Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Test PDF conversion API locally""" | |
| import requests | |
| import time | |
| import sys | |
| def test_pdf_conversion(pdf_path, api_base_url="http://localhost:7860"): | |
| """Test PDF conversion through API""" | |
| # 1. Upload PDF | |
| print(f"Uploading PDF: {pdf_path}") | |
| with open(pdf_path, 'rb') as f: | |
| files = {'file': (pdf_path.split('/')[-1], f, 'application/pdf')} | |
| response = requests.post(f"{api_base_url}/api/convert", files=files) | |
| if response.status_code != 200: | |
| print(f"Upload failed: {response.status_code}") | |
| print(response.text) | |
| return | |
| result = response.json() | |
| task_id = result['task_id'] | |
| print(f"Task ID: {task_id}") | |
| print(f"Status: {result['status']}") | |
| # 2. Check status | |
| print("\nChecking conversion status...") | |
| while True: | |
| response = requests.get(f"{api_base_url}/api/status/{task_id}") | |
| if response.status_code != 200: | |
| print(f"Status check failed: {response.status_code}") | |
| break | |
| status = response.json() | |
| print(f"Status: {status['status']}") | |
| if status['status'] == 'completed': | |
| print(f"Download URL: {status['download_url']}") | |
| # 3. Download result | |
| response = requests.get(f"{api_base_url}{status['download_url']}") | |
| if response.status_code == 200: | |
| output_file = f"output_{task_id}.md" | |
| with open(output_file, 'w') as f: | |
| f.write(response.text) | |
| print(f"\nMarkdown saved to: {output_file}") | |
| print("\nContent preview:") | |
| print(response.text[:500]) | |
| break | |
| elif status['status'] == 'failed': | |
| print(f"Conversion failed: {status.get('error', 'Unknown error')}") | |
| break | |
| time.sleep(1) | |
| if __name__ == "__main__": | |
| if len(sys.argv) > 1: | |
| pdf_path = sys.argv[1] | |
| else: | |
| pdf_path = "/Users/marcos/Documents/projects/pdf2md/batch-files/test-simple.pdf" | |
| # Test on HF Space | |
| print("Testing on Hugging Face Space...") | |
| test_pdf_conversion(pdf_path, "https://marcosremar2-mineru2.hf.space") |