Spaces:
Running
Running
| from fastapi import FastAPI, File, UploadFile, HTTPException | |
| from fastapi.responses import JSONResponse | |
| import os | |
| import shutil | |
| import tempfile | |
| import pymupdf4llm | |
| app = FastAPI(title="MarkItDown API", description="Convierte PDFs a Markdown usando PyMuPDF4LLM (motor de Microsoft MarkItDown)") | |
| async def convert_file(file: UploadFile = File(...)): | |
| if not file.filename.lower().endswith('.pdf'): | |
| raise HTTPException(status_code=400, detail="Solo se aceptan archivos PDF") | |
| temp_path = None | |
| try: | |
| # Save uploaded file to a temporary file | |
| fd, temp_path = tempfile.mkstemp(suffix=".pdf") | |
| with os.fdopen(fd, "wb") as temp_file: | |
| shutil.copyfileobj(file.file, temp_file) | |
| # Convert using PyMuPDF4LLM | |
| markdown_text = pymupdf4llm.to_markdown(temp_path) | |
| # Cleanup | |
| os.remove(temp_path) | |
| temp_path = None | |
| return { | |
| "success": True, | |
| "markdown": markdown_text, | |
| "chars": len(markdown_text), | |
| "lines": markdown_text.count("\n") | |
| } | |
| except Exception as e: | |
| if temp_path and os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def read_root(): | |
| return { | |
| "status": "online", | |
| "engine": "PyMuPDF4LLM", | |
| "message": "Envia POST /convert con form-data 'file' (PDF) para obtener Markdown estructurado" | |
| } | |