from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import JSONResponse import os import shutil import tempfile import pymupdf4llm app = FastAPI(title="MarkItDown API", description="Convierte PDFs a Markdown usando PyMuPDF4LLM (motor de Microsoft MarkItDown)") @app.post("/convert") async def convert_file(file: UploadFile = File(...)): if not file.filename.lower().endswith('.pdf'): raise HTTPException(status_code=400, detail="Solo se aceptan archivos PDF") temp_path = None try: # Save uploaded file to a temporary file fd, temp_path = tempfile.mkstemp(suffix=".pdf") with os.fdopen(fd, "wb") as temp_file: shutil.copyfileobj(file.file, temp_file) # Convert using PyMuPDF4LLM markdown_text = pymupdf4llm.to_markdown(temp_path) # Cleanup os.remove(temp_path) temp_path = None return { "success": True, "markdown": markdown_text, "chars": len(markdown_text), "lines": markdown_text.count("\n") } except Exception as e: if temp_path and os.path.exists(temp_path): os.remove(temp_path) raise HTTPException(status_code=500, detail=str(e)) @app.get("/") def read_root(): return { "status": "online", "engine": "PyMuPDF4LLM", "message": "Envia POST /convert con form-data 'file' (PDF) para obtener Markdown estructurado" }