Spaces:
Running
Running
File size: 1,523 Bytes
f870935 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import os
import shutil
import tempfile
import pymupdf4llm
app = FastAPI(title="MarkItDown API", description="Convierte PDFs a Markdown usando PyMuPDF4LLM (motor de Microsoft MarkItDown)")
@app.post("/convert")
async def convert_file(file: UploadFile = File(...)):
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(status_code=400, detail="Solo se aceptan archivos PDF")
temp_path = None
try:
# Save uploaded file to a temporary file
fd, temp_path = tempfile.mkstemp(suffix=".pdf")
with os.fdopen(fd, "wb") as temp_file:
shutil.copyfileobj(file.file, temp_file)
# Convert using PyMuPDF4LLM
markdown_text = pymupdf4llm.to_markdown(temp_path)
# Cleanup
os.remove(temp_path)
temp_path = None
return {
"success": True,
"markdown": markdown_text,
"chars": len(markdown_text),
"lines": markdown_text.count("\n")
}
except Exception as e:
if temp_path and os.path.exists(temp_path):
os.remove(temp_path)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def read_root():
return {
"status": "online",
"engine": "PyMuPDF4LLM",
"message": "Envia POST /convert con form-data 'file' (PDF) para obtener Markdown estructurado"
}
|