markitdown-api / app.py
C2MV's picture
Upload app.py with huggingface_hub
f870935 verified
raw
history blame contribute delete
1.52 kB
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import os
import shutil
import tempfile
import pymupdf4llm
app = FastAPI(title="MarkItDown API", description="Convierte PDFs a Markdown usando PyMuPDF4LLM (motor de Microsoft MarkItDown)")
@app.post("/convert")
async def convert_file(file: UploadFile = File(...)):
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(status_code=400, detail="Solo se aceptan archivos PDF")
temp_path = None
try:
# Save uploaded file to a temporary file
fd, temp_path = tempfile.mkstemp(suffix=".pdf")
with os.fdopen(fd, "wb") as temp_file:
shutil.copyfileobj(file.file, temp_file)
# Convert using PyMuPDF4LLM
markdown_text = pymupdf4llm.to_markdown(temp_path)
# Cleanup
os.remove(temp_path)
temp_path = None
return {
"success": True,
"markdown": markdown_text,
"chars": len(markdown_text),
"lines": markdown_text.count("\n")
}
except Exception as e:
if temp_path and os.path.exists(temp_path):
os.remove(temp_path)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def read_root():
return {
"status": "online",
"engine": "PyMuPDF4LLM",
"message": "Envia POST /convert con form-data 'file' (PDF) para obtener Markdown estructurado"
}