File size: 1,523 Bytes
f870935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import os
import shutil
import tempfile
import pymupdf4llm

app = FastAPI(title="MarkItDown API", description="Convierte PDFs a Markdown usando PyMuPDF4LLM (motor de Microsoft MarkItDown)")

@app.post("/convert")
async def convert_file(file: UploadFile = File(...)):
    if not file.filename.lower().endswith('.pdf'):
        raise HTTPException(status_code=400, detail="Solo se aceptan archivos PDF")
    
    temp_path = None
    try:
        # Save uploaded file to a temporary file
        fd, temp_path = tempfile.mkstemp(suffix=".pdf")
        with os.fdopen(fd, "wb") as temp_file:
            shutil.copyfileobj(file.file, temp_file)
            
        # Convert using PyMuPDF4LLM
        markdown_text = pymupdf4llm.to_markdown(temp_path)
        
        # Cleanup
        os.remove(temp_path)
        temp_path = None
        
        return {
            "success": True,
            "markdown": markdown_text,
            "chars": len(markdown_text),
            "lines": markdown_text.count("\n")
        }
        
    except Exception as e:
        if temp_path and os.path.exists(temp_path):
            os.remove(temp_path)
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
def read_root():
    return {
        "status": "online",
        "engine": "PyMuPDF4LLM",
        "message": "Envia POST /convert con form-data 'file' (PDF) para obtener Markdown estructurado"
    }