ExtractDocs / app.py
RemiProAtos's picture
replace pytesseract by PIL preprocessing & Mitral vision model
25af3cf verified
import base64
import gradio as gr
import io
import os
from PIL import Image, ImageEnhance, ImageFilter
from mistralai.client import Mistral
from dotenv import load_dotenv
load_dotenv()
client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
def preprocess(image: Image.Image) -> Image.Image:
# Upscale if the image is small (photos taken from distance / low-res)
w, h = image.size
if w < 1500:
image = image.resize((w * 2, h * 2), Image.LANCZOS)
# Convert to grayscale — removes colour noise irrelevant to text
image = image.convert("L")
# Sharpen edges before contrast so fine strokes are preserved
image = image.filter(ImageFilter.SHARPEN)
# Boost contrast to make text pop against background
image = ImageEnhance.Contrast(image).enhance(2.0)
# Second sharpness pass to crisp up letter edges
image = ImageEnhance.Sharpness(image).enhance(2.0)
return image
def image_to_base64(image: Image.Image) -> str:
buffer = io.BytesIO()
image.save(buffer, format="JPEG", quality=95)
return base64.b64encode(buffer.getvalue()).decode("utf-8")
def extract_info(image: Image.Image) -> str:
image = preprocess(image)
b64 = image_to_base64(image)
response = client.chat.complete(
model="pixtral-12b-2409",
messages=[
{
"role": "system",
"content": (
"Tu es un assistant d'extraction de données. "
"Extrait les informations demandées et renvoie un objet JSON propre. "
"N'inclue aucune explication ni mise en forme Markdown. "
"Formate le résultat en JSON simple en utilisant les clés suivantes uniquement: "
"Nom, Prénom, Numéro de voie, Type de voie, Nom de la voie, "
"Complément du numéro de voie, Complément d'adresse, Code postal, Ville. "
"N'incorpore aucune donnée supplémentaire. "
"Si tu ne trouves pas la donnée, indique n/a en valeur."
),
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{b64}",
},
{
"type": "text",
"text": "Extrait les informations structurées de ce document et renvoie uniquement le JSON.",
},
],
},
],
)
return response.choices[0].message.content
demo = gr.Interface(
fn=extract_info,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=gr.Textbox(label="Extracted JSON"),
title="Doc Xtract",
description="Upload an image to extract structured information as JSON.",
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")