Spaces:

KTXStudio
/

SeleniumShearchBot

Running

App Files Files Community

SeleniumShearchBot / app.py

lea97338

Update app.py

1c2e5c2 verified 1 day ago

Raw

History Blame Contribute Delete

4.25 kB

	import gradio as gr
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.options import Options
	from deep_translator import GoogleTranslator
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	from ddgs import DDGS

	# =========================
	# ✅ MODEL
	# =========================
	model_id = "Qwen/Qwen2.5-0.5B-Instruct"

	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map="auto",
	dtype=torch.float16
	)

	# =========================
	# ✅ SEARCH (4 texte + 1 image)
	# =========================
	def search_wiki(query):
	text_results = []
	image_url = None

	with DDGS() as ddgs:
	# ✅ 4 résultats texte
	results = list(ddgs.text(query, max_results=2))

	for r in results:
	text_results.append({
	"title": r.get("title"),
	"link": r.get("href"),
	"description": r.get("body")
	})

	# ✅ 1 image (5e résultat)
	images = list(ddgs.images(query, max_results=2))
	if images:
	image_url = images[0].get("image")

	return text_results, image_url

	# =========================
	# ✅ PIPELINE
	# =========================
	def run_pipeline(user_query):
	results, img = search_wiki(user_query)

	if not results:
	return "❌ Aucun résultat trouvé.", None

	link = results[0]["link"]

	try:
	options = Options()
	options.add_argument("--headless")
	options.add_argument("--no-sandbox") # Obligatoire pour Docker
	options.add_argument("--disable-dev-shm-usage") # Obligatoire pour Docker

	# Sur HF Spaces, le driver est installé dans /usr/bin/chromedriver
	service = Service("/usr/bin/chromedriver")

	driver = webdriver.Chrome(service=service,options=options)
	driver.get(link)

	paragraphs = driver.find_elements(By.TAG_NAME, "p")

	translator = GoogleTranslator(source='auto', target='fr')
	texte_total = ""

	for p in paragraphs:
	texte = p.text.strip()
	if texte and len(texte) > 50:
	try:
	traduction = translator.translate(texte)
	texte_total += traduction + "\n"
	except:
	pass

	driver.quit()

	texte_total = texte_total[:6000]

	prompt = (
	"Fais un résumé clair et structuré en français :\n\n"
	+ texte_total
	)

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	outputs = model.generate(
	**inputs,
	max_new_tokens=300,
	temperature=0.7,
	do_sample=True
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	return f"🔗 {link}\n\n📄 {response}", img

	except Exception as e:
	return f"❌ Erreur : {str(e)}", None

	# =========================
	# ✅ STYLE
	# =========================
	css = """
	body { background: #0f1117; color: white; }

	.container {
	max-width: 900px;
	margin: auto;
	padding-top: 40px;
	}

	.title {
	text-align: center;
	font-size: 30px;
	font-weight: bold;
	margin-bottom: 20px;
	}

	textarea {
	background: #1a1d26 !important;
	color: white !important;
	border-radius: 12px !important;
	}

	button {
	background: linear-gradient(90deg, #00c6ff, #0072ff) !important;
	border-radius: 12px !important;
	}
	"""

	# =========================
	# ✅ UI
	# =========================
	with gr.Blocks(css=css) as app:

	with gr.Column(elem_classes="container"):
	gr.Markdown("<div class='title'>🚀 KTXStudio AI</div>")

	query = gr.Textbox(
	placeholder="Ex : Ninjago Dragon Rising saison 4"
	)

	btn = gr.Button("⚡ Générer")

	output_text = gr.Textbox(lines=15)
	output_img = gr.Image(label="Image (résultat 5)")

	btn.click(
	run_pipeline,
	inputs=query,
	outputs=[output_text, output_img]
	)

	# =========================
	# ✅ RUN
	# =========================
	app.launch(share=True,favicon_path="favicon.png")