jonathan9879's picture
Update app.py
75a1136 verified
Raw
History Blame Contribute Delete
9.18 kB
import os
import gradio as gr
import requests
import re
import time
import pandas as pd
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MAX_ITERATIONS = 7
# --- HYBRID: Re-introducing the WebSearchTool ---
class WebSearchTool:
"""A tool to search the web using the Perplexity API."""
def __init__(self, api_key):
self.api_key = api_key
self.url = "https://api.perplexity.ai/chat/completions"
print("WebSearchTool initialized.")
def execute(self, query: str) -> str:
print(f"Executing WebSearchTool with query: {query}")
payload = {"model": "llama-3-sonar-small-32k-online", "messages": [{"role": "system", "content": "You are a research assistant. Provide a precise and factual answer to the query."}, {"role": "user", "content": query}]}
headers = {"accept": "application/json", "content-type": "application/json", "Authorization": f"Bearer {self.api_key}"}
try:
response = requests.post(self.url, json=payload, headers=headers, timeout=40)
response.raise_for_status()
return response.json()['choices'][0]['message']['content']
except requests.exceptions.RequestException as e:
return f"Error: Web search failed. {e}"
# --- The New Hybrid Agent ---
class HybridAgent:
def __init__(self, gemini_api_key: str, pplx_api_key: str, api_url: str):
print("Initializing HybridAgent...")
genai.configure(api_key=gemini_api_key)
self.api_url = api_url
self.web_search_tool = WebSearchTool(pplx_api_key)
# Using the stable, powerful model that we know works.
self.model_name = 'gemini-2.5-flash-preview-05-20'
# HYBRID: We do NOT enable the native search tool, as it's not supported by all models.
self.model = genai.GenerativeModel(
model_name=self.model_name,
system_instruction="""You are a powerful reasoning agent. You can understand files and URLs provided to you directly.
For general web searches or to find new information, you MUST use the `WebSearch` tool.
Follow the ReAct format: Thought, Action, Observation, Final Answer.""",
safety_settings={
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
# Add other categories as needed
}
)
print(f"Agent initialized with {self.model_name} and an external WebSearchTool.")
def _get_mime_type(self, url: str) -> str:
# (Using the robust MIME type detection from your last recommendation)
url_lower = url.lower()
if url_lower.endswith(('.jpg', '.jpeg')): return "image/jpeg"
elif url_lower.endswith('.png'): return "image/png"
elif url_lower.endswith('.pdf'): return "application/pdf"
# Add other types as needed...
else: return "application/octet-stream"
def _check_if_file_exists(self, url: str) -> bool:
try:
response = requests.head(url, timeout=15, allow_redirects=True)
return response.status_code == 200
except requests.exceptions.RequestException:
return False
def __call__(self, question: str, task_id: str) -> str:
print(f"\n{'='*20}\nProcessing Task ID: {task_id}")
# --- HYBRID: Multi-modal part preparation ---
prompt_parts = [
"You will solve the following question. You have been provided with the question and any relevant files or URLs.",
"Remember, for web searches, you must use the `WebSearch` tool in the ReAct format (Thought, Action, Observation).",
f"\n--- QUESTION ---\n{question}"
]
urls_in_question = re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', question)
for url in urls_in_question:
try:
mime_type = self._get_mime_type(url)
prompt_parts.append(genai.Part.from_uri(uri=url, mime_type=mime_type))
print(f"Appended URL to prompt parts: {url}")
except Exception as e: print(f"Failed to add URL {url}: {e}")
file_url = f"{self.api_url}/files/{task_id}"
if self._check_if_file_exists(file_url):
try:
mime_type = self._get_mime_type(file_url)
prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
print(f"Appended file to prompt parts: {file_url}")
except Exception as e: print(f"Failed to add file {file_url}: {e}")
# --- HYBRID: ReAct Loop ---
for i in range(MAX_ITERATIONS):
print(f"\n--- Hybrid Iteration {i+1} ---")
try:
response = self.model.generate_content(
prompt_parts,
generation_config=genai.types.GenerationConfig(temperature=0.1)
)
response_text = response.text
except Exception as e: return f"AGENT_ERROR: {e}"
print(f"LLM Response:\n{response_text}")
final_answer_match = re.search(r"Final Answer:\s*(.*)", response_text, re.DOTALL)
if final_answer_match:
return final_answer_match.group(1).strip()
action_match = re.search(r"Action:\s*WebSearch\[(.*?)\]", response_text, re.DOTALL)
if action_match:
query = action_match.group(1).strip()
observation = self.web_search_tool.execute(query)
prompt_parts.append(f"\nThought: {response_text.split('Thought:')[1]}")
prompt_parts.append(f"Observation: {observation}")
else:
# If the model gives a direct answer without the "Final Answer:" tag
return response_text.strip()
return "AGENT_ERROR: Max iterations reached."
# --- Main run_and_submit_all function ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile: return "Please Login to Hugging Face.", None
username = f"{profile.username}"
# HYBRID: We need both keys again.
gemini_key = os.getenv("GEMINI_API_KEY")
pplx_key = os.getenv("PPLX_API_KEY")
if not gemini_key or not pplx_key: return "CRITICAL ERROR: GEMINI_API_KEY or PPLX_API_KEY not found.", None
api_url = DEFAULT_API_URL
try:
agent = HybridAgent(gemini_api_key=gemini_key, pplx_api_key=pplx_key, api_url=api_url)
questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
except Exception as e: return f"Error during setup: {e}", None
results_log, answers_payload = [], []
for item in questions_data:
task_id, question_text = item.get("task_id"), item.get("question")
if not task_id or question_text is None: continue
try:
submitted_answer = agent(question_text, task_id)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT CRASH: {e}"})
print(f"--- Waiting for 10 seconds... ---")
time.sleep(10)
if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
try:
response = requests.post(f"{api_url}/submit", json=submission_data, timeout=120)
response.raise_for_status()
result_data = response.json()
final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}")
return final_status, pd.DataFrame(results_log)
except requests.exceptions.RequestException as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Hybrid GAIA Agent")
gr.Markdown("This agent uses Gemini 1.5 Pro's native multi-modality (files, URLs) combined with an external Perplexity web search tool.")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
demo.launch(debug=True, share=False)