mm1 / src /tools /fetch.py
TheRealHubertus's picture
Upload 49 files
82372e5 verified
Raw
History Blame Contribute Delete
588 Bytes
from __future__ import annotations
import re
import requests
def fetch_text(url: str, timeout: float = 6.0) -> tuple[str | None, str]:
if not url or "example.invalid" in url:
return None, "fixture_or_empty"
try:
response = requests.get(url, timeout=timeout, headers={"User-Agent": "MM1 prototype"})
response.raise_for_status()
text = re.sub(r"<[^>]+>", " ", response.text)
text = re.sub(r"\s+", " ", text).strip()
return text[:4000], "fetched"
except Exception as exc:
return None, f"error:{exc.__class__.__name__}"