Spaces:
Running
Running
| """macOS Vision OCR for peeks β shells out to the compiled `recognize/bin/ocr`. | |
| Two jobs: (1) identify a coding-CLI from the text in its prompt/status line β reliable on | |
| dark, small-text terminal screens where CLIP fingerprinting is fuzzy; (2) ground Puck's | |
| quip in the actual on-screen text so it's topical instead of a guess from pixels. | |
| Recognition is REGION-LOCAL (the OCR runs on Puck's peek crop), so it matches what's under | |
| the sprite β unlike a window title, which is window-global and lies under tabbed terminals | |
| (Ghostty) and browsers (Chrome). | |
| """ | |
| import base64 | |
| import os | |
| import subprocess | |
| import tempfile | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parent.parent | |
| OCR_BIN = ROOT / "recognize" / "bin" / "ocr" | |
| # Distinctive CONTENT markers (lowercased) from each CLI's prompt/status β NOT the window | |
| # title. codex & pi both surface "gpt-5.5", so neither uses it: pi is pinned by its | |
| # "(openai-codex)" backend tag / "pi v0." banner, codex by the SPACED "openai codex". | |
| _TOOL_MARKERS = { | |
| "claude-code": ["claude code", "claude max", "auto mode on", "for agents", "/release-notes"], | |
| "codex": ["openai codex", "/model to change", "codex app", "/fast to enable"], | |
| "opencode": ["opencode", "glm-5", "z.ai coding", "esc interrupt"], | |
| "pi": ["pi v0.", "openai-codex", "/272k", "ctrl+c/ctrl+d"], | |
| "amp": ["welcome to amp", "ctrl+o for help", "- smart -", "β smart β"], | |
| } | |
| # shell/login chrome that's noise for a topical quip | |
| _NOISE = ("last login", "cd /", "exec ", "ttys", "fnm_version") | |
| def available() -> bool: | |
| return OCR_BIN.exists() | |
| def ocr_lines(image_data_url: str, timeout: float = 8.0) -> list[str]: | |
| """Recognized text lines from a data-URL image (empty list on any failure).""" | |
| if not OCR_BIN.exists(): | |
| return [] | |
| _, _, b64 = image_data_url.partition(",") | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: | |
| f.write(base64.b64decode(b64)) | |
| tmp = f.name | |
| try: | |
| out = subprocess.run( | |
| [str(OCR_BIN), tmp], capture_output=True, text=True, timeout=timeout | |
| ) | |
| return [ln.strip() for ln in out.stdout.splitlines() if ln.strip()] | |
| except Exception: # noqa: BLE001 β OCR is best-effort; a failure just means no hint | |
| return [] | |
| finally: | |
| os.unlink(tmp) | |
| def detect_tool(lines: list[str]) -> str | None: | |
| """Best-scoring tool by distinctive marker hits; None if nothing matched.""" | |
| blob = "\n".join(lines).lower() | |
| best, best_score = None, 0 | |
| for label, markers in _TOOL_MARKERS.items(): | |
| score = sum(1 for m in markers if m in blob) | |
| if score > best_score: | |
| best, best_score = label, score | |
| return best | |
| def _is_noise(line: str) -> bool: | |
| """Shell/terminal furniture that crowds out real content in the quip's excerpt.""" | |
| low = line.lower() | |
| if any(n in low for n in _NOISE): | |
| return True | |
| if len(line) <= 3: # OCR crumbs of the shell powerline ("Evuln", time glyphs) | |
| return True | |
| if "Γ" in line: # terminal title-bar dimensions, e.g. "140Γ43" | |
| return True | |
| return False | |
| def topical_excerpt(lines: list[str], cap: int = 240) -> str: | |
| """A short, denoised snippet of on-screen text to anchor the quip in real words. | |
| Drops shell/title-bar furniture so content (prompt, output, status) leads the budget.""" | |
| return " Β· ".join(ln for ln in lines if not _is_noise(ln))[:cap] | |