"""Anthropic Haiku helper — used only to pick which `` URLs on a listing page are actual photos of the flat (vs. nav icons, badges, ads…). If the API key is missing or the call fails, the caller passes the original candidates straight through, so this is a soft enhancement, not a dependency. """ from __future__ import annotations import logging from typing import Optional import requests from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL logger = logging.getLogger("web.llm") API_URL = "https://api.anthropic.com/v1/messages" API_VERSION = "2023-06-01" TOOL_NAME = "select_flat_images" TOOL_SCHEMA = { "type": "object", "properties": { "urls": { "type": "array", "items": {"type": "string"}, "description": "Subset of the candidate URLs that show the actual flat — " "interior, exterior, floorplan. Keep ordering of input.", }, }, "required": ["urls"], "additionalProperties": False, } SYSTEM_PROMPT = ( "Du bekommst eine Liste von Bild-URLs einer Wohnungsanzeige. Wähle nur " "die URLs aus, die ein Foto der Wohnung zeigen (Innenraum, Außenansicht " "des Gebäudes, Grundriss). Verwerfe Logos, Icons, Banner, Ads, " "Bewertungs-Sterne, Karten/Stadtpläne, Mitarbeiter-Portraits, Tracking-" "Pixel. Behalte die Reihenfolge der Input-Liste bei. Antworte " "ausschließlich über den Tool-Call." ) def select_flat_image_urls(candidates: list[str], page_url: str, timeout: int = 30) -> list[str]: """Return the LLM-filtered subset, or the original list on any failure.""" if not ANTHROPIC_API_KEY or not candidates: return candidates user_text = ( f"Seite: {page_url}\n\n" "Kandidaten-URLs (nummeriert):\n" + "\n".join(f"{i+1}. {u}" for i, u in enumerate(candidates)) ) body = { "model": ANTHROPIC_MODEL, "max_tokens": 1500, "system": SYSTEM_PROMPT, "tools": [{ "name": TOOL_NAME, "description": "Persist the selected flat-photo URLs.", "input_schema": TOOL_SCHEMA, }], "tool_choice": {"type": "tool", "name": TOOL_NAME}, "messages": [{"role": "user", "content": user_text}], } try: r = requests.post( API_URL, headers={ "x-api-key": ANTHROPIC_API_KEY, "anthropic-version": API_VERSION, "content-type": "application/json", }, json=body, timeout=timeout, ) except requests.RequestException as e: logger.warning("anthropic image-select request failed: %s", e) return candidates if r.status_code >= 400: logger.warning("anthropic image-select %s: %s", r.status_code, r.text[:300]) return candidates data = r.json() for block in data.get("content", []): if block.get("type") == "tool_use" and block.get("name") == TOOL_NAME: urls = (block.get("input") or {}).get("urls") or [] # Constrain to the original candidate set so the model can't # invent URLs (it sometimes lightly rewrites them otherwise). allowed = set(candidates) return [u for u in urls if u in allowed] return candidates