"""Minimal Anthropic Messages API wrapper for flat enrichment. Uses tool-use forced output so Haiku returns structured JSON instead of free text we'd have to regex. No SDK — plain `requests` is enough here. """ from __future__ import annotations import logging from typing import Any, Optional import requests from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL logger = logging.getLogger("web.llm") API_URL = "https://api.anthropic.com/v1/messages" API_VERSION = "2023-06-01" TOOL_NAME = "record_flat_details" TOOL_SCHEMA: dict[str, Any] = { "type": "object", "properties": { "address": {"type": ["string", "null"], "description": "Full street address incl. postcode+city if present"}, "rooms": {"type": ["number", "null"], "description": "Number of rooms (decimal ok)"}, "size_sqm": {"type": ["number", "null"], "description": "Size in m²"}, "rent_cold": {"type": ["number", "null"], "description": "Kaltmiete in €"}, "rent_total": {"type": ["number", "null"], "description": "Warm/Gesamtmiete in €"}, "utilities": {"type": ["number", "null"], "description": "Nebenkosten in €"}, "deposit": {"type": ["number", "null"], "description": "Kaution in €"}, "available_from": {"type": ["string", "null"], "description": "Bezugsfrei ab (text)"}, "floor": {"type": ["string", "null"], "description": "Etage (text, z.B. '3. OG')"}, "heating": {"type": ["string", "null"]}, "energy_certificate": {"type": ["string", "null"]}, "energy_value": {"type": ["string", "null"]}, "year_built": {"type": ["string", "null"]}, "wbs_required": {"type": ["boolean", "null"]}, "wbs_type": {"type": ["string", "null"], "description": "WBS-Typ, z.B. '160' oder null"}, "description": { "type": ["string", "null"], "description": "Kurze 2–3-Satz-Beschreibung der Wohnung auf Deutsch. Fakten, keine Werbesprache.", }, "features": { "type": "array", "items": {"type": "string"}, "description": "Ausstattungsmerkmale (z.B. 'Balkon', 'Einbauküche', 'Parkett')", }, "pros": { "type": "array", "items": {"type": "string"}, "description": "2–4 konkrete Vorteile aus Bewerbersicht (keine Werbung)", }, "cons": { "type": "array", "items": {"type": "string"}, "description": "2–4 mögliche Nachteile / Punkte zum Beachten", }, }, "required": [], "additionalProperties": False, } SYSTEM_PROMPT = ( "Du extrahierst strukturierte Wohnungsdaten aus deutschem HTML-Quelltext von " "Berliner Wohnungsbaugesellschaften (howoge, gewobag, degewo, gesobau, wbm, " "stadt-und-land). Antworte AUSSCHLIESSLICH über den bereitgestellten Tool-Call. " "Fehlende Werte → null. Keine Erfindungen — wenn etwas nicht klar aus dem HTML " "hervorgeht, lass das Feld null. Zahlen bitte als Zahlen (nicht als String), " "Beschreibung/Pros/Cons auf Deutsch." ) def extract_flat_details(html: str, url: str, max_html_chars: int = 60_000, timeout: int = 60) -> Optional[dict]: """Call Haiku; return the structured dict or None on failure.""" if not ANTHROPIC_API_KEY: logger.info("skipping enrichment: ANTHROPIC_API_KEY not set") return None user_content = ( f"URL: {url}\n\n" f"HTML-Quellcode (ggf. gekürzt):\n---\n{html[:max_html_chars]}\n---" ) body = { "model": ANTHROPIC_MODEL, "max_tokens": 1500, "system": SYSTEM_PROMPT, "tools": [{ "name": TOOL_NAME, "description": "Persist the extracted flat details.", "input_schema": TOOL_SCHEMA, }], "tool_choice": {"type": "tool", "name": TOOL_NAME}, "messages": [{"role": "user", "content": user_content}], } try: r = requests.post( API_URL, headers={ "x-api-key": ANTHROPIC_API_KEY, "anthropic-version": API_VERSION, "content-type": "application/json", }, json=body, timeout=timeout, ) except requests.RequestException as e: logger.warning("anthropic request failed: %s", e) return None if r.status_code >= 400: logger.warning("anthropic %s: %s", r.status_code, r.text[:300]) return None data = r.json() for block in data.get("content", []): if block.get("type") == "tool_use" and block.get("name") == TOOL_NAME: return block.get("input") or {} logger.warning("anthropic returned no tool_use block: %s", data) return None