lazyflat/web/llm.py
EiSiMo eb66284172 enrichment: Haiku flat details + image gallery on expand
apply service
- POST /internal/fetch-listing: headless Playwright fetch of a listing URL,
  returns {html, image_urls[], final_url}. Uses the same browser
  fingerprint/profile as the apply run so bot guards don't kick in

web service
- New enrichment pipeline (web/enrichment.py):
  /internal/flats → upsert → kick() enrichment in a background thread
    1. POST /internal/fetch-listing on apply
    2. llm.extract_flat_details(html, url) — Haiku tool-use call returns
       structured JSON (address, rooms, rent, description, pros/cons, etc.)
    3. Download each image directly to /data/flats/<slug>/NN.<ext>
    4. Persist enrichment_json + image_count + enrichment_status on the flat
- llm.py: minimal Anthropic /v1/messages wrapper, no SDK
- DB migration v5 adds enrichment_json/_status/_updated_at + image_count
- Admin "Altbestand anreichern" button (POST /actions/enrich-all) queues
  backfill for all pending/failed rows; runs in a detached task
- GET /partials/wohnung/<id> renders _wohnung_detail.html
- GET /flat-images/<slug>/<n> serves the downloaded image

UI
- Chevron on each list row toggles an inline detail pane (HTMX fetch on
  first open, hx-preserve keeps it open across the 3–30 s polls)
- CSS .flat-gallery normalises image tiles to a 4/3 aspect with object-fit:
  cover so different source sizes align cleanly
- "analysiert…" / "?" chips on the list reflect enrichment_status

Config
- ANTHROPIC_API_KEY + ANTHROPIC_MODEL wired into docker-compose's web
  service (default model: claude-haiku-4-5-20251001)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 14:46:12 +02:00

119 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Minimal Anthropic Messages API wrapper for flat enrichment.
Uses tool-use forced output so Haiku returns structured JSON instead of free
text we'd have to regex. No SDK — plain `requests` is enough here.
"""
from __future__ import annotations
import logging
from typing import Any, Optional
import requests
from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL
logger = logging.getLogger("web.llm")
API_URL = "https://api.anthropic.com/v1/messages"
API_VERSION = "2023-06-01"
TOOL_NAME = "record_flat_details"
TOOL_SCHEMA: dict[str, Any] = {
"type": "object",
"properties": {
"address": {"type": ["string", "null"],
"description": "Full street address incl. postcode+city if present"},
"rooms": {"type": ["number", "null"], "description": "Number of rooms (decimal ok)"},
"size_sqm": {"type": ["number", "null"], "description": "Size in m²"},
"rent_cold": {"type": ["number", "null"], "description": "Kaltmiete in €"},
"rent_total": {"type": ["number", "null"], "description": "Warm/Gesamtmiete in €"},
"utilities": {"type": ["number", "null"], "description": "Nebenkosten in €"},
"deposit": {"type": ["number", "null"], "description": "Kaution in €"},
"available_from": {"type": ["string", "null"], "description": "Bezugsfrei ab (text)"},
"floor": {"type": ["string", "null"], "description": "Etage (text, z.B. '3. OG')"},
"heating": {"type": ["string", "null"]},
"energy_certificate": {"type": ["string", "null"]},
"energy_value": {"type": ["string", "null"]},
"year_built": {"type": ["string", "null"]},
"wbs_required": {"type": ["boolean", "null"]},
"wbs_type": {"type": ["string", "null"], "description": "WBS-Typ, z.B. '160' oder null"},
"description": {
"type": ["string", "null"],
"description": "Kurze 23-Satz-Beschreibung der Wohnung auf Deutsch. Fakten, keine Werbesprache.",
},
"features": {
"type": "array", "items": {"type": "string"},
"description": "Ausstattungsmerkmale (z.B. 'Balkon', 'Einbauküche', 'Parkett')",
},
"pros": {
"type": "array", "items": {"type": "string"},
"description": "24 konkrete Vorteile aus Bewerbersicht (keine Werbung)",
},
"cons": {
"type": "array", "items": {"type": "string"},
"description": "24 mögliche Nachteile / Punkte zum Beachten",
},
},
"required": [],
"additionalProperties": False,
}
SYSTEM_PROMPT = (
"Du extrahierst strukturierte Wohnungsdaten aus deutschem HTML-Quelltext von "
"Berliner Wohnungsbaugesellschaften (howoge, gewobag, degewo, gesobau, wbm, "
"stadt-und-land). Antworte AUSSCHLIESSLICH über den bereitgestellten Tool-Call. "
"Fehlende Werte → null. Keine Erfindungen — wenn etwas nicht klar aus dem HTML "
"hervorgeht, lass das Feld null. Zahlen bitte als Zahlen (nicht als String), "
"Beschreibung/Pros/Cons auf Deutsch."
)
def extract_flat_details(html: str, url: str,
max_html_chars: int = 60_000,
timeout: int = 60) -> Optional[dict]:
"""Call Haiku; return the structured dict or None on failure."""
if not ANTHROPIC_API_KEY:
logger.info("skipping enrichment: ANTHROPIC_API_KEY not set")
return None
user_content = (
f"URL: {url}\n\n"
f"HTML-Quellcode (ggf. gekürzt):\n---\n{html[:max_html_chars]}\n---"
)
body = {
"model": ANTHROPIC_MODEL,
"max_tokens": 1500,
"system": SYSTEM_PROMPT,
"tools": [{
"name": TOOL_NAME,
"description": "Persist the extracted flat details.",
"input_schema": TOOL_SCHEMA,
}],
"tool_choice": {"type": "tool", "name": TOOL_NAME},
"messages": [{"role": "user", "content": user_content}],
}
try:
r = requests.post(
API_URL,
headers={
"x-api-key": ANTHROPIC_API_KEY,
"anthropic-version": API_VERSION,
"content-type": "application/json",
},
json=body,
timeout=timeout,
)
except requests.RequestException as e:
logger.warning("anthropic request failed: %s", e)
return None
if r.status_code >= 400:
logger.warning("anthropic %s: %s", r.status_code, r.text[:300])
return None
data = r.json()
for block in data.get("content", []):
if block.get("type") == "tool_use" and block.get("name") == TOOL_NAME:
return block.get("input") or {}
logger.warning("anthropic returned no tool_use block: %s", data)
return None