apply service
- POST /internal/fetch-listing: headless Playwright fetch of a listing URL,
returns {html, image_urls[], final_url}. Uses the same browser
fingerprint/profile as the apply run so bot guards don't kick in
web service
- New enrichment pipeline (web/enrichment.py):
/internal/flats → upsert → kick() enrichment in a background thread
1. POST /internal/fetch-listing on apply
2. llm.extract_flat_details(html, url) — Haiku tool-use call returns
structured JSON (address, rooms, rent, description, pros/cons, etc.)
3. Download each image directly to /data/flats/<slug>/NN.<ext>
4. Persist enrichment_json + image_count + enrichment_status on the flat
- llm.py: minimal Anthropic /v1/messages wrapper, no SDK
- DB migration v5 adds enrichment_json/_status/_updated_at + image_count
- Admin "Altbestand anreichern" button (POST /actions/enrich-all) queues
backfill for all pending/failed rows; runs in a detached task
- GET /partials/wohnung/<id> renders _wohnung_detail.html
- GET /flat-images/<slug>/<n> serves the downloaded image
UI
- Chevron on each list row toggles an inline detail pane (HTMX fetch on
first open, hx-preserve keeps it open across the 3–30 s polls)
- CSS .flat-gallery normalises image tiles to a 4/3 aspect with object-fit:
cover so different source sizes align cleanly
- "analysiert…" / "?" chips on the list reflect enrichment_status
Config
- ANTHROPIC_API_KEY + ANTHROPIC_MODEL wired into docker-compose's web
service (default model: claude-haiku-4-5-20251001)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
119 lines
4.7 KiB
Python
119 lines
4.7 KiB
Python
"""Minimal Anthropic Messages API wrapper for flat enrichment.
|
||
|
||
Uses tool-use forced output so Haiku returns structured JSON instead of free
|
||
text we'd have to regex. No SDK — plain `requests` is enough here.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
from typing import Any, Optional
|
||
|
||
import requests
|
||
|
||
from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL
|
||
|
||
logger = logging.getLogger("web.llm")
|
||
|
||
API_URL = "https://api.anthropic.com/v1/messages"
|
||
API_VERSION = "2023-06-01"
|
||
|
||
TOOL_NAME = "record_flat_details"
|
||
TOOL_SCHEMA: dict[str, Any] = {
|
||
"type": "object",
|
||
"properties": {
|
||
"address": {"type": ["string", "null"],
|
||
"description": "Full street address incl. postcode+city if present"},
|
||
"rooms": {"type": ["number", "null"], "description": "Number of rooms (decimal ok)"},
|
||
"size_sqm": {"type": ["number", "null"], "description": "Size in m²"},
|
||
"rent_cold": {"type": ["number", "null"], "description": "Kaltmiete in €"},
|
||
"rent_total": {"type": ["number", "null"], "description": "Warm/Gesamtmiete in €"},
|
||
"utilities": {"type": ["number", "null"], "description": "Nebenkosten in €"},
|
||
"deposit": {"type": ["number", "null"], "description": "Kaution in €"},
|
||
"available_from": {"type": ["string", "null"], "description": "Bezugsfrei ab (text)"},
|
||
"floor": {"type": ["string", "null"], "description": "Etage (text, z.B. '3. OG')"},
|
||
"heating": {"type": ["string", "null"]},
|
||
"energy_certificate": {"type": ["string", "null"]},
|
||
"energy_value": {"type": ["string", "null"]},
|
||
"year_built": {"type": ["string", "null"]},
|
||
"wbs_required": {"type": ["boolean", "null"]},
|
||
"wbs_type": {"type": ["string", "null"], "description": "WBS-Typ, z.B. '160' oder null"},
|
||
"description": {
|
||
"type": ["string", "null"],
|
||
"description": "Kurze 2–3-Satz-Beschreibung der Wohnung auf Deutsch. Fakten, keine Werbesprache.",
|
||
},
|
||
"features": {
|
||
"type": "array", "items": {"type": "string"},
|
||
"description": "Ausstattungsmerkmale (z.B. 'Balkon', 'Einbauküche', 'Parkett')",
|
||
},
|
||
"pros": {
|
||
"type": "array", "items": {"type": "string"},
|
||
"description": "2–4 konkrete Vorteile aus Bewerbersicht (keine Werbung)",
|
||
},
|
||
"cons": {
|
||
"type": "array", "items": {"type": "string"},
|
||
"description": "2–4 mögliche Nachteile / Punkte zum Beachten",
|
||
},
|
||
},
|
||
"required": [],
|
||
"additionalProperties": False,
|
||
}
|
||
|
||
SYSTEM_PROMPT = (
|
||
"Du extrahierst strukturierte Wohnungsdaten aus deutschem HTML-Quelltext von "
|
||
"Berliner Wohnungsbaugesellschaften (howoge, gewobag, degewo, gesobau, wbm, "
|
||
"stadt-und-land). Antworte AUSSCHLIESSLICH über den bereitgestellten Tool-Call. "
|
||
"Fehlende Werte → null. Keine Erfindungen — wenn etwas nicht klar aus dem HTML "
|
||
"hervorgeht, lass das Feld null. Zahlen bitte als Zahlen (nicht als String), "
|
||
"Beschreibung/Pros/Cons auf Deutsch."
|
||
)
|
||
|
||
|
||
def extract_flat_details(html: str, url: str,
|
||
max_html_chars: int = 60_000,
|
||
timeout: int = 60) -> Optional[dict]:
|
||
"""Call Haiku; return the structured dict or None on failure."""
|
||
if not ANTHROPIC_API_KEY:
|
||
logger.info("skipping enrichment: ANTHROPIC_API_KEY not set")
|
||
return None
|
||
|
||
user_content = (
|
||
f"URL: {url}\n\n"
|
||
f"HTML-Quellcode (ggf. gekürzt):\n---\n{html[:max_html_chars]}\n---"
|
||
)
|
||
body = {
|
||
"model": ANTHROPIC_MODEL,
|
||
"max_tokens": 1500,
|
||
"system": SYSTEM_PROMPT,
|
||
"tools": [{
|
||
"name": TOOL_NAME,
|
||
"description": "Persist the extracted flat details.",
|
||
"input_schema": TOOL_SCHEMA,
|
||
}],
|
||
"tool_choice": {"type": "tool", "name": TOOL_NAME},
|
||
"messages": [{"role": "user", "content": user_content}],
|
||
}
|
||
try:
|
||
r = requests.post(
|
||
API_URL,
|
||
headers={
|
||
"x-api-key": ANTHROPIC_API_KEY,
|
||
"anthropic-version": API_VERSION,
|
||
"content-type": "application/json",
|
||
},
|
||
json=body,
|
||
timeout=timeout,
|
||
)
|
||
except requests.RequestException as e:
|
||
logger.warning("anthropic request failed: %s", e)
|
||
return None
|
||
|
||
if r.status_code >= 400:
|
||
logger.warning("anthropic %s: %s", r.status_code, r.text[:300])
|
||
return None
|
||
|
||
data = r.json()
|
||
for block in data.get("content", []):
|
||
if block.get("type") == "tool_use" and block.get("name") == TOOL_NAME:
|
||
return block.get("input") or {}
|
||
logger.warning("anthropic returned no tool_use block: %s", data)
|
||
return None
|