enrichment: Haiku flat details + image gallery on expand
apply service
- POST /internal/fetch-listing: headless Playwright fetch of a listing URL,
returns {html, image_urls[], final_url}. Uses the same browser
fingerprint/profile as the apply run so bot guards don't kick in
web service
- New enrichment pipeline (web/enrichment.py):
/internal/flats → upsert → kick() enrichment in a background thread
1. POST /internal/fetch-listing on apply
2. llm.extract_flat_details(html, url) — Haiku tool-use call returns
structured JSON (address, rooms, rent, description, pros/cons, etc.)
3. Download each image directly to /data/flats/<slug>/NN.<ext>
4. Persist enrichment_json + image_count + enrichment_status on the flat
- llm.py: minimal Anthropic /v1/messages wrapper, no SDK
- DB migration v5 adds enrichment_json/_status/_updated_at + image_count
- Admin "Altbestand anreichern" button (POST /actions/enrich-all) queues
backfill for all pending/failed rows; runs in a detached task
- GET /partials/wohnung/<id> renders _wohnung_detail.html
- GET /flat-images/<slug>/<n> serves the downloaded image
UI
- Chevron on each list row toggles an inline detail pane (HTMX fetch on
first open, hx-preserve keeps it open across the 3–30 s polls)
- CSS .flat-gallery normalises image tiles to a 4/3 aspect with object-fit:
cover so different source sizes align cleanly
- "analysiert…" / "?" chips on the list reflect enrichment_status
Config
- ANTHROPIC_API_KEY + ANTHROPIC_MODEL wired into docker-compose's web
service (default model: claude-haiku-4-5-20251001)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2609d3504a
commit
eb66284172
11 changed files with 688 additions and 44 deletions
|
|
@ -1,8 +1,9 @@
|
|||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from fastapi import Depends, FastAPI, Header, HTTPException, status
|
||||
from playwright.async_api import ViewportSize, async_playwright
|
||||
from pydantic import BaseModel, Field
|
||||
from rich.console import Console
|
||||
from rich.logging import RichHandler
|
||||
|
|
@ -13,7 +14,7 @@ from classes.application_result import ApplicationResult
|
|||
from classes.profile import Profile
|
||||
from language import _
|
||||
from providers._provider import ApplyContext
|
||||
from settings import INTERNAL_API_KEY
|
||||
from settings import BROWSER_HEIGHT, BROWSER_LOCALE, BROWSER_WIDTH, HEADLESS, INTERNAL_API_KEY
|
||||
|
||||
|
||||
def setup_logging():
|
||||
|
|
@ -125,3 +126,93 @@ async def apply(req: ApplyRequest):
|
|||
application_id=req.application_id,
|
||||
forensics=recorder.to_json(),
|
||||
)
|
||||
|
||||
|
||||
class FetchListingRequest(BaseModel):
|
||||
url: str
|
||||
|
||||
|
||||
class FetchListingResponse(BaseModel):
|
||||
final_url: str
|
||||
html: str
|
||||
image_urls: list[str]
|
||||
|
||||
|
||||
MAX_FETCH_HTML_BYTES = 400_000
|
||||
MAX_FETCH_IMAGES = 30
|
||||
|
||||
|
||||
@app.post(
|
||||
"/internal/fetch-listing",
|
||||
response_model=FetchListingResponse,
|
||||
dependencies=[Depends(require_api_key)],
|
||||
)
|
||||
async def fetch_listing(req: FetchListingRequest):
|
||||
"""Headless Playwright fetch of a flat listing — returns page HTML +
|
||||
absolute image URLs. Used by the web service's LLM enrichment pipeline
|
||||
so we look like a real browser and don't get bounced by bot guards."""
|
||||
url = req.url.strip()
|
||||
if not url:
|
||||
raise HTTPException(400, "url required")
|
||||
logger.info("fetch-listing url=%s", url)
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=HEADLESS,
|
||||
args=["--disable-blink-features=AutomationControlled"],
|
||||
)
|
||||
try:
|
||||
context = await browser.new_context(
|
||||
viewport=ViewportSize({"width": BROWSER_WIDTH, "height": BROWSER_HEIGHT}),
|
||||
locale=BROWSER_LOCALE,
|
||||
)
|
||||
page = await context.new_page()
|
||||
await page.goto(url, timeout=30_000)
|
||||
try:
|
||||
await page.wait_for_load_state("networkidle", timeout=10_000)
|
||||
except Exception:
|
||||
pass
|
||||
final_url = page.url
|
||||
html = await page.content()
|
||||
# Collect image candidates: <img src> + <img data-src> + srcset first URL.
|
||||
raw_imgs: list[str] = await page.evaluate(
|
||||
"""() => {
|
||||
const out = [];
|
||||
document.querySelectorAll('img').forEach((img) => {
|
||||
if (img.src) out.push(img.src);
|
||||
const ds = img.getAttribute('data-src');
|
||||
if (ds) out.push(ds);
|
||||
const ss = img.getAttribute('srcset');
|
||||
if (ss) {
|
||||
const first = ss.split(',')[0].trim().split(' ')[0];
|
||||
if (first) out.push(first);
|
||||
}
|
||||
});
|
||||
return out;
|
||||
}"""
|
||||
)
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
# Absolutize, dedupe, drop tiny icons/data-uris.
|
||||
seen: set[str] = set()
|
||||
image_urls: list[str] = []
|
||||
for u in raw_imgs:
|
||||
if not u or u.startswith("data:"):
|
||||
continue
|
||||
absu = urljoin(final_url, u)
|
||||
if absu in seen:
|
||||
continue
|
||||
seen.add(absu)
|
||||
lower = absu.lower()
|
||||
if any(x in lower for x in ("logo", "favicon", "sprite", "icon", ".svg")):
|
||||
continue
|
||||
image_urls.append(absu)
|
||||
if len(image_urls) >= MAX_FETCH_IMAGES:
|
||||
break
|
||||
|
||||
return FetchListingResponse(
|
||||
final_url=final_url,
|
||||
html=html[:MAX_FETCH_HTML_BYTES],
|
||||
image_urls=image_urls,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ services:
|
|||
- SMTP_PASSWORD=${SMTP_PASSWORD:-}
|
||||
- SMTP_FROM=${SMTP_FROM:-lazyflat@localhost}
|
||||
- SMTP_STARTTLS=${SMTP_STARTTLS:-true}
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- ANTHROPIC_MODEL=${ANTHROPIC_MODEL:-claude-haiku-4-5-20251001}
|
||||
volumes:
|
||||
- lazyflat_data:/data
|
||||
expose:
|
||||
|
|
|
|||
66
web/app.py
66
web/app.py
|
|
@ -15,6 +15,7 @@ import hmac
|
|||
import io
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import sqlite3
|
||||
import zipfile
|
||||
from contextlib import asynccontextmanager
|
||||
|
|
@ -33,6 +34,7 @@ except Exception:
|
|||
BERLIN_TZ = timezone.utc
|
||||
|
||||
import db
|
||||
import enrichment
|
||||
import notifications
|
||||
import retention
|
||||
from apply_client import ApplyClient, _row_to_profile
|
||||
|
|
@ -119,6 +121,7 @@ def _iso_utc(s: str | None) -> str:
|
|||
|
||||
templates.env.filters["de_dt"] = _de_dt
|
||||
templates.env.filters["iso_utc"] = _iso_utc
|
||||
templates.env.filters["flat_slug"] = lambda s: enrichment.flat_slug(str(s or ""))
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
|
|
@ -473,6 +476,53 @@ def partial_wohnungen(request: Request, user=Depends(require_user)):
|
|||
return templates.TemplateResponse("_wohnungen_body.html", ctx)
|
||||
|
||||
|
||||
@app.get("/partials/wohnung/{flat_id:path}", response_class=HTMLResponse)
|
||||
def partial_wohnung_detail(request: Request, flat_id: str, user=Depends(require_user)):
|
||||
flat = db.get_flat(flat_id)
|
||||
if not flat:
|
||||
raise HTTPException(404)
|
||||
enrichment_data = None
|
||||
if flat["enrichment_json"]:
|
||||
try:
|
||||
enrichment_data = json.loads(flat["enrichment_json"])
|
||||
except Exception:
|
||||
enrichment_data = None
|
||||
slug = enrichment.flat_slug(flat_id)
|
||||
image_urls = [
|
||||
f"/flat-images/{slug}/{i}"
|
||||
for i in range(1, int(flat["image_count"] or 0) + 1)
|
||||
]
|
||||
ctx = {
|
||||
"request": request,
|
||||
"flat": flat,
|
||||
"enrichment": enrichment_data,
|
||||
"enrichment_status": flat["enrichment_status"],
|
||||
"image_urls": image_urls,
|
||||
}
|
||||
return templates.TemplateResponse("_wohnung_detail.html", ctx)
|
||||
|
||||
|
||||
@app.get("/flat-images/{slug}/{index}")
|
||||
def flat_image(slug: str, index: int):
|
||||
"""Serve a downloaded flat image by slug + 1-based index.
|
||||
|
||||
`slug` is derived from enrichment.flat_slug(flat_id) and is filesystem-safe
|
||||
(hex), so it can be composed into a path without sanitisation concerns."""
|
||||
if not slug.isalnum() or not 1 <= index <= 99:
|
||||
raise HTTPException(404)
|
||||
d = enrichment.IMAGES_DIR / slug
|
||||
if not d.exists():
|
||||
raise HTTPException(404)
|
||||
# Files are named NN.<ext>; try the usual extensions.
|
||||
prefix = f"{index:02d}."
|
||||
for f in d.iterdir():
|
||||
if f.name.startswith(prefix):
|
||||
media = mimetypes.guess_type(f.name)[0] or "image/jpeg"
|
||||
return Response(content=f.read_bytes(), media_type=media,
|
||||
headers={"Cache-Control": "public, max-age=3600"})
|
||||
raise HTTPException(404)
|
||||
|
||||
|
||||
@app.post("/actions/filters")
|
||||
async def action_save_filters(
|
||||
request: Request,
|
||||
|
|
@ -974,6 +1024,19 @@ async def action_users_disable(
|
|||
return RedirectResponse("/einstellungen/benutzer", status_code=303)
|
||||
|
||||
|
||||
@app.post("/actions/enrich-all")
|
||||
async def action_enrich_all(
|
||||
request: Request,
|
||||
csrf: str = Form(...),
|
||||
admin=Depends(require_admin),
|
||||
):
|
||||
require_csrf(admin["id"], csrf)
|
||||
queued = enrichment.kick_backfill()
|
||||
db.log_audit(admin["username"], "enrichment.backfill",
|
||||
f"queued={queued}", user_id=admin["id"], ip=client_ip(request))
|
||||
return _wohnungen_partial_or_redirect(request, admin)
|
||||
|
||||
|
||||
@app.post("/actions/users/delete")
|
||||
async def action_users_delete(
|
||||
request: Request,
|
||||
|
|
@ -1010,6 +1073,9 @@ async def internal_submit_flat(
|
|||
if not is_new:
|
||||
return {"status": "duplicate"}
|
||||
|
||||
# Kick LLM enrichment + image download for this fresh flat.
|
||||
enrichment.kick(str(payload["id"]))
|
||||
|
||||
for u in db.list_users():
|
||||
if u["disabled"]:
|
||||
continue
|
||||
|
|
|
|||
32
web/db.py
32
web/db.py
|
|
@ -195,6 +195,13 @@ MIGRATIONS: list[str] = [
|
|||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_rejections_user ON flat_rejections(user_id);
|
||||
""",
|
||||
# 0005: LLM enrichment — extracted details + downloaded image count per flat
|
||||
"""
|
||||
ALTER TABLE flats ADD COLUMN enrichment_json TEXT;
|
||||
ALTER TABLE flats ADD COLUMN enrichment_status TEXT NOT NULL DEFAULT 'pending';
|
||||
ALTER TABLE flats ADD COLUMN enrichment_updated_at TEXT;
|
||||
ALTER TABLE flats ADD COLUMN image_count INTEGER NOT NULL DEFAULT 0;
|
||||
""",
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -447,6 +454,31 @@ def get_flat(flat_id: str) -> Optional[sqlite3.Row]:
|
|||
return _conn.execute("SELECT * FROM flats WHERE id = ?", (flat_id,)).fetchone()
|
||||
|
||||
|
||||
def set_flat_enrichment(flat_id: str, status: str,
|
||||
enrichment: Optional[dict] = None,
|
||||
image_count: int = 0) -> None:
|
||||
with _lock:
|
||||
_conn.execute(
|
||||
"""UPDATE flats SET enrichment_status = ?,
|
||||
enrichment_json = ?,
|
||||
enrichment_updated_at = ?,
|
||||
image_count = ?
|
||||
WHERE id = ?""",
|
||||
(status,
|
||||
json.dumps(enrichment) if enrichment is not None else None,
|
||||
now_iso(), image_count, flat_id),
|
||||
)
|
||||
|
||||
|
||||
def flats_needing_enrichment(limit: int = 100) -> list[sqlite3.Row]:
|
||||
return list(_conn.execute(
|
||||
"""SELECT id, link FROM flats
|
||||
WHERE enrichment_status IN ('pending', 'failed')
|
||||
ORDER BY discovered_at DESC LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Applications
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
168
web/enrichment.py
Normal file
168
web/enrichment.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
"""Flat-enrichment pipeline.
|
||||
|
||||
For each new flat we:
|
||||
1. Ask the apply service to fetch the listing via Playwright (bypasses bot guards)
|
||||
2. Feed the HTML to Haiku via `llm.extract_flat_details` → structured dict
|
||||
3. Download each image URL directly into /data/flats/<slug>/NN.<ext>
|
||||
4. Persist result on the flat row (enrichment_json + image_count + status)
|
||||
|
||||
Kicked as a detached asyncio task from /internal/flats so scraping stays fast.
|
||||
A small queue cap + per-call lock would be next steps if we ever need them.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
import db
|
||||
import llm
|
||||
from settings import DATA_DIR, INTERNAL_API_KEY
|
||||
|
||||
logger = logging.getLogger("web.enrichment")
|
||||
|
||||
APPLY_FETCH_URL = os.environ.get("APPLY_URL", "http://apply:8000") + "/internal/fetch-listing"
|
||||
IMAGES_DIR = DATA_DIR / "flats"
|
||||
IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
MAX_IMAGES = 12
|
||||
MAX_IMAGE_BYTES = 3_000_000 # 3 MB per image
|
||||
IMAGE_TIMEOUT = 15
|
||||
|
||||
|
||||
def flat_slug(flat_id: str) -> str:
|
||||
"""Filesystem-safe short identifier for a flat (IDs are URLs)."""
|
||||
return hashlib.sha1(flat_id.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def flat_image_dir(flat_id: str) -> Path:
|
||||
d = IMAGES_DIR / flat_slug(flat_id)
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
return d
|
||||
|
||||
|
||||
def _fetch_listing(url: str) -> Optional[dict]:
|
||||
try:
|
||||
r = requests.post(
|
||||
APPLY_FETCH_URL,
|
||||
headers={"X-Internal-Api-Key": INTERNAL_API_KEY},
|
||||
json={"url": url},
|
||||
timeout=90,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("fetch-listing request failed for %s: %s", url, e)
|
||||
return None
|
||||
if r.status_code >= 400:
|
||||
logger.warning("fetch-listing %s: %s", r.status_code, r.text[:300])
|
||||
return None
|
||||
return r.json()
|
||||
|
||||
|
||||
def _ext_from_response(resp: requests.Response, url: str) -> str:
|
||||
ct = resp.headers.get("content-type", "").split(";")[0].strip().lower()
|
||||
if ct:
|
||||
ext = mimetypes.guess_extension(ct) or ""
|
||||
if ext:
|
||||
return ext.replace(".jpe", ".jpg")
|
||||
path = urlparse(url).path
|
||||
_, ext = os.path.splitext(path)
|
||||
return ext.lower() or ".jpg"
|
||||
|
||||
|
||||
def _download_images(flat_id: str, urls: list[str], referer: str) -> int:
|
||||
d = flat_image_dir(flat_id)
|
||||
# Clear any previous attempts so re-enrichment doesn't pile up dupes.
|
||||
for old in d.iterdir():
|
||||
try: old.unlink()
|
||||
except OSError: pass
|
||||
|
||||
saved = 0
|
||||
for raw_url in urls[:MAX_IMAGES]:
|
||||
try:
|
||||
r = requests.get(
|
||||
raw_url,
|
||||
headers={"Referer": referer,
|
||||
"User-Agent": "Mozilla/5.0 (lazyflat enricher)"},
|
||||
timeout=IMAGE_TIMEOUT,
|
||||
stream=True,
|
||||
)
|
||||
if r.status_code >= 400:
|
||||
continue
|
||||
ct = r.headers.get("content-type", "").split(";")[0].strip().lower()
|
||||
if not ct.startswith("image/"):
|
||||
continue
|
||||
ext = _ext_from_response(r, raw_url)
|
||||
path = d / f"{saved + 1:02d}{ext}"
|
||||
total = 0
|
||||
with open(path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=65_536):
|
||||
if not chunk:
|
||||
continue
|
||||
total += len(chunk)
|
||||
if total > MAX_IMAGE_BYTES:
|
||||
break
|
||||
f.write(chunk)
|
||||
if total == 0:
|
||||
path.unlink(missing_ok=True)
|
||||
continue
|
||||
saved += 1
|
||||
except requests.RequestException as e:
|
||||
logger.info("image download failed %s: %s", raw_url, e)
|
||||
continue
|
||||
return saved
|
||||
|
||||
|
||||
def enrich_flat_sync(flat_id: str) -> None:
|
||||
"""Run the full enrichment pipeline for one flat. Blocking."""
|
||||
flat = db.get_flat(flat_id)
|
||||
if not flat:
|
||||
return
|
||||
url = flat["link"]
|
||||
logger.info("enrich start flat=%s url=%s", flat_id, url)
|
||||
listing = _fetch_listing(url)
|
||||
if not listing:
|
||||
db.set_flat_enrichment(flat_id, "failed")
|
||||
return
|
||||
|
||||
details = llm.extract_flat_details(listing.get("html") or "",
|
||||
listing.get("final_url") or url)
|
||||
if details is None:
|
||||
db.set_flat_enrichment(flat_id, "failed")
|
||||
return
|
||||
|
||||
image_urls = listing.get("image_urls") or []
|
||||
image_count = _download_images(flat_id, image_urls, referer=url)
|
||||
|
||||
db.set_flat_enrichment(flat_id, "ok", enrichment=details, image_count=image_count)
|
||||
logger.info("enrich done flat=%s images=%d", flat_id, image_count)
|
||||
|
||||
|
||||
def kick(flat_id: str) -> None:
|
||||
"""Fire-and-forget enrichment in a background thread."""
|
||||
asyncio.create_task(asyncio.to_thread(enrich_flat_sync, flat_id))
|
||||
|
||||
|
||||
async def _backfill_runner() -> None:
|
||||
rows = db.flats_needing_enrichment(limit=200)
|
||||
logger.info("enrich backfill: %d flats queued", len(rows))
|
||||
for row in rows:
|
||||
try:
|
||||
await asyncio.to_thread(enrich_flat_sync, row["id"])
|
||||
except Exception:
|
||||
logger.exception("backfill step failed flat=%s", row["id"])
|
||||
|
||||
|
||||
def kick_backfill() -> int:
|
||||
"""Queue enrichment for every flat still pending/failed. Returns how many
|
||||
flats are queued; the actual work happens in a detached task so the admin
|
||||
UI doesn't block for minutes."""
|
||||
pending = db.flats_needing_enrichment(limit=200)
|
||||
asyncio.create_task(_backfill_runner())
|
||||
return len(pending)
|
||||
119
web/llm.py
Normal file
119
web/llm.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
"""Minimal Anthropic Messages API wrapper for flat enrichment.
|
||||
|
||||
Uses tool-use forced output so Haiku returns structured JSON instead of free
|
||||
text we'd have to regex. No SDK — plain `requests` is enough here.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL
|
||||
|
||||
logger = logging.getLogger("web.llm")
|
||||
|
||||
API_URL = "https://api.anthropic.com/v1/messages"
|
||||
API_VERSION = "2023-06-01"
|
||||
|
||||
TOOL_NAME = "record_flat_details"
|
||||
TOOL_SCHEMA: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"address": {"type": ["string", "null"],
|
||||
"description": "Full street address incl. postcode+city if present"},
|
||||
"rooms": {"type": ["number", "null"], "description": "Number of rooms (decimal ok)"},
|
||||
"size_sqm": {"type": ["number", "null"], "description": "Size in m²"},
|
||||
"rent_cold": {"type": ["number", "null"], "description": "Kaltmiete in €"},
|
||||
"rent_total": {"type": ["number", "null"], "description": "Warm/Gesamtmiete in €"},
|
||||
"utilities": {"type": ["number", "null"], "description": "Nebenkosten in €"},
|
||||
"deposit": {"type": ["number", "null"], "description": "Kaution in €"},
|
||||
"available_from": {"type": ["string", "null"], "description": "Bezugsfrei ab (text)"},
|
||||
"floor": {"type": ["string", "null"], "description": "Etage (text, z.B. '3. OG')"},
|
||||
"heating": {"type": ["string", "null"]},
|
||||
"energy_certificate": {"type": ["string", "null"]},
|
||||
"energy_value": {"type": ["string", "null"]},
|
||||
"year_built": {"type": ["string", "null"]},
|
||||
"wbs_required": {"type": ["boolean", "null"]},
|
||||
"wbs_type": {"type": ["string", "null"], "description": "WBS-Typ, z.B. '160' oder null"},
|
||||
"description": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Kurze 2–3-Satz-Beschreibung der Wohnung auf Deutsch. Fakten, keine Werbesprache.",
|
||||
},
|
||||
"features": {
|
||||
"type": "array", "items": {"type": "string"},
|
||||
"description": "Ausstattungsmerkmale (z.B. 'Balkon', 'Einbauküche', 'Parkett')",
|
||||
},
|
||||
"pros": {
|
||||
"type": "array", "items": {"type": "string"},
|
||||
"description": "2–4 konkrete Vorteile aus Bewerbersicht (keine Werbung)",
|
||||
},
|
||||
"cons": {
|
||||
"type": "array", "items": {"type": "string"},
|
||||
"description": "2–4 mögliche Nachteile / Punkte zum Beachten",
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"Du extrahierst strukturierte Wohnungsdaten aus deutschem HTML-Quelltext von "
|
||||
"Berliner Wohnungsbaugesellschaften (howoge, gewobag, degewo, gesobau, wbm, "
|
||||
"stadt-und-land). Antworte AUSSCHLIESSLICH über den bereitgestellten Tool-Call. "
|
||||
"Fehlende Werte → null. Keine Erfindungen — wenn etwas nicht klar aus dem HTML "
|
||||
"hervorgeht, lass das Feld null. Zahlen bitte als Zahlen (nicht als String), "
|
||||
"Beschreibung/Pros/Cons auf Deutsch."
|
||||
)
|
||||
|
||||
|
||||
def extract_flat_details(html: str, url: str,
|
||||
max_html_chars: int = 60_000,
|
||||
timeout: int = 60) -> Optional[dict]:
|
||||
"""Call Haiku; return the structured dict or None on failure."""
|
||||
if not ANTHROPIC_API_KEY:
|
||||
logger.info("skipping enrichment: ANTHROPIC_API_KEY not set")
|
||||
return None
|
||||
|
||||
user_content = (
|
||||
f"URL: {url}\n\n"
|
||||
f"HTML-Quellcode (ggf. gekürzt):\n---\n{html[:max_html_chars]}\n---"
|
||||
)
|
||||
body = {
|
||||
"model": ANTHROPIC_MODEL,
|
||||
"max_tokens": 1500,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"tools": [{
|
||||
"name": TOOL_NAME,
|
||||
"description": "Persist the extracted flat details.",
|
||||
"input_schema": TOOL_SCHEMA,
|
||||
}],
|
||||
"tool_choice": {"type": "tool", "name": TOOL_NAME},
|
||||
"messages": [{"role": "user", "content": user_content}],
|
||||
}
|
||||
try:
|
||||
r = requests.post(
|
||||
API_URL,
|
||||
headers={
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": API_VERSION,
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json=body,
|
||||
timeout=timeout,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("anthropic request failed: %s", e)
|
||||
return None
|
||||
|
||||
if r.status_code >= 400:
|
||||
logger.warning("anthropic %s: %s", r.status_code, r.text[:300])
|
||||
return None
|
||||
|
||||
data = r.json()
|
||||
for block in data.get("content", []):
|
||||
if block.get("type") == "tool_use" and block.get("name") == TOOL_NAME:
|
||||
return block.get("input") or {}
|
||||
logger.warning("anthropic returned no tool_use block: %s", data)
|
||||
return None
|
||||
|
|
@ -63,3 +63,7 @@ SMTP_STARTTLS: bool = getenv("SMTP_STARTTLS", "true").lower() in ("true", "1", "
|
|||
|
||||
# --- App URL (used to build links in notifications) ---------------------------
|
||||
PUBLIC_URL: str = getenv("PUBLIC_URL", "https://flat.lab.moritz.run")
|
||||
|
||||
# --- LLM enrichment (Anthropic Haiku) -----------------------------------------
|
||||
ANTHROPIC_API_KEY: str = getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_MODEL: str = getenv("ANTHROPIC_MODEL", "claude-haiku-4-5-20251001")
|
||||
|
|
|
|||
|
|
@ -47,3 +47,37 @@ document.addEventListener("DOMContentLoaded", tick);
|
|||
document.body && document.body.addEventListener("htmx:afterSwap", tick);
|
||||
setInterval(updateCountdowns, 1000);
|
||||
setInterval(updateRelativeTimes, 5000);
|
||||
|
||||
// Flat detail expand — lazily fetches /partials/wohnung/<id> into the sibling
|
||||
// .flat-detail container on first open, toggles visibility on subsequent clicks.
|
||||
// Event delegation survives HTMX swaps without re-binding on each poll.
|
||||
document.addEventListener("click", (ev) => {
|
||||
const btn = ev.target.closest(".flat-expand-btn");
|
||||
if (!btn) return;
|
||||
const row = btn.closest(".flat-row");
|
||||
if (!row) return;
|
||||
const pane = row.querySelector(".flat-detail");
|
||||
if (!pane) return;
|
||||
|
||||
if (btn.classList.contains("open")) {
|
||||
pane.style.display = "none";
|
||||
btn.classList.remove("open");
|
||||
return;
|
||||
}
|
||||
btn.classList.add("open");
|
||||
pane.style.display = "block";
|
||||
if (pane.dataset.loaded) return;
|
||||
|
||||
pane.innerHTML = '<div class="px-4 py-5 text-sm text-slate-500">lädt…</div>';
|
||||
const flatId = btn.dataset.flatId || "";
|
||||
fetch("/partials/wohnung/" + encodeURIComponent(flatId),
|
||||
{ headers: { "HX-Request": "true" } })
|
||||
.then((r) => r.text())
|
||||
.then((html) => {
|
||||
pane.innerHTML = html;
|
||||
pane.dataset.loaded = "1";
|
||||
})
|
||||
.catch(() => {
|
||||
pane.innerHTML = '<div class="px-4 py-5 text-sm text-slate-500">Detail konnte nicht geladen werden.</div>';
|
||||
});
|
||||
});
|
||||
|
|
|
|||
82
web/templates/_wohnung_detail.html
Normal file
82
web/templates/_wohnung_detail.html
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
{# Expanded detail for a single flat, loaded into #flat-detail-<id> via HTMX. #}
|
||||
{% if enrichment_status == 'pending' %}
|
||||
<div class="px-4 py-5 text-sm text-slate-500">Analyse läuft – kommt in wenigen Augenblicken zurück…</div>
|
||||
{% elif enrichment_status == 'failed' %}
|
||||
<div class="px-4 py-5 text-sm text-slate-500">
|
||||
Detail-Analyse konnte nicht abgerufen werden.
|
||||
<a href="{{ flat.link }}" target="_blank" rel="noopener">Zur Original-Anzeige →</a>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="px-4 py-4 space-y-4">
|
||||
{% if image_urls %}
|
||||
<div class="flat-gallery">
|
||||
{% for src in image_urls %}
|
||||
<a class="flat-gallery-tile" href="{{ src }}" target="_blank" rel="noopener">
|
||||
<img src="{{ src }}" loading="lazy" alt="Foto {{ loop.index }}">
|
||||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if enrichment and enrichment.description %}
|
||||
<p class="text-sm text-slate-700">{{ enrichment.description }}</p>
|
||||
{% endif %}
|
||||
|
||||
{% if enrichment %}
|
||||
<div class="grid grid-cols-2 md:grid-cols-3 gap-x-6 gap-y-1.5 text-xs">
|
||||
{% macro kv(label, value) %}
|
||||
{% if value is not none and value != '' %}
|
||||
<div class="flex justify-between gap-3 border-b border-soft py-1">
|
||||
<span class="text-slate-500">{{ label }}</span>
|
||||
<span class="text-slate-800 text-right">{{ value }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
{{ kv('Adresse', enrichment.address) }}
|
||||
{{ kv('Zimmer', enrichment.rooms) }}
|
||||
{{ kv('Größe', enrichment.size_sqm ~ ' m²' if enrichment.size_sqm else none) }}
|
||||
{{ kv('Kaltmiete', enrichment.rent_cold ~ ' €' if enrichment.rent_cold else none) }}
|
||||
{{ kv('Nebenkosten', enrichment.utilities ~ ' €' if enrichment.utilities else none) }}
|
||||
{{ kv('Gesamtmiete', enrichment.rent_total ~ ' €' if enrichment.rent_total else none) }}
|
||||
{{ kv('Kaution', enrichment.deposit ~ ' €' if enrichment.deposit else none) }}
|
||||
{{ kv('Bezugsfrei ab', enrichment.available_from) }}
|
||||
{{ kv('Etage', enrichment.floor) }}
|
||||
{{ kv('Heizung', enrichment.heating) }}
|
||||
{{ kv('Energieausweis', enrichment.energy_certificate) }}
|
||||
{{ kv('Energiewert', enrichment.energy_value) }}
|
||||
{{ kv('Baujahr', enrichment.year_built) }}
|
||||
{{ kv('WBS', 'erforderlich' if enrichment.wbs_required else ('nicht erforderlich' if enrichment.wbs_required == false else none)) }}
|
||||
{{ kv('WBS-Typ', enrichment.wbs_type) }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if enrichment and enrichment.features %}
|
||||
<div class="flex flex-wrap gap-1.5">
|
||||
{% for f in enrichment.features %}<span class="chip chip-info">{{ f }}</span>{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
{% if enrichment and enrichment.pros %}
|
||||
<div>
|
||||
<div class="text-xs uppercase tracking-wide text-slate-500 mb-1">Pro</div>
|
||||
<ul class="text-sm space-y-1">
|
||||
{% for p in enrichment.pros %}<li>+ {{ p }}</li>{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if enrichment and enrichment.cons %}
|
||||
<div>
|
||||
<div class="text-xs uppercase tracking-wide text-slate-500 mb-1">Contra</div>
|
||||
<ul class="text-sm space-y-1">
|
||||
{% for c in enrichment.cons %}<li>− {{ c }}</li>{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="text-xs">
|
||||
<a href="{{ flat.link }}" target="_blank" rel="noopener">Zur Original-Anzeige →</a>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
|
@ -106,53 +106,63 @@
|
|||
|
||||
<!-- Liste -->
|
||||
<section class="view-list card">
|
||||
<div class="divide-y divide-soft">
|
||||
<div>
|
||||
{% for item in flats %}
|
||||
{% set f = item.row %}
|
||||
<div class="px-4 py-3 flex flex-col md:flex-row md:items-center gap-3">
|
||||
<div class="flex-1 min-w-0">
|
||||
<div class="flex items-center gap-2 flex-wrap">
|
||||
<a class="font-medium truncate" href="{{ f.link }}" target="_blank" rel="noopener noreferrer">
|
||||
{{ f.address or f.link }}
|
||||
</a>
|
||||
{% if item.last and item.last.finished_at is none %}
|
||||
<span class="chip chip-warn">läuft…</span>
|
||||
{% elif item.last and item.last.success == 1 %}<span class="chip chip-ok">beworben</span>
|
||||
{% elif item.last and item.last.success == 0 %}<span class="chip chip-bad">fehlgeschlagen</span>
|
||||
<div class="flat-row">
|
||||
<div class="px-4 py-3 flex flex-col md:flex-row md:items-center gap-3">
|
||||
<div class="flex-1 min-w-0">
|
||||
<div class="flex items-center gap-2 flex-wrap">
|
||||
<a class="font-medium truncate" href="{{ f.link }}" target="_blank" rel="noopener noreferrer">
|
||||
{{ f.address or f.link }}
|
||||
</a>
|
||||
{% if item.last and item.last.finished_at is none %}
|
||||
<span class="chip chip-warn">läuft…</span>
|
||||
{% elif item.last and item.last.success == 1 %}<span class="chip chip-ok">beworben</span>
|
||||
{% elif item.last and item.last.success == 0 %}<span class="chip chip-bad">fehlgeschlagen</span>
|
||||
{% endif %}
|
||||
{% if f.enrichment_status == 'pending' %}<span class="chip">analysiert…</span>
|
||||
{% elif f.enrichment_status == 'failed' %}<span class="chip chip-warn" title="Detail-Analyse fehlgeschlagen">?</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="text-xs text-slate-500 mt-0.5">
|
||||
{% if f.rooms %}{{ "%.1f"|format(f.rooms) }} Z{% endif %}
|
||||
{% if f.size %} · {{ "%.0f"|format(f.size) }} m²{% endif %}
|
||||
{% if f.total_rent %} · {{ "%.0f"|format(f.total_rent) }} €{% endif %}
|
||||
{% if f.wbs %} · WBS: {{ f.wbs }}{% endif %}
|
||||
· <span data-rel-utc="{{ f.discovered_at|iso_utc }}" title="{{ f.discovered_at|de_dt }}">…</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex gap-2 items-center">
|
||||
{% if apply_allowed and not (item.last and item.last.success == 1) %}
|
||||
{% set is_running = item.last and item.last.finished_at is none %}
|
||||
<form method="post" action="/actions/apply"
|
||||
hx-post="/actions/apply" hx-target="#wohnungen-body" hx-swap="outerHTML">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
<input type="hidden" name="flat_id" value="{{ f.id }}">
|
||||
<button class="btn btn-primary text-sm" type="submit"
|
||||
{% if is_running %}disabled{% endif %}
|
||||
hx-confirm="Bewerbung für {{ (f.address or f.link)|e }} starten?">
|
||||
{% if is_running %}läuft…{% else %}Bewerben{% endif %}
|
||||
</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="text-xs text-slate-500 mt-0.5">
|
||||
{% if f.rooms %}{{ "%.1f"|format(f.rooms) }} Z{% endif %}
|
||||
{% if f.size %} · {{ "%.0f"|format(f.size) }} m²{% endif %}
|
||||
{% if f.total_rent %} · {{ "%.0f"|format(f.total_rent) }} €{% endif %}
|
||||
{% if f.wbs %} · WBS: {{ f.wbs }}{% endif %}
|
||||
· <span data-rel-utc="{{ f.discovered_at|iso_utc }}" title="{{ f.discovered_at|de_dt }}">…</span>
|
||||
<form method="post" action="/actions/reject"
|
||||
hx-post="/actions/reject" hx-target="#wohnungen-body" hx-swap="outerHTML">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
<input type="hidden" name="flat_id" value="{{ f.id }}">
|
||||
<button class="btn btn-ghost text-sm" type="submit"
|
||||
hx-confirm="Ablehnen und aus der Liste entfernen?">
|
||||
Ablehnen
|
||||
</button>
|
||||
</form>
|
||||
<button type="button" class="flat-expand-btn" aria-label="Details"
|
||||
data-flat-id="{{ f.id }}">
|
||||
<svg width="14" height="14" viewBox="0 0 20 20" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polyline points="5 8 10 13 15 8"/></svg>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex gap-2">
|
||||
{% if apply_allowed and not (item.last and item.last.success == 1) %}
|
||||
{% set is_running = item.last and item.last.finished_at is none %}
|
||||
<form method="post" action="/actions/apply"
|
||||
hx-post="/actions/apply" hx-target="#wohnungen-body" hx-swap="outerHTML">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
<input type="hidden" name="flat_id" value="{{ f.id }}">
|
||||
<button class="btn btn-primary text-sm" type="submit"
|
||||
{% if is_running %}disabled{% endif %}
|
||||
hx-confirm="Bewerbung für {{ (f.address or f.link)|e }} starten?">
|
||||
{% if is_running %}läuft…{% else %}Bewerben{% endif %}
|
||||
</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
<form method="post" action="/actions/reject"
|
||||
hx-post="/actions/reject" hx-target="#wohnungen-body" hx-swap="outerHTML">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
<input type="hidden" name="flat_id" value="{{ f.id }}">
|
||||
<button class="btn btn-ghost text-sm" type="submit"
|
||||
hx-confirm="Ablehnen und aus der Liste entfernen?">
|
||||
Ablehnen
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
<div class="flat-detail" id="flat-detail-{{ f.id|flat_slug }}" hx-preserve="true"></div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="px-4 py-8 text-center text-slate-500">
|
||||
|
|
@ -166,6 +176,19 @@
|
|||
</div>
|
||||
</section>
|
||||
|
||||
{% if is_admin %}
|
||||
<section class="flex justify-end">
|
||||
<form method="post" action="/actions/enrich-all"
|
||||
hx-post="/actions/enrich-all" hx-target="#wohnungen-body" hx-swap="outerHTML">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
<button class="btn btn-ghost text-xs" type="submit"
|
||||
hx-confirm="Altbestand jetzt durch Haiku nachträglich anreichern? Kann einige Minuten dauern.">
|
||||
Altbestand anreichern
|
||||
</button>
|
||||
</form>
|
||||
</section>
|
||||
{% endif %}
|
||||
|
||||
{% if rejected_flats %}
|
||||
<section class="card">
|
||||
<details class="group">
|
||||
|
|
|
|||
|
|
@ -83,6 +83,29 @@
|
|||
body:has(#v_map:checked) .view-map { display: block; }
|
||||
#flats-map { height: 520px; border-radius: 10px; }
|
||||
|
||||
/* Flat detail expand */
|
||||
.flat-row { border-top: 1px solid var(--border); }
|
||||
.flat-row:first-child { border-top: 0; }
|
||||
.flat-expand-btn { width: 1.75rem; height: 1.75rem; border-radius: 999px;
|
||||
display: inline-flex; align-items: center; justify-content: center;
|
||||
border: 1px solid var(--border); background: var(--surface);
|
||||
color: var(--muted); cursor: pointer; transition: transform .2s, background .15s; }
|
||||
.flat-expand-btn:hover { background: var(--ghost); color: var(--text); }
|
||||
.flat-expand-btn.open { transform: rotate(180deg); }
|
||||
.flat-detail { background: #fafcfe; border-top: 1px solid var(--border); }
|
||||
.flat-detail:empty { display: none; }
|
||||
|
||||
/* Normalised image gallery — every tile has the same aspect ratio */
|
||||
.flat-gallery { display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
|
||||
gap: 8px; }
|
||||
.flat-gallery-tile { aspect-ratio: 4 / 3; overflow: hidden;
|
||||
border-radius: 8px; border: 1px solid var(--border);
|
||||
background: #f0f5fa; display: block; }
|
||||
.flat-gallery-tile img { width: 100%; height: 100%; object-fit: cover;
|
||||
display: block; transition: transform .3s; }
|
||||
.flat-gallery-tile:hover img { transform: scale(1.04); }
|
||||
|
||||
/* Leaflet popup — match site visual */
|
||||
.leaflet-popup-content-wrapper { border-radius: 12px; box-shadow: 0 6px 20px rgba(16,37,63,.15); }
|
||||
.leaflet-popup-content { margin: 12px 14px; min-width: 220px; color: var(--text); }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue