enrichment: Haiku flat details + image gallery on expand
apply service
- POST /internal/fetch-listing: headless Playwright fetch of a listing URL,
returns {html, image_urls[], final_url}. Uses the same browser
fingerprint/profile as the apply run so bot guards don't kick in
web service
- New enrichment pipeline (web/enrichment.py):
/internal/flats → upsert → kick() enrichment in a background thread
1. POST /internal/fetch-listing on apply
2. llm.extract_flat_details(html, url) — Haiku tool-use call returns
structured JSON (address, rooms, rent, description, pros/cons, etc.)
3. Download each image directly to /data/flats/<slug>/NN.<ext>
4. Persist enrichment_json + image_count + enrichment_status on the flat
- llm.py: minimal Anthropic /v1/messages wrapper, no SDK
- DB migration v5 adds enrichment_json/_status/_updated_at + image_count
- Admin "Altbestand anreichern" button (POST /actions/enrich-all) queues
backfill for all pending/failed rows; runs in a detached task
- GET /partials/wohnung/<id> renders _wohnung_detail.html
- GET /flat-images/<slug>/<n> serves the downloaded image
UI
- Chevron on each list row toggles an inline detail pane (HTMX fetch on
first open, hx-preserve keeps it open across the 3–30 s polls)
- CSS .flat-gallery normalises image tiles to a 4/3 aspect with object-fit:
cover so different source sizes align cleanly
- "analysiert…" / "?" chips on the list reflect enrichment_status
Config
- ANTHROPIC_API_KEY + ANTHROPIC_MODEL wired into docker-compose's web
service (default model: claude-haiku-4-5-20251001)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2609d3504a
commit
eb66284172
11 changed files with 688 additions and 44 deletions
66
web/app.py
66
web/app.py
|
|
@ -15,6 +15,7 @@ import hmac
|
|||
import io
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import sqlite3
|
||||
import zipfile
|
||||
from contextlib import asynccontextmanager
|
||||
|
|
@ -33,6 +34,7 @@ except Exception:
|
|||
BERLIN_TZ = timezone.utc
|
||||
|
||||
import db
|
||||
import enrichment
|
||||
import notifications
|
||||
import retention
|
||||
from apply_client import ApplyClient, _row_to_profile
|
||||
|
|
@ -119,6 +121,7 @@ def _iso_utc(s: str | None) -> str:
|
|||
|
||||
templates.env.filters["de_dt"] = _de_dt
|
||||
templates.env.filters["iso_utc"] = _iso_utc
|
||||
templates.env.filters["flat_slug"] = lambda s: enrichment.flat_slug(str(s or ""))
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
|
|
@ -473,6 +476,53 @@ def partial_wohnungen(request: Request, user=Depends(require_user)):
|
|||
return templates.TemplateResponse("_wohnungen_body.html", ctx)
|
||||
|
||||
|
||||
@app.get("/partials/wohnung/{flat_id:path}", response_class=HTMLResponse)
|
||||
def partial_wohnung_detail(request: Request, flat_id: str, user=Depends(require_user)):
|
||||
flat = db.get_flat(flat_id)
|
||||
if not flat:
|
||||
raise HTTPException(404)
|
||||
enrichment_data = None
|
||||
if flat["enrichment_json"]:
|
||||
try:
|
||||
enrichment_data = json.loads(flat["enrichment_json"])
|
||||
except Exception:
|
||||
enrichment_data = None
|
||||
slug = enrichment.flat_slug(flat_id)
|
||||
image_urls = [
|
||||
f"/flat-images/{slug}/{i}"
|
||||
for i in range(1, int(flat["image_count"] or 0) + 1)
|
||||
]
|
||||
ctx = {
|
||||
"request": request,
|
||||
"flat": flat,
|
||||
"enrichment": enrichment_data,
|
||||
"enrichment_status": flat["enrichment_status"],
|
||||
"image_urls": image_urls,
|
||||
}
|
||||
return templates.TemplateResponse("_wohnung_detail.html", ctx)
|
||||
|
||||
|
||||
@app.get("/flat-images/{slug}/{index}")
|
||||
def flat_image(slug: str, index: int):
|
||||
"""Serve a downloaded flat image by slug + 1-based index.
|
||||
|
||||
`slug` is derived from enrichment.flat_slug(flat_id) and is filesystem-safe
|
||||
(hex), so it can be composed into a path without sanitisation concerns."""
|
||||
if not slug.isalnum() or not 1 <= index <= 99:
|
||||
raise HTTPException(404)
|
||||
d = enrichment.IMAGES_DIR / slug
|
||||
if not d.exists():
|
||||
raise HTTPException(404)
|
||||
# Files are named NN.<ext>; try the usual extensions.
|
||||
prefix = f"{index:02d}."
|
||||
for f in d.iterdir():
|
||||
if f.name.startswith(prefix):
|
||||
media = mimetypes.guess_type(f.name)[0] or "image/jpeg"
|
||||
return Response(content=f.read_bytes(), media_type=media,
|
||||
headers={"Cache-Control": "public, max-age=3600"})
|
||||
raise HTTPException(404)
|
||||
|
||||
|
||||
@app.post("/actions/filters")
|
||||
async def action_save_filters(
|
||||
request: Request,
|
||||
|
|
@ -974,6 +1024,19 @@ async def action_users_disable(
|
|||
return RedirectResponse("/einstellungen/benutzer", status_code=303)
|
||||
|
||||
|
||||
@app.post("/actions/enrich-all")
|
||||
async def action_enrich_all(
|
||||
request: Request,
|
||||
csrf: str = Form(...),
|
||||
admin=Depends(require_admin),
|
||||
):
|
||||
require_csrf(admin["id"], csrf)
|
||||
queued = enrichment.kick_backfill()
|
||||
db.log_audit(admin["username"], "enrichment.backfill",
|
||||
f"queued={queued}", user_id=admin["id"], ip=client_ip(request))
|
||||
return _wohnungen_partial_or_redirect(request, admin)
|
||||
|
||||
|
||||
@app.post("/actions/users/delete")
|
||||
async def action_users_delete(
|
||||
request: Request,
|
||||
|
|
@ -1010,6 +1073,9 @@ async def internal_submit_flat(
|
|||
if not is_new:
|
||||
return {"status": "duplicate"}
|
||||
|
||||
# Kick LLM enrichment + image download for this fresh flat.
|
||||
enrichment.kick(str(payload["id"]))
|
||||
|
||||
for u in db.list_users():
|
||||
if u["disabled"]:
|
||||
continue
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue