enrichment: drop LLM for structured info, dedup images by sha + phash
Per user request, the LLM is no longer asked to extract rooms/size/rent/WBS — those come from the inberlinwohnen.de scraper which is reliable. Haiku is now used for one narrow job: pick which <img> URLs from the listing page are actual flat photos (vs. logos, badges, ads, employee portraits). On any LLM failure the unfiltered candidate list passes through. Image dedup runs in two tiers: 1. SHA256 of bytes — drops different URLs that point to byte-identical files 2. Perceptual hash (Pillow + imagehash, Hamming distance ≤ 5) — drops the "same image at a different resolution" duplicates from srcset / CDN variants that were filling galleries with 2–4× copies UI: - Wohnungsliste falls back to scraper-only display (rooms/size/rent/wbs) - Detail panel only shows images + "Zur Original-Anzeige →"; description / features / pros & cons / kv table are gone - Per-row "erneut versuchen" link + the "analysiert…/?" status chips were tied to LLM extraction and are removed; the header "Bilder nachladen (N)" button still surfaces pending/failed batches for admins Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
374368e4af
commit
0aa4c6c2bb
6 changed files with 137 additions and 233 deletions
|
|
@ -1,14 +1,13 @@
|
|||
{# Expanded detail for a single flat, loaded into #flat-detail-<id> via HTMX. #}
|
||||
{# Expanded detail for a single flat — only shows downloaded images. #}
|
||||
{% if enrichment_status == 'pending' %}
|
||||
<div class="px-4 py-5 text-sm text-slate-500">Analyse läuft – kommt in wenigen Augenblicken zurück…</div>
|
||||
<div class="px-4 py-5 text-sm text-slate-500">Bilder werden abgerufen…</div>
|
||||
{% elif enrichment_status == 'failed' %}
|
||||
<div class="px-4 py-5 text-sm text-slate-500">
|
||||
Detail-Analyse konnte nicht abgerufen werden.
|
||||
<a href="{{ flat.link }}" target="_blank" rel="noopener">Zur Original-Anzeige →</a>
|
||||
Bilder konnten nicht geladen werden.
|
||||
<a href="{{ flat.link }}" target="_blank" rel="noopener" class="ml-1">Zur Original-Anzeige →</a>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="px-4 py-4 space-y-4">
|
||||
{% if image_urls %}
|
||||
{% elif image_urls %}
|
||||
<div class="px-4 py-4 space-y-3">
|
||||
<div class="flat-gallery">
|
||||
{% for src in image_urls %}
|
||||
<a class="flat-gallery-tile" href="{{ src }}" target="_blank" rel="noopener">
|
||||
|
|
@ -16,67 +15,13 @@
|
|||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if enrichment and enrichment.description %}
|
||||
<p class="text-sm text-slate-700">{{ enrichment.description }}</p>
|
||||
{% endif %}
|
||||
|
||||
{% if enrichment %}
|
||||
<div class="grid grid-cols-2 md:grid-cols-3 gap-x-6 gap-y-1.5 text-xs">
|
||||
{% macro kv(label, value) %}
|
||||
{% if value is not none and value != '' %}
|
||||
<div class="flex justify-between gap-3 border-b border-soft py-1">
|
||||
<span class="text-slate-500">{{ label }}</span>
|
||||
<span class="text-slate-800 text-right">{{ value }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
{{ kv('Adresse', enrichment.address) }}
|
||||
{{ kv('Zimmer', enrichment.rooms) }}
|
||||
{{ kv('Größe', enrichment.size_sqm ~ ' m²' if enrichment.size_sqm else none) }}
|
||||
{{ kv('Kaltmiete', enrichment.rent_cold ~ ' €' if enrichment.rent_cold else none) }}
|
||||
{{ kv('Nebenkosten', enrichment.utilities ~ ' €' if enrichment.utilities else none) }}
|
||||
{{ kv('Gesamtmiete', enrichment.rent_total ~ ' €' if enrichment.rent_total else none) }}
|
||||
{{ kv('Kaution', enrichment.deposit ~ ' €' if enrichment.deposit else none) }}
|
||||
{{ kv('Bezugsfrei ab', enrichment.available_from) }}
|
||||
{{ kv('Etage', enrichment.floor) }}
|
||||
{{ kv('Heizung', enrichment.heating) }}
|
||||
{{ kv('Energieausweis', enrichment.energy_certificate) }}
|
||||
{{ kv('Energiewert', enrichment.energy_value) }}
|
||||
{{ kv('Baujahr', enrichment.year_built) }}
|
||||
{{ kv('WBS', 'erforderlich' if enrichment.wbs_required else ('nicht erforderlich' if enrichment.wbs_required == false else none)) }}
|
||||
{{ kv('WBS-Typ', enrichment.wbs_type) }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if enrichment and enrichment.features %}
|
||||
<div class="flex flex-wrap gap-1.5">
|
||||
{% for f in enrichment.features %}<span class="chip chip-info">{{ f }}</span>{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
{% if enrichment and enrichment.pros %}
|
||||
<div>
|
||||
<div class="text-xs uppercase tracking-wide text-slate-500 mb-1">Pro</div>
|
||||
<ul class="text-sm space-y-1">
|
||||
{% for p in enrichment.pros %}<li>+ {{ p }}</li>{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if enrichment and enrichment.cons %}
|
||||
<div>
|
||||
<div class="text-xs uppercase tracking-wide text-slate-500 mb-1">Contra</div>
|
||||
<ul class="text-sm space-y-1">
|
||||
{% for c in enrichment.cons %}<li>− {{ c }}</li>{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="text-xs">
|
||||
<a href="{{ flat.link }}" target="_blank" rel="noopener">Zur Original-Anzeige →</a>
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="px-4 py-5 text-sm text-slate-500">
|
||||
Keine Bilder gefunden.
|
||||
<a href="{{ flat.link }}" target="_blank" rel="noopener" class="ml-1">Zur Original-Anzeige →</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
|
|
|||
|
|
@ -90,8 +90,8 @@
|
|||
hx-post="/actions/enrich-all" hx-target="#wohnungen-body" hx-swap="outerHTML">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
<button class="btn btn-ghost text-xs" type="submit"
|
||||
hx-confirm="Altbestand jetzt durch Haiku nachträglich anreichern? Kann einige Minuten dauern.">
|
||||
Anreichern ({{ enrichment_counts.pending + enrichment_counts.failed }})
|
||||
hx-confirm="Bilder für ausstehende Wohnungen nachladen? Kann einige Minuten dauern.">
|
||||
Bilder nachladen ({{ enrichment_counts.pending + enrichment_counts.failed }})
|
||||
</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
|
|
@ -133,52 +133,16 @@
|
|||
{% elif item.last and item.last.success == 1 %}<span class="chip chip-ok">beworben</span>
|
||||
{% elif item.last and item.last.success == 0 %}<span class="chip chip-bad">fehlgeschlagen</span>
|
||||
{% endif %}
|
||||
{% if f.enrichment_status == 'pending' %}<span class="chip">analysiert…</span>
|
||||
{% elif f.enrichment_status == 'failed' %}<span class="chip chip-warn" title="Detail-Analyse fehlgeschlagen">?</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="text-xs text-slate-500 mt-0.5">
|
||||
{% if f.enrichment_status == 'pending' %}
|
||||
Infos werden abgerufen…
|
||||
· <span data-rel-utc="{{ f.discovered_at|iso_utc }}" title="{{ f.discovered_at|de_dt }}">…</span>
|
||||
{% elif f.enrichment_status == 'failed' %}
|
||||
{% set err = (item.enrichment or {}).get('_error') or 'unbekannt' %}
|
||||
<span title="{{ err }}">Fehler beim Abrufen der Infos</span>
|
||||
{% if is_admin %}
|
||||
<form method="post" action="/actions/enrich-flat" class="inline"
|
||||
hx-post="/actions/enrich-flat" hx-target="#wohnungen-body" hx-swap="outerHTML">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
<input type="hidden" name="flat_id" value="{{ f.id }}">
|
||||
<button type="submit" class="underline text-slate-600 hover:text-slate-900 ml-1">erneut versuchen</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
· <span data-rel-utc="{{ f.discovered_at|iso_utc }}" title="{{ f.discovered_at|de_dt }}">…</span>
|
||||
{% else %}
|
||||
{# LLM first, scraper as fallback. The scraper data
|
||||
from inberlinwohnen.de is reliable; we only
|
||||
replace it when the LLM has a concrete value. #}
|
||||
{% set e = item.enrichment or {} %}
|
||||
{% set rooms = e.rooms if e.rooms is not none else f.rooms %}
|
||||
{% set size = e.size_sqm if e.size_sqm is not none else f.size %}
|
||||
{% set rent = e.rent_total or e.rent_cold or f.total_rent %}
|
||||
{% if e.wbs_required is sameas true %}
|
||||
{% set wbs_label = 'WBS: ' ~ (e.wbs_type or 'erforderlich') %}
|
||||
{% elif e.wbs_required is sameas false %}
|
||||
{% set wbs_label = 'ohne WBS' %}
|
||||
{% elif f.wbs == 'erforderlich' %}
|
||||
{% set wbs_label = 'WBS: erforderlich' %}
|
||||
{% elif f.wbs == 'nicht erforderlich' %}
|
||||
{% set wbs_label = 'ohne WBS' %}
|
||||
{% else %}
|
||||
{% set wbs_label = '' %}
|
||||
{% endif %}
|
||||
{% set parts = [] %}
|
||||
{% if rooms %}{% set _ = parts.append('%g Z'|format(rooms)) %}{% endif %}
|
||||
{% if size %}{% set _ = parts.append('%.0f m²'|format(size)) %}{% endif %}
|
||||
{% if rent %}{% set _ = parts.append('%.0f €'|format(rent)) %}{% endif %}
|
||||
{% if wbs_label %}{% set _ = parts.append(wbs_label) %}{% endif %}
|
||||
{{ parts|join(' · ') }}{% if parts %} · {% endif %}<span data-rel-utc="{{ f.discovered_at|iso_utc }}" title="{{ f.discovered_at|de_dt }}">…</span>
|
||||
{% set parts = [] %}
|
||||
{% if f.rooms %}{% set _ = parts.append('%g Z'|format(f.rooms)) %}{% endif %}
|
||||
{% if f.size %}{% set _ = parts.append('%.0f m²'|format(f.size)) %}{% endif %}
|
||||
{% if f.total_rent %}{% set _ = parts.append('%.0f €'|format(f.total_rent)) %}{% endif %}
|
||||
{% if f.wbs == 'erforderlich' %}{% set _ = parts.append('WBS: erforderlich') %}
|
||||
{% elif f.wbs == 'nicht erforderlich' %}{% set _ = parts.append('ohne WBS') %}
|
||||
{% endif %}
|
||||
{{ parts|join(' · ') }}{% if parts %} · {% endif %}<span data-rel-utc="{{ f.discovered_at|iso_utc }}" title="{{ f.discovered_at|de_dt }}">…</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex gap-2 items-center">
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue