secrets tab, drop commute filter, favicon, robust error reports

1. Admin → Geheimnisse sub-tab lets you edit ANTHROPIC_API_KEY +
   BERLIN_WOHNEN_USERNAME/PASSWORD at runtime. Migration v7 adds a
   secrets(key,value,updated_at) table; startup seeds missing keys from
   env (idempotent). web reads secrets DB-first (env fallback) via
   llm._api_key(); alert fetches them from web /internal/secrets on each
   scan, passes them into Scraper(). Rotating creds no longer needs a
   redeploy.
   Masked display: 6 leading + 4 trailing chars, "…" in the middle.
   Blank form fields leave the stored value untouched.

2. Drop the max_morning_commute filter from UI + server + FILTER_KEYS +
   filter summary (the underlying Maps.calculate_score code stays for
   potential future re-enable).

3. /static/didi.webp wired as favicon via <link rel="icon"> in base.html.

4. apply.open_page wraps page.goto in try/except so a failed load still
   produces a "goto.failed" step + screenshot instead of returning an
   empty forensics blob. networkidle + post-submission sleep are also
   made best-effort. The error ZIP export already writes screenshot+HTML
   per step and final_html — with this change every apply run leaves a
   reconstructable trail even when the listing is already offline.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
EiSiMo 2026-04-21 17:56:57 +02:00
parent 9fbe1ce728
commit 3bb04210c4
12 changed files with 211 additions and 27 deletions

View file

@ -71,7 +71,13 @@ class FlatAlerter:
def scan(self):
logger.info("starting scan")
scraper = Scraper()
# Pull fresh creds from web each scan so admin edits take effect
# without a redeploy.
secrets = self.web.fetch_secrets()
scraper = Scraper(
username=secrets.get("BERLIN_WOHNEN_USERNAME", ""),
password=secrets.get("BERLIN_WOHNEN_PASSWORD", ""),
)
if not scraper.login():
return
flats_data = scraper.get_flats()

View file

@ -2,7 +2,7 @@ import requests
import re
import logging
from bs4 import BeautifulSoup
from settings import BERLIN_WOHNEN_USERNAME, BERLIN_WOHNEN_PASSWORD
from settings import BERLIN_WOHNEN_USERNAME, BERLIN_WOHNEN_PASSWORD # env fallback
logger = logging.getLogger("flat-alert")
@ -19,11 +19,16 @@ class Scraper:
'Upgrade-Insecure-Requests': '1',
}
def __init__(self):
def __init__(self, username: str = "", password: str = ""):
self.username = username or BERLIN_WOHNEN_USERNAME
self.password = password or BERLIN_WOHNEN_PASSWORD
self.session = requests.Session()
self.session.headers.update(self.HEADERS)
def login(self):
if not self.username or not self.password:
logger.critical("BERLIN_WOHNEN credentials missing — nothing to log in with")
return False
logger.info("fetching inberlinwohnen.de login page")
resp_login_page = self.session.get(self.URL_LOGIN, timeout=30)
token_search = re.search(r'name="csrf-token" content="([^"]+)"', resp_login_page.text)
@ -34,8 +39,8 @@ class Scraper:
payload_login = {
'_token': csrf_token,
'email': BERLIN_WOHNEN_USERNAME,
'password': BERLIN_WOHNEN_PASSWORD,
'email': self.username,
'password': self.password,
'remember': 'on'
}
headers_login = self.HEADERS.copy()

View file

@ -20,7 +20,9 @@ TIME_INTERVALL: int = int(getenv("SLEEP_INTERVALL", "60"))
WEB_URL: str = getenv("WEB_URL", "http://web:8000")
INTERNAL_API_KEY: str = _required("INTERNAL_API_KEY")
# secrets
# secrets — BERLIN_WOHNEN_* env acts as bootstrap only; the web service
# owns the live creds in its DB (admin UI), alert fetches them on each
# scan via /internal/secrets. GMAPS_API_KEY is still env-only.
GMAPS_API_KEY: str = _required("GMAPS_API_KEY")
BERLIN_WOHNEN_USERNAME: str = _required("BERLIN_WOHNEN_USERNAME")
BERLIN_WOHNEN_PASSWORD: str = _required("BERLIN_WOHNEN_PASSWORD")
BERLIN_WOHNEN_USERNAME: str = getenv("BERLIN_WOHNEN_USERNAME", "")
BERLIN_WOHNEN_PASSWORD: str = getenv("BERLIN_WOHNEN_PASSWORD", "")

View file

@ -37,3 +37,18 @@ class WebClient:
)
except requests.RequestException:
pass
def fetch_secrets(self) -> dict:
"""Pull the current runtime secrets dict from web. Empty on failure
caller falls back to env values."""
try:
r = requests.get(
f"{self.base_url}/internal/secrets",
headers=self.headers,
timeout=5,
)
if r.ok:
return r.json() or {}
except requests.RequestException as e:
logger.warning(f"secrets fetch failed: {e}")
return {}

View file

@ -188,11 +188,25 @@ async def open_page(url: str, recorder: Optional[Recorder] = None):
if recorder:
recorder.step("goto", detail=url)
await page.goto(url)
await page.wait_for_load_state("networkidle")
if recorder:
recorder.step("loaded", detail=page.url)
await recorder.snap(page, "loaded")
goto_failed = False
try:
await page.goto(url, timeout=45_000)
except Exception as e:
goto_failed = True
if recorder:
recorder.step("goto.failed", "error", f"{type(e).__name__}: {e}")
try:
await recorder.snap(page, "goto.failed")
except Exception:
logger.exception("snap after goto.failed failed")
if not goto_failed:
try:
await page.wait_for_load_state("networkidle", timeout=15_000)
except Exception:
pass
if recorder:
recorder.step("loaded", detail=page.url)
await recorder.snap(page, "loaded")
try:
yield page
@ -202,7 +216,10 @@ async def open_page(url: str, recorder: Optional[Recorder] = None):
await recorder.finalize(page)
except Exception:
logger.exception("recorder.finalize failed")
await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS)
try:
await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS)
except Exception:
pass
await browser.close()

View file

@ -78,6 +78,7 @@ apply_client = ApplyClient()
@asynccontextmanager
async def lifespan(_app: FastAPI):
db.init_db()
db.seed_secrets_from_env()
bootstrap_admin()
retention.start()
logger.info("web service ready")
@ -220,7 +221,7 @@ def _last_scrape_utc() -> str:
FILTER_KEYS = ("rooms_min", "rooms_max", "max_rent", "min_size",
"max_morning_commute", "wbs_required", "max_age_hours")
"wbs_required", "max_age_hours")
def _has_filters(f) -> bool:
@ -263,8 +264,6 @@ def _filter_summary(f) -> str:
parts.append(f"{int(f['max_rent'])}")
if f["min_size"]:
parts.append(f"{int(f['min_size'])}")
if f["max_morning_commute"]:
parts.append(f"{int(f['max_morning_commute'])} min")
if f["wbs_required"] == "yes":
parts.append("WBS")
elif f["wbs_required"] == "no":
@ -552,7 +551,6 @@ async def action_save_filters(
rooms_max: str = Form(""),
max_rent: str = Form(""),
min_size: str = Form(""),
max_morning_commute: str = Form(""),
wbs_required: str = Form(""),
max_age_hours: str = Form(""),
user=Depends(require_user),
@ -575,7 +573,6 @@ async def action_save_filters(
"rooms_max": _f(rooms_max),
"max_rent": _f(max_rent),
"min_size": _f(min_size),
"max_morning_commute": _f(max_morning_commute),
"wbs_required": (wbs_required or "").strip(),
"max_age_hours": _i(max_age_hours),
})
@ -825,7 +822,15 @@ def tab_logs_legacy():
return RedirectResponse("/admin/protokoll", status_code=301)
ADMIN_SECTIONS = ("protokoll", "benutzer")
ADMIN_SECTIONS = ("protokoll", "benutzer", "geheimnisse")
def _mask_secret(value: str) -> str:
if not value:
return ""
if len(value) <= 10:
return "" * len(value)
return value[:6] + "" + value[-4:]
@app.get("/admin", response_class=HTMLResponse)
@ -857,6 +862,10 @@ def tab_admin(request: Request, section: str):
})
elif section == "benutzer":
ctx["users"] = db.list_users()
elif section == "geheimnisse":
secrets = db.all_secrets()
ctx["secrets_masked"] = {k: _mask_secret(secrets.get(k, "")) for k in db.SECRET_KEYS}
ctx["secret_flash"] = request.query_params.get("ok")
return templates.TemplateResponse("admin.html", ctx)
@ -1073,6 +1082,23 @@ async def action_users_disable(
return RedirectResponse("/admin/benutzer", status_code=303)
@app.post("/actions/secrets")
async def action_secrets(request: Request, admin=Depends(require_admin)):
form = await request.form()
require_csrf(admin["id"], form.get("csrf", ""))
changed = []
for key in db.SECRET_KEYS:
raw = (form.get(key) or "").strip()
if not raw:
continue
db.set_secret(key, raw)
changed.append(key)
db.log_audit(admin["username"], "secrets.updated",
",".join(changed) or "no-op",
user_id=admin["id"], ip=client_ip(request))
return RedirectResponse("/admin/geheimnisse?ok=1", status_code=303)
@app.post("/actions/enrich-all")
async def action_enrich_all(
request: Request,
@ -1181,3 +1207,10 @@ async def internal_report_error(
context=payload.get("context"),
)
return {"status": "ok"}
@app.get("/internal/secrets")
async def internal_secrets(_g: None = Depends(require_internal)):
"""Give sibling services (alert) the current runtime creds that the admin
may have edited via the UI, so no redeploy is needed when rotating."""
return db.all_secrets()

View file

@ -206,6 +206,14 @@ MIGRATIONS: list[str] = [
"""
ALTER TABLE user_filters ADD COLUMN max_age_hours INTEGER;
""",
# 0007: secrets table — API keys / scraper creds editable from admin UI
"""
CREATE TABLE IF NOT EXISTS secrets (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at TEXT NOT NULL
);
""",
]
@ -237,6 +245,47 @@ def now_iso() -> str:
return datetime.now(timezone.utc).isoformat(timespec="seconds")
# ---------------------------------------------------------------------------
# Secrets (admin-editable, source of truth for runtime creds)
# ---------------------------------------------------------------------------
SECRET_KEYS = ("ANTHROPIC_API_KEY", "BERLIN_WOHNEN_USERNAME", "BERLIN_WOHNEN_PASSWORD")
def get_secret(key: str) -> Optional[str]:
row = _conn.execute("SELECT value FROM secrets WHERE key = ?", (key,)).fetchone()
return row["value"] if row else None
def set_secret(key: str, value: str) -> None:
with _lock:
_conn.execute(
"INSERT INTO secrets(key, value, updated_at) VALUES (?, ?, ?) "
"ON CONFLICT(key) DO UPDATE SET value = excluded.value, "
" updated_at = excluded.updated_at",
(key, value, now_iso()),
)
def all_secrets() -> dict[str, str]:
rows = _conn.execute("SELECT key, value FROM secrets").fetchall()
return {r["key"]: r["value"] for r in rows}
def seed_secrets_from_env() -> None:
"""Copy env values into the DB for any secret key that's still empty.
Idempotent: existing DB values are never overwritten."""
import os
for k in SECRET_KEYS:
existing = get_secret(k)
if existing:
continue
env_val = os.environ.get(k, "")
if env_val:
set_secret(k, env_val)
logger.info("seeded secret %s from env", k)
# ---------------------------------------------------------------------------
# System state
# ---------------------------------------------------------------------------

View file

@ -12,10 +12,22 @@ from typing import Optional
import requests
import db
from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL
logger = logging.getLogger("web.llm")
def _api_key() -> str:
"""DB-first so the admin UI can rotate the key without a redeploy."""
try:
k = db.get_secret("ANTHROPIC_API_KEY")
if k:
return k
except Exception:
pass
return ANTHROPIC_API_KEY
API_URL = "https://api.anthropic.com/v1/messages"
API_VERSION = "2023-06-01"
@ -54,7 +66,8 @@ SYSTEM_PROMPT = (
def select_flat_image_urls(candidates: list[str], page_url: str,
timeout: int = 30) -> list[str]:
"""Return the LLM-filtered subset, or the original list on any failure."""
if not ANTHROPIC_API_KEY or not candidates:
key = _api_key()
if not key or not candidates:
return candidates
user_text = (
@ -78,7 +91,7 @@ def select_flat_image_urls(candidates: list[str], page_url: str,
r = requests.post(
API_URL,
headers={
"x-api-key": ANTHROPIC_API_KEY,
"x-api-key": key,
"anthropic-version": API_VERSION,
"content-type": "application/json",
},

View file

@ -0,0 +1,46 @@
<h2 class="font-semibold mb-2">Geheimnisse</h2>
<p class="text-sm text-slate-600 mb-4">
Hier hinterlegte Werte überschreiben die entsprechenden Umgebungsvariablen zur Laufzeit.
Leerlassen bedeutet: der gespeicherte Wert bleibt unverändert.
</p>
{% if secret_flash %}
<div class="chip chip-ok mb-4">Gespeichert.</div>
{% endif %}
<form method="post" action="/actions/secrets" class="space-y-5 max-w-xl"
autocomplete="off" data-lpignore="true" data-1p-ignore data-bwignore data-form-type="other">
<input type="hidden" name="csrf" value="{{ csrf }}">
<div>
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">Anthropic API Key</label>
<input class="input" type="text" name="ANTHROPIC_API_KEY"
placeholder="{{ secrets_masked.ANTHROPIC_API_KEY or 'nicht gesetzt' }}"
autocomplete="off" data-lpignore="true" data-1p-ignore data-bwignore>
<p class="text-xs text-slate-500 mt-1">Wird für die Bild-URL-Auswahl durch Haiku verwendet.</p>
</div>
<div class="border-t border-soft pt-4">
<h3 class="font-semibold text-sm mb-3">inberlinwohnen.de Login</h3>
<div class="grid grid-cols-1 md:grid-cols-2 gap-3">
<div>
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">Benutzer / E-Mail</label>
<input class="input" type="text" name="BERLIN_WOHNEN_USERNAME"
placeholder="{{ secrets_masked.BERLIN_WOHNEN_USERNAME or 'nicht gesetzt' }}"
autocomplete="off" data-lpignore="true" data-1p-ignore data-bwignore>
</div>
<div>
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">Passwort</label>
<input class="input" type="password" name="BERLIN_WOHNEN_PASSWORD"
placeholder="{{ secrets_masked.BERLIN_WOHNEN_PASSWORD or 'nicht gesetzt' }}"
autocomplete="new-password" data-lpignore="true" data-1p-ignore data-bwignore>
</div>
</div>
<p class="text-xs text-slate-500 mt-2">
Wird vom Scraper beim Login auf inberlinwohnen.de verwendet. Änderungen greifen
automatisch beim nächsten Scrape-Zyklus.
</p>
</div>
<button class="btn btn-primary" type="submit">Speichern</button>
</form>

View file

@ -26,10 +26,6 @@
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">min Größe (m²)</label>
<input class="input" name="min_size" value="{{ filters.min_size if filters.min_size is not none else '' }}">
</div>
<div>
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">max Anfahrt morgens (min)</label>
<input class="input" name="max_morning_commute" value="{{ filters.max_morning_commute if filters.max_morning_commute is not none else '' }}">
</div>
<div>
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">WBS benötigt</label>
<select class="input" name="wbs_required">

View file

@ -6,7 +6,7 @@
{% block content %}
<section class="card">
<nav class="flex flex-wrap border-b border-soft px-4">
{% set sections = [('protokoll','Protokoll'), ('benutzer','Benutzer')] %}
{% set sections = [('protokoll','Protokoll'), ('benutzer','Benutzer'), ('geheimnisse','Geheimnisse')] %}
{% for key, label in sections %}
<a href="/admin/{{ key }}"
class="tab {% if section == key %}active{% endif %}">{{ label }}</a>
@ -16,6 +16,7 @@
<div class="p-5">
{% if section == 'protokoll' %}{% include "_admin_logs.html" %}
{% elif section == 'benutzer' %}{% include "_settings_users.html" %}
{% elif section == 'geheimnisse' %}{% include "_admin_secrets.html" %}
{% endif %}
</div>
</section>

View file

@ -4,6 +4,7 @@
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="robots" content="noindex, nofollow">
<link rel="icon" type="image/webp" href="/static/didi.webp">
<title>{% block title %}wohnungsdidi{% endblock %}</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/htmx.org@2.0.3"></script>