From 3bb04210c4118c2041da1742abb2d9486c7ac1ad Mon Sep 17 00:00:00 2001 From: EiSiMo Date: Tue, 21 Apr 2026 17:56:57 +0200 Subject: [PATCH] secrets tab, drop commute filter, favicon, robust error reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Admin → Geheimnisse sub-tab lets you edit ANTHROPIC_API_KEY + BERLIN_WOHNEN_USERNAME/PASSWORD at runtime. Migration v7 adds a secrets(key,value,updated_at) table; startup seeds missing keys from env (idempotent). web reads secrets DB-first (env fallback) via llm._api_key(); alert fetches them from web /internal/secrets on each scan, passes them into Scraper(). Rotating creds no longer needs a redeploy. Masked display: 6 leading + 4 trailing chars, "…" in the middle. Blank form fields leave the stored value untouched. 2. Drop the max_morning_commute filter from UI + server + FILTER_KEYS + filter summary (the underlying Maps.calculate_score code stays for potential future re-enable). 3. /static/didi.webp wired as favicon via in base.html. 4. apply.open_page wraps page.goto in try/except so a failed load still produces a "goto.failed" step + screenshot instead of returning an empty forensics blob. networkidle + post-submission sleep are also made best-effort. The error ZIP export already writes screenshot+HTML per step and final_html — with this change every apply run leaves a reconstructable trail even when the listing is already offline. Co-Authored-By: Claude Opus 4.7 (1M context) --- alert/main.py | 8 ++++- alert/scraper.py | 13 +++++--- alert/settings.py | 8 +++-- alert/web_client.py | 15 +++++++++ apply/actions.py | 29 +++++++++++++---- web/app.py | 45 ++++++++++++++++++++++---- web/db.py | 49 +++++++++++++++++++++++++++++ web/llm.py | 17 ++++++++-- web/templates/_admin_secrets.html | 46 +++++++++++++++++++++++++++ web/templates/_settings_filter.html | 4 --- web/templates/admin.html | 3 +- web/templates/base.html | 1 + 12 files changed, 211 insertions(+), 27 deletions(-) create mode 100644 web/templates/_admin_secrets.html diff --git a/alert/main.py b/alert/main.py index 42b4b66..0b63345 100644 --- a/alert/main.py +++ b/alert/main.py @@ -71,7 +71,13 @@ class FlatAlerter: def scan(self): logger.info("starting scan") - scraper = Scraper() + # Pull fresh creds from web each scan so admin edits take effect + # without a redeploy. + secrets = self.web.fetch_secrets() + scraper = Scraper( + username=secrets.get("BERLIN_WOHNEN_USERNAME", ""), + password=secrets.get("BERLIN_WOHNEN_PASSWORD", ""), + ) if not scraper.login(): return flats_data = scraper.get_flats() diff --git a/alert/scraper.py b/alert/scraper.py index bef85d7..3662e90 100644 --- a/alert/scraper.py +++ b/alert/scraper.py @@ -2,7 +2,7 @@ import requests import re import logging from bs4 import BeautifulSoup -from settings import BERLIN_WOHNEN_USERNAME, BERLIN_WOHNEN_PASSWORD +from settings import BERLIN_WOHNEN_USERNAME, BERLIN_WOHNEN_PASSWORD # env fallback logger = logging.getLogger("flat-alert") @@ -19,11 +19,16 @@ class Scraper: 'Upgrade-Insecure-Requests': '1', } - def __init__(self): + def __init__(self, username: str = "", password: str = ""): + self.username = username or BERLIN_WOHNEN_USERNAME + self.password = password or BERLIN_WOHNEN_PASSWORD self.session = requests.Session() self.session.headers.update(self.HEADERS) def login(self): + if not self.username or not self.password: + logger.critical("BERLIN_WOHNEN credentials missing — nothing to log in with") + return False logger.info("fetching inberlinwohnen.de login page") resp_login_page = self.session.get(self.URL_LOGIN, timeout=30) token_search = re.search(r'name="csrf-token" content="([^"]+)"', resp_login_page.text) @@ -34,8 +39,8 @@ class Scraper: payload_login = { '_token': csrf_token, - 'email': BERLIN_WOHNEN_USERNAME, - 'password': BERLIN_WOHNEN_PASSWORD, + 'email': self.username, + 'password': self.password, 'remember': 'on' } headers_login = self.HEADERS.copy() diff --git a/alert/settings.py b/alert/settings.py index f6eec2c..0a04d1f 100644 --- a/alert/settings.py +++ b/alert/settings.py @@ -20,7 +20,9 @@ TIME_INTERVALL: int = int(getenv("SLEEP_INTERVALL", "60")) WEB_URL: str = getenv("WEB_URL", "http://web:8000") INTERNAL_API_KEY: str = _required("INTERNAL_API_KEY") -# secrets +# secrets — BERLIN_WOHNEN_* env acts as bootstrap only; the web service +# owns the live creds in its DB (admin UI), alert fetches them on each +# scan via /internal/secrets. GMAPS_API_KEY is still env-only. GMAPS_API_KEY: str = _required("GMAPS_API_KEY") -BERLIN_WOHNEN_USERNAME: str = _required("BERLIN_WOHNEN_USERNAME") -BERLIN_WOHNEN_PASSWORD: str = _required("BERLIN_WOHNEN_PASSWORD") +BERLIN_WOHNEN_USERNAME: str = getenv("BERLIN_WOHNEN_USERNAME", "") +BERLIN_WOHNEN_PASSWORD: str = getenv("BERLIN_WOHNEN_PASSWORD", "") diff --git a/alert/web_client.py b/alert/web_client.py index 47fec9f..353be30 100644 --- a/alert/web_client.py +++ b/alert/web_client.py @@ -37,3 +37,18 @@ class WebClient: ) except requests.RequestException: pass + + def fetch_secrets(self) -> dict: + """Pull the current runtime secrets dict from web. Empty on failure + — caller falls back to env values.""" + try: + r = requests.get( + f"{self.base_url}/internal/secrets", + headers=self.headers, + timeout=5, + ) + if r.ok: + return r.json() or {} + except requests.RequestException as e: + logger.warning(f"secrets fetch failed: {e}") + return {} diff --git a/apply/actions.py b/apply/actions.py index c1aab40..318c7a2 100644 --- a/apply/actions.py +++ b/apply/actions.py @@ -188,11 +188,25 @@ async def open_page(url: str, recorder: Optional[Recorder] = None): if recorder: recorder.step("goto", detail=url) - await page.goto(url) - await page.wait_for_load_state("networkidle") - if recorder: - recorder.step("loaded", detail=page.url) - await recorder.snap(page, "loaded") + goto_failed = False + try: + await page.goto(url, timeout=45_000) + except Exception as e: + goto_failed = True + if recorder: + recorder.step("goto.failed", "error", f"{type(e).__name__}: {e}") + try: + await recorder.snap(page, "goto.failed") + except Exception: + logger.exception("snap after goto.failed failed") + if not goto_failed: + try: + await page.wait_for_load_state("networkidle", timeout=15_000) + except Exception: + pass + if recorder: + recorder.step("loaded", detail=page.url) + await recorder.snap(page, "loaded") try: yield page @@ -202,7 +216,10 @@ async def open_page(url: str, recorder: Optional[Recorder] = None): await recorder.finalize(page) except Exception: logger.exception("recorder.finalize failed") - await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS) + try: + await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS) + except Exception: + pass await browser.close() diff --git a/web/app.py b/web/app.py index 1730bd8..b2ee03e 100644 --- a/web/app.py +++ b/web/app.py @@ -78,6 +78,7 @@ apply_client = ApplyClient() @asynccontextmanager async def lifespan(_app: FastAPI): db.init_db() + db.seed_secrets_from_env() bootstrap_admin() retention.start() logger.info("web service ready") @@ -220,7 +221,7 @@ def _last_scrape_utc() -> str: FILTER_KEYS = ("rooms_min", "rooms_max", "max_rent", "min_size", - "max_morning_commute", "wbs_required", "max_age_hours") + "wbs_required", "max_age_hours") def _has_filters(f) -> bool: @@ -263,8 +264,6 @@ def _filter_summary(f) -> str: parts.append(f"≤ {int(f['max_rent'])} €") if f["min_size"]: parts.append(f"≥ {int(f['min_size'])} m²") - if f["max_morning_commute"]: - parts.append(f"≤ {int(f['max_morning_commute'])} min") if f["wbs_required"] == "yes": parts.append("WBS") elif f["wbs_required"] == "no": @@ -552,7 +551,6 @@ async def action_save_filters( rooms_max: str = Form(""), max_rent: str = Form(""), min_size: str = Form(""), - max_morning_commute: str = Form(""), wbs_required: str = Form(""), max_age_hours: str = Form(""), user=Depends(require_user), @@ -575,7 +573,6 @@ async def action_save_filters( "rooms_max": _f(rooms_max), "max_rent": _f(max_rent), "min_size": _f(min_size), - "max_morning_commute": _f(max_morning_commute), "wbs_required": (wbs_required or "").strip(), "max_age_hours": _i(max_age_hours), }) @@ -825,7 +822,15 @@ def tab_logs_legacy(): return RedirectResponse("/admin/protokoll", status_code=301) -ADMIN_SECTIONS = ("protokoll", "benutzer") +ADMIN_SECTIONS = ("protokoll", "benutzer", "geheimnisse") + + +def _mask_secret(value: str) -> str: + if not value: + return "" + if len(value) <= 10: + return "•" * len(value) + return value[:6] + "…" + value[-4:] @app.get("/admin", response_class=HTMLResponse) @@ -857,6 +862,10 @@ def tab_admin(request: Request, section: str): }) elif section == "benutzer": ctx["users"] = db.list_users() + elif section == "geheimnisse": + secrets = db.all_secrets() + ctx["secrets_masked"] = {k: _mask_secret(secrets.get(k, "")) for k in db.SECRET_KEYS} + ctx["secret_flash"] = request.query_params.get("ok") return templates.TemplateResponse("admin.html", ctx) @@ -1073,6 +1082,23 @@ async def action_users_disable( return RedirectResponse("/admin/benutzer", status_code=303) +@app.post("/actions/secrets") +async def action_secrets(request: Request, admin=Depends(require_admin)): + form = await request.form() + require_csrf(admin["id"], form.get("csrf", "")) + changed = [] + for key in db.SECRET_KEYS: + raw = (form.get(key) or "").strip() + if not raw: + continue + db.set_secret(key, raw) + changed.append(key) + db.log_audit(admin["username"], "secrets.updated", + ",".join(changed) or "no-op", + user_id=admin["id"], ip=client_ip(request)) + return RedirectResponse("/admin/geheimnisse?ok=1", status_code=303) + + @app.post("/actions/enrich-all") async def action_enrich_all( request: Request, @@ -1181,3 +1207,10 @@ async def internal_report_error( context=payload.get("context"), ) return {"status": "ok"} + + +@app.get("/internal/secrets") +async def internal_secrets(_g: None = Depends(require_internal)): + """Give sibling services (alert) the current runtime creds that the admin + may have edited via the UI, so no redeploy is needed when rotating.""" + return db.all_secrets() diff --git a/web/db.py b/web/db.py index 362f162..3adb02d 100644 --- a/web/db.py +++ b/web/db.py @@ -206,6 +206,14 @@ MIGRATIONS: list[str] = [ """ ALTER TABLE user_filters ADD COLUMN max_age_hours INTEGER; """, + # 0007: secrets table — API keys / scraper creds editable from admin UI + """ + CREATE TABLE IF NOT EXISTS secrets ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + """, ] @@ -237,6 +245,47 @@ def now_iso() -> str: return datetime.now(timezone.utc).isoformat(timespec="seconds") +# --------------------------------------------------------------------------- +# Secrets (admin-editable, source of truth for runtime creds) +# --------------------------------------------------------------------------- + +SECRET_KEYS = ("ANTHROPIC_API_KEY", "BERLIN_WOHNEN_USERNAME", "BERLIN_WOHNEN_PASSWORD") + + +def get_secret(key: str) -> Optional[str]: + row = _conn.execute("SELECT value FROM secrets WHERE key = ?", (key,)).fetchone() + return row["value"] if row else None + + +def set_secret(key: str, value: str) -> None: + with _lock: + _conn.execute( + "INSERT INTO secrets(key, value, updated_at) VALUES (?, ?, ?) " + "ON CONFLICT(key) DO UPDATE SET value = excluded.value, " + " updated_at = excluded.updated_at", + (key, value, now_iso()), + ) + + +def all_secrets() -> dict[str, str]: + rows = _conn.execute("SELECT key, value FROM secrets").fetchall() + return {r["key"]: r["value"] for r in rows} + + +def seed_secrets_from_env() -> None: + """Copy env values into the DB for any secret key that's still empty. + Idempotent: existing DB values are never overwritten.""" + import os + for k in SECRET_KEYS: + existing = get_secret(k) + if existing: + continue + env_val = os.environ.get(k, "") + if env_val: + set_secret(k, env_val) + logger.info("seeded secret %s from env", k) + + # --------------------------------------------------------------------------- # System state # --------------------------------------------------------------------------- diff --git a/web/llm.py b/web/llm.py index de9299b..79b7527 100644 --- a/web/llm.py +++ b/web/llm.py @@ -12,10 +12,22 @@ from typing import Optional import requests +import db from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL logger = logging.getLogger("web.llm") + +def _api_key() -> str: + """DB-first so the admin UI can rotate the key without a redeploy.""" + try: + k = db.get_secret("ANTHROPIC_API_KEY") + if k: + return k + except Exception: + pass + return ANTHROPIC_API_KEY + API_URL = "https://api.anthropic.com/v1/messages" API_VERSION = "2023-06-01" @@ -54,7 +66,8 @@ SYSTEM_PROMPT = ( def select_flat_image_urls(candidates: list[str], page_url: str, timeout: int = 30) -> list[str]: """Return the LLM-filtered subset, or the original list on any failure.""" - if not ANTHROPIC_API_KEY or not candidates: + key = _api_key() + if not key or not candidates: return candidates user_text = ( @@ -78,7 +91,7 @@ def select_flat_image_urls(candidates: list[str], page_url: str, r = requests.post( API_URL, headers={ - "x-api-key": ANTHROPIC_API_KEY, + "x-api-key": key, "anthropic-version": API_VERSION, "content-type": "application/json", }, diff --git a/web/templates/_admin_secrets.html b/web/templates/_admin_secrets.html new file mode 100644 index 0000000..ce5937e --- /dev/null +++ b/web/templates/_admin_secrets.html @@ -0,0 +1,46 @@ +

Geheimnisse

+

+ Hier hinterlegte Werte überschreiben die entsprechenden Umgebungsvariablen zur Laufzeit. + Leerlassen bedeutet: der gespeicherte Wert bleibt unverändert. +

+ +{% if secret_flash %} +
Gespeichert.
+{% endif %} + +
+ + +
+ + +

Wird für die Bild-URL-Auswahl durch Haiku verwendet.

+
+ +
+

inberlinwohnen.de Login

+
+
+ + +
+
+ + +
+
+

+ Wird vom Scraper beim Login auf inberlinwohnen.de verwendet. Änderungen greifen + automatisch beim nächsten Scrape-Zyklus. +

+
+ + +
diff --git a/web/templates/_settings_filter.html b/web/templates/_settings_filter.html index a21604e..a78726f 100644 --- a/web/templates/_settings_filter.html +++ b/web/templates/_settings_filter.html @@ -26,10 +26,6 @@ -
- - -