secrets tab, drop commute filter, favicon, robust error reports
1. Admin → Geheimnisse sub-tab lets you edit ANTHROPIC_API_KEY + BERLIN_WOHNEN_USERNAME/PASSWORD at runtime. Migration v7 adds a secrets(key,value,updated_at) table; startup seeds missing keys from env (idempotent). web reads secrets DB-first (env fallback) via llm._api_key(); alert fetches them from web /internal/secrets on each scan, passes them into Scraper(). Rotating creds no longer needs a redeploy. Masked display: 6 leading + 4 trailing chars, "…" in the middle. Blank form fields leave the stored value untouched. 2. Drop the max_morning_commute filter from UI + server + FILTER_KEYS + filter summary (the underlying Maps.calculate_score code stays for potential future re-enable). 3. /static/didi.webp wired as favicon via <link rel="icon"> in base.html. 4. apply.open_page wraps page.goto in try/except so a failed load still produces a "goto.failed" step + screenshot instead of returning an empty forensics blob. networkidle + post-submission sleep are also made best-effort. The error ZIP export already writes screenshot+HTML per step and final_html — with this change every apply run leaves a reconstructable trail even when the listing is already offline. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9fbe1ce728
commit
3bb04210c4
12 changed files with 211 additions and 27 deletions
|
|
@ -71,7 +71,13 @@ class FlatAlerter:
|
|||
|
||||
def scan(self):
|
||||
logger.info("starting scan")
|
||||
scraper = Scraper()
|
||||
# Pull fresh creds from web each scan so admin edits take effect
|
||||
# without a redeploy.
|
||||
secrets = self.web.fetch_secrets()
|
||||
scraper = Scraper(
|
||||
username=secrets.get("BERLIN_WOHNEN_USERNAME", ""),
|
||||
password=secrets.get("BERLIN_WOHNEN_PASSWORD", ""),
|
||||
)
|
||||
if not scraper.login():
|
||||
return
|
||||
flats_data = scraper.get_flats()
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import requests
|
|||
import re
|
||||
import logging
|
||||
from bs4 import BeautifulSoup
|
||||
from settings import BERLIN_WOHNEN_USERNAME, BERLIN_WOHNEN_PASSWORD
|
||||
from settings import BERLIN_WOHNEN_USERNAME, BERLIN_WOHNEN_PASSWORD # env fallback
|
||||
|
||||
logger = logging.getLogger("flat-alert")
|
||||
|
||||
|
|
@ -19,11 +19,16 @@ class Scraper:
|
|||
'Upgrade-Insecure-Requests': '1',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, username: str = "", password: str = ""):
|
||||
self.username = username or BERLIN_WOHNEN_USERNAME
|
||||
self.password = password or BERLIN_WOHNEN_PASSWORD
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(self.HEADERS)
|
||||
|
||||
def login(self):
|
||||
if not self.username or not self.password:
|
||||
logger.critical("BERLIN_WOHNEN credentials missing — nothing to log in with")
|
||||
return False
|
||||
logger.info("fetching inberlinwohnen.de login page")
|
||||
resp_login_page = self.session.get(self.URL_LOGIN, timeout=30)
|
||||
token_search = re.search(r'name="csrf-token" content="([^"]+)"', resp_login_page.text)
|
||||
|
|
@ -34,8 +39,8 @@ class Scraper:
|
|||
|
||||
payload_login = {
|
||||
'_token': csrf_token,
|
||||
'email': BERLIN_WOHNEN_USERNAME,
|
||||
'password': BERLIN_WOHNEN_PASSWORD,
|
||||
'email': self.username,
|
||||
'password': self.password,
|
||||
'remember': 'on'
|
||||
}
|
||||
headers_login = self.HEADERS.copy()
|
||||
|
|
|
|||
|
|
@ -20,7 +20,9 @@ TIME_INTERVALL: int = int(getenv("SLEEP_INTERVALL", "60"))
|
|||
WEB_URL: str = getenv("WEB_URL", "http://web:8000")
|
||||
INTERNAL_API_KEY: str = _required("INTERNAL_API_KEY")
|
||||
|
||||
# secrets
|
||||
# secrets — BERLIN_WOHNEN_* env acts as bootstrap only; the web service
|
||||
# owns the live creds in its DB (admin UI), alert fetches them on each
|
||||
# scan via /internal/secrets. GMAPS_API_KEY is still env-only.
|
||||
GMAPS_API_KEY: str = _required("GMAPS_API_KEY")
|
||||
BERLIN_WOHNEN_USERNAME: str = _required("BERLIN_WOHNEN_USERNAME")
|
||||
BERLIN_WOHNEN_PASSWORD: str = _required("BERLIN_WOHNEN_PASSWORD")
|
||||
BERLIN_WOHNEN_USERNAME: str = getenv("BERLIN_WOHNEN_USERNAME", "")
|
||||
BERLIN_WOHNEN_PASSWORD: str = getenv("BERLIN_WOHNEN_PASSWORD", "")
|
||||
|
|
|
|||
|
|
@ -37,3 +37,18 @@ class WebClient:
|
|||
)
|
||||
except requests.RequestException:
|
||||
pass
|
||||
|
||||
def fetch_secrets(self) -> dict:
|
||||
"""Pull the current runtime secrets dict from web. Empty on failure
|
||||
— caller falls back to env values."""
|
||||
try:
|
||||
r = requests.get(
|
||||
f"{self.base_url}/internal/secrets",
|
||||
headers=self.headers,
|
||||
timeout=5,
|
||||
)
|
||||
if r.ok:
|
||||
return r.json() or {}
|
||||
except requests.RequestException as e:
|
||||
logger.warning(f"secrets fetch failed: {e}")
|
||||
return {}
|
||||
|
|
|
|||
|
|
@ -188,11 +188,25 @@ async def open_page(url: str, recorder: Optional[Recorder] = None):
|
|||
|
||||
if recorder:
|
||||
recorder.step("goto", detail=url)
|
||||
await page.goto(url)
|
||||
await page.wait_for_load_state("networkidle")
|
||||
if recorder:
|
||||
recorder.step("loaded", detail=page.url)
|
||||
await recorder.snap(page, "loaded")
|
||||
goto_failed = False
|
||||
try:
|
||||
await page.goto(url, timeout=45_000)
|
||||
except Exception as e:
|
||||
goto_failed = True
|
||||
if recorder:
|
||||
recorder.step("goto.failed", "error", f"{type(e).__name__}: {e}")
|
||||
try:
|
||||
await recorder.snap(page, "goto.failed")
|
||||
except Exception:
|
||||
logger.exception("snap after goto.failed failed")
|
||||
if not goto_failed:
|
||||
try:
|
||||
await page.wait_for_load_state("networkidle", timeout=15_000)
|
||||
except Exception:
|
||||
pass
|
||||
if recorder:
|
||||
recorder.step("loaded", detail=page.url)
|
||||
await recorder.snap(page, "loaded")
|
||||
|
||||
try:
|
||||
yield page
|
||||
|
|
@ -202,7 +216,10 @@ async def open_page(url: str, recorder: Optional[Recorder] = None):
|
|||
await recorder.finalize(page)
|
||||
except Exception:
|
||||
logger.exception("recorder.finalize failed")
|
||||
await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS)
|
||||
try:
|
||||
await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS)
|
||||
except Exception:
|
||||
pass
|
||||
await browser.close()
|
||||
|
||||
|
||||
|
|
|
|||
45
web/app.py
45
web/app.py
|
|
@ -78,6 +78,7 @@ apply_client = ApplyClient()
|
|||
@asynccontextmanager
|
||||
async def lifespan(_app: FastAPI):
|
||||
db.init_db()
|
||||
db.seed_secrets_from_env()
|
||||
bootstrap_admin()
|
||||
retention.start()
|
||||
logger.info("web service ready")
|
||||
|
|
@ -220,7 +221,7 @@ def _last_scrape_utc() -> str:
|
|||
|
||||
|
||||
FILTER_KEYS = ("rooms_min", "rooms_max", "max_rent", "min_size",
|
||||
"max_morning_commute", "wbs_required", "max_age_hours")
|
||||
"wbs_required", "max_age_hours")
|
||||
|
||||
|
||||
def _has_filters(f) -> bool:
|
||||
|
|
@ -263,8 +264,6 @@ def _filter_summary(f) -> str:
|
|||
parts.append(f"≤ {int(f['max_rent'])} €")
|
||||
if f["min_size"]:
|
||||
parts.append(f"≥ {int(f['min_size'])} m²")
|
||||
if f["max_morning_commute"]:
|
||||
parts.append(f"≤ {int(f['max_morning_commute'])} min")
|
||||
if f["wbs_required"] == "yes":
|
||||
parts.append("WBS")
|
||||
elif f["wbs_required"] == "no":
|
||||
|
|
@ -552,7 +551,6 @@ async def action_save_filters(
|
|||
rooms_max: str = Form(""),
|
||||
max_rent: str = Form(""),
|
||||
min_size: str = Form(""),
|
||||
max_morning_commute: str = Form(""),
|
||||
wbs_required: str = Form(""),
|
||||
max_age_hours: str = Form(""),
|
||||
user=Depends(require_user),
|
||||
|
|
@ -575,7 +573,6 @@ async def action_save_filters(
|
|||
"rooms_max": _f(rooms_max),
|
||||
"max_rent": _f(max_rent),
|
||||
"min_size": _f(min_size),
|
||||
"max_morning_commute": _f(max_morning_commute),
|
||||
"wbs_required": (wbs_required or "").strip(),
|
||||
"max_age_hours": _i(max_age_hours),
|
||||
})
|
||||
|
|
@ -825,7 +822,15 @@ def tab_logs_legacy():
|
|||
return RedirectResponse("/admin/protokoll", status_code=301)
|
||||
|
||||
|
||||
ADMIN_SECTIONS = ("protokoll", "benutzer")
|
||||
ADMIN_SECTIONS = ("protokoll", "benutzer", "geheimnisse")
|
||||
|
||||
|
||||
def _mask_secret(value: str) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
if len(value) <= 10:
|
||||
return "•" * len(value)
|
||||
return value[:6] + "…" + value[-4:]
|
||||
|
||||
|
||||
@app.get("/admin", response_class=HTMLResponse)
|
||||
|
|
@ -857,6 +862,10 @@ def tab_admin(request: Request, section: str):
|
|||
})
|
||||
elif section == "benutzer":
|
||||
ctx["users"] = db.list_users()
|
||||
elif section == "geheimnisse":
|
||||
secrets = db.all_secrets()
|
||||
ctx["secrets_masked"] = {k: _mask_secret(secrets.get(k, "")) for k in db.SECRET_KEYS}
|
||||
ctx["secret_flash"] = request.query_params.get("ok")
|
||||
return templates.TemplateResponse("admin.html", ctx)
|
||||
|
||||
|
||||
|
|
@ -1073,6 +1082,23 @@ async def action_users_disable(
|
|||
return RedirectResponse("/admin/benutzer", status_code=303)
|
||||
|
||||
|
||||
@app.post("/actions/secrets")
|
||||
async def action_secrets(request: Request, admin=Depends(require_admin)):
|
||||
form = await request.form()
|
||||
require_csrf(admin["id"], form.get("csrf", ""))
|
||||
changed = []
|
||||
for key in db.SECRET_KEYS:
|
||||
raw = (form.get(key) or "").strip()
|
||||
if not raw:
|
||||
continue
|
||||
db.set_secret(key, raw)
|
||||
changed.append(key)
|
||||
db.log_audit(admin["username"], "secrets.updated",
|
||||
",".join(changed) or "no-op",
|
||||
user_id=admin["id"], ip=client_ip(request))
|
||||
return RedirectResponse("/admin/geheimnisse?ok=1", status_code=303)
|
||||
|
||||
|
||||
@app.post("/actions/enrich-all")
|
||||
async def action_enrich_all(
|
||||
request: Request,
|
||||
|
|
@ -1181,3 +1207,10 @@ async def internal_report_error(
|
|||
context=payload.get("context"),
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/internal/secrets")
|
||||
async def internal_secrets(_g: None = Depends(require_internal)):
|
||||
"""Give sibling services (alert) the current runtime creds that the admin
|
||||
may have edited via the UI, so no redeploy is needed when rotating."""
|
||||
return db.all_secrets()
|
||||
|
|
|
|||
49
web/db.py
49
web/db.py
|
|
@ -206,6 +206,14 @@ MIGRATIONS: list[str] = [
|
|||
"""
|
||||
ALTER TABLE user_filters ADD COLUMN max_age_hours INTEGER;
|
||||
""",
|
||||
# 0007: secrets table — API keys / scraper creds editable from admin UI
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS secrets (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
""",
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -237,6 +245,47 @@ def now_iso() -> str:
|
|||
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Secrets (admin-editable, source of truth for runtime creds)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SECRET_KEYS = ("ANTHROPIC_API_KEY", "BERLIN_WOHNEN_USERNAME", "BERLIN_WOHNEN_PASSWORD")
|
||||
|
||||
|
||||
def get_secret(key: str) -> Optional[str]:
|
||||
row = _conn.execute("SELECT value FROM secrets WHERE key = ?", (key,)).fetchone()
|
||||
return row["value"] if row else None
|
||||
|
||||
|
||||
def set_secret(key: str, value: str) -> None:
|
||||
with _lock:
|
||||
_conn.execute(
|
||||
"INSERT INTO secrets(key, value, updated_at) VALUES (?, ?, ?) "
|
||||
"ON CONFLICT(key) DO UPDATE SET value = excluded.value, "
|
||||
" updated_at = excluded.updated_at",
|
||||
(key, value, now_iso()),
|
||||
)
|
||||
|
||||
|
||||
def all_secrets() -> dict[str, str]:
|
||||
rows = _conn.execute("SELECT key, value FROM secrets").fetchall()
|
||||
return {r["key"]: r["value"] for r in rows}
|
||||
|
||||
|
||||
def seed_secrets_from_env() -> None:
|
||||
"""Copy env values into the DB for any secret key that's still empty.
|
||||
Idempotent: existing DB values are never overwritten."""
|
||||
import os
|
||||
for k in SECRET_KEYS:
|
||||
existing = get_secret(k)
|
||||
if existing:
|
||||
continue
|
||||
env_val = os.environ.get(k, "")
|
||||
if env_val:
|
||||
set_secret(k, env_val)
|
||||
logger.info("seeded secret %s from env", k)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# System state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
17
web/llm.py
17
web/llm.py
|
|
@ -12,10 +12,22 @@ from typing import Optional
|
|||
|
||||
import requests
|
||||
|
||||
import db
|
||||
from settings import ANTHROPIC_API_KEY, ANTHROPIC_MODEL
|
||||
|
||||
logger = logging.getLogger("web.llm")
|
||||
|
||||
|
||||
def _api_key() -> str:
|
||||
"""DB-first so the admin UI can rotate the key without a redeploy."""
|
||||
try:
|
||||
k = db.get_secret("ANTHROPIC_API_KEY")
|
||||
if k:
|
||||
return k
|
||||
except Exception:
|
||||
pass
|
||||
return ANTHROPIC_API_KEY
|
||||
|
||||
API_URL = "https://api.anthropic.com/v1/messages"
|
||||
API_VERSION = "2023-06-01"
|
||||
|
||||
|
|
@ -54,7 +66,8 @@ SYSTEM_PROMPT = (
|
|||
def select_flat_image_urls(candidates: list[str], page_url: str,
|
||||
timeout: int = 30) -> list[str]:
|
||||
"""Return the LLM-filtered subset, or the original list on any failure."""
|
||||
if not ANTHROPIC_API_KEY or not candidates:
|
||||
key = _api_key()
|
||||
if not key or not candidates:
|
||||
return candidates
|
||||
|
||||
user_text = (
|
||||
|
|
@ -78,7 +91,7 @@ def select_flat_image_urls(candidates: list[str], page_url: str,
|
|||
r = requests.post(
|
||||
API_URL,
|
||||
headers={
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"x-api-key": key,
|
||||
"anthropic-version": API_VERSION,
|
||||
"content-type": "application/json",
|
||||
},
|
||||
|
|
|
|||
46
web/templates/_admin_secrets.html
Normal file
46
web/templates/_admin_secrets.html
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
<h2 class="font-semibold mb-2">Geheimnisse</h2>
|
||||
<p class="text-sm text-slate-600 mb-4">
|
||||
Hier hinterlegte Werte überschreiben die entsprechenden Umgebungsvariablen zur Laufzeit.
|
||||
Leerlassen bedeutet: der gespeicherte Wert bleibt unverändert.
|
||||
</p>
|
||||
|
||||
{% if secret_flash %}
|
||||
<div class="chip chip-ok mb-4">Gespeichert.</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="post" action="/actions/secrets" class="space-y-5 max-w-xl"
|
||||
autocomplete="off" data-lpignore="true" data-1p-ignore data-bwignore data-form-type="other">
|
||||
<input type="hidden" name="csrf" value="{{ csrf }}">
|
||||
|
||||
<div>
|
||||
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">Anthropic API Key</label>
|
||||
<input class="input" type="text" name="ANTHROPIC_API_KEY"
|
||||
placeholder="{{ secrets_masked.ANTHROPIC_API_KEY or 'nicht gesetzt' }}"
|
||||
autocomplete="off" data-lpignore="true" data-1p-ignore data-bwignore>
|
||||
<p class="text-xs text-slate-500 mt-1">Wird für die Bild-URL-Auswahl durch Haiku verwendet.</p>
|
||||
</div>
|
||||
|
||||
<div class="border-t border-soft pt-4">
|
||||
<h3 class="font-semibold text-sm mb-3">inberlinwohnen.de Login</h3>
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-3">
|
||||
<div>
|
||||
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">Benutzer / E-Mail</label>
|
||||
<input class="input" type="text" name="BERLIN_WOHNEN_USERNAME"
|
||||
placeholder="{{ secrets_masked.BERLIN_WOHNEN_USERNAME or 'nicht gesetzt' }}"
|
||||
autocomplete="off" data-lpignore="true" data-1p-ignore data-bwignore>
|
||||
</div>
|
||||
<div>
|
||||
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">Passwort</label>
|
||||
<input class="input" type="password" name="BERLIN_WOHNEN_PASSWORD"
|
||||
placeholder="{{ secrets_masked.BERLIN_WOHNEN_PASSWORD or 'nicht gesetzt' }}"
|
||||
autocomplete="new-password" data-lpignore="true" data-1p-ignore data-bwignore>
|
||||
</div>
|
||||
</div>
|
||||
<p class="text-xs text-slate-500 mt-2">
|
||||
Wird vom Scraper beim Login auf inberlinwohnen.de verwendet. Änderungen greifen
|
||||
automatisch beim nächsten Scrape-Zyklus.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<button class="btn btn-primary" type="submit">Speichern</button>
|
||||
</form>
|
||||
|
|
@ -26,10 +26,6 @@
|
|||
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">min Größe (m²)</label>
|
||||
<input class="input" name="min_size" value="{{ filters.min_size if filters.min_size is not none else '' }}">
|
||||
</div>
|
||||
<div>
|
||||
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">max Anfahrt morgens (min)</label>
|
||||
<input class="input" name="max_morning_commute" value="{{ filters.max_morning_commute if filters.max_morning_commute is not none else '' }}">
|
||||
</div>
|
||||
<div>
|
||||
<label class="block text-xs uppercase tracking-wide text-slate-500 mb-1">WBS benötigt</label>
|
||||
<select class="input" name="wbs_required">
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
{% block content %}
|
||||
<section class="card">
|
||||
<nav class="flex flex-wrap border-b border-soft px-4">
|
||||
{% set sections = [('protokoll','Protokoll'), ('benutzer','Benutzer')] %}
|
||||
{% set sections = [('protokoll','Protokoll'), ('benutzer','Benutzer'), ('geheimnisse','Geheimnisse')] %}
|
||||
{% for key, label in sections %}
|
||||
<a href="/admin/{{ key }}"
|
||||
class="tab {% if section == key %}active{% endif %}">{{ label }}</a>
|
||||
|
|
@ -16,6 +16,7 @@
|
|||
<div class="p-5">
|
||||
{% if section == 'protokoll' %}{% include "_admin_logs.html" %}
|
||||
{% elif section == 'benutzer' %}{% include "_settings_users.html" %}
|
||||
{% elif section == 'geheimnisse' %}{% include "_admin_secrets.html" %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</section>
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="robots" content="noindex, nofollow">
|
||||
<link rel="icon" type="image/webp" href="/static/didi.webp">
|
||||
<title>{% block title %}wohnungsdidi{% endblock %}</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<script src="https://unpkg.com/htmx.org@2.0.3"></script>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue