lazyflat/apply/actions.py
EiSiMo 3bb04210c4 secrets tab, drop commute filter, favicon, robust error reports
1. Admin → Geheimnisse sub-tab lets you edit ANTHROPIC_API_KEY +
   BERLIN_WOHNEN_USERNAME/PASSWORD at runtime. Migration v7 adds a
   secrets(key,value,updated_at) table; startup seeds missing keys from
   env (idempotent). web reads secrets DB-first (env fallback) via
   llm._api_key(); alert fetches them from web /internal/secrets on each
   scan, passes them into Scraper(). Rotating creds no longer needs a
   redeploy.
   Masked display: 6 leading + 4 trailing chars, "…" in the middle.
   Blank form fields leave the stored value untouched.

2. Drop the max_morning_commute filter from UI + server + FILTER_KEYS +
   filter summary (the underlying Maps.calculate_score code stays for
   potential future re-enable).

3. /static/didi.webp wired as favicon via <link rel="icon"> in base.html.

4. apply.open_page wraps page.goto in try/except so a failed load still
   produces a "goto.failed" step + screenshot instead of returning an
   empty forensics blob. networkidle + post-submission sleep are also
   made best-effort. The error ZIP export already writes screenshot+HTML
   per step and final_html — with this change every apply run leaves a
   reconstructable trail even when the listing is already offline.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 17:56:57 +02:00

230 lines
8 KiB
Python

"""
Playwright actions + forensic recorder.
The recorder captures everything a downstream AI agent would need to diagnose
a broken application flow:
* a structured `step_log` (one entry per `recorder.step(...)`)
* browser console logs
* browser errors
* every network request + selective response bodies
* page HTML at finalize time
* screenshots at key moments
Payloads are capped so SQLite stays healthy. Screenshots are base64 JPEGs.
"""
import asyncio
import base64
import logging
import time
from contextlib import asynccontextmanager
from typing import Optional
from playwright.async_api import ViewportSize, async_playwright
from reportlab.pdfgen import canvas
from settings import BROWSER_HEIGHT, BROWSER_LOCALE, BROWSER_WIDTH, HEADLESS, POST_SUBMISSION_SLEEP_MS
logger = logging.getLogger("flat-apply")
MAX_CONSOLE_ENTRIES = 200
MAX_NETWORK_ENTRIES = 150
MAX_BODY_SNIPPET = 2000
MAX_HTML_DUMP = 200_000 # 200 KB
MAX_SCREENSHOTS = 40
SCREENSHOT_JPEG_QUALITY = 60
class Recorder:
"""Captures browser + step telemetry for one apply run."""
def __init__(self, url: str):
self.started_at = time.time()
self.url = url
self.steps: list[dict] = []
self.console: list[dict] = []
self.errors: list[dict] = []
self.network: list[dict] = []
self.screenshots: list[dict] = []
self.final_html: Optional[str] = None
self.final_url: Optional[str] = None
# --- step log -----------------------------------------------------------
def step(self, step_name: str, status: str = "ok", detail: str = "") -> None:
entry = {
"ts": round(time.time() - self.started_at, 3),
"step": step_name,
"status": status,
"detail": str(detail)[:500],
}
self.steps.append(entry)
log = logger.info if status == "ok" else logger.warning
log("step %-20s %-4s %s", step_name, status, detail)
# --- browser hooks ------------------------------------------------------
def _attach(self, page) -> None:
def on_console(msg):
try:
text = msg.text
except Exception:
text = "<unavailable>"
if len(self.console) < MAX_CONSOLE_ENTRIES:
self.console.append({
"ts": round(time.time() - self.started_at, 3),
"type": getattr(msg, "type", "?"),
"text": text[:500],
})
def on_pageerror(err):
if len(self.errors) < MAX_CONSOLE_ENTRIES:
self.errors.append({
"ts": round(time.time() - self.started_at, 3),
"message": str(err)[:1000],
})
def on_request(req):
if len(self.network) < MAX_NETWORK_ENTRIES:
self.network.append({
"ts": round(time.time() - self.started_at, 3),
"kind": "request",
"method": req.method,
"url": req.url,
"resource_type": req.resource_type,
})
async def on_response(resp):
if len(self.network) >= MAX_NETWORK_ENTRIES:
return
try:
snippet = ""
if resp.status >= 400:
try:
body = await resp.text()
snippet = body[:MAX_BODY_SNIPPET]
except Exception:
snippet = ""
self.network.append({
"ts": round(time.time() - self.started_at, 3),
"kind": "response",
"status": resp.status,
"url": resp.url,
"body_snippet": snippet,
})
except Exception:
pass
page.on("console", on_console)
page.on("pageerror", on_pageerror)
page.on("request", on_request)
page.on("response", lambda r: asyncio.create_task(on_response(r)))
# --- screenshots + html dump -------------------------------------------
async def snap(self, page, label: str) -> None:
"""Capture screenshot + full page HTML for this moment."""
if len(self.screenshots) >= MAX_SCREENSHOTS:
return
ts = round(time.time() - self.started_at, 3)
entry = {"ts": ts, "label": label, "url": page.url,
"b64_jpeg": "", "size": 0, "html": "", "html_size": 0}
try:
img = await page.screenshot(type="jpeg", quality=SCREENSHOT_JPEG_QUALITY,
full_page=False, timeout=5000)
entry["b64_jpeg"] = base64.b64encode(img).decode("ascii")
entry["size"] = len(img)
except Exception as e:
logger.warning("snap screenshot failed (%s): %s", label, e)
try:
html = await page.content()
entry["html"] = html[:MAX_HTML_DUMP]
entry["html_size"] = len(html)
except Exception as e:
logger.warning("snap html failed (%s): %s", label, e)
self.screenshots.append(entry)
async def step_snap(self, page, name: str, detail: str = "", status: str = "ok") -> None:
"""Log a step AND capture a screenshot + HTML for it."""
self.step(name, status, detail)
await self.snap(page, name)
async def finalize(self, page) -> None:
try:
self.final_url = page.url
html = await page.content()
self.final_html = html[:MAX_HTML_DUMP]
except Exception as e:
logger.warning("finalize html dump failed: %s", e)
await self.snap(page, "final")
def to_json(self) -> dict:
return {
"url": self.url,
"final_url": self.final_url,
"duration_s": round(time.time() - self.started_at, 3),
"steps": self.steps,
"console": self.console,
"errors": self.errors,
"network": self.network,
"screenshots": self.screenshots,
"final_html": self.final_html,
}
@asynccontextmanager
async def open_page(url: str, recorder: Optional[Recorder] = None):
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=HEADLESS,
args=["--disable-blink-features=AutomationControlled"],
)
context = await browser.new_context(
viewport=ViewportSize({"width": BROWSER_WIDTH, "height": BROWSER_HEIGHT}),
locale=BROWSER_LOCALE,
)
page = await context.new_page()
if recorder:
recorder._attach(page)
recorder.step("launch", detail=f"headless={HEADLESS}")
if recorder:
recorder.step("goto", detail=url)
goto_failed = False
try:
await page.goto(url, timeout=45_000)
except Exception as e:
goto_failed = True
if recorder:
recorder.step("goto.failed", "error", f"{type(e).__name__}: {e}")
try:
await recorder.snap(page, "goto.failed")
except Exception:
logger.exception("snap after goto.failed failed")
if not goto_failed:
try:
await page.wait_for_load_state("networkidle", timeout=15_000)
except Exception:
pass
if recorder:
recorder.step("loaded", detail=page.url)
await recorder.snap(page, "loaded")
try:
yield page
finally:
if recorder:
try:
await recorder.finalize(page)
except Exception:
logger.exception("recorder.finalize failed")
try:
await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS)
except Exception:
pass
await browser.close()
def create_dummy_pdf():
logger.info("creating dummy pdf")
c = canvas.Canvas("DummyPDF.pdf")
c.drawString(100, 750, "Hello! This is a dummy PDF file.")
c.save()