1. Admin → Geheimnisse sub-tab lets you edit ANTHROPIC_API_KEY + BERLIN_WOHNEN_USERNAME/PASSWORD at runtime. Migration v7 adds a secrets(key,value,updated_at) table; startup seeds missing keys from env (idempotent). web reads secrets DB-first (env fallback) via llm._api_key(); alert fetches them from web /internal/secrets on each scan, passes them into Scraper(). Rotating creds no longer needs a redeploy. Masked display: 6 leading + 4 trailing chars, "…" in the middle. Blank form fields leave the stored value untouched. 2. Drop the max_morning_commute filter from UI + server + FILTER_KEYS + filter summary (the underlying Maps.calculate_score code stays for potential future re-enable). 3. /static/didi.webp wired as favicon via <link rel="icon"> in base.html. 4. apply.open_page wraps page.goto in try/except so a failed load still produces a "goto.failed" step + screenshot instead of returning an empty forensics blob. networkidle + post-submission sleep are also made best-effort. The error ZIP export already writes screenshot+HTML per step and final_html — with this change every apply run leaves a reconstructable trail even when the listing is already offline. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
230 lines
8 KiB
Python
230 lines
8 KiB
Python
"""
|
|
Playwright actions + forensic recorder.
|
|
|
|
The recorder captures everything a downstream AI agent would need to diagnose
|
|
a broken application flow:
|
|
|
|
* a structured `step_log` (one entry per `recorder.step(...)`)
|
|
* browser console logs
|
|
* browser errors
|
|
* every network request + selective response bodies
|
|
* page HTML at finalize time
|
|
* screenshots at key moments
|
|
|
|
Payloads are capped so SQLite stays healthy. Screenshots are base64 JPEGs.
|
|
"""
|
|
import asyncio
|
|
import base64
|
|
import logging
|
|
import time
|
|
from contextlib import asynccontextmanager
|
|
from typing import Optional
|
|
|
|
from playwright.async_api import ViewportSize, async_playwright
|
|
from reportlab.pdfgen import canvas
|
|
|
|
from settings import BROWSER_HEIGHT, BROWSER_LOCALE, BROWSER_WIDTH, HEADLESS, POST_SUBMISSION_SLEEP_MS
|
|
|
|
logger = logging.getLogger("flat-apply")
|
|
|
|
MAX_CONSOLE_ENTRIES = 200
|
|
MAX_NETWORK_ENTRIES = 150
|
|
MAX_BODY_SNIPPET = 2000
|
|
MAX_HTML_DUMP = 200_000 # 200 KB
|
|
MAX_SCREENSHOTS = 40
|
|
SCREENSHOT_JPEG_QUALITY = 60
|
|
|
|
|
|
class Recorder:
|
|
"""Captures browser + step telemetry for one apply run."""
|
|
|
|
def __init__(self, url: str):
|
|
self.started_at = time.time()
|
|
self.url = url
|
|
self.steps: list[dict] = []
|
|
self.console: list[dict] = []
|
|
self.errors: list[dict] = []
|
|
self.network: list[dict] = []
|
|
self.screenshots: list[dict] = []
|
|
self.final_html: Optional[str] = None
|
|
self.final_url: Optional[str] = None
|
|
|
|
# --- step log -----------------------------------------------------------
|
|
def step(self, step_name: str, status: str = "ok", detail: str = "") -> None:
|
|
entry = {
|
|
"ts": round(time.time() - self.started_at, 3),
|
|
"step": step_name,
|
|
"status": status,
|
|
"detail": str(detail)[:500],
|
|
}
|
|
self.steps.append(entry)
|
|
log = logger.info if status == "ok" else logger.warning
|
|
log("step %-20s %-4s %s", step_name, status, detail)
|
|
|
|
# --- browser hooks ------------------------------------------------------
|
|
def _attach(self, page) -> None:
|
|
def on_console(msg):
|
|
try:
|
|
text = msg.text
|
|
except Exception:
|
|
text = "<unavailable>"
|
|
if len(self.console) < MAX_CONSOLE_ENTRIES:
|
|
self.console.append({
|
|
"ts": round(time.time() - self.started_at, 3),
|
|
"type": getattr(msg, "type", "?"),
|
|
"text": text[:500],
|
|
})
|
|
|
|
def on_pageerror(err):
|
|
if len(self.errors) < MAX_CONSOLE_ENTRIES:
|
|
self.errors.append({
|
|
"ts": round(time.time() - self.started_at, 3),
|
|
"message": str(err)[:1000],
|
|
})
|
|
|
|
def on_request(req):
|
|
if len(self.network) < MAX_NETWORK_ENTRIES:
|
|
self.network.append({
|
|
"ts": round(time.time() - self.started_at, 3),
|
|
"kind": "request",
|
|
"method": req.method,
|
|
"url": req.url,
|
|
"resource_type": req.resource_type,
|
|
})
|
|
|
|
async def on_response(resp):
|
|
if len(self.network) >= MAX_NETWORK_ENTRIES:
|
|
return
|
|
try:
|
|
snippet = ""
|
|
if resp.status >= 400:
|
|
try:
|
|
body = await resp.text()
|
|
snippet = body[:MAX_BODY_SNIPPET]
|
|
except Exception:
|
|
snippet = ""
|
|
self.network.append({
|
|
"ts": round(time.time() - self.started_at, 3),
|
|
"kind": "response",
|
|
"status": resp.status,
|
|
"url": resp.url,
|
|
"body_snippet": snippet,
|
|
})
|
|
except Exception:
|
|
pass
|
|
|
|
page.on("console", on_console)
|
|
page.on("pageerror", on_pageerror)
|
|
page.on("request", on_request)
|
|
page.on("response", lambda r: asyncio.create_task(on_response(r)))
|
|
|
|
# --- screenshots + html dump -------------------------------------------
|
|
async def snap(self, page, label: str) -> None:
|
|
"""Capture screenshot + full page HTML for this moment."""
|
|
if len(self.screenshots) >= MAX_SCREENSHOTS:
|
|
return
|
|
ts = round(time.time() - self.started_at, 3)
|
|
entry = {"ts": ts, "label": label, "url": page.url,
|
|
"b64_jpeg": "", "size": 0, "html": "", "html_size": 0}
|
|
try:
|
|
img = await page.screenshot(type="jpeg", quality=SCREENSHOT_JPEG_QUALITY,
|
|
full_page=False, timeout=5000)
|
|
entry["b64_jpeg"] = base64.b64encode(img).decode("ascii")
|
|
entry["size"] = len(img)
|
|
except Exception as e:
|
|
logger.warning("snap screenshot failed (%s): %s", label, e)
|
|
try:
|
|
html = await page.content()
|
|
entry["html"] = html[:MAX_HTML_DUMP]
|
|
entry["html_size"] = len(html)
|
|
except Exception as e:
|
|
logger.warning("snap html failed (%s): %s", label, e)
|
|
self.screenshots.append(entry)
|
|
|
|
async def step_snap(self, page, name: str, detail: str = "", status: str = "ok") -> None:
|
|
"""Log a step AND capture a screenshot + HTML for it."""
|
|
self.step(name, status, detail)
|
|
await self.snap(page, name)
|
|
|
|
async def finalize(self, page) -> None:
|
|
try:
|
|
self.final_url = page.url
|
|
html = await page.content()
|
|
self.final_html = html[:MAX_HTML_DUMP]
|
|
except Exception as e:
|
|
logger.warning("finalize html dump failed: %s", e)
|
|
await self.snap(page, "final")
|
|
|
|
def to_json(self) -> dict:
|
|
return {
|
|
"url": self.url,
|
|
"final_url": self.final_url,
|
|
"duration_s": round(time.time() - self.started_at, 3),
|
|
"steps": self.steps,
|
|
"console": self.console,
|
|
"errors": self.errors,
|
|
"network": self.network,
|
|
"screenshots": self.screenshots,
|
|
"final_html": self.final_html,
|
|
}
|
|
|
|
|
|
@asynccontextmanager
|
|
async def open_page(url: str, recorder: Optional[Recorder] = None):
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(
|
|
headless=HEADLESS,
|
|
args=["--disable-blink-features=AutomationControlled"],
|
|
)
|
|
context = await browser.new_context(
|
|
viewport=ViewportSize({"width": BROWSER_WIDTH, "height": BROWSER_HEIGHT}),
|
|
locale=BROWSER_LOCALE,
|
|
)
|
|
page = await context.new_page()
|
|
|
|
if recorder:
|
|
recorder._attach(page)
|
|
recorder.step("launch", detail=f"headless={HEADLESS}")
|
|
|
|
if recorder:
|
|
recorder.step("goto", detail=url)
|
|
goto_failed = False
|
|
try:
|
|
await page.goto(url, timeout=45_000)
|
|
except Exception as e:
|
|
goto_failed = True
|
|
if recorder:
|
|
recorder.step("goto.failed", "error", f"{type(e).__name__}: {e}")
|
|
try:
|
|
await recorder.snap(page, "goto.failed")
|
|
except Exception:
|
|
logger.exception("snap after goto.failed failed")
|
|
if not goto_failed:
|
|
try:
|
|
await page.wait_for_load_state("networkidle", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
if recorder:
|
|
recorder.step("loaded", detail=page.url)
|
|
await recorder.snap(page, "loaded")
|
|
|
|
try:
|
|
yield page
|
|
finally:
|
|
if recorder:
|
|
try:
|
|
await recorder.finalize(page)
|
|
except Exception:
|
|
logger.exception("recorder.finalize failed")
|
|
try:
|
|
await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS)
|
|
except Exception:
|
|
pass
|
|
await browser.close()
|
|
|
|
|
|
def create_dummy_pdf():
|
|
logger.info("creating dummy pdf")
|
|
c = canvas.Canvas("DummyPDF.pdf")
|
|
c.drawString(100, 750, "Hello! This is a dummy PDF file.")
|
|
c.save()
|