multi-user: users, per-user profiles/filters/notifications, tab UI, apply forensics
* DB: users + user_profiles/filters/notifications/preferences; applications gets user_id + forensics_json + profile_snapshot_json; new errors table with 14d retention; schema versioning via MIGRATIONS list * auth: password hashes in DB (argon2); env vars seed first admin; per-user sessions; CSRF bound to user id * apply: personal info/WBS moved out of env into the request body; providers take an ApplyContext with Profile + submit_forms; full Playwright recorder (step log, console, page errors, network, screenshots, final HTML) * web: five top-level tabs (Wohnungen/Bewerbungen/Logs/Fehler/Einstellungen); settings sub-tabs profil/filter/benachrichtigungen/account/benutzer; per-user matching, auto-apply and notifications (UI/Telegram/SMTP); red auto-apply switch on Wohnungen tab; forensics detail view for bewerbungen and fehler; retention background thread Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e663386a19
commit
c630b500ef
36 changed files with 2763 additions and 1113 deletions
182
apply/actions.py
182
apply/actions.py
|
|
@ -1,38 +1,198 @@
|
|||
"""
|
||||
Playwright actions + forensic recorder.
|
||||
|
||||
The recorder captures everything a downstream AI agent would need to diagnose
|
||||
a broken application flow:
|
||||
|
||||
* a structured `step_log` (one entry per `recorder.step(...)`)
|
||||
* browser console logs
|
||||
* browser errors
|
||||
* every network request + selective response bodies
|
||||
* page HTML at finalize time
|
||||
* screenshots at key moments
|
||||
|
||||
Payloads are capped so SQLite stays healthy. Screenshots are base64 JPEGs.
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
from playwright.async_api import async_playwright, ViewportSize
|
||||
from typing import Optional
|
||||
|
||||
from playwright.async_api import ViewportSize, async_playwright
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
from settings import *
|
||||
import logging
|
||||
from settings import BROWSER_HEIGHT, BROWSER_LOCALE, BROWSER_WIDTH, HEADLESS, POST_SUBMISSION_SLEEP_MS
|
||||
|
||||
logger = logging.getLogger("flat-apply")
|
||||
|
||||
MAX_CONSOLE_ENTRIES = 200
|
||||
MAX_NETWORK_ENTRIES = 150
|
||||
MAX_BODY_SNIPPET = 2000
|
||||
MAX_HTML_DUMP = 200_000 # 200 KB
|
||||
SCREENSHOT_JPEG_QUALITY = 60
|
||||
|
||||
|
||||
class Recorder:
|
||||
"""Captures browser + step telemetry for one apply run."""
|
||||
|
||||
def __init__(self, url: str):
|
||||
self.started_at = time.time()
|
||||
self.url = url
|
||||
self.steps: list[dict] = []
|
||||
self.console: list[dict] = []
|
||||
self.errors: list[dict] = []
|
||||
self.network: list[dict] = []
|
||||
self.screenshots: list[dict] = []
|
||||
self.final_html: Optional[str] = None
|
||||
self.final_url: Optional[str] = None
|
||||
|
||||
# --- step log -----------------------------------------------------------
|
||||
def step(self, step_name: str, status: str = "ok", detail: str = "") -> None:
|
||||
entry = {
|
||||
"ts": round(time.time() - self.started_at, 3),
|
||||
"step": step_name,
|
||||
"status": status,
|
||||
"detail": str(detail)[:500],
|
||||
}
|
||||
self.steps.append(entry)
|
||||
log = logger.info if status == "ok" else logger.warning
|
||||
log("step %-20s %-4s %s", step_name, status, detail)
|
||||
|
||||
# --- browser hooks ------------------------------------------------------
|
||||
def _attach(self, page) -> None:
|
||||
def on_console(msg):
|
||||
try:
|
||||
text = msg.text
|
||||
except Exception:
|
||||
text = "<unavailable>"
|
||||
if len(self.console) < MAX_CONSOLE_ENTRIES:
|
||||
self.console.append({
|
||||
"ts": round(time.time() - self.started_at, 3),
|
||||
"type": getattr(msg, "type", "?"),
|
||||
"text": text[:500],
|
||||
})
|
||||
|
||||
def on_pageerror(err):
|
||||
if len(self.errors) < MAX_CONSOLE_ENTRIES:
|
||||
self.errors.append({
|
||||
"ts": round(time.time() - self.started_at, 3),
|
||||
"message": str(err)[:1000],
|
||||
})
|
||||
|
||||
def on_request(req):
|
||||
if len(self.network) < MAX_NETWORK_ENTRIES:
|
||||
self.network.append({
|
||||
"ts": round(time.time() - self.started_at, 3),
|
||||
"kind": "request",
|
||||
"method": req.method,
|
||||
"url": req.url,
|
||||
"resource_type": req.resource_type,
|
||||
})
|
||||
|
||||
async def on_response(resp):
|
||||
if len(self.network) >= MAX_NETWORK_ENTRIES:
|
||||
return
|
||||
try:
|
||||
snippet = ""
|
||||
if resp.status >= 400:
|
||||
try:
|
||||
body = await resp.text()
|
||||
snippet = body[:MAX_BODY_SNIPPET]
|
||||
except Exception:
|
||||
snippet = ""
|
||||
self.network.append({
|
||||
"ts": round(time.time() - self.started_at, 3),
|
||||
"kind": "response",
|
||||
"status": resp.status,
|
||||
"url": resp.url,
|
||||
"body_snippet": snippet,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
page.on("console", on_console)
|
||||
page.on("pageerror", on_pageerror)
|
||||
page.on("request", on_request)
|
||||
page.on("response", lambda r: asyncio.create_task(on_response(r)))
|
||||
|
||||
# --- screenshots --------------------------------------------------------
|
||||
async def snap(self, page, label: str) -> None:
|
||||
try:
|
||||
data = await page.screenshot(type="jpeg", quality=SCREENSHOT_JPEG_QUALITY,
|
||||
full_page=False, timeout=5000)
|
||||
b64 = base64.b64encode(data).decode("ascii")
|
||||
self.screenshots.append({
|
||||
"ts": round(time.time() - self.started_at, 3),
|
||||
"label": label,
|
||||
"url": page.url,
|
||||
"b64_jpeg": b64,
|
||||
"size": len(data),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("snap failed (%s): %s", label, e)
|
||||
|
||||
async def finalize(self, page) -> None:
|
||||
try:
|
||||
self.final_url = page.url
|
||||
html = await page.content()
|
||||
self.final_html = html[:MAX_HTML_DUMP]
|
||||
except Exception as e:
|
||||
logger.warning("finalize html dump failed: %s", e)
|
||||
await self.snap(page, "final")
|
||||
|
||||
def to_json(self) -> dict:
|
||||
return {
|
||||
"url": self.url,
|
||||
"final_url": self.final_url,
|
||||
"duration_s": round(time.time() - self.started_at, 3),
|
||||
"steps": self.steps,
|
||||
"console": self.console,
|
||||
"errors": self.errors,
|
||||
"network": self.network,
|
||||
"screenshots": self.screenshots,
|
||||
"final_html": self.final_html,
|
||||
}
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def open_page(url):
|
||||
async def open_page(url: str, recorder: Optional[Recorder] = None):
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=HEADLESS,
|
||||
args=["--disable-blink-features=AutomationControlled"]
|
||||
args=["--disable-blink-features=AutomationControlled"],
|
||||
)
|
||||
|
||||
context = await browser.new_context(
|
||||
viewport=ViewportSize({
|
||||
"width": BROWSER_WIDTH,
|
||||
"height": BROWSER_HEIGHT}),
|
||||
locale=BROWSER_LOCALE
|
||||
viewport=ViewportSize({"width": BROWSER_WIDTH, "height": BROWSER_HEIGHT}),
|
||||
locale=BROWSER_LOCALE,
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
if recorder:
|
||||
recorder._attach(page)
|
||||
recorder.step("launch", detail=f"headless={HEADLESS}")
|
||||
|
||||
if recorder:
|
||||
recorder.step("goto", detail=url)
|
||||
await page.goto(url)
|
||||
await page.wait_for_load_state("networkidle")
|
||||
if recorder:
|
||||
recorder.step("loaded", detail=page.url)
|
||||
await recorder.snap(page, "loaded")
|
||||
|
||||
try:
|
||||
yield page
|
||||
finally:
|
||||
if recorder:
|
||||
try:
|
||||
await recorder.finalize(page)
|
||||
except Exception:
|
||||
logger.exception("recorder.finalize failed")
|
||||
await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS)
|
||||
await browser.close()
|
||||
|
||||
|
||||
def create_dummy_pdf():
|
||||
logger.info("creating dummy pdf")
|
||||
c = canvas.Canvas("DummyPDF.pdf")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue