""" Playwright actions + forensic recorder. The recorder captures everything a downstream AI agent would need to diagnose a broken application flow: * a structured `step_log` (one entry per `recorder.step(...)`) * browser console logs * browser errors * every network request + selective response bodies * page HTML at finalize time * screenshots at key moments Payloads are capped so SQLite stays healthy. Screenshots are base64 JPEGs. """ import asyncio import base64 import logging import time from contextlib import asynccontextmanager from typing import Optional from playwright.async_api import ViewportSize, async_playwright from reportlab.pdfgen import canvas from settings import BROWSER_HEIGHT, BROWSER_LOCALE, BROWSER_WIDTH, HEADLESS, POST_SUBMISSION_SLEEP_MS logger = logging.getLogger("flat-apply") MAX_CONSOLE_ENTRIES = 200 MAX_NETWORK_ENTRIES = 150 MAX_BODY_SNIPPET = 2000 MAX_HTML_DUMP = 200_000 # 200 KB SCREENSHOT_JPEG_QUALITY = 60 class Recorder: """Captures browser + step telemetry for one apply run.""" def __init__(self, url: str): self.started_at = time.time() self.url = url self.steps: list[dict] = [] self.console: list[dict] = [] self.errors: list[dict] = [] self.network: list[dict] = [] self.screenshots: list[dict] = [] self.final_html: Optional[str] = None self.final_url: Optional[str] = None # --- step log ----------------------------------------------------------- def step(self, step_name: str, status: str = "ok", detail: str = "") -> None: entry = { "ts": round(time.time() - self.started_at, 3), "step": step_name, "status": status, "detail": str(detail)[:500], } self.steps.append(entry) log = logger.info if status == "ok" else logger.warning log("step %-20s %-4s %s", step_name, status, detail) # --- browser hooks ------------------------------------------------------ def _attach(self, page) -> None: def on_console(msg): try: text = msg.text except Exception: text = "" if len(self.console) < MAX_CONSOLE_ENTRIES: self.console.append({ "ts": round(time.time() - self.started_at, 3), "type": getattr(msg, "type", "?"), "text": text[:500], }) def on_pageerror(err): if len(self.errors) < MAX_CONSOLE_ENTRIES: self.errors.append({ "ts": round(time.time() - self.started_at, 3), "message": str(err)[:1000], }) def on_request(req): if len(self.network) < MAX_NETWORK_ENTRIES: self.network.append({ "ts": round(time.time() - self.started_at, 3), "kind": "request", "method": req.method, "url": req.url, "resource_type": req.resource_type, }) async def on_response(resp): if len(self.network) >= MAX_NETWORK_ENTRIES: return try: snippet = "" if resp.status >= 400: try: body = await resp.text() snippet = body[:MAX_BODY_SNIPPET] except Exception: snippet = "" self.network.append({ "ts": round(time.time() - self.started_at, 3), "kind": "response", "status": resp.status, "url": resp.url, "body_snippet": snippet, }) except Exception: pass page.on("console", on_console) page.on("pageerror", on_pageerror) page.on("request", on_request) page.on("response", lambda r: asyncio.create_task(on_response(r))) # --- screenshots -------------------------------------------------------- async def snap(self, page, label: str) -> None: try: data = await page.screenshot(type="jpeg", quality=SCREENSHOT_JPEG_QUALITY, full_page=False, timeout=5000) b64 = base64.b64encode(data).decode("ascii") self.screenshots.append({ "ts": round(time.time() - self.started_at, 3), "label": label, "url": page.url, "b64_jpeg": b64, "size": len(data), }) except Exception as e: logger.warning("snap failed (%s): %s", label, e) async def finalize(self, page) -> None: try: self.final_url = page.url html = await page.content() self.final_html = html[:MAX_HTML_DUMP] except Exception as e: logger.warning("finalize html dump failed: %s", e) await self.snap(page, "final") def to_json(self) -> dict: return { "url": self.url, "final_url": self.final_url, "duration_s": round(time.time() - self.started_at, 3), "steps": self.steps, "console": self.console, "errors": self.errors, "network": self.network, "screenshots": self.screenshots, "final_html": self.final_html, } @asynccontextmanager async def open_page(url: str, recorder: Optional[Recorder] = None): async with async_playwright() as p: browser = await p.chromium.launch( headless=HEADLESS, args=["--disable-blink-features=AutomationControlled"], ) context = await browser.new_context( viewport=ViewportSize({"width": BROWSER_WIDTH, "height": BROWSER_HEIGHT}), locale=BROWSER_LOCALE, ) page = await context.new_page() if recorder: recorder._attach(page) recorder.step("launch", detail=f"headless={HEADLESS}") if recorder: recorder.step("goto", detail=url) await page.goto(url) await page.wait_for_load_state("networkidle") if recorder: recorder.step("loaded", detail=page.url) await recorder.snap(page, "loaded") try: yield page finally: if recorder: try: await recorder.finalize(page) except Exception: logger.exception("recorder.finalize failed") await page.wait_for_timeout(POST_SUBMISSION_SLEEP_MS) await browser.close() def create_dummy_pdf(): logger.info("creating dummy pdf") c = canvas.Canvas("DummyPDF.pdf") c.drawString(100, 750, "Hello! This is a dummy PDF file.") c.save()