correctness batch: atomic writes, task refs, hmac, import-star, pickle

Per review §2:

- web/db.py: new _tx() context manager wraps multi-statement writers in
  BEGIN IMMEDIATE … COMMIT/ROLLBACK (our connections run in autocommit
  mode, so plain `with _lock:` doesn't give atomicity). partnership_accept
  (UPDATE + DELETE) and cleanup_retention (3 deletes/updates) now use it.
- Fire-and-forget tasks: add module-level _bg_tasks sets in web/app.py and
  web/enrichment.py. A _spawn() helper holds a strong ref until the task
  finishes so the GC can't drop it mid-flight (CPython's event loop only
  weakly references pending tasks).
- apply/main.py: require_api_key uses hmac.compare_digest, matching web's
  check. Also imports now use explicit names instead of `from settings *`.
- apply/language.py: replace `from settings import *` + `from paths import *`
  with explicit imports — this is the pattern that caused the LANGUAGE
  NameError earlier.
- alert/utils.py: pickle-based hash_any_object → deterministic JSON+sha256.
  Cheaper, portable across Python versions, no pickle attack surface.
- web/notifications.py: /fehler links repointed to /bewerbungen (the
  former page doesn't exist).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
EiSiMo 2026-04-21 19:14:26 +02:00
parent 77098c82df
commit eb73b5e415
7 changed files with 66 additions and 19 deletions

View file

@ -69,6 +69,17 @@ logger = logging.getLogger("web")
apply_client = ApplyClient()
# Strong refs for fire-and-forget tasks. asyncio.create_task only weakly
# references tasks from the event loop — the GC could drop them mid-flight.
_bg_tasks: set[asyncio.Task] = set()
def _spawn(coro) -> asyncio.Task:
t = asyncio.create_task(coro)
_bg_tasks.add(t)
t.add_done_callback(_bg_tasks.discard)
return t
# ---------------------------------------------------------------------------
# App + Jinja
@ -326,7 +337,7 @@ def _kick_apply(user_id: int, flat_id: str, url: str, triggered_by: str) -> None
profile_snapshot=profile,
)
asyncio.create_task(asyncio.to_thread(
_spawn(asyncio.to_thread(
_finish_apply_background, app_id, user_id, flat_id, url, profile, submit_forms,
))

View file

@ -9,6 +9,7 @@ import json
import logging
import sqlite3
import threading
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from typing import Any, Iterable, Optional
@ -42,6 +43,23 @@ def _get_conn() -> sqlite3.Connection:
return c
@contextmanager
def _tx():
"""Atomic BEGIN IMMEDIATE … COMMIT/ROLLBACK. Required for multi-statement
writes because our connections run in autocommit mode (isolation_level=None).
Combine with _lock to serialize writers and avoid BUSY.
"""
c = _get_conn()
c.execute("BEGIN IMMEDIATE")
try:
yield c
except Exception:
c.execute("ROLLBACK")
raise
else:
c.execute("COMMIT")
# ---------------------------------------------------------------------------
# Schema
# ---------------------------------------------------------------------------
@ -845,13 +863,13 @@ def partnership_accept(request_id: int, user_id: int) -> bool:
if get_accepted_partnership(row["from_user_id"]) or get_accepted_partnership(user_id):
return False
partner_id = row["from_user_id"]
with _lock:
_get_conn().execute(
with _lock, _tx() as c:
c.execute(
"UPDATE partnerships SET status = 'accepted', accepted_at = ? WHERE id = ?",
(now_iso(), request_id),
)
# clean up any stale pending requests touching either user
_get_conn().execute(
c.execute(
"""DELETE FROM partnerships
WHERE status = 'pending'
AND (from_user_id IN (?, ?) OR to_user_id IN (?, ?))""",
@ -899,12 +917,12 @@ def partner_flat_actions(partner_id: int) -> dict:
def cleanup_retention() -> dict:
cutoff = (datetime.now(timezone.utc) - timedelta(days=RETENTION_DAYS)).isoformat(timespec="seconds")
stats = {}
with _lock:
with _lock, _tx() as c:
for table in ("errors", "audit_log"):
cur = _get_conn().execute(f"DELETE FROM {table} WHERE timestamp < ?", (cutoff,))
cur = c.execute(f"DELETE FROM {table} WHERE timestamp < ?", (cutoff,))
stats[table] = cur.rowcount
# Drop forensics from old applications (but keep the row itself for history)
cur = _get_conn().execute(
cur = c.execute(
"UPDATE applications SET forensics_json = NULL WHERE started_at < ? AND forensics_json IS NOT NULL",
(cutoff,),
)

View file

@ -218,8 +218,20 @@ def _record_failure(flat_id: str, step: str, reason: str) -> None:
pass
# Holding strong references to every spawned task so asyncio doesn't GC them
# mid-flight. create_task only weakly references tasks from the event loop.
_bg_tasks: set[asyncio.Task] = set()
def _spawn(coro) -> asyncio.Task:
t = asyncio.create_task(coro)
_bg_tasks.add(t)
t.add_done_callback(_bg_tasks.discard)
return t
def kick(flat_id: str) -> None:
asyncio.create_task(asyncio.to_thread(enrich_flat_sync, flat_id))
_spawn(asyncio.to_thread(enrich_flat_sync, flat_id))
async def _backfill_runner() -> None:
@ -234,5 +246,5 @@ async def _backfill_runner() -> None:
def kick_backfill() -> int:
pending = db.flats_needing_enrichment(limit=200)
asyncio.create_task(_backfill_runner())
_spawn(_backfill_runner())
return len(pending)

View file

@ -84,8 +84,8 @@ def on_apply_ok(user_id: int, flat: dict, message: str) -> None:
def on_apply_fail(user_id: int, flat: dict, message: str) -> None:
addr = flat.get("address") or flat.get("link")
body = f"Bewerbung fehlgeschlagen: {addr}\n{message}\n{PUBLIC_URL}/fehler"
body = f"Bewerbung fehlgeschlagen: {addr}\n{message}\n{PUBLIC_URL}/bewerbungen"
md = (f"*Bewerbung fehlgeschlagen*\n{addr}\n{message}\n"
f"[Fehler ansehen]({PUBLIC_URL}/fehler)")
f"[Bewerbungen ansehen]({PUBLIC_URL}/bewerbungen)")
notify_user(user_id, "apply_fail", subject="[wohnungsdidi] Bewerbung fehlgeschlagen",
body_plain=body, body_markdown=md)