correctness batch: atomic writes, task refs, hmac, import-star, pickle

Per review §2:

- web/db.py: new _tx() context manager wraps multi-statement writers in
  BEGIN IMMEDIATE … COMMIT/ROLLBACK (our connections run in autocommit
  mode, so plain `with _lock:` doesn't give atomicity). partnership_accept
  (UPDATE + DELETE) and cleanup_retention (3 deletes/updates) now use it.
- Fire-and-forget tasks: add module-level _bg_tasks sets in web/app.py and
  web/enrichment.py. A _spawn() helper holds a strong ref until the task
  finishes so the GC can't drop it mid-flight (CPython's event loop only
  weakly references pending tasks).
- apply/main.py: require_api_key uses hmac.compare_digest, matching web's
  check. Also imports now use explicit names instead of `from settings *`.
- apply/language.py: replace `from settings import *` + `from paths import *`
  with explicit imports — this is the pattern that caused the LANGUAGE
  NameError earlier.
- alert/utils.py: pickle-based hash_any_object → deterministic JSON+sha256.
  Cheaper, portable across Python versions, no pickle attack surface.
- web/notifications.py: /fehler links repointed to /bewerbungen (the
  former page doesn't exist).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
EiSiMo 2026-04-21 19:14:26 +02:00
parent 77098c82df
commit eb73b5e415
7 changed files with 66 additions and 19 deletions

View file

@ -1,6 +1,11 @@
import hashlib import hashlib
import pickle import json
def hash_any_object(var):
byte_stream = pickle.dumps(var) def hash_any_object(var) -> str:
return hashlib.sha256(byte_stream).hexdigest() """Deterministic sha256 over a JSON-encodable value. Replaces the older
pickle-based hash: pickle is nondeterministic across Python versions /
dict-ordering tweaks and pulls more attack surface than we need for a
plain change-detection fingerprint."""
encoded = json.dumps(var, sort_keys=True, ensure_ascii=False, default=str)
return hashlib.sha256(encoded.encode("utf-8")).hexdigest()

View file

@ -1,9 +1,9 @@
import tomllib import tomllib
from settings import *
from paths import *
import logging import logging
from settings import LANGUAGE
from paths import TRANSLATIONS_FILE
logger = logging.getLogger("flat-apply") logger = logging.getLogger("flat-apply")
with open(TRANSLATIONS_FILE, "rb") as f: with open(TRANSLATIONS_FILE, "rb") as f:

View file

@ -1,3 +1,4 @@
import hmac
import logging import logging
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from urllib.parse import urljoin, urlparse from urllib.parse import urljoin, urlparse
@ -71,7 +72,7 @@ class ApplyResponse(BaseModel):
def require_api_key(x_internal_api_key: str | None = Header(default=None)) -> None: def require_api_key(x_internal_api_key: str | None = Header(default=None)) -> None:
if not INTERNAL_API_KEY: if not INTERNAL_API_KEY:
raise HTTPException(status_code=503, detail="INTERNAL_API_KEY not configured") raise HTTPException(status_code=503, detail="INTERNAL_API_KEY not configured")
if x_internal_api_key != INTERNAL_API_KEY: if not x_internal_api_key or not hmac.compare_digest(x_internal_api_key, INTERNAL_API_KEY):
raise HTTPException(status_code=401, detail="invalid api key") raise HTTPException(status_code=401, detail="invalid api key")

View file

@ -69,6 +69,17 @@ logger = logging.getLogger("web")
apply_client = ApplyClient() apply_client = ApplyClient()
# Strong refs for fire-and-forget tasks. asyncio.create_task only weakly
# references tasks from the event loop — the GC could drop them mid-flight.
_bg_tasks: set[asyncio.Task] = set()
def _spawn(coro) -> asyncio.Task:
t = asyncio.create_task(coro)
_bg_tasks.add(t)
t.add_done_callback(_bg_tasks.discard)
return t
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# App + Jinja # App + Jinja
@ -326,7 +337,7 @@ def _kick_apply(user_id: int, flat_id: str, url: str, triggered_by: str) -> None
profile_snapshot=profile, profile_snapshot=profile,
) )
asyncio.create_task(asyncio.to_thread( _spawn(asyncio.to_thread(
_finish_apply_background, app_id, user_id, flat_id, url, profile, submit_forms, _finish_apply_background, app_id, user_id, flat_id, url, profile, submit_forms,
)) ))

View file

@ -9,6 +9,7 @@ import json
import logging import logging
import sqlite3 import sqlite3
import threading import threading
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Any, Iterable, Optional from typing import Any, Iterable, Optional
@ -42,6 +43,23 @@ def _get_conn() -> sqlite3.Connection:
return c return c
@contextmanager
def _tx():
"""Atomic BEGIN IMMEDIATE … COMMIT/ROLLBACK. Required for multi-statement
writes because our connections run in autocommit mode (isolation_level=None).
Combine with _lock to serialize writers and avoid BUSY.
"""
c = _get_conn()
c.execute("BEGIN IMMEDIATE")
try:
yield c
except Exception:
c.execute("ROLLBACK")
raise
else:
c.execute("COMMIT")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Schema # Schema
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -845,13 +863,13 @@ def partnership_accept(request_id: int, user_id: int) -> bool:
if get_accepted_partnership(row["from_user_id"]) or get_accepted_partnership(user_id): if get_accepted_partnership(row["from_user_id"]) or get_accepted_partnership(user_id):
return False return False
partner_id = row["from_user_id"] partner_id = row["from_user_id"]
with _lock: with _lock, _tx() as c:
_get_conn().execute( c.execute(
"UPDATE partnerships SET status = 'accepted', accepted_at = ? WHERE id = ?", "UPDATE partnerships SET status = 'accepted', accepted_at = ? WHERE id = ?",
(now_iso(), request_id), (now_iso(), request_id),
) )
# clean up any stale pending requests touching either user # clean up any stale pending requests touching either user
_get_conn().execute( c.execute(
"""DELETE FROM partnerships """DELETE FROM partnerships
WHERE status = 'pending' WHERE status = 'pending'
AND (from_user_id IN (?, ?) OR to_user_id IN (?, ?))""", AND (from_user_id IN (?, ?) OR to_user_id IN (?, ?))""",
@ -899,12 +917,12 @@ def partner_flat_actions(partner_id: int) -> dict:
def cleanup_retention() -> dict: def cleanup_retention() -> dict:
cutoff = (datetime.now(timezone.utc) - timedelta(days=RETENTION_DAYS)).isoformat(timespec="seconds") cutoff = (datetime.now(timezone.utc) - timedelta(days=RETENTION_DAYS)).isoformat(timespec="seconds")
stats = {} stats = {}
with _lock: with _lock, _tx() as c:
for table in ("errors", "audit_log"): for table in ("errors", "audit_log"):
cur = _get_conn().execute(f"DELETE FROM {table} WHERE timestamp < ?", (cutoff,)) cur = c.execute(f"DELETE FROM {table} WHERE timestamp < ?", (cutoff,))
stats[table] = cur.rowcount stats[table] = cur.rowcount
# Drop forensics from old applications (but keep the row itself for history) # Drop forensics from old applications (but keep the row itself for history)
cur = _get_conn().execute( cur = c.execute(
"UPDATE applications SET forensics_json = NULL WHERE started_at < ? AND forensics_json IS NOT NULL", "UPDATE applications SET forensics_json = NULL WHERE started_at < ? AND forensics_json IS NOT NULL",
(cutoff,), (cutoff,),
) )

View file

@ -218,8 +218,20 @@ def _record_failure(flat_id: str, step: str, reason: str) -> None:
pass pass
# Holding strong references to every spawned task so asyncio doesn't GC them
# mid-flight. create_task only weakly references tasks from the event loop.
_bg_tasks: set[asyncio.Task] = set()
def _spawn(coro) -> asyncio.Task:
t = asyncio.create_task(coro)
_bg_tasks.add(t)
t.add_done_callback(_bg_tasks.discard)
return t
def kick(flat_id: str) -> None: def kick(flat_id: str) -> None:
asyncio.create_task(asyncio.to_thread(enrich_flat_sync, flat_id)) _spawn(asyncio.to_thread(enrich_flat_sync, flat_id))
async def _backfill_runner() -> None: async def _backfill_runner() -> None:
@ -234,5 +246,5 @@ async def _backfill_runner() -> None:
def kick_backfill() -> int: def kick_backfill() -> int:
pending = db.flats_needing_enrichment(limit=200) pending = db.flats_needing_enrichment(limit=200)
asyncio.create_task(_backfill_runner()) _spawn(_backfill_runner())
return len(pending) return len(pending)

View file

@ -84,8 +84,8 @@ def on_apply_ok(user_id: int, flat: dict, message: str) -> None:
def on_apply_fail(user_id: int, flat: dict, message: str) -> None: def on_apply_fail(user_id: int, flat: dict, message: str) -> None:
addr = flat.get("address") or flat.get("link") addr = flat.get("address") or flat.get("link")
body = f"Bewerbung fehlgeschlagen: {addr}\n{message}\n{PUBLIC_URL}/fehler" body = f"Bewerbung fehlgeschlagen: {addr}\n{message}\n{PUBLIC_URL}/bewerbungen"
md = (f"*Bewerbung fehlgeschlagen*\n{addr}\n{message}\n" md = (f"*Bewerbung fehlgeschlagen*\n{addr}\n{message}\n"
f"[Fehler ansehen]({PUBLIC_URL}/fehler)") f"[Bewerbungen ansehen]({PUBLIC_URL}/bewerbungen)")
notify_user(user_id, "apply_fail", subject="[wohnungsdidi] Bewerbung fehlgeschlagen", notify_user(user_id, "apply_fail", subject="[wohnungsdidi] Bewerbung fehlgeschlagen",
body_plain=body, body_markdown=md) body_plain=body, body_markdown=md)