correctness batch: atomic writes, task refs, hmac, import-star, pickle
Per review §2: - web/db.py: new _tx() context manager wraps multi-statement writers in BEGIN IMMEDIATE … COMMIT/ROLLBACK (our connections run in autocommit mode, so plain `with _lock:` doesn't give atomicity). partnership_accept (UPDATE + DELETE) and cleanup_retention (3 deletes/updates) now use it. - Fire-and-forget tasks: add module-level _bg_tasks sets in web/app.py and web/enrichment.py. A _spawn() helper holds a strong ref until the task finishes so the GC can't drop it mid-flight (CPython's event loop only weakly references pending tasks). - apply/main.py: require_api_key uses hmac.compare_digest, matching web's check. Also imports now use explicit names instead of `from settings *`. - apply/language.py: replace `from settings import *` + `from paths import *` with explicit imports — this is the pattern that caused the LANGUAGE NameError earlier. - alert/utils.py: pickle-based hash_any_object → deterministic JSON+sha256. Cheaper, portable across Python versions, no pickle attack surface. - web/notifications.py: /fehler links repointed to /bewerbungen (the former page doesn't exist). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
77098c82df
commit
eb73b5e415
7 changed files with 66 additions and 19 deletions
|
|
@ -1,6 +1,11 @@
|
|||
import hashlib
|
||||
import pickle
|
||||
import json
|
||||
|
||||
def hash_any_object(var):
|
||||
byte_stream = pickle.dumps(var)
|
||||
return hashlib.sha256(byte_stream).hexdigest()
|
||||
|
||||
def hash_any_object(var) -> str:
|
||||
"""Deterministic sha256 over a JSON-encodable value. Replaces the older
|
||||
pickle-based hash: pickle is nondeterministic across Python versions /
|
||||
dict-ordering tweaks and pulls more attack surface than we need for a
|
||||
plain change-detection fingerprint."""
|
||||
encoded = json.dumps(var, sort_keys=True, ensure_ascii=False, default=str)
|
||||
return hashlib.sha256(encoded.encode("utf-8")).hexdigest()
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
import tomllib
|
||||
|
||||
from settings import *
|
||||
from paths import *
|
||||
import logging
|
||||
|
||||
from settings import LANGUAGE
|
||||
from paths import TRANSLATIONS_FILE
|
||||
|
||||
logger = logging.getLogger("flat-apply")
|
||||
|
||||
with open(TRANSLATIONS_FILE, "rb") as f:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import hmac
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
|
@ -71,7 +72,7 @@ class ApplyResponse(BaseModel):
|
|||
def require_api_key(x_internal_api_key: str | None = Header(default=None)) -> None:
|
||||
if not INTERNAL_API_KEY:
|
||||
raise HTTPException(status_code=503, detail="INTERNAL_API_KEY not configured")
|
||||
if x_internal_api_key != INTERNAL_API_KEY:
|
||||
if not x_internal_api_key or not hmac.compare_digest(x_internal_api_key, INTERNAL_API_KEY):
|
||||
raise HTTPException(status_code=401, detail="invalid api key")
|
||||
|
||||
|
||||
|
|
|
|||
13
web/app.py
13
web/app.py
|
|
@ -69,6 +69,17 @@ logger = logging.getLogger("web")
|
|||
|
||||
apply_client = ApplyClient()
|
||||
|
||||
# Strong refs for fire-and-forget tasks. asyncio.create_task only weakly
|
||||
# references tasks from the event loop — the GC could drop them mid-flight.
|
||||
_bg_tasks: set[asyncio.Task] = set()
|
||||
|
||||
|
||||
def _spawn(coro) -> asyncio.Task:
|
||||
t = asyncio.create_task(coro)
|
||||
_bg_tasks.add(t)
|
||||
t.add_done_callback(_bg_tasks.discard)
|
||||
return t
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App + Jinja
|
||||
|
|
@ -326,7 +337,7 @@ def _kick_apply(user_id: int, flat_id: str, url: str, triggered_by: str) -> None
|
|||
profile_snapshot=profile,
|
||||
)
|
||||
|
||||
asyncio.create_task(asyncio.to_thread(
|
||||
_spawn(asyncio.to_thread(
|
||||
_finish_apply_background, app_id, user_id, flat_id, url, profile, submit_forms,
|
||||
))
|
||||
|
||||
|
|
|
|||
30
web/db.py
30
web/db.py
|
|
@ -9,6 +9,7 @@ import json
|
|||
import logging
|
||||
import sqlite3
|
||||
import threading
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
|
|
@ -42,6 +43,23 @@ def _get_conn() -> sqlite3.Connection:
|
|||
return c
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _tx():
|
||||
"""Atomic BEGIN IMMEDIATE … COMMIT/ROLLBACK. Required for multi-statement
|
||||
writes because our connections run in autocommit mode (isolation_level=None).
|
||||
Combine with _lock to serialize writers and avoid BUSY.
|
||||
"""
|
||||
c = _get_conn()
|
||||
c.execute("BEGIN IMMEDIATE")
|
||||
try:
|
||||
yield c
|
||||
except Exception:
|
||||
c.execute("ROLLBACK")
|
||||
raise
|
||||
else:
|
||||
c.execute("COMMIT")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -845,13 +863,13 @@ def partnership_accept(request_id: int, user_id: int) -> bool:
|
|||
if get_accepted_partnership(row["from_user_id"]) or get_accepted_partnership(user_id):
|
||||
return False
|
||||
partner_id = row["from_user_id"]
|
||||
with _lock:
|
||||
_get_conn().execute(
|
||||
with _lock, _tx() as c:
|
||||
c.execute(
|
||||
"UPDATE partnerships SET status = 'accepted', accepted_at = ? WHERE id = ?",
|
||||
(now_iso(), request_id),
|
||||
)
|
||||
# clean up any stale pending requests touching either user
|
||||
_get_conn().execute(
|
||||
c.execute(
|
||||
"""DELETE FROM partnerships
|
||||
WHERE status = 'pending'
|
||||
AND (from_user_id IN (?, ?) OR to_user_id IN (?, ?))""",
|
||||
|
|
@ -899,12 +917,12 @@ def partner_flat_actions(partner_id: int) -> dict:
|
|||
def cleanup_retention() -> dict:
|
||||
cutoff = (datetime.now(timezone.utc) - timedelta(days=RETENTION_DAYS)).isoformat(timespec="seconds")
|
||||
stats = {}
|
||||
with _lock:
|
||||
with _lock, _tx() as c:
|
||||
for table in ("errors", "audit_log"):
|
||||
cur = _get_conn().execute(f"DELETE FROM {table} WHERE timestamp < ?", (cutoff,))
|
||||
cur = c.execute(f"DELETE FROM {table} WHERE timestamp < ?", (cutoff,))
|
||||
stats[table] = cur.rowcount
|
||||
# Drop forensics from old applications (but keep the row itself for history)
|
||||
cur = _get_conn().execute(
|
||||
cur = c.execute(
|
||||
"UPDATE applications SET forensics_json = NULL WHERE started_at < ? AND forensics_json IS NOT NULL",
|
||||
(cutoff,),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -218,8 +218,20 @@ def _record_failure(flat_id: str, step: str, reason: str) -> None:
|
|||
pass
|
||||
|
||||
|
||||
# Holding strong references to every spawned task so asyncio doesn't GC them
|
||||
# mid-flight. create_task only weakly references tasks from the event loop.
|
||||
_bg_tasks: set[asyncio.Task] = set()
|
||||
|
||||
|
||||
def _spawn(coro) -> asyncio.Task:
|
||||
t = asyncio.create_task(coro)
|
||||
_bg_tasks.add(t)
|
||||
t.add_done_callback(_bg_tasks.discard)
|
||||
return t
|
||||
|
||||
|
||||
def kick(flat_id: str) -> None:
|
||||
asyncio.create_task(asyncio.to_thread(enrich_flat_sync, flat_id))
|
||||
_spawn(asyncio.to_thread(enrich_flat_sync, flat_id))
|
||||
|
||||
|
||||
async def _backfill_runner() -> None:
|
||||
|
|
@ -234,5 +246,5 @@ async def _backfill_runner() -> None:
|
|||
|
||||
def kick_backfill() -> int:
|
||||
pending = db.flats_needing_enrichment(limit=200)
|
||||
asyncio.create_task(_backfill_runner())
|
||||
_spawn(_backfill_runner())
|
||||
return len(pending)
|
||||
|
|
|
|||
|
|
@ -84,8 +84,8 @@ def on_apply_ok(user_id: int, flat: dict, message: str) -> None:
|
|||
|
||||
def on_apply_fail(user_id: int, flat: dict, message: str) -> None:
|
||||
addr = flat.get("address") or flat.get("link")
|
||||
body = f"Bewerbung fehlgeschlagen: {addr}\n{message}\n{PUBLIC_URL}/fehler"
|
||||
body = f"Bewerbung fehlgeschlagen: {addr}\n{message}\n{PUBLIC_URL}/bewerbungen"
|
||||
md = (f"*Bewerbung fehlgeschlagen*\n{addr}\n{message}\n"
|
||||
f"[Fehler ansehen]({PUBLIC_URL}/fehler)")
|
||||
f"[Bewerbungen ansehen]({PUBLIC_URL}/bewerbungen)")
|
||||
notify_user(user_id, "apply_fail", subject="[wohnungsdidi] Bewerbung fehlgeschlagen",
|
||||
body_plain=body, body_markdown=md)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue