diff --git a/src/rights_filter/server/http_app.py b/src/rights_filter/server/http_app.py index e61e757..26993b1 100644 --- a/src/rights_filter/server/http_app.py +++ b/src/rights_filter/server/http_app.py @@ -1,14 +1,16 @@ from __future__ import annotations import base64 +import hmac import json import mimetypes import re import sqlite3 from http import HTTPStatus +from http.cookies import SimpleCookie from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path -from urllib.parse import parse_qs, unquote, urlparse +from urllib.parse import unquote, urlparse from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES from rights_filter.server.sqlite_store import CopyrighterStore @@ -33,15 +35,21 @@ class _PayloadTooLarge(Exception): """Raised when a request body exceeds _MAX_BODY_BYTES.""" -# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set. -# The static GUI shell and /health stay open so the operator can bootstrap. -_PROTECTED_PREFIXES = ( - "/api/", +# Media routes loaded via /, which cannot send an Authorization +# header — these rely on the session cookie instead. +_MEDIA_PREFIXES = ( "/media/", "/knowledge-media/", "/collected-media/", "/face-crop-media/", ) +# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set. +# The static GUI shell and /health stay open so the operator can bootstrap. +_PROTECTED_PREFIXES = ("/api/", *_MEDIA_PREFIXES) + +# HttpOnly cookie minted after a header-authenticated request so the browser can +# load media without ever carrying the token in a URL. +_SESSION_COOKIE = "copyrighter_session" def build_server( @@ -249,12 +257,19 @@ def build_server( if not any(path.startswith(prefix) for prefix in _PROTECTED_PREFIXES): return True header = self.headers.get("Authorization", "") - if header == f"Bearer {auth_token}": - return True - # /media cannot send headers, so accept ?token= for those. - query_token = parse_qs(urlparse(self.path).query).get("token", [""])[0] - if query_token == auth_token: + if hmac.compare_digest(header, f"Bearer {auth_token}"): + # First header-authenticated request mints an HttpOnly session + # cookie so the browser can load /media (which cannot send an + # Authorization header) without ever putting the token in a URL. + self._issue_session_cookie = True return True + cookie_header = self.headers.get("Cookie", "") + if cookie_header: + jar = SimpleCookie() + jar.load(cookie_header) + morsel = jar.get(_SESSION_COOKIE) + if morsel is not None and hmac.compare_digest(morsel.value, auth_token): + return True self._json({"error": "unauthorized"}, HTTPStatus.UNAUTHORIZED) return False @@ -269,11 +284,19 @@ def build_server( except json.JSONDecodeError as exc: raise ValueError("invalid JSON body") from exc + def _maybe_set_session_cookie(self) -> None: + if getattr(self, "_issue_session_cookie", False): + self.send_header( + "Set-Cookie", + f"{_SESSION_COOKIE}={auth_token}; HttpOnly; SameSite=Strict; Path=/", + ) + def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None: data = json.dumps(payload, ensure_ascii=False).encode("utf-8") self.send_response(status) self.send_header("Content-Type", "application/json; charset=utf-8") self.send_header("Content-Length", str(len(data))) + self._maybe_set_session_cookie() self.end_headers() self.wfile.write(data) @@ -313,6 +336,11 @@ def build_server( "Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'; img-src 'self' data:; sandbox", ) + # Biometric/personal media: keep it out of intermediary caches and + # don't leak the URL via Referer. + self.send_header("Cache-Control", "no-store") + self.send_header("Referrer-Policy", "no-referrer") + self._maybe_set_session_cookie() self.end_headers() self.wfile.write(data) diff --git a/tests/rights_filter/server/test_http_app.py b/tests/rights_filter/server/test_http_app.py index 7d4d896..c0b7a3a 100644 --- a/tests/rights_filter/server/test_http_app.py +++ b/tests/rights_filter/server/test_http_app.py @@ -1325,8 +1325,23 @@ def test_protected_routes_require_token_when_configured(tmp_path: Path): urlopen(base + "/api/bootstrap") assert exc.value.code == 401 + # Header auth succeeds and mints an HttpOnly session cookie. authed = Request(base + "/api/bootstrap", headers={"Authorization": "Bearer secret"}) - assert urlopen(authed).status == 200 + response = urlopen(authed) + assert response.status == 200 + set_cookie = response.headers.get("Set-Cookie") + assert set_cookie and "copyrighter_session=secret" in set_cookie + assert "HttpOnly" in set_cookie + + # The cookie alone authenticates (mimics a browser media load). + cookie_req = Request(base + "/api/bootstrap", headers={"Cookie": "copyrighter_session=secret"}) + assert urlopen(cookie_req).status == 200 + + # A wrong cookie is rejected; the token never travels in the URL. + bad = Request(base + "/api/bootstrap", headers={"Cookie": "copyrighter_session=nope"}) + with pytest.raises(HTTPError) as exc_bad: + urlopen(bad) + assert exc_bad.value.code == 401 # The GUI shell and liveness probe stay open so the operator can bootstrap. assert urlopen(base + "/health").status == 200 @@ -1379,6 +1394,33 @@ def test_malformed_json_body_returns_400(tmp_path: Path): server.shutdown() +def test_upload_and_import_validation_returns_400(tmp_path: Path): + static_dir, image_store, store = _fixtures(tmp_path) + server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir) + _start(server) + base = f"http://127.0.0.1:{server.server_port}" + + def post_expect(path: str, body: dict, expected: int) -> None: + data = json.dumps(body).encode("utf-8") + request = Request(base + path, data=data, method="POST", headers={"Content-Type": "application/json"}) + with pytest.raises(HTTPError) as exc: + urlopen(request, timeout=10) + assert exc.value.code == expected + + try: + good_b64 = base64.b64encode(b"x").decode("ascii") + # unsupported image type + post_expect("/api/submissions/upload-image", {"image": {"filename": "x.txt", "data": good_b64}}, 400) + # data is not valid base64 + post_expect("/api/submissions/upload-image", {"image": {"filename": "x.png", "data": "!!notbase64!!"}}, 400) + # empty image data + post_expect("/api/submissions/upload-image", {"image": {"filename": "x.png", "data": ""}}, 400) + # import folder that does not exist + post_expect("/api/submissions/import-folder", {"path": str(tmp_path / "does-not-exist")}, 400) + finally: + server.shutdown() + + def test_operational_error_surfaces_as_503(tmp_path: Path, monkeypatch): static_dir, image_store, store = _fixtures(tmp_path)