fix: cookie-based operator auth keeps token out of URLs

Address commit security review: replace the ?token= query fallback (which
leaked the token into logs/referrers) with an HttpOnly, SameSite=Strict
session cookie minted on the first header-authenticated request, so <img>
media loads authenticate without a URL token. Use hmac.compare_digest for
constant-time comparison and add Cache-Control: no-store + Referrer-Policy:
no-referrer on untrusted biometric media. Also cover upload/import boundary
validation (400) at the HTTP layer.
This commit is contained in:
유창욱 2026-06-20 18:43:53 +09:00
parent 62e2d183f8
commit 1abb1107a2
2 changed files with 81 additions and 11 deletions

View file

@ -1,14 +1,16 @@
from __future__ import annotations
import base64
import hmac
import json
import mimetypes
import re
import sqlite3
from http import HTTPStatus
from http.cookies import SimpleCookie
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from urllib.parse import parse_qs, unquote, urlparse
from urllib.parse import unquote, urlparse
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
from rights_filter.server.sqlite_store import CopyrighterStore
@ -33,15 +35,21 @@ class _PayloadTooLarge(Exception):
"""Raised when a request body exceeds _MAX_BODY_BYTES."""
# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set.
# The static GUI shell and /health stay open so the operator can bootstrap.
_PROTECTED_PREFIXES = (
"/api/",
# Media routes loaded via <img>/<link>, which cannot send an Authorization
# header — these rely on the session cookie instead.
_MEDIA_PREFIXES = (
"/media/",
"/knowledge-media/",
"/collected-media/",
"/face-crop-media/",
)
# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set.
# The static GUI shell and /health stay open so the operator can bootstrap.
_PROTECTED_PREFIXES = ("/api/", *_MEDIA_PREFIXES)
# HttpOnly cookie minted after a header-authenticated request so the browser can
# load media without ever carrying the token in a URL.
_SESSION_COOKIE = "copyrighter_session"
def build_server(
@ -249,12 +257,19 @@ def build_server(
if not any(path.startswith(prefix) for prefix in _PROTECTED_PREFIXES):
return True
header = self.headers.get("Authorization", "")
if header == f"Bearer {auth_token}":
return True
# <img>/media cannot send headers, so accept ?token= for those.
query_token = parse_qs(urlparse(self.path).query).get("token", [""])[0]
if query_token == auth_token:
if hmac.compare_digest(header, f"Bearer {auth_token}"):
# First header-authenticated request mints an HttpOnly session
# cookie so the browser can load <img>/media (which cannot send an
# Authorization header) without ever putting the token in a URL.
self._issue_session_cookie = True
return True
cookie_header = self.headers.get("Cookie", "")
if cookie_header:
jar = SimpleCookie()
jar.load(cookie_header)
morsel = jar.get(_SESSION_COOKIE)
if morsel is not None and hmac.compare_digest(morsel.value, auth_token):
return True
self._json({"error": "unauthorized"}, HTTPStatus.UNAUTHORIZED)
return False
@ -269,11 +284,19 @@ def build_server(
except json.JSONDecodeError as exc:
raise ValueError("invalid JSON body") from exc
def _maybe_set_session_cookie(self) -> None:
if getattr(self, "_issue_session_cookie", False):
self.send_header(
"Set-Cookie",
f"{_SESSION_COOKIE}={auth_token}; HttpOnly; SameSite=Strict; Path=/",
)
def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None:
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
self.send_response(status)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(data)))
self._maybe_set_session_cookie()
self.end_headers()
self.wfile.write(data)
@ -313,6 +336,11 @@ def build_server(
"Content-Security-Policy",
"default-src 'none'; style-src 'unsafe-inline'; img-src 'self' data:; sandbox",
)
# Biometric/personal media: keep it out of intermediary caches and
# don't leak the URL via Referer.
self.send_header("Cache-Control", "no-store")
self.send_header("Referrer-Policy", "no-referrer")
self._maybe_set_session_cookie()
self.end_headers()
self.wfile.write(data)

View file

@ -1325,8 +1325,23 @@ def test_protected_routes_require_token_when_configured(tmp_path: Path):
urlopen(base + "/api/bootstrap")
assert exc.value.code == 401
# Header auth succeeds and mints an HttpOnly session cookie.
authed = Request(base + "/api/bootstrap", headers={"Authorization": "Bearer secret"})
assert urlopen(authed).status == 200
response = urlopen(authed)
assert response.status == 200
set_cookie = response.headers.get("Set-Cookie")
assert set_cookie and "copyrighter_session=secret" in set_cookie
assert "HttpOnly" in set_cookie
# The cookie alone authenticates (mimics a browser <img> media load).
cookie_req = Request(base + "/api/bootstrap", headers={"Cookie": "copyrighter_session=secret"})
assert urlopen(cookie_req).status == 200
# A wrong cookie is rejected; the token never travels in the URL.
bad = Request(base + "/api/bootstrap", headers={"Cookie": "copyrighter_session=nope"})
with pytest.raises(HTTPError) as exc_bad:
urlopen(bad)
assert exc_bad.value.code == 401
# The GUI shell and liveness probe stay open so the operator can bootstrap.
assert urlopen(base + "/health").status == 200
@ -1379,6 +1394,33 @@ def test_malformed_json_body_returns_400(tmp_path: Path):
server.shutdown()
def test_upload_and_import_validation_returns_400(tmp_path: Path):
static_dir, image_store, store = _fixtures(tmp_path)
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
_start(server)
base = f"http://127.0.0.1:{server.server_port}"
def post_expect(path: str, body: dict, expected: int) -> None:
data = json.dumps(body).encode("utf-8")
request = Request(base + path, data=data, method="POST", headers={"Content-Type": "application/json"})
with pytest.raises(HTTPError) as exc:
urlopen(request, timeout=10)
assert exc.value.code == expected
try:
good_b64 = base64.b64encode(b"x").decode("ascii")
# unsupported image type
post_expect("/api/submissions/upload-image", {"image": {"filename": "x.txt", "data": good_b64}}, 400)
# data is not valid base64
post_expect("/api/submissions/upload-image", {"image": {"filename": "x.png", "data": "!!notbase64!!"}}, 400)
# empty image data
post_expect("/api/submissions/upload-image", {"image": {"filename": "x.png", "data": ""}}, 400)
# import folder that does not exist
post_expect("/api/submissions/import-folder", {"path": str(tmp_path / "does-not-exist")}, 400)
finally:
server.shutdown()
def test_operational_error_surfaces_as_503(tmp_path: Path, monkeypatch):
static_dir, image_store, store = _fixtures(tmp_path)