From e9a15e8110fc88e74d0a7cb0faef6a20e924206b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9C=A0=EC=B0=BD=EC=9A=B1?= Date: Sat, 20 Jun 2026 18:18:54 +0900 Subject: [PATCH] fix: harden operator HTTP server Remove wildcard CORS (prevented cross-origin reads of biometric/case data from localhost), add optional shared-token auth gate on data routes (COPYRIGHTER_AUTH_TOKEN; GUI shell + /health stay open), cap request body size (413), and map malformed JSON to 400 and SQLite lock contention to 503. --- .env.example | 7 ++ src/rights_filter/server/__main__.py | 5 + src/rights_filter/server/http_app.py | 70 +++++++++++++- tests/rights_filter/server/test_http_app.py | 100 ++++++++++++++++++++ 4 files changed, 179 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index 0fc9ed2..e8a5ec1 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,10 @@ +# Shared bearer token for the operator server's data routes (/api, media). +# Unset = open access (single-host dev only). Set for any networked deployment. +COPYRIGHTER_AUTH_TOKEN= +# Face-crop (biometric) retention in days; expired crops are purged with an +# audit event. 0 = keep indefinitely. +COPYRIGHTER_FACE_CROP_RETENTION_DAYS=90 + NAVER_CLIENT_ID= NAVER_CLIENT_SECRET= NAVER_SEARCH_DISPLAY=10 diff --git a/src/rights_filter/server/__main__.py b/src/rights_filter/server/__main__.py index ef54a2b..5fd8a90 100644 --- a/src/rights_filter/server/__main__.py +++ b/src/rights_filter/server/__main__.py @@ -27,12 +27,17 @@ def main() -> None: store = CopyrighterStore(Path(args.db), provider_runtime=provider_runtime) store.initialize() + auth_token = os.environ.get("COPYRIGHTER_AUTH_TOKEN", "").strip() or None + if auth_token is None: + print("WARNING: COPYRIGHTER_AUTH_TOKEN not set — data routes are unauthenticated.") + server = build_server( host=args.host, port=args.port, store=store, image_store=image_store, static_dir=Path(args.static), + auth_token=auth_token, ) print(f"Copyrighter API server listening on http://{args.host}:{args.port}") print(f"SQLite DB: {Path(args.db).resolve()}") diff --git a/src/rights_filter/server/http_app.py b/src/rights_filter/server/http_app.py index 3cad9fd..e61e757 100644 --- a/src/rights_filter/server/http_app.py +++ b/src/rights_filter/server/http_app.py @@ -4,10 +4,11 @@ import base64 import json import mimetypes import re +import sqlite3 from http import HTTPStatus from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path -from urllib.parse import unquote, urlparse +from urllib.parse import parse_qs, unquote, urlparse from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES from rights_filter.server.sqlite_store import CopyrighterStore @@ -22,6 +23,26 @@ mimetypes.add_type("font/woff", ".woff") _IMMUTABLE_SUFFIXES = {".woff2", ".woff"} _IMMUTABLE_CACHE_CONTROL = "public, max-age=31536000, immutable" +# An operator JSON request body is small (decisions, memos, knowledge edits); +# uploads come base64-encoded inside the body, so allow headroom but never an +# unbounded read that a single request could use to exhaust memory. +_MAX_BODY_BYTES = 10 * 1024 * 1024 + + +class _PayloadTooLarge(Exception): + """Raised when a request body exceeds _MAX_BODY_BYTES.""" + + +# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set. +# The static GUI shell and /health stay open so the operator can bootstrap. +_PROTECTED_PREFIXES = ( + "/api/", + "/media/", + "/knowledge-media/", + "/collected-media/", + "/face-crop-media/", +) + def build_server( host: str, @@ -29,6 +50,7 @@ def build_server( store: CopyrighterStore, image_store: LocalSubmissionImageStore, static_dir: Path | str, + auth_token: str | None = None, ) -> ThreadingHTTPServer: static_root = Path(static_dir).resolve() @@ -37,6 +59,8 @@ def build_server( def do_GET(self) -> None: path = _path(self.path) + if not self._require_auth(): + return active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it try: if path == "/health": @@ -64,6 +88,10 @@ def build_server( self._file(store.face_crop_media_path(unquote(path.removeprefix("/face-crop-media/"))), untrusted=True) else: self._static(path, static_root) + except _PayloadTooLarge: + self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE) + except sqlite3.OperationalError: + self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE) except KeyError: self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) except ValueError as exc: @@ -71,6 +99,8 @@ def build_server( def do_POST(self) -> None: path = _path(self.path) + if not self._require_auth(): + return active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it try: body = self._body() @@ -164,6 +194,10 @@ def build_server( self._json(store.emergency_disable_external_providers()) else: self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) + except _PayloadTooLarge: + self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE) + except sqlite3.OperationalError: + self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE) except KeyError: self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) except ValueError as exc: @@ -171,6 +205,8 @@ def build_server( def do_PATCH(self) -> None: path = _path(self.path) + if not self._require_auth(): + return try: body = self._body() if path.startswith("/api/providers/"): @@ -190,6 +226,10 @@ def build_server( self._json(store.update_knowledge_entry(entry_id, body)) else: self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) + except _PayloadTooLarge: + self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE) + except sqlite3.OperationalError: + self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE) except KeyError: self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) except ValueError as exc: @@ -198,18 +238,42 @@ def build_server( def log_message(self, format: str, *args: object) -> None: return + def _require_auth(self) -> bool: + # No token configured -> open access (dev / single-host default). + if not auth_token: + return True + path = _path(self.path) + # The GUI shell (static assets) and liveness probe must load + # unauthenticated so the operator can enter the token; only data + # routes are protected. + if not any(path.startswith(prefix) for prefix in _PROTECTED_PREFIXES): + return True + header = self.headers.get("Authorization", "") + if header == f"Bearer {auth_token}": + return True + # /media cannot send headers, so accept ?token= for those. + query_token = parse_qs(urlparse(self.path).query).get("token", [""])[0] + if query_token == auth_token: + return True + self._json({"error": "unauthorized"}, HTTPStatus.UNAUTHORIZED) + return False + def _body(self) -> dict[str, object]: length = int(self.headers.get("Content-Length", "0") or "0") + if length > _MAX_BODY_BYTES: + raise _PayloadTooLarge() if not length: return {} - return json.loads(self.rfile.read(length).decode("utf-8")) + try: + return json.loads(self.rfile.read(length).decode("utf-8")) + except json.JSONDecodeError as exc: + raise ValueError("invalid JSON body") from exc def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None: data = json.dumps(payload, ensure_ascii=False).encode("utf-8") self.send_response(status) self.send_header("Content-Type", "application/json; charset=utf-8") self.send_header("Content-Length", str(len(data))) - self.send_header("Access-Control-Allow-Origin", "*") self.end_headers() self.wfile.write(data) diff --git a/tests/rights_filter/server/test_http_app.py b/tests/rights_filter/server/test_http_app.py index b2f80dd..7d4d896 100644 --- a/tests/rights_filter/server/test_http_app.py +++ b/tests/rights_filter/server/test_http_app.py @@ -1,8 +1,11 @@ from pathlib import Path from threading import Thread +from urllib.error import HTTPError from urllib.request import Request, urlopen import base64 import json +import socket +import sqlite3 import pytest @@ -1295,3 +1298,100 @@ def test_rerun_enrichment_records_evidence_diff(tmp_path: Path): assert second_diff["scoreBefore"] == second_diff["scoreAfter"] finally: server.shutdown() + + +def test_json_responses_do_not_send_wildcard_cors(tmp_path: Path): + static_dir, image_store, store = _fixtures(tmp_path) + server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir) + _start(server) + base = f"http://127.0.0.1:{server.server_port}" + try: + response = urlopen(base + "/api/bootstrap") + assert response.headers.get("Access-Control-Allow-Origin") is None + finally: + server.shutdown() + + +def test_protected_routes_require_token_when_configured(tmp_path: Path): + static_dir, image_store, store = _fixtures(tmp_path) + server = build_server( + host="127.0.0.1", port=0, store=store, image_store=image_store, + static_dir=static_dir, auth_token="secret", + ) + _start(server) + base = f"http://127.0.0.1:{server.server_port}" + try: + with pytest.raises(HTTPError) as exc: + urlopen(base + "/api/bootstrap") + assert exc.value.code == 401 + + authed = Request(base + "/api/bootstrap", headers={"Authorization": "Bearer secret"}) + assert urlopen(authed).status == 200 + + # The GUI shell and liveness probe stay open so the operator can bootstrap. + assert urlopen(base + "/health").status == 200 + assert urlopen(base + "/").status == 200 + finally: + server.shutdown() + + +def test_oversized_body_rejected_with_413(tmp_path: Path): + static_dir, image_store, store = _fixtures(tmp_path) + server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir) + _start(server) + try: + # The server rejects on the Content-Length header before reading the + # body, so send only headers (a real 11MB upload would race the close). + big_len = 11 * 1024 * 1024 + request = ( + "POST /api/knowledge/manual HTTP/1.1\r\n" + "Host: 127.0.0.1\r\n" + "Content-Type: application/json\r\n" + f"Content-Length: {big_len}\r\n" + "Connection: close\r\n" + "\r\n" + ).encode("ascii") + with socket.create_connection(("127.0.0.1", server.server_port), timeout=10) as sock: + sock.sendall(request) + response = b"" + while b"\r\n\r\n" not in response: + chunk = sock.recv(4096) + if not chunk: + break + response += chunk + assert b"413" in response.split(b"\r\n", 1)[0] + finally: + server.shutdown() + + +def test_malformed_json_body_returns_400(tmp_path: Path): + static_dir, image_store, store = _fixtures(tmp_path) + server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir) + _start(server) + base = f"http://127.0.0.1:{server.server_port}" + try: + request = Request(base + "/api/knowledge/manual", data=b"not json", method="POST", + headers={"Content-Type": "application/json"}) + with pytest.raises(HTTPError) as exc: + urlopen(request) + assert exc.value.code == 400 + finally: + server.shutdown() + + +def test_operational_error_surfaces_as_503(tmp_path: Path, monkeypatch): + static_dir, image_store, store = _fixtures(tmp_path) + + def boom(): + raise sqlite3.OperationalError("database is locked") + + monkeypatch.setattr(store, "bootstrap", boom) + server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir) + _start(server) + base = f"http://127.0.0.1:{server.server_port}" + try: + with pytest.raises(HTTPError) as exc: + urlopen(base + "/api/bootstrap") + assert exc.value.code == 503 + finally: + server.shutdown()