fix: harden operator HTTP server

Remove wildcard CORS (prevented cross-origin reads of biometric/case data
from localhost), add optional shared-token auth gate on data routes
(COPYRIGHTER_AUTH_TOKEN; GUI shell + /health stay open), cap request body
size (413), and map malformed JSON to 400 and SQLite lock contention to 503.
This commit is contained in:
유창욱 2026-06-20 18:18:54 +09:00
parent 62c13faafa
commit e9a15e8110
4 changed files with 179 additions and 3 deletions

View file

@ -1,3 +1,10 @@
# Shared bearer token for the operator server's data routes (/api, media).
# Unset = open access (single-host dev only). Set for any networked deployment.
COPYRIGHTER_AUTH_TOKEN=
# Face-crop (biometric) retention in days; expired crops are purged with an
# audit event. 0 = keep indefinitely.
COPYRIGHTER_FACE_CROP_RETENTION_DAYS=90
NAVER_CLIENT_ID= NAVER_CLIENT_ID=
NAVER_CLIENT_SECRET= NAVER_CLIENT_SECRET=
NAVER_SEARCH_DISPLAY=10 NAVER_SEARCH_DISPLAY=10

View file

@ -27,12 +27,17 @@ def main() -> None:
store = CopyrighterStore(Path(args.db), provider_runtime=provider_runtime) store = CopyrighterStore(Path(args.db), provider_runtime=provider_runtime)
store.initialize() store.initialize()
auth_token = os.environ.get("COPYRIGHTER_AUTH_TOKEN", "").strip() or None
if auth_token is None:
print("WARNING: COPYRIGHTER_AUTH_TOKEN not set — data routes are unauthenticated.")
server = build_server( server = build_server(
host=args.host, host=args.host,
port=args.port, port=args.port,
store=store, store=store,
image_store=image_store, image_store=image_store,
static_dir=Path(args.static), static_dir=Path(args.static),
auth_token=auth_token,
) )
print(f"Copyrighter API server listening on http://{args.host}:{args.port}") print(f"Copyrighter API server listening on http://{args.host}:{args.port}")
print(f"SQLite DB: {Path(args.db).resolve()}") print(f"SQLite DB: {Path(args.db).resolve()}")

View file

@ -4,10 +4,11 @@ import base64
import json import json
import mimetypes import mimetypes
import re import re
import sqlite3
from http import HTTPStatus from http import HTTPStatus
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path from pathlib import Path
from urllib.parse import unquote, urlparse from urllib.parse import parse_qs, unquote, urlparse
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
from rights_filter.server.sqlite_store import CopyrighterStore from rights_filter.server.sqlite_store import CopyrighterStore
@ -22,6 +23,26 @@ mimetypes.add_type("font/woff", ".woff")
_IMMUTABLE_SUFFIXES = {".woff2", ".woff"} _IMMUTABLE_SUFFIXES = {".woff2", ".woff"}
_IMMUTABLE_CACHE_CONTROL = "public, max-age=31536000, immutable" _IMMUTABLE_CACHE_CONTROL = "public, max-age=31536000, immutable"
# An operator JSON request body is small (decisions, memos, knowledge edits);
# uploads come base64-encoded inside the body, so allow headroom but never an
# unbounded read that a single request could use to exhaust memory.
_MAX_BODY_BYTES = 10 * 1024 * 1024
class _PayloadTooLarge(Exception):
"""Raised when a request body exceeds _MAX_BODY_BYTES."""
# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set.
# The static GUI shell and /health stay open so the operator can bootstrap.
_PROTECTED_PREFIXES = (
"/api/",
"/media/",
"/knowledge-media/",
"/collected-media/",
"/face-crop-media/",
)
def build_server( def build_server(
host: str, host: str,
@ -29,6 +50,7 @@ def build_server(
store: CopyrighterStore, store: CopyrighterStore,
image_store: LocalSubmissionImageStore, image_store: LocalSubmissionImageStore,
static_dir: Path | str, static_dir: Path | str,
auth_token: str | None = None,
) -> ThreadingHTTPServer: ) -> ThreadingHTTPServer:
static_root = Path(static_dir).resolve() static_root = Path(static_dir).resolve()
@ -37,6 +59,8 @@ def build_server(
def do_GET(self) -> None: def do_GET(self) -> None:
path = _path(self.path) path = _path(self.path)
if not self._require_auth():
return
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
try: try:
if path == "/health": if path == "/health":
@ -64,6 +88,10 @@ def build_server(
self._file(store.face_crop_media_path(unquote(path.removeprefix("/face-crop-media/"))), untrusted=True) self._file(store.face_crop_media_path(unquote(path.removeprefix("/face-crop-media/"))), untrusted=True)
else: else:
self._static(path, static_root) self._static(path, static_root)
except _PayloadTooLarge:
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
except sqlite3.OperationalError:
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
except KeyError: except KeyError:
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
except ValueError as exc: except ValueError as exc:
@ -71,6 +99,8 @@ def build_server(
def do_POST(self) -> None: def do_POST(self) -> None:
path = _path(self.path) path = _path(self.path)
if not self._require_auth():
return
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
try: try:
body = self._body() body = self._body()
@ -164,6 +194,10 @@ def build_server(
self._json(store.emergency_disable_external_providers()) self._json(store.emergency_disable_external_providers())
else: else:
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
except _PayloadTooLarge:
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
except sqlite3.OperationalError:
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
except KeyError: except KeyError:
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
except ValueError as exc: except ValueError as exc:
@ -171,6 +205,8 @@ def build_server(
def do_PATCH(self) -> None: def do_PATCH(self) -> None:
path = _path(self.path) path = _path(self.path)
if not self._require_auth():
return
try: try:
body = self._body() body = self._body()
if path.startswith("/api/providers/"): if path.startswith("/api/providers/"):
@ -190,6 +226,10 @@ def build_server(
self._json(store.update_knowledge_entry(entry_id, body)) self._json(store.update_knowledge_entry(entry_id, body))
else: else:
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
except _PayloadTooLarge:
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
except sqlite3.OperationalError:
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
except KeyError: except KeyError:
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND) self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
except ValueError as exc: except ValueError as exc:
@ -198,18 +238,42 @@ def build_server(
def log_message(self, format: str, *args: object) -> None: def log_message(self, format: str, *args: object) -> None:
return return
def _require_auth(self) -> bool:
# No token configured -> open access (dev / single-host default).
if not auth_token:
return True
path = _path(self.path)
# The GUI shell (static assets) and liveness probe must load
# unauthenticated so the operator can enter the token; only data
# routes are protected.
if not any(path.startswith(prefix) for prefix in _PROTECTED_PREFIXES):
return True
header = self.headers.get("Authorization", "")
if header == f"Bearer {auth_token}":
return True
# <img>/media cannot send headers, so accept ?token= for those.
query_token = parse_qs(urlparse(self.path).query).get("token", [""])[0]
if query_token == auth_token:
return True
self._json({"error": "unauthorized"}, HTTPStatus.UNAUTHORIZED)
return False
def _body(self) -> dict[str, object]: def _body(self) -> dict[str, object]:
length = int(self.headers.get("Content-Length", "0") or "0") length = int(self.headers.get("Content-Length", "0") or "0")
if length > _MAX_BODY_BYTES:
raise _PayloadTooLarge()
if not length: if not length:
return {} return {}
try:
return json.loads(self.rfile.read(length).decode("utf-8")) return json.loads(self.rfile.read(length).decode("utf-8"))
except json.JSONDecodeError as exc:
raise ValueError("invalid JSON body") from exc
def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None: def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None:
data = json.dumps(payload, ensure_ascii=False).encode("utf-8") data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
self.send_response(status) self.send_response(status)
self.send_header("Content-Type", "application/json; charset=utf-8") self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(data))) self.send_header("Content-Length", str(len(data)))
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers() self.end_headers()
self.wfile.write(data) self.wfile.write(data)

View file

@ -1,8 +1,11 @@
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread
from urllib.error import HTTPError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
import base64 import base64
import json import json
import socket
import sqlite3
import pytest import pytest
@ -1295,3 +1298,100 @@ def test_rerun_enrichment_records_evidence_diff(tmp_path: Path):
assert second_diff["scoreBefore"] == second_diff["scoreAfter"] assert second_diff["scoreBefore"] == second_diff["scoreAfter"]
finally: finally:
server.shutdown() server.shutdown()
def test_json_responses_do_not_send_wildcard_cors(tmp_path: Path):
static_dir, image_store, store = _fixtures(tmp_path)
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
_start(server)
base = f"http://127.0.0.1:{server.server_port}"
try:
response = urlopen(base + "/api/bootstrap")
assert response.headers.get("Access-Control-Allow-Origin") is None
finally:
server.shutdown()
def test_protected_routes_require_token_when_configured(tmp_path: Path):
static_dir, image_store, store = _fixtures(tmp_path)
server = build_server(
host="127.0.0.1", port=0, store=store, image_store=image_store,
static_dir=static_dir, auth_token="secret",
)
_start(server)
base = f"http://127.0.0.1:{server.server_port}"
try:
with pytest.raises(HTTPError) as exc:
urlopen(base + "/api/bootstrap")
assert exc.value.code == 401
authed = Request(base + "/api/bootstrap", headers={"Authorization": "Bearer secret"})
assert urlopen(authed).status == 200
# The GUI shell and liveness probe stay open so the operator can bootstrap.
assert urlopen(base + "/health").status == 200
assert urlopen(base + "/").status == 200
finally:
server.shutdown()
def test_oversized_body_rejected_with_413(tmp_path: Path):
static_dir, image_store, store = _fixtures(tmp_path)
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
_start(server)
try:
# The server rejects on the Content-Length header before reading the
# body, so send only headers (a real 11MB upload would race the close).
big_len = 11 * 1024 * 1024
request = (
"POST /api/knowledge/manual HTTP/1.1\r\n"
"Host: 127.0.0.1\r\n"
"Content-Type: application/json\r\n"
f"Content-Length: {big_len}\r\n"
"Connection: close\r\n"
"\r\n"
).encode("ascii")
with socket.create_connection(("127.0.0.1", server.server_port), timeout=10) as sock:
sock.sendall(request)
response = b""
while b"\r\n\r\n" not in response:
chunk = sock.recv(4096)
if not chunk:
break
response += chunk
assert b"413" in response.split(b"\r\n", 1)[0]
finally:
server.shutdown()
def test_malformed_json_body_returns_400(tmp_path: Path):
static_dir, image_store, store = _fixtures(tmp_path)
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
_start(server)
base = f"http://127.0.0.1:{server.server_port}"
try:
request = Request(base + "/api/knowledge/manual", data=b"not json", method="POST",
headers={"Content-Type": "application/json"})
with pytest.raises(HTTPError) as exc:
urlopen(request)
assert exc.value.code == 400
finally:
server.shutdown()
def test_operational_error_surfaces_as_503(tmp_path: Path, monkeypatch):
static_dir, image_store, store = _fixtures(tmp_path)
def boom():
raise sqlite3.OperationalError("database is locked")
monkeypatch.setattr(store, "bootstrap", boom)
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
_start(server)
base = f"http://127.0.0.1:{server.server_port}"
try:
with pytest.raises(HTTPError) as exc:
urlopen(base + "/api/bootstrap")
assert exc.value.code == 503
finally:
server.shutdown()