fix: harden operator HTTP server
Remove wildcard CORS (prevented cross-origin reads of biometric/case data from localhost), add optional shared-token auth gate on data routes (COPYRIGHTER_AUTH_TOKEN; GUI shell + /health stay open), cap request body size (413), and map malformed JSON to 400 and SQLite lock contention to 503.
This commit is contained in:
parent
62c13faafa
commit
e9a15e8110
4 changed files with 179 additions and 3 deletions
|
|
@ -1,3 +1,10 @@
|
|||
# Shared bearer token for the operator server's data routes (/api, media).
|
||||
# Unset = open access (single-host dev only). Set for any networked deployment.
|
||||
COPYRIGHTER_AUTH_TOKEN=
|
||||
# Face-crop (biometric) retention in days; expired crops are purged with an
|
||||
# audit event. 0 = keep indefinitely.
|
||||
COPYRIGHTER_FACE_CROP_RETENTION_DAYS=90
|
||||
|
||||
NAVER_CLIENT_ID=
|
||||
NAVER_CLIENT_SECRET=
|
||||
NAVER_SEARCH_DISPLAY=10
|
||||
|
|
|
|||
|
|
@ -27,12 +27,17 @@ def main() -> None:
|
|||
store = CopyrighterStore(Path(args.db), provider_runtime=provider_runtime)
|
||||
store.initialize()
|
||||
|
||||
auth_token = os.environ.get("COPYRIGHTER_AUTH_TOKEN", "").strip() or None
|
||||
if auth_token is None:
|
||||
print("WARNING: COPYRIGHTER_AUTH_TOKEN not set — data routes are unauthenticated.")
|
||||
|
||||
server = build_server(
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
store=store,
|
||||
image_store=image_store,
|
||||
static_dir=Path(args.static),
|
||||
auth_token=auth_token,
|
||||
)
|
||||
print(f"Copyrighter API server listening on http://{args.host}:{args.port}")
|
||||
print(f"SQLite DB: {Path(args.db).resolve()}")
|
||||
|
|
|
|||
|
|
@ -4,10 +4,11 @@ import base64
|
|||
import json
|
||||
import mimetypes
|
||||
import re
|
||||
import sqlite3
|
||||
from http import HTTPStatus
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote, urlparse
|
||||
from urllib.parse import parse_qs, unquote, urlparse
|
||||
|
||||
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
|
||||
from rights_filter.server.sqlite_store import CopyrighterStore
|
||||
|
|
@ -22,6 +23,26 @@ mimetypes.add_type("font/woff", ".woff")
|
|||
_IMMUTABLE_SUFFIXES = {".woff2", ".woff"}
|
||||
_IMMUTABLE_CACHE_CONTROL = "public, max-age=31536000, immutable"
|
||||
|
||||
# An operator JSON request body is small (decisions, memos, knowledge edits);
|
||||
# uploads come base64-encoded inside the body, so allow headroom but never an
|
||||
# unbounded read that a single request could use to exhaust memory.
|
||||
_MAX_BODY_BYTES = 10 * 1024 * 1024
|
||||
|
||||
|
||||
class _PayloadTooLarge(Exception):
|
||||
"""Raised when a request body exceeds _MAX_BODY_BYTES."""
|
||||
|
||||
|
||||
# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set.
|
||||
# The static GUI shell and /health stay open so the operator can bootstrap.
|
||||
_PROTECTED_PREFIXES = (
|
||||
"/api/",
|
||||
"/media/",
|
||||
"/knowledge-media/",
|
||||
"/collected-media/",
|
||||
"/face-crop-media/",
|
||||
)
|
||||
|
||||
|
||||
def build_server(
|
||||
host: str,
|
||||
|
|
@ -29,6 +50,7 @@ def build_server(
|
|||
store: CopyrighterStore,
|
||||
image_store: LocalSubmissionImageStore,
|
||||
static_dir: Path | str,
|
||||
auth_token: str | None = None,
|
||||
) -> ThreadingHTTPServer:
|
||||
static_root = Path(static_dir).resolve()
|
||||
|
||||
|
|
@ -37,6 +59,8 @@ def build_server(
|
|||
|
||||
def do_GET(self) -> None:
|
||||
path = _path(self.path)
|
||||
if not self._require_auth():
|
||||
return
|
||||
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
|
||||
try:
|
||||
if path == "/health":
|
||||
|
|
@ -64,6 +88,10 @@ def build_server(
|
|||
self._file(store.face_crop_media_path(unquote(path.removeprefix("/face-crop-media/"))), untrusted=True)
|
||||
else:
|
||||
self._static(path, static_root)
|
||||
except _PayloadTooLarge:
|
||||
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
|
||||
except sqlite3.OperationalError:
|
||||
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
|
||||
except KeyError:
|
||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||
except ValueError as exc:
|
||||
|
|
@ -71,6 +99,8 @@ def build_server(
|
|||
|
||||
def do_POST(self) -> None:
|
||||
path = _path(self.path)
|
||||
if not self._require_auth():
|
||||
return
|
||||
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
|
||||
try:
|
||||
body = self._body()
|
||||
|
|
@ -164,6 +194,10 @@ def build_server(
|
|||
self._json(store.emergency_disable_external_providers())
|
||||
else:
|
||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||
except _PayloadTooLarge:
|
||||
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
|
||||
except sqlite3.OperationalError:
|
||||
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
|
||||
except KeyError:
|
||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||
except ValueError as exc:
|
||||
|
|
@ -171,6 +205,8 @@ def build_server(
|
|||
|
||||
def do_PATCH(self) -> None:
|
||||
path = _path(self.path)
|
||||
if not self._require_auth():
|
||||
return
|
||||
try:
|
||||
body = self._body()
|
||||
if path.startswith("/api/providers/"):
|
||||
|
|
@ -190,6 +226,10 @@ def build_server(
|
|||
self._json(store.update_knowledge_entry(entry_id, body))
|
||||
else:
|
||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||
except _PayloadTooLarge:
|
||||
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
|
||||
except sqlite3.OperationalError:
|
||||
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
|
||||
except KeyError:
|
||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||
except ValueError as exc:
|
||||
|
|
@ -198,18 +238,42 @@ def build_server(
|
|||
def log_message(self, format: str, *args: object) -> None:
|
||||
return
|
||||
|
||||
def _require_auth(self) -> bool:
|
||||
# No token configured -> open access (dev / single-host default).
|
||||
if not auth_token:
|
||||
return True
|
||||
path = _path(self.path)
|
||||
# The GUI shell (static assets) and liveness probe must load
|
||||
# unauthenticated so the operator can enter the token; only data
|
||||
# routes are protected.
|
||||
if not any(path.startswith(prefix) for prefix in _PROTECTED_PREFIXES):
|
||||
return True
|
||||
header = self.headers.get("Authorization", "")
|
||||
if header == f"Bearer {auth_token}":
|
||||
return True
|
||||
# <img>/media cannot send headers, so accept ?token= for those.
|
||||
query_token = parse_qs(urlparse(self.path).query).get("token", [""])[0]
|
||||
if query_token == auth_token:
|
||||
return True
|
||||
self._json({"error": "unauthorized"}, HTTPStatus.UNAUTHORIZED)
|
||||
return False
|
||||
|
||||
def _body(self) -> dict[str, object]:
|
||||
length = int(self.headers.get("Content-Length", "0") or "0")
|
||||
if length > _MAX_BODY_BYTES:
|
||||
raise _PayloadTooLarge()
|
||||
if not length:
|
||||
return {}
|
||||
return json.loads(self.rfile.read(length).decode("utf-8"))
|
||||
try:
|
||||
return json.loads(self.rfile.read(length).decode("utf-8"))
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError("invalid JSON body") from exc
|
||||
|
||||
def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None:
|
||||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||
self.send_response(status)
|
||||
self.send_header("Content-Type", "application/json; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(data)))
|
||||
self.send_header("Access-Control-Allow-Origin", "*")
|
||||
self.end_headers()
|
||||
self.wfile.write(data)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from urllib.error import HTTPError
|
||||
from urllib.request import Request, urlopen
|
||||
import base64
|
||||
import json
|
||||
import socket
|
||||
import sqlite3
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -1295,3 +1298,100 @@ def test_rerun_enrichment_records_evidence_diff(tmp_path: Path):
|
|||
assert second_diff["scoreBefore"] == second_diff["scoreAfter"]
|
||||
finally:
|
||||
server.shutdown()
|
||||
|
||||
|
||||
def test_json_responses_do_not_send_wildcard_cors(tmp_path: Path):
|
||||
static_dir, image_store, store = _fixtures(tmp_path)
|
||||
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||
_start(server)
|
||||
base = f"http://127.0.0.1:{server.server_port}"
|
||||
try:
|
||||
response = urlopen(base + "/api/bootstrap")
|
||||
assert response.headers.get("Access-Control-Allow-Origin") is None
|
||||
finally:
|
||||
server.shutdown()
|
||||
|
||||
|
||||
def test_protected_routes_require_token_when_configured(tmp_path: Path):
|
||||
static_dir, image_store, store = _fixtures(tmp_path)
|
||||
server = build_server(
|
||||
host="127.0.0.1", port=0, store=store, image_store=image_store,
|
||||
static_dir=static_dir, auth_token="secret",
|
||||
)
|
||||
_start(server)
|
||||
base = f"http://127.0.0.1:{server.server_port}"
|
||||
try:
|
||||
with pytest.raises(HTTPError) as exc:
|
||||
urlopen(base + "/api/bootstrap")
|
||||
assert exc.value.code == 401
|
||||
|
||||
authed = Request(base + "/api/bootstrap", headers={"Authorization": "Bearer secret"})
|
||||
assert urlopen(authed).status == 200
|
||||
|
||||
# The GUI shell and liveness probe stay open so the operator can bootstrap.
|
||||
assert urlopen(base + "/health").status == 200
|
||||
assert urlopen(base + "/").status == 200
|
||||
finally:
|
||||
server.shutdown()
|
||||
|
||||
|
||||
def test_oversized_body_rejected_with_413(tmp_path: Path):
|
||||
static_dir, image_store, store = _fixtures(tmp_path)
|
||||
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||
_start(server)
|
||||
try:
|
||||
# The server rejects on the Content-Length header before reading the
|
||||
# body, so send only headers (a real 11MB upload would race the close).
|
||||
big_len = 11 * 1024 * 1024
|
||||
request = (
|
||||
"POST /api/knowledge/manual HTTP/1.1\r\n"
|
||||
"Host: 127.0.0.1\r\n"
|
||||
"Content-Type: application/json\r\n"
|
||||
f"Content-Length: {big_len}\r\n"
|
||||
"Connection: close\r\n"
|
||||
"\r\n"
|
||||
).encode("ascii")
|
||||
with socket.create_connection(("127.0.0.1", server.server_port), timeout=10) as sock:
|
||||
sock.sendall(request)
|
||||
response = b""
|
||||
while b"\r\n\r\n" not in response:
|
||||
chunk = sock.recv(4096)
|
||||
if not chunk:
|
||||
break
|
||||
response += chunk
|
||||
assert b"413" in response.split(b"\r\n", 1)[0]
|
||||
finally:
|
||||
server.shutdown()
|
||||
|
||||
|
||||
def test_malformed_json_body_returns_400(tmp_path: Path):
|
||||
static_dir, image_store, store = _fixtures(tmp_path)
|
||||
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||
_start(server)
|
||||
base = f"http://127.0.0.1:{server.server_port}"
|
||||
try:
|
||||
request = Request(base + "/api/knowledge/manual", data=b"not json", method="POST",
|
||||
headers={"Content-Type": "application/json"})
|
||||
with pytest.raises(HTTPError) as exc:
|
||||
urlopen(request)
|
||||
assert exc.value.code == 400
|
||||
finally:
|
||||
server.shutdown()
|
||||
|
||||
|
||||
def test_operational_error_surfaces_as_503(tmp_path: Path, monkeypatch):
|
||||
static_dir, image_store, store = _fixtures(tmp_path)
|
||||
|
||||
def boom():
|
||||
raise sqlite3.OperationalError("database is locked")
|
||||
|
||||
monkeypatch.setattr(store, "bootstrap", boom)
|
||||
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||
_start(server)
|
||||
base = f"http://127.0.0.1:{server.server_port}"
|
||||
try:
|
||||
with pytest.raises(HTTPError) as exc:
|
||||
urlopen(base + "/api/bootstrap")
|
||||
assert exc.value.code == 503
|
||||
finally:
|
||||
server.shutdown()
|
||||
|
|
|
|||
Loading…
Reference in a new issue