fix: harden operator HTTP server
Remove wildcard CORS (prevented cross-origin reads of biometric/case data from localhost), add optional shared-token auth gate on data routes (COPYRIGHTER_AUTH_TOKEN; GUI shell + /health stay open), cap request body size (413), and map malformed JSON to 400 and SQLite lock contention to 503.
This commit is contained in:
parent
62c13faafa
commit
e9a15e8110
4 changed files with 179 additions and 3 deletions
|
|
@ -1,3 +1,10 @@
|
||||||
|
# Shared bearer token for the operator server's data routes (/api, media).
|
||||||
|
# Unset = open access (single-host dev only). Set for any networked deployment.
|
||||||
|
COPYRIGHTER_AUTH_TOKEN=
|
||||||
|
# Face-crop (biometric) retention in days; expired crops are purged with an
|
||||||
|
# audit event. 0 = keep indefinitely.
|
||||||
|
COPYRIGHTER_FACE_CROP_RETENTION_DAYS=90
|
||||||
|
|
||||||
NAVER_CLIENT_ID=
|
NAVER_CLIENT_ID=
|
||||||
NAVER_CLIENT_SECRET=
|
NAVER_CLIENT_SECRET=
|
||||||
NAVER_SEARCH_DISPLAY=10
|
NAVER_SEARCH_DISPLAY=10
|
||||||
|
|
|
||||||
|
|
@ -27,12 +27,17 @@ def main() -> None:
|
||||||
store = CopyrighterStore(Path(args.db), provider_runtime=provider_runtime)
|
store = CopyrighterStore(Path(args.db), provider_runtime=provider_runtime)
|
||||||
store.initialize()
|
store.initialize()
|
||||||
|
|
||||||
|
auth_token = os.environ.get("COPYRIGHTER_AUTH_TOKEN", "").strip() or None
|
||||||
|
if auth_token is None:
|
||||||
|
print("WARNING: COPYRIGHTER_AUTH_TOKEN not set — data routes are unauthenticated.")
|
||||||
|
|
||||||
server = build_server(
|
server = build_server(
|
||||||
host=args.host,
|
host=args.host,
|
||||||
port=args.port,
|
port=args.port,
|
||||||
store=store,
|
store=store,
|
||||||
image_store=image_store,
|
image_store=image_store,
|
||||||
static_dir=Path(args.static),
|
static_dir=Path(args.static),
|
||||||
|
auth_token=auth_token,
|
||||||
)
|
)
|
||||||
print(f"Copyrighter API server listening on http://{args.host}:{args.port}")
|
print(f"Copyrighter API server listening on http://{args.host}:{args.port}")
|
||||||
print(f"SQLite DB: {Path(args.db).resolve()}")
|
print(f"SQLite DB: {Path(args.db).resolve()}")
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,11 @@ import base64
|
||||||
import json
|
import json
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import re
|
import re
|
||||||
|
import sqlite3
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib.parse import unquote, urlparse
|
from urllib.parse import parse_qs, unquote, urlparse
|
||||||
|
|
||||||
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
|
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
|
||||||
from rights_filter.server.sqlite_store import CopyrighterStore
|
from rights_filter.server.sqlite_store import CopyrighterStore
|
||||||
|
|
@ -22,6 +23,26 @@ mimetypes.add_type("font/woff", ".woff")
|
||||||
_IMMUTABLE_SUFFIXES = {".woff2", ".woff"}
|
_IMMUTABLE_SUFFIXES = {".woff2", ".woff"}
|
||||||
_IMMUTABLE_CACHE_CONTROL = "public, max-age=31536000, immutable"
|
_IMMUTABLE_CACHE_CONTROL = "public, max-age=31536000, immutable"
|
||||||
|
|
||||||
|
# An operator JSON request body is small (decisions, memos, knowledge edits);
|
||||||
|
# uploads come base64-encoded inside the body, so allow headroom but never an
|
||||||
|
# unbounded read that a single request could use to exhaust memory.
|
||||||
|
_MAX_BODY_BYTES = 10 * 1024 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
class _PayloadTooLarge(Exception):
|
||||||
|
"""Raised when a request body exceeds _MAX_BODY_BYTES."""
|
||||||
|
|
||||||
|
|
||||||
|
# Data routes requiring the shared token when COPYRIGHTER_AUTH_TOKEN is set.
|
||||||
|
# The static GUI shell and /health stay open so the operator can bootstrap.
|
||||||
|
_PROTECTED_PREFIXES = (
|
||||||
|
"/api/",
|
||||||
|
"/media/",
|
||||||
|
"/knowledge-media/",
|
||||||
|
"/collected-media/",
|
||||||
|
"/face-crop-media/",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_server(
|
def build_server(
|
||||||
host: str,
|
host: str,
|
||||||
|
|
@ -29,6 +50,7 @@ def build_server(
|
||||||
store: CopyrighterStore,
|
store: CopyrighterStore,
|
||||||
image_store: LocalSubmissionImageStore,
|
image_store: LocalSubmissionImageStore,
|
||||||
static_dir: Path | str,
|
static_dir: Path | str,
|
||||||
|
auth_token: str | None = None,
|
||||||
) -> ThreadingHTTPServer:
|
) -> ThreadingHTTPServer:
|
||||||
static_root = Path(static_dir).resolve()
|
static_root = Path(static_dir).resolve()
|
||||||
|
|
||||||
|
|
@ -37,6 +59,8 @@ def build_server(
|
||||||
|
|
||||||
def do_GET(self) -> None:
|
def do_GET(self) -> None:
|
||||||
path = _path(self.path)
|
path = _path(self.path)
|
||||||
|
if not self._require_auth():
|
||||||
|
return
|
||||||
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
|
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
|
||||||
try:
|
try:
|
||||||
if path == "/health":
|
if path == "/health":
|
||||||
|
|
@ -64,6 +88,10 @@ def build_server(
|
||||||
self._file(store.face_crop_media_path(unquote(path.removeprefix("/face-crop-media/"))), untrusted=True)
|
self._file(store.face_crop_media_path(unquote(path.removeprefix("/face-crop-media/"))), untrusted=True)
|
||||||
else:
|
else:
|
||||||
self._static(path, static_root)
|
self._static(path, static_root)
|
||||||
|
except _PayloadTooLarge:
|
||||||
|
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
|
|
@ -71,6 +99,8 @@ def build_server(
|
||||||
|
|
||||||
def do_POST(self) -> None:
|
def do_POST(self) -> None:
|
||||||
path = _path(self.path)
|
path = _path(self.path)
|
||||||
|
if not self._require_auth():
|
||||||
|
return
|
||||||
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
|
active_store = lambda: store.active_submission_image_store(image_store.root) # noqa: E731 - lazy: only opens a DB connection on routes that use it
|
||||||
try:
|
try:
|
||||||
body = self._body()
|
body = self._body()
|
||||||
|
|
@ -164,6 +194,10 @@ def build_server(
|
||||||
self._json(store.emergency_disable_external_providers())
|
self._json(store.emergency_disable_external_providers())
|
||||||
else:
|
else:
|
||||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||||
|
except _PayloadTooLarge:
|
||||||
|
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
|
|
@ -171,6 +205,8 @@ def build_server(
|
||||||
|
|
||||||
def do_PATCH(self) -> None:
|
def do_PATCH(self) -> None:
|
||||||
path = _path(self.path)
|
path = _path(self.path)
|
||||||
|
if not self._require_auth():
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
body = self._body()
|
body = self._body()
|
||||||
if path.startswith("/api/providers/"):
|
if path.startswith("/api/providers/"):
|
||||||
|
|
@ -190,6 +226,10 @@ def build_server(
|
||||||
self._json(store.update_knowledge_entry(entry_id, body))
|
self._json(store.update_knowledge_entry(entry_id, body))
|
||||||
else:
|
else:
|
||||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||||
|
except _PayloadTooLarge:
|
||||||
|
self._json({"error": "payload too large"}, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
self._json({"error": "service busy, retry"}, HTTPStatus.SERVICE_UNAVAILABLE)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
|
|
@ -198,18 +238,42 @@ def build_server(
|
||||||
def log_message(self, format: str, *args: object) -> None:
|
def log_message(self, format: str, *args: object) -> None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def _require_auth(self) -> bool:
|
||||||
|
# No token configured -> open access (dev / single-host default).
|
||||||
|
if not auth_token:
|
||||||
|
return True
|
||||||
|
path = _path(self.path)
|
||||||
|
# The GUI shell (static assets) and liveness probe must load
|
||||||
|
# unauthenticated so the operator can enter the token; only data
|
||||||
|
# routes are protected.
|
||||||
|
if not any(path.startswith(prefix) for prefix in _PROTECTED_PREFIXES):
|
||||||
|
return True
|
||||||
|
header = self.headers.get("Authorization", "")
|
||||||
|
if header == f"Bearer {auth_token}":
|
||||||
|
return True
|
||||||
|
# <img>/media cannot send headers, so accept ?token= for those.
|
||||||
|
query_token = parse_qs(urlparse(self.path).query).get("token", [""])[0]
|
||||||
|
if query_token == auth_token:
|
||||||
|
return True
|
||||||
|
self._json({"error": "unauthorized"}, HTTPStatus.UNAUTHORIZED)
|
||||||
|
return False
|
||||||
|
|
||||||
def _body(self) -> dict[str, object]:
|
def _body(self) -> dict[str, object]:
|
||||||
length = int(self.headers.get("Content-Length", "0") or "0")
|
length = int(self.headers.get("Content-Length", "0") or "0")
|
||||||
|
if length > _MAX_BODY_BYTES:
|
||||||
|
raise _PayloadTooLarge()
|
||||||
if not length:
|
if not length:
|
||||||
return {}
|
return {}
|
||||||
|
try:
|
||||||
return json.loads(self.rfile.read(length).decode("utf-8"))
|
return json.loads(self.rfile.read(length).decode("utf-8"))
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise ValueError("invalid JSON body") from exc
|
||||||
|
|
||||||
def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None:
|
def _json(self, payload: object, status: HTTPStatus = HTTPStatus.OK) -> None:
|
||||||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||||
self.send_response(status)
|
self.send_response(status)
|
||||||
self.send_header("Content-Type", "application/json; charset=utf-8")
|
self.send_header("Content-Type", "application/json; charset=utf-8")
|
||||||
self.send_header("Content-Length", str(len(data)))
|
self.send_header("Content-Length", str(len(data)))
|
||||||
self.send_header("Access-Control-Allow-Origin", "*")
|
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(data)
|
self.wfile.write(data)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
from urllib.error import HTTPError
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
import socket
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
@ -1295,3 +1298,100 @@ def test_rerun_enrichment_records_evidence_diff(tmp_path: Path):
|
||||||
assert second_diff["scoreBefore"] == second_diff["scoreAfter"]
|
assert second_diff["scoreBefore"] == second_diff["scoreAfter"]
|
||||||
finally:
|
finally:
|
||||||
server.shutdown()
|
server.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_responses_do_not_send_wildcard_cors(tmp_path: Path):
|
||||||
|
static_dir, image_store, store = _fixtures(tmp_path)
|
||||||
|
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||||
|
_start(server)
|
||||||
|
base = f"http://127.0.0.1:{server.server_port}"
|
||||||
|
try:
|
||||||
|
response = urlopen(base + "/api/bootstrap")
|
||||||
|
assert response.headers.get("Access-Control-Allow-Origin") is None
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def test_protected_routes_require_token_when_configured(tmp_path: Path):
|
||||||
|
static_dir, image_store, store = _fixtures(tmp_path)
|
||||||
|
server = build_server(
|
||||||
|
host="127.0.0.1", port=0, store=store, image_store=image_store,
|
||||||
|
static_dir=static_dir, auth_token="secret",
|
||||||
|
)
|
||||||
|
_start(server)
|
||||||
|
base = f"http://127.0.0.1:{server.server_port}"
|
||||||
|
try:
|
||||||
|
with pytest.raises(HTTPError) as exc:
|
||||||
|
urlopen(base + "/api/bootstrap")
|
||||||
|
assert exc.value.code == 401
|
||||||
|
|
||||||
|
authed = Request(base + "/api/bootstrap", headers={"Authorization": "Bearer secret"})
|
||||||
|
assert urlopen(authed).status == 200
|
||||||
|
|
||||||
|
# The GUI shell and liveness probe stay open so the operator can bootstrap.
|
||||||
|
assert urlopen(base + "/health").status == 200
|
||||||
|
assert urlopen(base + "/").status == 200
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def test_oversized_body_rejected_with_413(tmp_path: Path):
|
||||||
|
static_dir, image_store, store = _fixtures(tmp_path)
|
||||||
|
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||||
|
_start(server)
|
||||||
|
try:
|
||||||
|
# The server rejects on the Content-Length header before reading the
|
||||||
|
# body, so send only headers (a real 11MB upload would race the close).
|
||||||
|
big_len = 11 * 1024 * 1024
|
||||||
|
request = (
|
||||||
|
"POST /api/knowledge/manual HTTP/1.1\r\n"
|
||||||
|
"Host: 127.0.0.1\r\n"
|
||||||
|
"Content-Type: application/json\r\n"
|
||||||
|
f"Content-Length: {big_len}\r\n"
|
||||||
|
"Connection: close\r\n"
|
||||||
|
"\r\n"
|
||||||
|
).encode("ascii")
|
||||||
|
with socket.create_connection(("127.0.0.1", server.server_port), timeout=10) as sock:
|
||||||
|
sock.sendall(request)
|
||||||
|
response = b""
|
||||||
|
while b"\r\n\r\n" not in response:
|
||||||
|
chunk = sock.recv(4096)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
response += chunk
|
||||||
|
assert b"413" in response.split(b"\r\n", 1)[0]
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def test_malformed_json_body_returns_400(tmp_path: Path):
|
||||||
|
static_dir, image_store, store = _fixtures(tmp_path)
|
||||||
|
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||||
|
_start(server)
|
||||||
|
base = f"http://127.0.0.1:{server.server_port}"
|
||||||
|
try:
|
||||||
|
request = Request(base + "/api/knowledge/manual", data=b"not json", method="POST",
|
||||||
|
headers={"Content-Type": "application/json"})
|
||||||
|
with pytest.raises(HTTPError) as exc:
|
||||||
|
urlopen(request)
|
||||||
|
assert exc.value.code == 400
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def test_operational_error_surfaces_as_503(tmp_path: Path, monkeypatch):
|
||||||
|
static_dir, image_store, store = _fixtures(tmp_path)
|
||||||
|
|
||||||
|
def boom():
|
||||||
|
raise sqlite3.OperationalError("database is locked")
|
||||||
|
|
||||||
|
monkeypatch.setattr(store, "bootstrap", boom)
|
||||||
|
server = build_server(host="127.0.0.1", port=0, store=store, image_store=image_store, static_dir=static_dir)
|
||||||
|
_start(server)
|
||||||
|
base = f"http://127.0.0.1:{server.server_port}"
|
||||||
|
try:
|
||||||
|
with pytest.raises(HTTPError) as exc:
|
||||||
|
urlopen(base + "/api/bootstrap")
|
||||||
|
assert exc.value.code == 503
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue