refactor: extract URL helpers into store_url_utils

Move the SQLite store's 5 URL helpers (_decoded_nested_url, _is_http_url,
_url_path_has_image_suffix, _url_has_image_format_hint, _url_looks_like_image)
into a focused module and import them back. Pure relocation, no behavior
change. First step of splitting the 5300-line sqlite_store god file.
This commit is contained in:
유창욱 2026-06-20 20:35:21 +09:00
parent 7317bfb2b3
commit e66f9d5001
2 changed files with 59 additions and 38 deletions

View file

@ -45,6 +45,13 @@ from rights_filter.integrations.env_clients import ProviderRuntime, build_provid
from rights_filter.integrations.external_policy import ExternalApiPolicy from rights_filter.integrations.external_policy import ExternalApiPolicy
from rights_filter.jobs.batch_analyzer import BatchAnalyzer, SubmissionImage from rights_filter.jobs.batch_analyzer import BatchAnalyzer, SubmissionImage
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
from rights_filter.server.store_url_utils import (
_decoded_nested_url,
_is_http_url,
_url_has_image_format_hint,
_url_looks_like_image,
_url_path_has_image_suffix,
)
EVIDENCE_OPERATOR_STATUSES = { EVIDENCE_OPERATOR_STATUSES = {
@ -4947,44 +4954,6 @@ def _decoded_url_reference(value: str) -> str:
return raw return raw
def _decoded_nested_url(value: str) -> str:
candidate = str(value).strip()
for _ in range(3):
decoded = unquote(candidate).strip()
if decoded == candidate:
break
candidate = decoded
return candidate
def _is_http_url(url: str) -> bool:
parsed = urlparse(url)
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
def _url_path_has_image_suffix(url: str) -> bool:
return Path(urlparse(url).path).suffix.lower() in SUPPORTED_IMAGE_SUFFIXES
def _url_has_image_format_hint(url: str) -> bool:
image_formats = {suffix.lstrip(".") for suffix in SUPPORTED_IMAGE_SUFFIXES}
image_format_keys = {"format", "fm", "ext", "extension", "mime", "output", "type"}
for key, hint in parse_qsl(urlparse(url).query, keep_blank_values=False):
if key.lower().replace("-", "_") not in image_format_keys:
continue
normalized = hint.lower().split(";", 1)[0].strip().lstrip(".")
if normalized.startswith("image/"):
normalized = normalized.split("/", 1)[1]
normalized = normalized.split("+", 1)[0]
if normalized in image_formats:
return True
return False
def _url_looks_like_image(url: str) -> bool:
return _url_path_has_image_suffix(url) or _url_has_image_format_hint(url)
def _submission_payload( def _submission_payload(
record: dict[str, Any], record: dict[str, Any],
score: int, score: int,

View file

@ -0,0 +1,52 @@
"""URL helpers used by the SQLite store's evidence/image handling.
Extracted from sqlite_store.py to keep that module focused. Behavior is
unchanged. Note these intentionally treat the local-submission suffix set
(SUPPORTED_IMAGE_SUFFIXES, which includes .svg) as image-like; the integration
adapters keep their own stricter suffix policy for external results.
"""
from __future__ import annotations
from pathlib import Path
from urllib.parse import parse_qsl, unquote, urlparse
from rights_filter.server.image_store import SUPPORTED_IMAGE_SUFFIXES
def _decoded_nested_url(value: str) -> str:
candidate = str(value).strip()
for _ in range(3):
decoded = unquote(candidate).strip()
if decoded == candidate:
break
candidate = decoded
return candidate
def _is_http_url(url: str) -> bool:
parsed = urlparse(url)
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
def _url_path_has_image_suffix(url: str) -> bool:
return Path(urlparse(url).path).suffix.lower() in SUPPORTED_IMAGE_SUFFIXES
def _url_has_image_format_hint(url: str) -> bool:
image_formats = {suffix.lstrip(".") for suffix in SUPPORTED_IMAGE_SUFFIXES}
image_format_keys = {"format", "fm", "ext", "extension", "mime", "output", "type"}
for key, hint in parse_qsl(urlparse(url).query, keep_blank_values=False):
if key.lower().replace("-", "_") not in image_format_keys:
continue
normalized = hint.lower().split(";", 1)[0].strip().lstrip(".")
if normalized.startswith("image/"):
normalized = normalized.split("/", 1)[1]
normalized = normalized.split("+", 1)[0]
if normalized in image_formats:
return True
return False
def _url_looks_like_image(url: str) -> bool:
return _url_path_has_image_suffix(url) or _url_has_image_format_hint(url)