refactor: extract URL helpers into store_url_utils
Move the SQLite store's 5 URL helpers (_decoded_nested_url, _is_http_url, _url_path_has_image_suffix, _url_has_image_format_hint, _url_looks_like_image) into a focused module and import them back. Pure relocation, no behavior change. First step of splitting the 5300-line sqlite_store god file.
This commit is contained in:
parent
7317bfb2b3
commit
e66f9d5001
2 changed files with 59 additions and 38 deletions
|
|
@ -45,6 +45,13 @@ from rights_filter.integrations.env_clients import ProviderRuntime, build_provid
|
|||
from rights_filter.integrations.external_policy import ExternalApiPolicy
|
||||
from rights_filter.jobs.batch_analyzer import BatchAnalyzer, SubmissionImage
|
||||
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
|
||||
from rights_filter.server.store_url_utils import (
|
||||
_decoded_nested_url,
|
||||
_is_http_url,
|
||||
_url_has_image_format_hint,
|
||||
_url_looks_like_image,
|
||||
_url_path_has_image_suffix,
|
||||
)
|
||||
|
||||
|
||||
EVIDENCE_OPERATOR_STATUSES = {
|
||||
|
|
@ -4947,44 +4954,6 @@ def _decoded_url_reference(value: str) -> str:
|
|||
return raw
|
||||
|
||||
|
||||
def _decoded_nested_url(value: str) -> str:
|
||||
candidate = str(value).strip()
|
||||
for _ in range(3):
|
||||
decoded = unquote(candidate).strip()
|
||||
if decoded == candidate:
|
||||
break
|
||||
candidate = decoded
|
||||
return candidate
|
||||
|
||||
|
||||
def _is_http_url(url: str) -> bool:
|
||||
parsed = urlparse(url)
|
||||
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
|
||||
|
||||
|
||||
def _url_path_has_image_suffix(url: str) -> bool:
|
||||
return Path(urlparse(url).path).suffix.lower() in SUPPORTED_IMAGE_SUFFIXES
|
||||
|
||||
|
||||
def _url_has_image_format_hint(url: str) -> bool:
|
||||
image_formats = {suffix.lstrip(".") for suffix in SUPPORTED_IMAGE_SUFFIXES}
|
||||
image_format_keys = {"format", "fm", "ext", "extension", "mime", "output", "type"}
|
||||
for key, hint in parse_qsl(urlparse(url).query, keep_blank_values=False):
|
||||
if key.lower().replace("-", "_") not in image_format_keys:
|
||||
continue
|
||||
normalized = hint.lower().split(";", 1)[0].strip().lstrip(".")
|
||||
if normalized.startswith("image/"):
|
||||
normalized = normalized.split("/", 1)[1]
|
||||
normalized = normalized.split("+", 1)[0]
|
||||
if normalized in image_formats:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _url_looks_like_image(url: str) -> bool:
|
||||
return _url_path_has_image_suffix(url) or _url_has_image_format_hint(url)
|
||||
|
||||
|
||||
def _submission_payload(
|
||||
record: dict[str, Any],
|
||||
score: int,
|
||||
|
|
|
|||
52
src/rights_filter/server/store_url_utils.py
Normal file
52
src/rights_filter/server/store_url_utils.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""URL helpers used by the SQLite store's evidence/image handling.
|
||||
|
||||
Extracted from sqlite_store.py to keep that module focused. Behavior is
|
||||
unchanged. Note these intentionally treat the local-submission suffix set
|
||||
(SUPPORTED_IMAGE_SUFFIXES, which includes .svg) as image-like; the integration
|
||||
adapters keep their own stricter suffix policy for external results.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from urllib.parse import parse_qsl, unquote, urlparse
|
||||
|
||||
from rights_filter.server.image_store import SUPPORTED_IMAGE_SUFFIXES
|
||||
|
||||
|
||||
def _decoded_nested_url(value: str) -> str:
|
||||
candidate = str(value).strip()
|
||||
for _ in range(3):
|
||||
decoded = unquote(candidate).strip()
|
||||
if decoded == candidate:
|
||||
break
|
||||
candidate = decoded
|
||||
return candidate
|
||||
|
||||
|
||||
def _is_http_url(url: str) -> bool:
|
||||
parsed = urlparse(url)
|
||||
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
|
||||
|
||||
|
||||
def _url_path_has_image_suffix(url: str) -> bool:
|
||||
return Path(urlparse(url).path).suffix.lower() in SUPPORTED_IMAGE_SUFFIXES
|
||||
|
||||
|
||||
def _url_has_image_format_hint(url: str) -> bool:
|
||||
image_formats = {suffix.lstrip(".") for suffix in SUPPORTED_IMAGE_SUFFIXES}
|
||||
image_format_keys = {"format", "fm", "ext", "extension", "mime", "output", "type"}
|
||||
for key, hint in parse_qsl(urlparse(url).query, keep_blank_values=False):
|
||||
if key.lower().replace("-", "_") not in image_format_keys:
|
||||
continue
|
||||
normalized = hint.lower().split(";", 1)[0].strip().lstrip(".")
|
||||
if normalized.startswith("image/"):
|
||||
normalized = normalized.split("/", 1)[1]
|
||||
normalized = normalized.split("+", 1)[0]
|
||||
if normalized in image_formats:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _url_looks_like_image(url: str) -> bool:
|
||||
return _url_path_has_image_suffix(url) or _url_has_image_format_hint(url)
|
||||
Loading…
Reference in a new issue