refactor: extract URL helpers into store_url_utils
Move the SQLite store's 5 URL helpers (_decoded_nested_url, _is_http_url, _url_path_has_image_suffix, _url_has_image_format_hint, _url_looks_like_image) into a focused module and import them back. Pure relocation, no behavior change. First step of splitting the 5300-line sqlite_store god file.
This commit is contained in:
parent
7317bfb2b3
commit
e66f9d5001
2 changed files with 59 additions and 38 deletions
|
|
@ -45,6 +45,13 @@ from rights_filter.integrations.env_clients import ProviderRuntime, build_provid
|
||||||
from rights_filter.integrations.external_policy import ExternalApiPolicy
|
from rights_filter.integrations.external_policy import ExternalApiPolicy
|
||||||
from rights_filter.jobs.batch_analyzer import BatchAnalyzer, SubmissionImage
|
from rights_filter.jobs.batch_analyzer import BatchAnalyzer, SubmissionImage
|
||||||
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
|
from rights_filter.server.image_store import LocalSubmissionImageStore, SUPPORTED_IMAGE_SUFFIXES
|
||||||
|
from rights_filter.server.store_url_utils import (
|
||||||
|
_decoded_nested_url,
|
||||||
|
_is_http_url,
|
||||||
|
_url_has_image_format_hint,
|
||||||
|
_url_looks_like_image,
|
||||||
|
_url_path_has_image_suffix,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
EVIDENCE_OPERATOR_STATUSES = {
|
EVIDENCE_OPERATOR_STATUSES = {
|
||||||
|
|
@ -4947,44 +4954,6 @@ def _decoded_url_reference(value: str) -> str:
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
|
|
||||||
def _decoded_nested_url(value: str) -> str:
|
|
||||||
candidate = str(value).strip()
|
|
||||||
for _ in range(3):
|
|
||||||
decoded = unquote(candidate).strip()
|
|
||||||
if decoded == candidate:
|
|
||||||
break
|
|
||||||
candidate = decoded
|
|
||||||
return candidate
|
|
||||||
|
|
||||||
|
|
||||||
def _is_http_url(url: str) -> bool:
|
|
||||||
parsed = urlparse(url)
|
|
||||||
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
|
|
||||||
|
|
||||||
|
|
||||||
def _url_path_has_image_suffix(url: str) -> bool:
|
|
||||||
return Path(urlparse(url).path).suffix.lower() in SUPPORTED_IMAGE_SUFFIXES
|
|
||||||
|
|
||||||
|
|
||||||
def _url_has_image_format_hint(url: str) -> bool:
|
|
||||||
image_formats = {suffix.lstrip(".") for suffix in SUPPORTED_IMAGE_SUFFIXES}
|
|
||||||
image_format_keys = {"format", "fm", "ext", "extension", "mime", "output", "type"}
|
|
||||||
for key, hint in parse_qsl(urlparse(url).query, keep_blank_values=False):
|
|
||||||
if key.lower().replace("-", "_") not in image_format_keys:
|
|
||||||
continue
|
|
||||||
normalized = hint.lower().split(";", 1)[0].strip().lstrip(".")
|
|
||||||
if normalized.startswith("image/"):
|
|
||||||
normalized = normalized.split("/", 1)[1]
|
|
||||||
normalized = normalized.split("+", 1)[0]
|
|
||||||
if normalized in image_formats:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _url_looks_like_image(url: str) -> bool:
|
|
||||||
return _url_path_has_image_suffix(url) or _url_has_image_format_hint(url)
|
|
||||||
|
|
||||||
|
|
||||||
def _submission_payload(
|
def _submission_payload(
|
||||||
record: dict[str, Any],
|
record: dict[str, Any],
|
||||||
score: int,
|
score: int,
|
||||||
|
|
|
||||||
52
src/rights_filter/server/store_url_utils.py
Normal file
52
src/rights_filter/server/store_url_utils.py
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
"""URL helpers used by the SQLite store's evidence/image handling.
|
||||||
|
|
||||||
|
Extracted from sqlite_store.py to keep that module focused. Behavior is
|
||||||
|
unchanged. Note these intentionally treat the local-submission suffix set
|
||||||
|
(SUPPORTED_IMAGE_SUFFIXES, which includes .svg) as image-like; the integration
|
||||||
|
adapters keep their own stricter suffix policy for external results.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.parse import parse_qsl, unquote, urlparse
|
||||||
|
|
||||||
|
from rights_filter.server.image_store import SUPPORTED_IMAGE_SUFFIXES
|
||||||
|
|
||||||
|
|
||||||
|
def _decoded_nested_url(value: str) -> str:
|
||||||
|
candidate = str(value).strip()
|
||||||
|
for _ in range(3):
|
||||||
|
decoded = unquote(candidate).strip()
|
||||||
|
if decoded == candidate:
|
||||||
|
break
|
||||||
|
candidate = decoded
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
|
||||||
|
def _is_http_url(url: str) -> bool:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
|
||||||
|
|
||||||
|
|
||||||
|
def _url_path_has_image_suffix(url: str) -> bool:
|
||||||
|
return Path(urlparse(url).path).suffix.lower() in SUPPORTED_IMAGE_SUFFIXES
|
||||||
|
|
||||||
|
|
||||||
|
def _url_has_image_format_hint(url: str) -> bool:
|
||||||
|
image_formats = {suffix.lstrip(".") for suffix in SUPPORTED_IMAGE_SUFFIXES}
|
||||||
|
image_format_keys = {"format", "fm", "ext", "extension", "mime", "output", "type"}
|
||||||
|
for key, hint in parse_qsl(urlparse(url).query, keep_blank_values=False):
|
||||||
|
if key.lower().replace("-", "_") not in image_format_keys:
|
||||||
|
continue
|
||||||
|
normalized = hint.lower().split(";", 1)[0].strip().lstrip(".")
|
||||||
|
if normalized.startswith("image/"):
|
||||||
|
normalized = normalized.split("/", 1)[1]
|
||||||
|
normalized = normalized.split("+", 1)[0]
|
||||||
|
if normalized in image_formats:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _url_looks_like_image(url: str) -> bool:
|
||||||
|
return _url_path_has_image_suffix(url) or _url_has_image_format_hint(url)
|
||||||
Loading…
Reference in a new issue