fix: separate Naver blog/web auto-search dedup; drop dead helpers
_auto_naver_search previously used a shared 'continue' so a duplicate blog query skipped that iteration's web search entirely. Split into independent per-signature guards so blog and web auto-searches dedup separately. Also remove unused private helpers left by the god-file split: _is_generic_data_image_attr, _search_result_page_image_urls, _count, _set_active_queue (all grep-verified to have no callers).
This commit is contained in:
parent
35c63ce37a
commit
71a6da10a3
5 changed files with 10 additions and 31 deletions
|
|
@ -651,10 +651,11 @@ class StoreEnrichmentMixin:
|
|||
submission_id,
|
||||
image_store,
|
||||
)
|
||||
if blog_query_count < self.provider_runtime.auto_naver_blog_query_limit:
|
||||
blog_signature = _naver_blog_query_signature(query)
|
||||
if blog_signature in existing_signatures:
|
||||
continue
|
||||
if (
|
||||
blog_query_count < self.provider_runtime.auto_naver_blog_query_limit
|
||||
and blog_signature not in existing_signatures
|
||||
):
|
||||
existing_signatures.add(blog_signature)
|
||||
blog_query_count += 1
|
||||
|
||||
|
|
@ -703,10 +704,11 @@ class StoreEnrichmentMixin:
|
|||
}
|
||||
)
|
||||
|
||||
if web_query_count < self.provider_runtime.auto_naver_web_query_limit:
|
||||
web_signature = _naver_web_query_signature(query)
|
||||
if web_signature in existing_signatures:
|
||||
continue
|
||||
if (
|
||||
web_query_count < self.provider_runtime.auto_naver_web_query_limit
|
||||
and web_signature not in existing_signatures
|
||||
):
|
||||
existing_signatures.add(web_signature)
|
||||
web_query_count += 1
|
||||
|
||||
|
|
|
|||
|
|
@ -254,10 +254,6 @@ def _srcset_descriptor_score(value: str) -> float:
|
|||
return 0.0
|
||||
|
||||
|
||||
def _is_generic_data_image_attr(name: str, value: str) -> bool:
|
||||
return bool(_data_attribute_image_urls(name, value))
|
||||
|
||||
|
||||
def _data_attribute_image_urls(
|
||||
name: str,
|
||||
value: str,
|
||||
|
|
|
|||
|
|
@ -16,11 +16,6 @@ from rights_filter.server.store_serialization import _validate_payload, _validat
|
|||
|
||||
|
||||
class StorePersistenceMixin:
|
||||
def _count(self, table: str) -> int:
|
||||
_validate_table(table)
|
||||
with self._connect() as conn:
|
||||
return int(conn.execute(f"select count(*) from {table}").fetchone()[0])
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
conn.execute("pragma foreign_keys = on")
|
||||
|
|
|
|||
|
|
@ -26,14 +26,6 @@ class StoreQueueMixin:
|
|||
def _normalize_queue_folder(folder_path: Path | str) -> Path:
|
||||
return Path(folder_path).resolve()
|
||||
|
||||
def _set_active_queue(self, queue_id: str) -> None:
|
||||
with self._connect() as conn:
|
||||
conn.execute("update submission_queues set is_active = 0")
|
||||
conn.execute(
|
||||
"update submission_queues set is_active = 1 where id = ?",
|
||||
(queue_id,),
|
||||
)
|
||||
|
||||
def _queue_row_by_id(self, queue_id: str) -> dict[str, Any] | None:
|
||||
with self._connect() as conn:
|
||||
row = conn.execute(
|
||||
|
|
|
|||
|
|
@ -458,12 +458,6 @@ class StoreSearchCandidatesMixin:
|
|||
return image_urls
|
||||
return image_urls
|
||||
|
||||
def _search_result_page_image_urls(self, source_evidence: Evidence) -> list[str]:
|
||||
return [
|
||||
image_url
|
||||
for image_url, _candidate_source in self._search_result_page_image_candidates(source_evidence)
|
||||
]
|
||||
|
||||
def _increment_knowledge_contribution_counts(
|
||||
self,
|
||||
submission_id: str,
|
||||
|
|
|
|||
Loading…
Reference in a new issue