fix: separate Naver blog/web auto-search dedup; drop dead helpers
_auto_naver_search previously used a shared 'continue' so a duplicate blog query skipped that iteration's web search entirely. Split into independent per-signature guards so blog and web auto-searches dedup separately. Also remove unused private helpers left by the god-file split: _is_generic_data_image_attr, _search_result_page_image_urls, _count, _set_active_queue (all grep-verified to have no callers).
This commit is contained in:
parent
35c63ce37a
commit
71a6da10a3
5 changed files with 10 additions and 31 deletions
|
|
@ -651,10 +651,11 @@ class StoreEnrichmentMixin:
|
||||||
submission_id,
|
submission_id,
|
||||||
image_store,
|
image_store,
|
||||||
)
|
)
|
||||||
if blog_query_count < self.provider_runtime.auto_naver_blog_query_limit:
|
|
||||||
blog_signature = _naver_blog_query_signature(query)
|
blog_signature = _naver_blog_query_signature(query)
|
||||||
if blog_signature in existing_signatures:
|
if (
|
||||||
continue
|
blog_query_count < self.provider_runtime.auto_naver_blog_query_limit
|
||||||
|
and blog_signature not in existing_signatures
|
||||||
|
):
|
||||||
existing_signatures.add(blog_signature)
|
existing_signatures.add(blog_signature)
|
||||||
blog_query_count += 1
|
blog_query_count += 1
|
||||||
|
|
||||||
|
|
@ -703,10 +704,11 @@ class StoreEnrichmentMixin:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
if web_query_count < self.provider_runtime.auto_naver_web_query_limit:
|
|
||||||
web_signature = _naver_web_query_signature(query)
|
web_signature = _naver_web_query_signature(query)
|
||||||
if web_signature in existing_signatures:
|
if (
|
||||||
continue
|
web_query_count < self.provider_runtime.auto_naver_web_query_limit
|
||||||
|
and web_signature not in existing_signatures
|
||||||
|
):
|
||||||
existing_signatures.add(web_signature)
|
existing_signatures.add(web_signature)
|
||||||
web_query_count += 1
|
web_query_count += 1
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -254,10 +254,6 @@ def _srcset_descriptor_score(value: str) -> float:
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
def _is_generic_data_image_attr(name: str, value: str) -> bool:
|
|
||||||
return bool(_data_attribute_image_urls(name, value))
|
|
||||||
|
|
||||||
|
|
||||||
def _data_attribute_image_urls(
|
def _data_attribute_image_urls(
|
||||||
name: str,
|
name: str,
|
||||||
value: str,
|
value: str,
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,6 @@ from rights_filter.server.store_serialization import _validate_payload, _validat
|
||||||
|
|
||||||
|
|
||||||
class StorePersistenceMixin:
|
class StorePersistenceMixin:
|
||||||
def _count(self, table: str) -> int:
|
|
||||||
_validate_table(table)
|
|
||||||
with self._connect() as conn:
|
|
||||||
return int(conn.execute(f"select count(*) from {table}").fetchone()[0])
|
|
||||||
|
|
||||||
def _connect(self) -> sqlite3.Connection:
|
def _connect(self) -> sqlite3.Connection:
|
||||||
conn = sqlite3.connect(self.db_path)
|
conn = sqlite3.connect(self.db_path)
|
||||||
conn.execute("pragma foreign_keys = on")
|
conn.execute("pragma foreign_keys = on")
|
||||||
|
|
|
||||||
|
|
@ -26,14 +26,6 @@ class StoreQueueMixin:
|
||||||
def _normalize_queue_folder(folder_path: Path | str) -> Path:
|
def _normalize_queue_folder(folder_path: Path | str) -> Path:
|
||||||
return Path(folder_path).resolve()
|
return Path(folder_path).resolve()
|
||||||
|
|
||||||
def _set_active_queue(self, queue_id: str) -> None:
|
|
||||||
with self._connect() as conn:
|
|
||||||
conn.execute("update submission_queues set is_active = 0")
|
|
||||||
conn.execute(
|
|
||||||
"update submission_queues set is_active = 1 where id = ?",
|
|
||||||
(queue_id,),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _queue_row_by_id(self, queue_id: str) -> dict[str, Any] | None:
|
def _queue_row_by_id(self, queue_id: str) -> dict[str, Any] | None:
|
||||||
with self._connect() as conn:
|
with self._connect() as conn:
|
||||||
row = conn.execute(
|
row = conn.execute(
|
||||||
|
|
|
||||||
|
|
@ -458,12 +458,6 @@ class StoreSearchCandidatesMixin:
|
||||||
return image_urls
|
return image_urls
|
||||||
return image_urls
|
return image_urls
|
||||||
|
|
||||||
def _search_result_page_image_urls(self, source_evidence: Evidence) -> list[str]:
|
|
||||||
return [
|
|
||||||
image_url
|
|
||||||
for image_url, _candidate_source in self._search_result_page_image_candidates(source_evidence)
|
|
||||||
]
|
|
||||||
|
|
||||||
def _increment_knowledge_contribution_counts(
|
def _increment_knowledge_contribution_counts(
|
||||||
self,
|
self,
|
||||||
submission_id: str,
|
submission_id: str,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue