From 71a6da10a35d4ce7a844926b1fd42fc9d59f6fa9 Mon Sep 17 00:00:00 2001 From: changukyu Date: Mon, 22 Jun 2026 10:00:09 +0900 Subject: [PATCH] fix: separate Naver blog/web auto-search dedup; drop dead helpers _auto_naver_search previously used a shared 'continue' so a duplicate blog query skipped that iteration's web search entirely. Split into independent per-signature guards so blog and web auto-searches dedup separately. Also remove unused private helpers left by the god-file split: _is_generic_data_image_attr, _search_result_page_image_urls, _count, _set_active_queue (all grep-verified to have no callers). --- src/rights_filter/server/store_enrichment.py | 18 ++++++++++-------- src/rights_filter/server/store_page_scrape.py | 4 ---- src/rights_filter/server/store_persistence.py | 5 ----- src/rights_filter/server/store_queue.py | 8 -------- .../server/store_search_candidates.py | 6 ------ 5 files changed, 10 insertions(+), 31 deletions(-) diff --git a/src/rights_filter/server/store_enrichment.py b/src/rights_filter/server/store_enrichment.py index d9a2663..23d6904 100644 --- a/src/rights_filter/server/store_enrichment.py +++ b/src/rights_filter/server/store_enrichment.py @@ -651,10 +651,11 @@ class StoreEnrichmentMixin: submission_id, image_store, ) - if blog_query_count < self.provider_runtime.auto_naver_blog_query_limit: - blog_signature = _naver_blog_query_signature(query) - if blog_signature in existing_signatures: - continue + blog_signature = _naver_blog_query_signature(query) + if ( + blog_query_count < self.provider_runtime.auto_naver_blog_query_limit + and blog_signature not in existing_signatures + ): existing_signatures.add(blog_signature) blog_query_count += 1 @@ -703,10 +704,11 @@ class StoreEnrichmentMixin: } ) - if web_query_count < self.provider_runtime.auto_naver_web_query_limit: - web_signature = _naver_web_query_signature(query) - if web_signature in existing_signatures: - continue + web_signature = _naver_web_query_signature(query) + if ( + web_query_count < self.provider_runtime.auto_naver_web_query_limit + and web_signature not in existing_signatures + ): existing_signatures.add(web_signature) web_query_count += 1 diff --git a/src/rights_filter/server/store_page_scrape.py b/src/rights_filter/server/store_page_scrape.py index 98f577b..f05e957 100644 --- a/src/rights_filter/server/store_page_scrape.py +++ b/src/rights_filter/server/store_page_scrape.py @@ -254,10 +254,6 @@ def _srcset_descriptor_score(value: str) -> float: return 0.0 -def _is_generic_data_image_attr(name: str, value: str) -> bool: - return bool(_data_attribute_image_urls(name, value)) - - def _data_attribute_image_urls( name: str, value: str, diff --git a/src/rights_filter/server/store_persistence.py b/src/rights_filter/server/store_persistence.py index c09707a..03b3b93 100644 --- a/src/rights_filter/server/store_persistence.py +++ b/src/rights_filter/server/store_persistence.py @@ -16,11 +16,6 @@ from rights_filter.server.store_serialization import _validate_payload, _validat class StorePersistenceMixin: - def _count(self, table: str) -> int: - _validate_table(table) - with self._connect() as conn: - return int(conn.execute(f"select count(*) from {table}").fetchone()[0]) - def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.db_path) conn.execute("pragma foreign_keys = on") diff --git a/src/rights_filter/server/store_queue.py b/src/rights_filter/server/store_queue.py index 923bbcc..e4de0d6 100644 --- a/src/rights_filter/server/store_queue.py +++ b/src/rights_filter/server/store_queue.py @@ -26,14 +26,6 @@ class StoreQueueMixin: def _normalize_queue_folder(folder_path: Path | str) -> Path: return Path(folder_path).resolve() - def _set_active_queue(self, queue_id: str) -> None: - with self._connect() as conn: - conn.execute("update submission_queues set is_active = 0") - conn.execute( - "update submission_queues set is_active = 1 where id = ?", - (queue_id,), - ) - def _queue_row_by_id(self, queue_id: str) -> dict[str, Any] | None: with self._connect() as conn: row = conn.execute( diff --git a/src/rights_filter/server/store_search_candidates.py b/src/rights_filter/server/store_search_candidates.py index 3914143..6f26a58 100644 --- a/src/rights_filter/server/store_search_candidates.py +++ b/src/rights_filter/server/store_search_candidates.py @@ -458,12 +458,6 @@ class StoreSearchCandidatesMixin: return image_urls return image_urls - def _search_result_page_image_urls(self, source_evidence: Evidence) -> list[str]: - return [ - image_url - for image_url, _candidate_source in self._search_result_page_image_candidates(source_evidence) - ] - def _increment_knowledge_contribution_counts( self, submission_id: str,