refactor: extract SQLite persistence primitives into StorePersistenceMixin
Move connection/transaction management and the generic _put/_get/_all row access plus evidence-read helpers into a mixin; CopyrighterStore now inherits it. Methods rely on the host class's self.db_path / self._write_lock. Behavior-preserving. sqlite_store.py 3368 -> 3072 lines.
This commit is contained in:
parent
3be7b016ce
commit
40501e13f1
2 changed files with 315 additions and 298 deletions
|
|
@ -2,14 +2,12 @@ from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import html
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import threading
|
import threading
|
||||||
from contextlib import contextmanager
|
|
||||||
from dataclasses import replace
|
from dataclasses import replace
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -62,6 +60,7 @@ from rights_filter.server.store_page_scrape import (
|
||||||
_normalized_remote_image_url,
|
_normalized_remote_image_url,
|
||||||
_search_result_direct_image_urls,
|
_search_result_direct_image_urls,
|
||||||
)
|
)
|
||||||
|
from rights_filter.server.store_persistence import StorePersistenceMixin
|
||||||
from rights_filter.server.store_schema import (
|
from rights_filter.server.store_schema import (
|
||||||
_ensure_constrained_schema,
|
_ensure_constrained_schema,
|
||||||
_ensure_queue_schema,
|
_ensure_queue_schema,
|
||||||
|
|
@ -116,7 +115,7 @@ from rights_filter.server.store_url_utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CopyrighterStore:
|
class CopyrighterStore(StorePersistenceMixin):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
db_path: Path | str,
|
db_path: Path | str,
|
||||||
|
|
@ -3071,298 +3070,3 @@ class CopyrighterStore:
|
||||||
for submission in self._all("submissions", queue_id=queue_id):
|
for submission in self._all("submissions", queue_id=queue_id):
|
||||||
self._rescore_submission(str(submission["id"]))
|
self._rescore_submission(str(submission["id"]))
|
||||||
|
|
||||||
def _count(self, table: str) -> int:
|
|
||||||
_validate_table(table)
|
|
||||||
with self._connect() as conn:
|
|
||||||
return int(conn.execute(f"select count(*) from {table}").fetchone()[0])
|
|
||||||
|
|
||||||
def _connect(self) -> sqlite3.Connection:
|
|
||||||
conn = sqlite3.connect(self.db_path)
|
|
||||||
conn.execute("pragma foreign_keys = on")
|
|
||||||
# The server is a ThreadingHTTPServer: concurrent operator requests open
|
|
||||||
# their own connection. Without a busy timeout the second writer raises
|
|
||||||
# "database is locked" immediately; wait for the lock instead.
|
|
||||||
conn.execute("pragma busy_timeout = 5000")
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
return conn
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def _conn_ctx(self, conn: sqlite3.Connection | None = None):
|
|
||||||
# When a caller passes an open connection (inside _transaction), reuse it
|
|
||||||
# and let the caller own commit/rollback. Otherwise behave exactly like
|
|
||||||
# `with self._connect() as conn:` — sqlite3 commits on success / rolls
|
|
||||||
# back on error.
|
|
||||||
if conn is not None:
|
|
||||||
yield conn
|
|
||||||
return
|
|
||||||
with self._connect() as owned:
|
|
||||||
yield owned
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def _transaction(self):
|
|
||||||
# A single connection + single commit for multi-step writes that must be
|
|
||||||
# atomic (e.g. a state change and its audit event), so a mid-sequence
|
|
||||||
# failure leaves no partial state.
|
|
||||||
conn = self._connect()
|
|
||||||
try:
|
|
||||||
yield conn
|
|
||||||
conn.commit()
|
|
||||||
except Exception:
|
|
||||||
conn.rollback()
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
def _put(
|
|
||||||
self,
|
|
||||||
table: str,
|
|
||||||
id_value: str,
|
|
||||||
payload: dict[str, Any],
|
|
||||||
*,
|
|
||||||
conn: sqlite3.Connection | None = None,
|
|
||||||
) -> None:
|
|
||||||
_validate_payload(table, id_value, payload)
|
|
||||||
payload_json = json.dumps(payload, ensure_ascii=False)
|
|
||||||
with self._conn_ctx(conn) as conn:
|
|
||||||
if table == "submissions":
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
insert into submissions
|
|
||||||
(id, title, risk_score, risk_band, decision_status, submitted_epoch, queue_id, payload)
|
|
||||||
values (?, ?, ?, ?, ?, ?, ?, ?)
|
|
||||||
on conflict(id) do update set
|
|
||||||
title = excluded.title,
|
|
||||||
risk_score = excluded.risk_score,
|
|
||||||
risk_band = excluded.risk_band,
|
|
||||||
decision_status = excluded.decision_status,
|
|
||||||
submitted_epoch = excluded.submitted_epoch,
|
|
||||||
queue_id = excluded.queue_id,
|
|
||||||
payload = excluded.payload
|
|
||||||
""",
|
|
||||||
(
|
|
||||||
id_value,
|
|
||||||
str(payload.get("title", "")),
|
|
||||||
int(payload.get("riskScore", 0) or 0),
|
|
||||||
str(payload.get("riskBand", "low")),
|
|
||||||
str(payload.get("decisionStatus", "unreviewed")),
|
|
||||||
int(payload.get("submittedEpoch", 0) or 0),
|
|
||||||
str(payload.get("queueId", "")),
|
|
||||||
payload_json,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return
|
|
||||||
if table == "evidence":
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
insert into evidence
|
|
||||||
(id, submission_id, source, confidence, status, contributed, payload)
|
|
||||||
values (?, ?, ?, ?, ?, ?, ?)
|
|
||||||
on conflict(id) do update set
|
|
||||||
submission_id = excluded.submission_id,
|
|
||||||
source = excluded.source,
|
|
||||||
confidence = excluded.confidence,
|
|
||||||
status = excluded.status,
|
|
||||||
contributed = excluded.contributed,
|
|
||||||
payload = excluded.payload
|
|
||||||
""",
|
|
||||||
(
|
|
||||||
id_value,
|
|
||||||
str(payload["submission_id"]),
|
|
||||||
str(payload.get("source", "")),
|
|
||||||
# Clamp to the column's CHECK (confidence >= 0 and <= 1).
|
|
||||||
# Some sources (e.g. Google Vision web-entity relevance
|
|
||||||
# scores) are unbounded and would raise IntegrityError.
|
|
||||||
max(0.0, min(1.0, float(payload.get("confidence", 0) or 0))),
|
|
||||||
str(payload.get("status", "active")),
|
|
||||||
1 if bool(payload.get("contributed", True)) else 0,
|
|
||||||
payload_json,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return
|
|
||||||
if table == "providers":
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
insert into providers
|
|
||||||
(id, name, enabled, usage, quota, payload)
|
|
||||||
values (?, ?, ?, ?, ?, ?)
|
|
||||||
on conflict(id) do update set
|
|
||||||
name = excluded.name,
|
|
||||||
enabled = excluded.enabled,
|
|
||||||
usage = excluded.usage,
|
|
||||||
quota = excluded.quota,
|
|
||||||
payload = excluded.payload
|
|
||||||
""",
|
|
||||||
(
|
|
||||||
id_value,
|
|
||||||
str(payload.get("name", "")),
|
|
||||||
1 if bool(payload.get("enabled", False)) else 0,
|
|
||||||
int(payload.get("usage", 0) or 0),
|
|
||||||
int(payload.get("quota", 0) or 0),
|
|
||||||
payload_json,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return
|
|
||||||
if table == "knowledge_entries":
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
insert into knowledge_entries
|
|
||||||
(id, name, entry_type, entry_status, active, source_submission_id, payload)
|
|
||||||
values (?, ?, ?, ?, ?, ?, ?)
|
|
||||||
on conflict(id) do update set
|
|
||||||
name = excluded.name,
|
|
||||||
entry_type = excluded.entry_type,
|
|
||||||
entry_status = excluded.entry_status,
|
|
||||||
active = excluded.active,
|
|
||||||
source_submission_id = excluded.source_submission_id,
|
|
||||||
payload = excluded.payload
|
|
||||||
""",
|
|
||||||
(
|
|
||||||
id_value,
|
|
||||||
str(payload.get("name", "")),
|
|
||||||
str(payload.get("type", "other")),
|
|
||||||
str(payload.get("entryStatus", "confirmed")),
|
|
||||||
1 if bool(payload.get("active", True)) else 0,
|
|
||||||
str(payload.get("sourceSubmissionId", "")),
|
|
||||||
payload_json,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return
|
|
||||||
if table == "collection_candidates":
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
insert into collection_candidates
|
|
||||||
(id, provider, query, status, collected_epoch, payload)
|
|
||||||
values (?, ?, ?, ?, ?, ?)
|
|
||||||
on conflict(id) do update set
|
|
||||||
provider = excluded.provider,
|
|
||||||
query = excluded.query,
|
|
||||||
status = excluded.status,
|
|
||||||
collected_epoch = excluded.collected_epoch,
|
|
||||||
payload = excluded.payload
|
|
||||||
""",
|
|
||||||
(
|
|
||||||
id_value,
|
|
||||||
str(payload.get("provider", "")),
|
|
||||||
str(payload.get("query", "")),
|
|
||||||
str(payload.get("status", "candidate")),
|
|
||||||
int(payload.get("collectedEpoch", 0) or 0),
|
|
||||||
payload_json,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return
|
|
||||||
if table == "corrections":
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
insert into corrections
|
|
||||||
(id, decision_id, status, payload)
|
|
||||||
values (?, ?, ?, ?)
|
|
||||||
on conflict(id) do update set
|
|
||||||
decision_id = excluded.decision_id,
|
|
||||||
status = excluded.status,
|
|
||||||
payload = excluded.payload
|
|
||||||
""",
|
|
||||||
(
|
|
||||||
id_value,
|
|
||||||
str(payload.get("decisionId", payload.get("decision_id", ""))),
|
|
||||||
str(payload.get("status", "")),
|
|
||||||
payload_json,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return
|
|
||||||
conn.execute(
|
|
||||||
f"insert or replace into {table} (id, payload) values (?, ?)",
|
|
||||||
(id_value, payload_json),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _get(
|
|
||||||
self,
|
|
||||||
table: str,
|
|
||||||
id_value: str,
|
|
||||||
*,
|
|
||||||
conn: sqlite3.Connection | None = None,
|
|
||||||
) -> dict[str, Any]:
|
|
||||||
_validate_table(table)
|
|
||||||
with self._conn_ctx(conn) as conn:
|
|
||||||
row = conn.execute(f"select payload from {table} where id = ?", (id_value,)).fetchone()
|
|
||||||
if row is None:
|
|
||||||
raise KeyError(id_value)
|
|
||||||
return json.loads(row["payload"])
|
|
||||||
|
|
||||||
def _apply_provider_usage_delta(
|
|
||||||
self, provider_id: str, count: int, payload: dict[str, Any]
|
|
||||||
) -> None:
|
|
||||||
# Re-read current usage under the write lock and apply the delta so
|
|
||||||
# concurrent provider calls don't lose increments. The rest of `payload`
|
|
||||||
# (lastSuccess / lastFailure) is last-writer-wins, which is acceptable for
|
|
||||||
# status fields.
|
|
||||||
with self._write_lock:
|
|
||||||
current = self._get("providers", provider_id)
|
|
||||||
payload["usage"] = int(current.get("usage", 0) or 0) + int(count)
|
|
||||||
self._put("providers", provider_id, payload)
|
|
||||||
|
|
||||||
def _clear_collection_candidates(self) -> None:
|
|
||||||
with self._connect() as conn:
|
|
||||||
conn.execute("delete from collection_candidates")
|
|
||||||
|
|
||||||
def _all(self, table: str, queue_id: str | None = None) -> list[dict[str, Any]]:
|
|
||||||
_validate_table(table)
|
|
||||||
with self._connect() as conn:
|
|
||||||
if table == "submissions" and queue_id is not None:
|
|
||||||
rows = conn.execute(
|
|
||||||
"select payload from submissions where queue_id = ? order by id",
|
|
||||||
(queue_id,),
|
|
||||||
).fetchall()
|
|
||||||
else:
|
|
||||||
rows = conn.execute(f"select payload from {table} order by id").fetchall()
|
|
||||||
values = [json.loads(row["payload"]) for row in rows]
|
|
||||||
if table == "providers":
|
|
||||||
order = {"internal": 0, "naver": 1, "google": 2, "google_search": 3, "llm": 4}
|
|
||||||
values.sort(key=lambda item: order.get(item["id"], 99))
|
|
||||||
if table == "collection_candidates":
|
|
||||||
values.sort(key=lambda item: int(item.get("collectedEpoch", 0)), reverse=True)
|
|
||||||
return values
|
|
||||||
|
|
||||||
def _evidence_rows(self, queue_id: str | None = None):
|
|
||||||
with self._connect() as conn:
|
|
||||||
if queue_id is None:
|
|
||||||
rows = conn.execute("select submission_id, payload from evidence order by id").fetchall()
|
|
||||||
else:
|
|
||||||
rows = conn.execute(
|
|
||||||
"""
|
|
||||||
select e.submission_id, e.payload
|
|
||||||
from evidence e
|
|
||||||
join submissions s on s.id = e.submission_id
|
|
||||||
where s.queue_id = ?
|
|
||||||
order by e.id
|
|
||||||
""",
|
|
||||||
(queue_id,),
|
|
||||||
).fetchall()
|
|
||||||
return rows
|
|
||||||
|
|
||||||
def _evidence_for_submission(self, submission_id: str) -> list[dict[str, Any]]:
|
|
||||||
# Read only this submission's evidence. _rescore_submission previously
|
|
||||||
# called _evidence_by_submission(), which read and JSON-parsed the ENTIRE
|
|
||||||
# evidence table on every single-submission rescore (a hot path inside the
|
|
||||||
# seed / auto-search / similarity loops).
|
|
||||||
with self._connect() as conn:
|
|
||||||
rows = conn.execute(
|
|
||||||
"select payload from evidence where submission_id = ? order by id",
|
|
||||||
(submission_id,),
|
|
||||||
).fetchall()
|
|
||||||
evidence: list[dict[str, Any]] = []
|
|
||||||
for row in rows:
|
|
||||||
payload = json.loads(row["payload"])
|
|
||||||
payload.pop("submission_id", None)
|
|
||||||
evidence.append(payload)
|
|
||||||
return evidence
|
|
||||||
|
|
||||||
def _evidence_by_submission(self, queue_id: str | None = None) -> dict[str, list[dict[str, Any]]]:
|
|
||||||
rows = self._evidence_rows(queue_id=queue_id)
|
|
||||||
grouped: dict[str, list[dict[str, Any]]] = {}
|
|
||||||
for row in rows:
|
|
||||||
payload = json.loads(row["payload"])
|
|
||||||
payload.pop("submission_id", None)
|
|
||||||
grouped.setdefault(row["submission_id"], []).append(payload)
|
|
||||||
return grouped
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
313
src/rights_filter/server/store_persistence.py
Normal file
313
src/rights_filter/server/store_persistence.py
Normal file
|
|
@ -0,0 +1,313 @@
|
||||||
|
"""SQLite persistence primitives for CopyrighterStore, as a mixin.
|
||||||
|
|
||||||
|
Connection/transaction management and the generic _put/_get/_all row access plus
|
||||||
|
the evidence-read helpers. Mixed into CopyrighterStore; methods rely on the host
|
||||||
|
class providing `self.db_path` and `self._write_lock`. Behavior unchanged.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from rights_filter.server.store_serialization import _validate_payload, _validate_table
|
||||||
|
|
||||||
|
|
||||||
|
class StorePersistenceMixin:
|
||||||
|
def _count(self, table: str) -> int:
|
||||||
|
_validate_table(table)
|
||||||
|
with self._connect() as conn:
|
||||||
|
return int(conn.execute(f"select count(*) from {table}").fetchone()[0])
|
||||||
|
|
||||||
|
def _connect(self) -> sqlite3.Connection:
|
||||||
|
conn = sqlite3.connect(self.db_path)
|
||||||
|
conn.execute("pragma foreign_keys = on")
|
||||||
|
# The server is a ThreadingHTTPServer: concurrent operator requests open
|
||||||
|
# their own connection. Without a busy timeout the second writer raises
|
||||||
|
# "database is locked" immediately; wait for the lock instead.
|
||||||
|
conn.execute("pragma busy_timeout = 5000")
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _conn_ctx(self, conn: sqlite3.Connection | None = None):
|
||||||
|
# When a caller passes an open connection (inside _transaction), reuse it
|
||||||
|
# and let the caller own commit/rollback. Otherwise behave exactly like
|
||||||
|
# `with self._connect() as conn:` — sqlite3 commits on success / rolls
|
||||||
|
# back on error.
|
||||||
|
if conn is not None:
|
||||||
|
yield conn
|
||||||
|
return
|
||||||
|
with self._connect() as owned:
|
||||||
|
yield owned
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _transaction(self):
|
||||||
|
# A single connection + single commit for multi-step writes that must be
|
||||||
|
# atomic (e.g. a state change and its audit event), so a mid-sequence
|
||||||
|
# failure leaves no partial state.
|
||||||
|
conn = self._connect()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
conn.commit()
|
||||||
|
except Exception:
|
||||||
|
conn.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def _put(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
id_value: str,
|
||||||
|
payload: dict[str, Any],
|
||||||
|
*,
|
||||||
|
conn: sqlite3.Connection | None = None,
|
||||||
|
) -> None:
|
||||||
|
_validate_payload(table, id_value, payload)
|
||||||
|
payload_json = json.dumps(payload, ensure_ascii=False)
|
||||||
|
with self._conn_ctx(conn) as conn:
|
||||||
|
if table == "submissions":
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
insert into submissions
|
||||||
|
(id, title, risk_score, risk_band, decision_status, submitted_epoch, queue_id, payload)
|
||||||
|
values (?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
on conflict(id) do update set
|
||||||
|
title = excluded.title,
|
||||||
|
risk_score = excluded.risk_score,
|
||||||
|
risk_band = excluded.risk_band,
|
||||||
|
decision_status = excluded.decision_status,
|
||||||
|
submitted_epoch = excluded.submitted_epoch,
|
||||||
|
queue_id = excluded.queue_id,
|
||||||
|
payload = excluded.payload
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
id_value,
|
||||||
|
str(payload.get("title", "")),
|
||||||
|
int(payload.get("riskScore", 0) or 0),
|
||||||
|
str(payload.get("riskBand", "low")),
|
||||||
|
str(payload.get("decisionStatus", "unreviewed")),
|
||||||
|
int(payload.get("submittedEpoch", 0) or 0),
|
||||||
|
str(payload.get("queueId", "")),
|
||||||
|
payload_json,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if table == "evidence":
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
insert into evidence
|
||||||
|
(id, submission_id, source, confidence, status, contributed, payload)
|
||||||
|
values (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
on conflict(id) do update set
|
||||||
|
submission_id = excluded.submission_id,
|
||||||
|
source = excluded.source,
|
||||||
|
confidence = excluded.confidence,
|
||||||
|
status = excluded.status,
|
||||||
|
contributed = excluded.contributed,
|
||||||
|
payload = excluded.payload
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
id_value,
|
||||||
|
str(payload["submission_id"]),
|
||||||
|
str(payload.get("source", "")),
|
||||||
|
# Clamp to the column's CHECK (confidence >= 0 and <= 1).
|
||||||
|
# Some sources (e.g. Google Vision web-entity relevance
|
||||||
|
# scores) are unbounded and would raise IntegrityError.
|
||||||
|
max(0.0, min(1.0, float(payload.get("confidence", 0) or 0))),
|
||||||
|
str(payload.get("status", "active")),
|
||||||
|
1 if bool(payload.get("contributed", True)) else 0,
|
||||||
|
payload_json,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if table == "providers":
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
insert into providers
|
||||||
|
(id, name, enabled, usage, quota, payload)
|
||||||
|
values (?, ?, ?, ?, ?, ?)
|
||||||
|
on conflict(id) do update set
|
||||||
|
name = excluded.name,
|
||||||
|
enabled = excluded.enabled,
|
||||||
|
usage = excluded.usage,
|
||||||
|
quota = excluded.quota,
|
||||||
|
payload = excluded.payload
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
id_value,
|
||||||
|
str(payload.get("name", "")),
|
||||||
|
1 if bool(payload.get("enabled", False)) else 0,
|
||||||
|
int(payload.get("usage", 0) or 0),
|
||||||
|
int(payload.get("quota", 0) or 0),
|
||||||
|
payload_json,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if table == "knowledge_entries":
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
insert into knowledge_entries
|
||||||
|
(id, name, entry_type, entry_status, active, source_submission_id, payload)
|
||||||
|
values (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
on conflict(id) do update set
|
||||||
|
name = excluded.name,
|
||||||
|
entry_type = excluded.entry_type,
|
||||||
|
entry_status = excluded.entry_status,
|
||||||
|
active = excluded.active,
|
||||||
|
source_submission_id = excluded.source_submission_id,
|
||||||
|
payload = excluded.payload
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
id_value,
|
||||||
|
str(payload.get("name", "")),
|
||||||
|
str(payload.get("type", "other")),
|
||||||
|
str(payload.get("entryStatus", "confirmed")),
|
||||||
|
1 if bool(payload.get("active", True)) else 0,
|
||||||
|
str(payload.get("sourceSubmissionId", "")),
|
||||||
|
payload_json,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if table == "collection_candidates":
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
insert into collection_candidates
|
||||||
|
(id, provider, query, status, collected_epoch, payload)
|
||||||
|
values (?, ?, ?, ?, ?, ?)
|
||||||
|
on conflict(id) do update set
|
||||||
|
provider = excluded.provider,
|
||||||
|
query = excluded.query,
|
||||||
|
status = excluded.status,
|
||||||
|
collected_epoch = excluded.collected_epoch,
|
||||||
|
payload = excluded.payload
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
id_value,
|
||||||
|
str(payload.get("provider", "")),
|
||||||
|
str(payload.get("query", "")),
|
||||||
|
str(payload.get("status", "candidate")),
|
||||||
|
int(payload.get("collectedEpoch", 0) or 0),
|
||||||
|
payload_json,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if table == "corrections":
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
insert into corrections
|
||||||
|
(id, decision_id, status, payload)
|
||||||
|
values (?, ?, ?, ?)
|
||||||
|
on conflict(id) do update set
|
||||||
|
decision_id = excluded.decision_id,
|
||||||
|
status = excluded.status,
|
||||||
|
payload = excluded.payload
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
id_value,
|
||||||
|
str(payload.get("decisionId", payload.get("decision_id", ""))),
|
||||||
|
str(payload.get("status", "")),
|
||||||
|
payload_json,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
conn.execute(
|
||||||
|
f"insert or replace into {table} (id, payload) values (?, ?)",
|
||||||
|
(id_value, payload_json),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
id_value: str,
|
||||||
|
*,
|
||||||
|
conn: sqlite3.Connection | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
_validate_table(table)
|
||||||
|
with self._conn_ctx(conn) as conn:
|
||||||
|
row = conn.execute(f"select payload from {table} where id = ?", (id_value,)).fetchone()
|
||||||
|
if row is None:
|
||||||
|
raise KeyError(id_value)
|
||||||
|
return json.loads(row["payload"])
|
||||||
|
|
||||||
|
def _apply_provider_usage_delta(
|
||||||
|
self, provider_id: str, count: int, payload: dict[str, Any]
|
||||||
|
) -> None:
|
||||||
|
# Re-read current usage under the write lock and apply the delta so
|
||||||
|
# concurrent provider calls don't lose increments. The rest of `payload`
|
||||||
|
# (lastSuccess / lastFailure) is last-writer-wins, which is acceptable for
|
||||||
|
# status fields.
|
||||||
|
with self._write_lock:
|
||||||
|
current = self._get("providers", provider_id)
|
||||||
|
payload["usage"] = int(current.get("usage", 0) or 0) + int(count)
|
||||||
|
self._put("providers", provider_id, payload)
|
||||||
|
|
||||||
|
def _clear_collection_candidates(self) -> None:
|
||||||
|
with self._connect() as conn:
|
||||||
|
conn.execute("delete from collection_candidates")
|
||||||
|
|
||||||
|
def _all(self, table: str, queue_id: str | None = None) -> list[dict[str, Any]]:
|
||||||
|
_validate_table(table)
|
||||||
|
with self._connect() as conn:
|
||||||
|
if table == "submissions" and queue_id is not None:
|
||||||
|
rows = conn.execute(
|
||||||
|
"select payload from submissions where queue_id = ? order by id",
|
||||||
|
(queue_id,),
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(f"select payload from {table} order by id").fetchall()
|
||||||
|
values = [json.loads(row["payload"]) for row in rows]
|
||||||
|
if table == "providers":
|
||||||
|
order = {"internal": 0, "naver": 1, "google": 2, "google_search": 3, "llm": 4}
|
||||||
|
values.sort(key=lambda item: order.get(item["id"], 99))
|
||||||
|
if table == "collection_candidates":
|
||||||
|
values.sort(key=lambda item: int(item.get("collectedEpoch", 0)), reverse=True)
|
||||||
|
return values
|
||||||
|
|
||||||
|
def _evidence_rows(self, queue_id: str | None = None):
|
||||||
|
with self._connect() as conn:
|
||||||
|
if queue_id is None:
|
||||||
|
rows = conn.execute("select submission_id, payload from evidence order by id").fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
select e.submission_id, e.payload
|
||||||
|
from evidence e
|
||||||
|
join submissions s on s.id = e.submission_id
|
||||||
|
where s.queue_id = ?
|
||||||
|
order by e.id
|
||||||
|
""",
|
||||||
|
(queue_id,),
|
||||||
|
).fetchall()
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def _evidence_for_submission(self, submission_id: str) -> list[dict[str, Any]]:
|
||||||
|
# Read only this submission's evidence. _rescore_submission previously
|
||||||
|
# called _evidence_by_submission(), which read and JSON-parsed the ENTIRE
|
||||||
|
# evidence table on every single-submission rescore (a hot path inside the
|
||||||
|
# seed / auto-search / similarity loops).
|
||||||
|
with self._connect() as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"select payload from evidence where submission_id = ? order by id",
|
||||||
|
(submission_id,),
|
||||||
|
).fetchall()
|
||||||
|
evidence: list[dict[str, Any]] = []
|
||||||
|
for row in rows:
|
||||||
|
payload = json.loads(row["payload"])
|
||||||
|
payload.pop("submission_id", None)
|
||||||
|
evidence.append(payload)
|
||||||
|
return evidence
|
||||||
|
|
||||||
|
def _evidence_by_submission(self, queue_id: str | None = None) -> dict[str, list[dict[str, Any]]]:
|
||||||
|
rows = self._evidence_rows(queue_id=queue_id)
|
||||||
|
grouped: dict[str, list[dict[str, Any]]] = {}
|
||||||
|
for row in rows:
|
||||||
|
payload = json.loads(row["payload"])
|
||||||
|
payload.pop("submission_id", None)
|
||||||
|
grouped.setdefault(row["submission_id"], []).append(payload)
|
||||||
|
return grouped
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in a new issue