feat(db): add truth scoring database helpers (#35)

New structs: - TruthScoreUpdate: parameters for updating truth scores - TruthStats: aggregated truth scoring statistics - ScoringCandidate: lightweight record for the scoring worker New Database methods: - get_unscored_memories(): fetch unscored memories FIFO - get_stale_memories(): fetch memories due for re-evaluation - update_truth_score(): update single memory truth fields - batch_update_truth_scores(): transactional batch update - get_truth_stats(): aggregate stats with category breakdown Uses partial index idx_memories_truth_unevaluated for efficient unscored memory queries. Part of #29
2026-06-15 22:07:08 +00:00 · 2026-04-04 03:11:38 +00:00
parent 2fe656a63f
commit 04f4809b7c
1 changed files with 247 additions and 0 deletions
--- a/src/db.rs
+++ b/src/db.rs
@@ -384,6 +384,253 @@ impl Database {
    }
 }

+// ---------------------------------------------------------------------------
+// Truth scoring database helpers (Issue #35)
+// ---------------------------------------------------------------------------
+
+/// Parameters for updating truth scores on a memory.
+#[derive(Debug, Clone)]
+pub struct TruthScoreUpdate {
+    pub id: Uuid,
+    pub truth_value: f32,
+    pub truth_confidence: f32,
+    pub truth_category: String,
+    pub ecan_sti: f32,
+    pub ecan_lti: f32,
+}
+
+/// Aggregated truth scoring statistics.
+#[derive(Debug, Clone, Serialize)]
+pub struct TruthStats {
+    pub total_memories: i64,
+    pub scored_memories: i64,
+    pub unscored_memories: i64,
+    pub category_verified: i64,
+    pub category_plausible: i64,
+    pub category_unverified: i64,
+    pub category_contradicted: i64,
+    pub avg_truth_value: Option<f64>,
+    pub avg_confidence: Option<f64>,
+    pub coverage_pct: f64,
+}
+
+/// A lightweight memory record for the truth scoring worker.
+/// Contains only the fields needed for scoring (avoids fetching full embeddings
+/// unless cross-referencing requires them).
+#[derive(Debug, Clone)]
+pub struct ScoringCandidate {
+    pub id: Uuid,
+    pub content: String,
+    pub embedding: Vec<f32>,
+    pub metadata: serde_json::Value,
+    pub created_at: chrono::DateTime<chrono::Utc>,
+    /// Existing truth value, if previously scored.
+    pub truth_value: Option<f32>,
+    pub truth_confidence: Option<f32>,
+    pub ecan_sti: Option<f32>,
+    pub ecan_lti: Option<f32>,
+}
+
+impl Database {
+    /// Fetch memories that have never been truth-scored.
+    ///
+    /// Returns up to `limit` memories ordered by creation time (oldest first),
+    /// so the worker processes memories in FIFO order.
+    pub async fn get_unscored_memories(&self, limit: i64) -> Result<Vec<ScoringCandidate>> {
+        let client = self.pool.get().await?;
+        let rows = client
+            .query(
+                r#"
+                SELECT id, content, embedding, metadata, created_at,
+                       truth_value, truth_confidence, ecan_sti, ecan_lti
+                FROM memories
+                WHERE truth_evaluated_at IS NULL
+                  AND (expires_at IS NULL OR expires_at > NOW())
+                ORDER BY created_at ASC
+                LIMIT $1
+                "#,
+                &[&limit],
+            )
+            .await
+            .context("Failed to fetch unscored memories")?;
+
+        Ok(rows
+            .iter()
+            .map(|row| {
+                let pgvec: Vector = row.get("embedding");
+                ScoringCandidate {
+                    id: row.get("id"),
+                    content: row.get("content"),
+                    embedding: pgvec.to_vec(),
+                    metadata: row.get("metadata"),
+                    created_at: row.get("created_at"),
+                    truth_value: row.get("truth_value"),
+                    truth_confidence: row.get("truth_confidence"),
+                    ecan_sti: row.get("ecan_sti"),
+                    ecan_lti: row.get("ecan_lti"),
+                }
+            })
+            .collect())
+    }
+
+    /// Fetch memories whose truth score is stale (evaluated more than
+    /// `older_than_seconds` ago).
+    pub async fn get_stale_memories(
+        &self,
+        older_than_seconds: i64,
+        limit: i64,
+    ) -> Result<Vec<ScoringCandidate>> {
+        let client = self.pool.get().await?;
+        let rows = client
+            .query(
+                r#"
+                SELECT id, content, embedding, metadata, created_at,
+                       truth_value, truth_confidence, ecan_sti, ecan_lti
+                FROM memories
+                WHERE truth_evaluated_at IS NOT NULL
+                  AND truth_evaluated_at < NOW() - ($1 || ' seconds')::interval
+                  AND (expires_at IS NULL OR expires_at > NOW())
+                ORDER BY truth_evaluated_at ASC
+                LIMIT $2
+                "#,
+                &[&older_than_seconds.to_string(), &limit],
+            )
+            .await
+            .context("Failed to fetch stale memories")?;
+
+        Ok(rows
+            .iter()
+            .map(|row| {
+                let pgvec: Vector = row.get("embedding");
+                ScoringCandidate {
+                    id: row.get("id"),
+                    content: row.get("content"),
+                    embedding: pgvec.to_vec(),
+                    metadata: row.get("metadata"),
+                    created_at: row.get("created_at"),
+                    truth_value: row.get("truth_value"),
+                    truth_confidence: row.get("truth_confidence"),
+                    ecan_sti: row.get("ecan_sti"),
+                    ecan_lti: row.get("ecan_lti"),
+                }
+            })
+            .collect())
+    }
+
+    /// Update truth scores for a single memory.
+    pub async fn update_truth_score(&self, update: &TruthScoreUpdate) -> Result<()> {
+        let client = self.pool.get().await?;
+        client
+            .execute(
+                r#"
+                UPDATE memories
+                SET truth_value = $2,
+                    truth_confidence = $3,
+                    truth_category = $4,
+                    truth_evaluated_at = NOW(),
+                    ecan_sti = $5,
+                    ecan_lti = $6
+                WHERE id = $1
+                "#,
+                &[
+                    &update.id,
+                    &update.truth_value,
+                    &update.truth_confidence,
+                    &update.truth_category,
+                    &update.ecan_sti,
+                    &update.ecan_lti,
+                ],
+            )
+            .await
+            .context("Failed to update truth score")?;
+        Ok(())
+    }
+
+    /// Batch update truth scores in a single transaction.
+    pub async fn batch_update_truth_scores(&self, updates: &[TruthScoreUpdate]) -> Result<usize> {
+        if updates.is_empty() {
+            return Ok(0);
+        }
+        let mut client = self.pool.get().await?;
+        let transaction = client.transaction().await?;
+
+        for update in updates {
+            transaction
+                .execute(
+                    r#"
+                    UPDATE memories
+                    SET truth_value = $2,
+                        truth_confidence = $3,
+                        truth_category = $4,
+                        truth_evaluated_at = NOW(),
+                        ecan_sti = $5,
+                        ecan_lti = $6
+                    WHERE id = $1
+                    "#,
+                    &[
+                        &update.id,
+                        &update.truth_value,
+                        &update.truth_confidence,
+                        &update.truth_category,
+                        &update.ecan_sti,
+                        &update.ecan_lti,
+                    ],
+                )
+                .await
+                .context("Failed to update truth score in batch")?;
+        }
+
+        transaction.commit().await?;
+        Ok(updates.len())
+    }
+
+    /// Get aggregated truth scoring statistics.
+    pub async fn get_truth_stats(&self) -> Result<TruthStats> {
+        let client = self.pool.get().await?;
+        let row = client
+            .query_one(
+                r#"
+                SELECT
+                    COUNT(*) AS total,
+                    COUNT(truth_evaluated_at) AS scored,
+                    COUNT(*) - COUNT(truth_evaluated_at) AS unscored,
+                    COUNT(*) FILTER (WHERE truth_category = 'verified') AS cat_verified,
+                    COUNT(*) FILTER (WHERE truth_category = 'plausible') AS cat_plausible,
+                    COUNT(*) FILTER (WHERE truth_category = 'unverified') AS cat_unverified,
+                    COUNT(*) FILTER (WHERE truth_category = 'contradicted') AS cat_contradicted,
+                    AVG(truth_value) FILTER (WHERE truth_value IS NOT NULL) AS avg_tv,
+                    AVG(truth_confidence) FILTER (WHERE truth_confidence IS NOT NULL) AS avg_conf
+                FROM memories
+                WHERE expires_at IS NULL OR expires_at > NOW()
+                "#,
+                &[],
+            )
+            .await
+            .context("Failed to get truth stats")?;
+
+        let total: i64 = row.get("total");
+        let scored: i64 = row.get("scored");
+        let coverage_pct = if total > 0 {
+            (scored as f64 / total as f64) * 100.0
+        } else {
+            0.0
+        };
+
+        Ok(TruthStats {
+            total_memories: total,
+            scored_memories: scored,
+            unscored_memories: row.get("unscored"),
+            category_verified: row.get("cat_verified"),
+            category_plausible: row.get("cat_plausible"),
+            category_unverified: row.get("cat_unverified"),
+            category_contradicted: row.get("cat_contradicted"),
+            avg_truth_value: row.get("avg_tv"),
+            avg_confidence: row.get("avg_conf"),
+            coverage_pct,
+        })
+    }
+}
+
 /// Result for a single batch entry
 #[derive(Debug, Clone, Serialize)]
 pub struct BatchStoreResult {