feat(db): add truth scoring database helpers (#35)

New structs: - TruthScoreUpdate: parameters for updating truth scores - TruthStats: aggregated truth scoring statistics - ScoringCandidate: lightweight record for the scoring worker New Database methods: - get_unscored_memories(): fetch unscored memories FIFO - get_stale_memories(): fetch memories due for re-evaluation - update_truth_score(): update single memory truth fields - batch_update_truth_scores(): transactional batch update - get_truth_stats(): aggregate stats with category breakdown Uses partial index idx_memories_truth_unevaluated for efficient unscored memory queries. Part of #29
2026-06-15 22:07:08 +00:00 · 2026-04-04 03:11:38 +00:00
parent 2fe656a63f
commit 04f4809b7c
1 changed files with 247 additions and 0 deletions
--- a/src/db.rs
+++ b/src/db.rs
@@ -384,6 +384,253 @@ impl Database {
    }
 }
 // ---------------------------------------------------------------------------
 // Truth scoring database helpers (Issue #35)
 // ---------------------------------------------------------------------------
 /// Parameters for updating truth scores on a memory.
 #[derive(Debug, Clone)]
 pub struct TruthScoreUpdate {
    pub id: Uuid,
    pub truth_value: f32,
    pub truth_confidence: f32,
    pub truth_category: String,
    pub ecan_sti: f32,
    pub ecan_lti: f32,
 }
 /// Aggregated truth scoring statistics.
 #[derive(Debug, Clone, Serialize)]
 pub struct TruthStats {
    pub total_memories: i64,
    pub scored_memories: i64,
    pub unscored_memories: i64,
    pub category_verified: i64,
    pub category_plausible: i64,
    pub category_unverified: i64,
    pub category_contradicted: i64,
    pub avg_truth_value: Option<f64>,
    pub avg_confidence: Option<f64>,
    pub coverage_pct: f64,
 }
 /// A lightweight memory record for the truth scoring worker.
 /// Contains only the fields needed for scoring (avoids fetching full embeddings
 /// unless cross-referencing requires them).
 #[derive(Debug, Clone)]
 pub struct ScoringCandidate {
    pub id: Uuid,
    pub content: String,
    pub embedding: Vec<f32>,
    pub metadata: serde_json::Value,
    pub created_at: chrono::DateTime<chrono::Utc>,
    /// Existing truth value, if previously scored.
    pub truth_value: Option<f32>,
    pub truth_confidence: Option<f32>,
    pub ecan_sti: Option<f32>,
    pub ecan_lti: Option<f32>,
 }
 impl Database {
    /// Fetch memories that have never been truth-scored.
    ///
    /// Returns up to `limit` memories ordered by creation time (oldest first),
    /// so the worker processes memories in FIFO order.
    pub async fn get_unscored_memories(&self, limit: i64) -> Result<Vec<ScoringCandidate>> {
        let client = self.pool.get().await?;
        let rows = client
            .query(
                r#"
                SELECT id, content, embedding, metadata, created_at,
                       truth_value, truth_confidence, ecan_sti, ecan_lti
                FROM memories
                WHERE truth_evaluated_at IS NULL
                  AND (expires_at IS NULL OR expires_at > NOW())
                ORDER BY created_at ASC
                LIMIT $1
                "#,
                &[&limit],
            )
            .await
            .context("Failed to fetch unscored memories")?;
        Ok(rows
            .iter()
            .map(|row| {
                let pgvec: Vector = row.get("embedding");
                ScoringCandidate {
                    id: row.get("id"),
                    content: row.get("content"),
                    embedding: pgvec.to_vec(),
                    metadata: row.get("metadata"),
                    created_at: row.get("created_at"),
                    truth_value: row.get("truth_value"),
                    truth_confidence: row.get("truth_confidence"),
                    ecan_sti: row.get("ecan_sti"),
                    ecan_lti: row.get("ecan_lti"),
                }
            })
            .collect())
    }
    /// Fetch memories whose truth score is stale (evaluated more than
    /// `older_than_seconds` ago).
    pub async fn get_stale_memories(
        &self,
        older_than_seconds: i64,
        limit: i64,
    ) -> Result<Vec<ScoringCandidate>> {
        let client = self.pool.get().await?;
        let rows = client
            .query(
                r#"
                SELECT id, content, embedding, metadata, created_at,
                       truth_value, truth_confidence, ecan_sti, ecan_lti
                FROM memories
                WHERE truth_evaluated_at IS NOT NULL
                  AND truth_evaluated_at < NOW() - ($1 || ' seconds')::interval
                  AND (expires_at IS NULL OR expires_at > NOW())
                ORDER BY truth_evaluated_at ASC
                LIMIT $2
                "#,
                &[&older_than_seconds.to_string(), &limit],
            )
            .await
            .context("Failed to fetch stale memories")?;
        Ok(rows
            .iter()
            .map(|row| {
                let pgvec: Vector = row.get("embedding");
                ScoringCandidate {
                    id: row.get("id"),
                    content: row.get("content"),
                    embedding: pgvec.to_vec(),
                    metadata: row.get("metadata"),
                    created_at: row.get("created_at"),
                    truth_value: row.get("truth_value"),
                    truth_confidence: row.get("truth_confidence"),
                    ecan_sti: row.get("ecan_sti"),
                    ecan_lti: row.get("ecan_lti"),
                }
            })
            .collect())
    }
    /// Update truth scores for a single memory.
    pub async fn update_truth_score(&self, update: &TruthScoreUpdate) -> Result<()> {
        let client = self.pool.get().await?;
        client
            .execute(
                r#"
                UPDATE memories
                SET truth_value = $2,
                    truth_confidence = $3,
                    truth_category = $4,
                    truth_evaluated_at = NOW(),
                    ecan_sti = $5,
                    ecan_lti = $6
                WHERE id = $1
                "#,
                &[
                    &update.id,
                    &update.truth_value,
                    &update.truth_confidence,
                    &update.truth_category,
                    &update.ecan_sti,
                    &update.ecan_lti,
                ],
            )
            .await
            .context("Failed to update truth score")?;
        Ok(())
    }
    /// Batch update truth scores in a single transaction.
    pub async fn batch_update_truth_scores(&self, updates: &[TruthScoreUpdate]) -> Result<usize> {
        if updates.is_empty() {
            return Ok(0);
        }
        let mut client = self.pool.get().await?;
        let transaction = client.transaction().await?;
        for update in updates {
            transaction
                .execute(
                    r#"
                    UPDATE memories
                    SET truth_value = $2,
                        truth_confidence = $3,
                        truth_category = $4,
                        truth_evaluated_at = NOW(),
                        ecan_sti = $5,
                        ecan_lti = $6
                    WHERE id = $1
                    "#,
                    &[
                        &update.id,
                        &update.truth_value,
                        &update.truth_confidence,
                        &update.truth_category,
                        &update.ecan_sti,
                        &update.ecan_lti,
                    ],
                )
                .await
                .context("Failed to update truth score in batch")?;
        }
        transaction.commit().await?;
        Ok(updates.len())
    }
    /// Get aggregated truth scoring statistics.
    pub async fn get_truth_stats(&self) -> Result<TruthStats> {
        let client = self.pool.get().await?;
        let row = client
            .query_one(
                r#"
                SELECT
                    COUNT(*) AS total,
                    COUNT(truth_evaluated_at) AS scored,
                    COUNT(*) - COUNT(truth_evaluated_at) AS unscored,
                    COUNT(*) FILTER (WHERE truth_category = 'verified') AS cat_verified,
                    COUNT(*) FILTER (WHERE truth_category = 'plausible') AS cat_plausible,
                    COUNT(*) FILTER (WHERE truth_category = 'unverified') AS cat_unverified,
                    COUNT(*) FILTER (WHERE truth_category = 'contradicted') AS cat_contradicted,
                    AVG(truth_value) FILTER (WHERE truth_value IS NOT NULL) AS avg_tv,
                    AVG(truth_confidence) FILTER (WHERE truth_confidence IS NOT NULL) AS avg_conf
                FROM memories
                WHERE expires_at IS NULL OR expires_at > NOW()
                "#,
                &[],
            )
            .await
            .context("Failed to get truth stats")?;
        let total: i64 = row.get("total");
        let scored: i64 = row.get("scored");
        let coverage_pct = if total > 0 {
            (scored as f64 / total as f64) * 100.0
        } else {
            0.0
        };
        Ok(TruthStats {
            total_memories: total,
            scored_memories: scored,
            unscored_memories: row.get("unscored"),
            category_verified: row.get("cat_verified"),
            category_plausible: row.get("cat_plausible"),
            category_unverified: row.get("cat_unverified"),
            category_contradicted: row.get("cat_contradicted"),
            avg_truth_value: row.get("avg_tv"),
            avg_confidence: row.get("avg_conf"),
            coverage_pct,
        })
    }
 }
 /// Result for a single batch entry
 #[derive(Debug, Clone, Serialize)]
 pub struct BatchStoreResult {