mirror of
https://gitea.ingwaz.work/Ingwaz/openbrain-mcp.git
synced 2026-06-15 22:07:08 +00:00
feat(db): add truth scoring database helpers (#35)
New structs: - TruthScoreUpdate: parameters for updating truth scores - TruthStats: aggregated truth scoring statistics - ScoringCandidate: lightweight record for the scoring worker New Database methods: - get_unscored_memories(): fetch unscored memories FIFO - get_stale_memories(): fetch memories due for re-evaluation - update_truth_score(): update single memory truth fields - batch_update_truth_scores(): transactional batch update - get_truth_stats(): aggregate stats with category breakdown Uses partial index idx_memories_truth_unevaluated for efficient unscored memory queries. Part of #29
This commit is contained in:
247
src/db.rs
247
src/db.rs
@@ -384,6 +384,253 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Truth scoring database helpers (Issue #35)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Parameters for updating truth scores on a memory.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TruthScoreUpdate {
|
||||
pub id: Uuid,
|
||||
pub truth_value: f32,
|
||||
pub truth_confidence: f32,
|
||||
pub truth_category: String,
|
||||
pub ecan_sti: f32,
|
||||
pub ecan_lti: f32,
|
||||
}
|
||||
|
||||
/// Aggregated truth scoring statistics.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct TruthStats {
|
||||
pub total_memories: i64,
|
||||
pub scored_memories: i64,
|
||||
pub unscored_memories: i64,
|
||||
pub category_verified: i64,
|
||||
pub category_plausible: i64,
|
||||
pub category_unverified: i64,
|
||||
pub category_contradicted: i64,
|
||||
pub avg_truth_value: Option<f64>,
|
||||
pub avg_confidence: Option<f64>,
|
||||
pub coverage_pct: f64,
|
||||
}
|
||||
|
||||
/// A lightweight memory record for the truth scoring worker.
|
||||
/// Contains only the fields needed for scoring (avoids fetching full embeddings
|
||||
/// unless cross-referencing requires them).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScoringCandidate {
|
||||
pub id: Uuid,
|
||||
pub content: String,
|
||||
pub embedding: Vec<f32>,
|
||||
pub metadata: serde_json::Value,
|
||||
pub created_at: chrono::DateTime<chrono::Utc>,
|
||||
/// Existing truth value, if previously scored.
|
||||
pub truth_value: Option<f32>,
|
||||
pub truth_confidence: Option<f32>,
|
||||
pub ecan_sti: Option<f32>,
|
||||
pub ecan_lti: Option<f32>,
|
||||
}
|
||||
|
||||
impl Database {
|
||||
/// Fetch memories that have never been truth-scored.
|
||||
///
|
||||
/// Returns up to `limit` memories ordered by creation time (oldest first),
|
||||
/// so the worker processes memories in FIFO order.
|
||||
pub async fn get_unscored_memories(&self, limit: i64) -> Result<Vec<ScoringCandidate>> {
|
||||
let client = self.pool.get().await?;
|
||||
let rows = client
|
||||
.query(
|
||||
r#"
|
||||
SELECT id, content, embedding, metadata, created_at,
|
||||
truth_value, truth_confidence, ecan_sti, ecan_lti
|
||||
FROM memories
|
||||
WHERE truth_evaluated_at IS NULL
|
||||
AND (expires_at IS NULL OR expires_at > NOW())
|
||||
ORDER BY created_at ASC
|
||||
LIMIT $1
|
||||
"#,
|
||||
&[&limit],
|
||||
)
|
||||
.await
|
||||
.context("Failed to fetch unscored memories")?;
|
||||
|
||||
Ok(rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
let pgvec: Vector = row.get("embedding");
|
||||
ScoringCandidate {
|
||||
id: row.get("id"),
|
||||
content: row.get("content"),
|
||||
embedding: pgvec.to_vec(),
|
||||
metadata: row.get("metadata"),
|
||||
created_at: row.get("created_at"),
|
||||
truth_value: row.get("truth_value"),
|
||||
truth_confidence: row.get("truth_confidence"),
|
||||
ecan_sti: row.get("ecan_sti"),
|
||||
ecan_lti: row.get("ecan_lti"),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Fetch memories whose truth score is stale (evaluated more than
|
||||
/// `older_than_seconds` ago).
|
||||
pub async fn get_stale_memories(
|
||||
&self,
|
||||
older_than_seconds: i64,
|
||||
limit: i64,
|
||||
) -> Result<Vec<ScoringCandidate>> {
|
||||
let client = self.pool.get().await?;
|
||||
let rows = client
|
||||
.query(
|
||||
r#"
|
||||
SELECT id, content, embedding, metadata, created_at,
|
||||
truth_value, truth_confidence, ecan_sti, ecan_lti
|
||||
FROM memories
|
||||
WHERE truth_evaluated_at IS NOT NULL
|
||||
AND truth_evaluated_at < NOW() - ($1 || ' seconds')::interval
|
||||
AND (expires_at IS NULL OR expires_at > NOW())
|
||||
ORDER BY truth_evaluated_at ASC
|
||||
LIMIT $2
|
||||
"#,
|
||||
&[&older_than_seconds.to_string(), &limit],
|
||||
)
|
||||
.await
|
||||
.context("Failed to fetch stale memories")?;
|
||||
|
||||
Ok(rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
let pgvec: Vector = row.get("embedding");
|
||||
ScoringCandidate {
|
||||
id: row.get("id"),
|
||||
content: row.get("content"),
|
||||
embedding: pgvec.to_vec(),
|
||||
metadata: row.get("metadata"),
|
||||
created_at: row.get("created_at"),
|
||||
truth_value: row.get("truth_value"),
|
||||
truth_confidence: row.get("truth_confidence"),
|
||||
ecan_sti: row.get("ecan_sti"),
|
||||
ecan_lti: row.get("ecan_lti"),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Update truth scores for a single memory.
|
||||
pub async fn update_truth_score(&self, update: &TruthScoreUpdate) -> Result<()> {
|
||||
let client = self.pool.get().await?;
|
||||
client
|
||||
.execute(
|
||||
r#"
|
||||
UPDATE memories
|
||||
SET truth_value = $2,
|
||||
truth_confidence = $3,
|
||||
truth_category = $4,
|
||||
truth_evaluated_at = NOW(),
|
||||
ecan_sti = $5,
|
||||
ecan_lti = $6
|
||||
WHERE id = $1
|
||||
"#,
|
||||
&[
|
||||
&update.id,
|
||||
&update.truth_value,
|
||||
&update.truth_confidence,
|
||||
&update.truth_category,
|
||||
&update.ecan_sti,
|
||||
&update.ecan_lti,
|
||||
],
|
||||
)
|
||||
.await
|
||||
.context("Failed to update truth score")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Batch update truth scores in a single transaction.
|
||||
pub async fn batch_update_truth_scores(&self, updates: &[TruthScoreUpdate]) -> Result<usize> {
|
||||
if updates.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
let mut client = self.pool.get().await?;
|
||||
let transaction = client.transaction().await?;
|
||||
|
||||
for update in updates {
|
||||
transaction
|
||||
.execute(
|
||||
r#"
|
||||
UPDATE memories
|
||||
SET truth_value = $2,
|
||||
truth_confidence = $3,
|
||||
truth_category = $4,
|
||||
truth_evaluated_at = NOW(),
|
||||
ecan_sti = $5,
|
||||
ecan_lti = $6
|
||||
WHERE id = $1
|
||||
"#,
|
||||
&[
|
||||
&update.id,
|
||||
&update.truth_value,
|
||||
&update.truth_confidence,
|
||||
&update.truth_category,
|
||||
&update.ecan_sti,
|
||||
&update.ecan_lti,
|
||||
],
|
||||
)
|
||||
.await
|
||||
.context("Failed to update truth score in batch")?;
|
||||
}
|
||||
|
||||
transaction.commit().await?;
|
||||
Ok(updates.len())
|
||||
}
|
||||
|
||||
/// Get aggregated truth scoring statistics.
|
||||
pub async fn get_truth_stats(&self) -> Result<TruthStats> {
|
||||
let client = self.pool.get().await?;
|
||||
let row = client
|
||||
.query_one(
|
||||
r#"
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
COUNT(truth_evaluated_at) AS scored,
|
||||
COUNT(*) - COUNT(truth_evaluated_at) AS unscored,
|
||||
COUNT(*) FILTER (WHERE truth_category = 'verified') AS cat_verified,
|
||||
COUNT(*) FILTER (WHERE truth_category = 'plausible') AS cat_plausible,
|
||||
COUNT(*) FILTER (WHERE truth_category = 'unverified') AS cat_unverified,
|
||||
COUNT(*) FILTER (WHERE truth_category = 'contradicted') AS cat_contradicted,
|
||||
AVG(truth_value) FILTER (WHERE truth_value IS NOT NULL) AS avg_tv,
|
||||
AVG(truth_confidence) FILTER (WHERE truth_confidence IS NOT NULL) AS avg_conf
|
||||
FROM memories
|
||||
WHERE expires_at IS NULL OR expires_at > NOW()
|
||||
"#,
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
.context("Failed to get truth stats")?;
|
||||
|
||||
let total: i64 = row.get("total");
|
||||
let scored: i64 = row.get("scored");
|
||||
let coverage_pct = if total > 0 {
|
||||
(scored as f64 / total as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
Ok(TruthStats {
|
||||
total_memories: total,
|
||||
scored_memories: scored,
|
||||
unscored_memories: row.get("unscored"),
|
||||
category_verified: row.get("cat_verified"),
|
||||
category_plausible: row.get("cat_plausible"),
|
||||
category_unverified: row.get("cat_unverified"),
|
||||
category_contradicted: row.get("cat_contradicted"),
|
||||
avg_truth_value: row.get("avg_tv"),
|
||||
avg_confidence: row.get("avg_conf"),
|
||||
coverage_pct,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Result for a single batch entry
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct BatchStoreResult {
|
||||
|
||||
Reference in New Issue
Block a user