mirror of
https://gitea.ingwaz.work/Ingwaz/openbrain-mcp.git
synced 2026-06-15 22:07:08 +00:00
feat(db): add truth scoring database helpers (#35)
New structs: - TruthScoreUpdate: parameters for updating truth scores - TruthStats: aggregated truth scoring statistics - ScoringCandidate: lightweight record for the scoring worker New Database methods: - get_unscored_memories(): fetch unscored memories FIFO - get_stale_memories(): fetch memories due for re-evaluation - update_truth_score(): update single memory truth fields - batch_update_truth_scores(): transactional batch update - get_truth_stats(): aggregate stats with category breakdown Uses partial index idx_memories_truth_unevaluated for efficient unscored memory queries. Part of #29
This commit is contained in:
247
src/db.rs
247
src/db.rs
@@ -384,6 +384,253 @@ impl Database {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Truth scoring database helpers (Issue #35)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Parameters for updating truth scores on a memory.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct TruthScoreUpdate {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub truth_value: f32,
|
||||||
|
pub truth_confidence: f32,
|
||||||
|
pub truth_category: String,
|
||||||
|
pub ecan_sti: f32,
|
||||||
|
pub ecan_lti: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregated truth scoring statistics.
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct TruthStats {
|
||||||
|
pub total_memories: i64,
|
||||||
|
pub scored_memories: i64,
|
||||||
|
pub unscored_memories: i64,
|
||||||
|
pub category_verified: i64,
|
||||||
|
pub category_plausible: i64,
|
||||||
|
pub category_unverified: i64,
|
||||||
|
pub category_contradicted: i64,
|
||||||
|
pub avg_truth_value: Option<f64>,
|
||||||
|
pub avg_confidence: Option<f64>,
|
||||||
|
pub coverage_pct: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A lightweight memory record for the truth scoring worker.
|
||||||
|
/// Contains only the fields needed for scoring (avoids fetching full embeddings
|
||||||
|
/// unless cross-referencing requires them).
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ScoringCandidate {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub content: String,
|
||||||
|
pub embedding: Vec<f32>,
|
||||||
|
pub metadata: serde_json::Value,
|
||||||
|
pub created_at: chrono::DateTime<chrono::Utc>,
|
||||||
|
/// Existing truth value, if previously scored.
|
||||||
|
pub truth_value: Option<f32>,
|
||||||
|
pub truth_confidence: Option<f32>,
|
||||||
|
pub ecan_sti: Option<f32>,
|
||||||
|
pub ecan_lti: Option<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Database {
|
||||||
|
/// Fetch memories that have never been truth-scored.
|
||||||
|
///
|
||||||
|
/// Returns up to `limit` memories ordered by creation time (oldest first),
|
||||||
|
/// so the worker processes memories in FIFO order.
|
||||||
|
pub async fn get_unscored_memories(&self, limit: i64) -> Result<Vec<ScoringCandidate>> {
|
||||||
|
let client = self.pool.get().await?;
|
||||||
|
let rows = client
|
||||||
|
.query(
|
||||||
|
r#"
|
||||||
|
SELECT id, content, embedding, metadata, created_at,
|
||||||
|
truth_value, truth_confidence, ecan_sti, ecan_lti
|
||||||
|
FROM memories
|
||||||
|
WHERE truth_evaluated_at IS NULL
|
||||||
|
AND (expires_at IS NULL OR expires_at > NOW())
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
LIMIT $1
|
||||||
|
"#,
|
||||||
|
&[&limit],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("Failed to fetch unscored memories")?;
|
||||||
|
|
||||||
|
Ok(rows
|
||||||
|
.iter()
|
||||||
|
.map(|row| {
|
||||||
|
let pgvec: Vector = row.get("embedding");
|
||||||
|
ScoringCandidate {
|
||||||
|
id: row.get("id"),
|
||||||
|
content: row.get("content"),
|
||||||
|
embedding: pgvec.to_vec(),
|
||||||
|
metadata: row.get("metadata"),
|
||||||
|
created_at: row.get("created_at"),
|
||||||
|
truth_value: row.get("truth_value"),
|
||||||
|
truth_confidence: row.get("truth_confidence"),
|
||||||
|
ecan_sti: row.get("ecan_sti"),
|
||||||
|
ecan_lti: row.get("ecan_lti"),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetch memories whose truth score is stale (evaluated more than
|
||||||
|
/// `older_than_seconds` ago).
|
||||||
|
pub async fn get_stale_memories(
|
||||||
|
&self,
|
||||||
|
older_than_seconds: i64,
|
||||||
|
limit: i64,
|
||||||
|
) -> Result<Vec<ScoringCandidate>> {
|
||||||
|
let client = self.pool.get().await?;
|
||||||
|
let rows = client
|
||||||
|
.query(
|
||||||
|
r#"
|
||||||
|
SELECT id, content, embedding, metadata, created_at,
|
||||||
|
truth_value, truth_confidence, ecan_sti, ecan_lti
|
||||||
|
FROM memories
|
||||||
|
WHERE truth_evaluated_at IS NOT NULL
|
||||||
|
AND truth_evaluated_at < NOW() - ($1 || ' seconds')::interval
|
||||||
|
AND (expires_at IS NULL OR expires_at > NOW())
|
||||||
|
ORDER BY truth_evaluated_at ASC
|
||||||
|
LIMIT $2
|
||||||
|
"#,
|
||||||
|
&[&older_than_seconds.to_string(), &limit],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("Failed to fetch stale memories")?;
|
||||||
|
|
||||||
|
Ok(rows
|
||||||
|
.iter()
|
||||||
|
.map(|row| {
|
||||||
|
let pgvec: Vector = row.get("embedding");
|
||||||
|
ScoringCandidate {
|
||||||
|
id: row.get("id"),
|
||||||
|
content: row.get("content"),
|
||||||
|
embedding: pgvec.to_vec(),
|
||||||
|
metadata: row.get("metadata"),
|
||||||
|
created_at: row.get("created_at"),
|
||||||
|
truth_value: row.get("truth_value"),
|
||||||
|
truth_confidence: row.get("truth_confidence"),
|
||||||
|
ecan_sti: row.get("ecan_sti"),
|
||||||
|
ecan_lti: row.get("ecan_lti"),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update truth scores for a single memory.
|
||||||
|
pub async fn update_truth_score(&self, update: &TruthScoreUpdate) -> Result<()> {
|
||||||
|
let client = self.pool.get().await?;
|
||||||
|
client
|
||||||
|
.execute(
|
||||||
|
r#"
|
||||||
|
UPDATE memories
|
||||||
|
SET truth_value = $2,
|
||||||
|
truth_confidence = $3,
|
||||||
|
truth_category = $4,
|
||||||
|
truth_evaluated_at = NOW(),
|
||||||
|
ecan_sti = $5,
|
||||||
|
ecan_lti = $6
|
||||||
|
WHERE id = $1
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
&update.id,
|
||||||
|
&update.truth_value,
|
||||||
|
&update.truth_confidence,
|
||||||
|
&update.truth_category,
|
||||||
|
&update.ecan_sti,
|
||||||
|
&update.ecan_lti,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("Failed to update truth score")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Batch update truth scores in a single transaction.
|
||||||
|
pub async fn batch_update_truth_scores(&self, updates: &[TruthScoreUpdate]) -> Result<usize> {
|
||||||
|
if updates.is_empty() {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
let mut client = self.pool.get().await?;
|
||||||
|
let transaction = client.transaction().await?;
|
||||||
|
|
||||||
|
for update in updates {
|
||||||
|
transaction
|
||||||
|
.execute(
|
||||||
|
r#"
|
||||||
|
UPDATE memories
|
||||||
|
SET truth_value = $2,
|
||||||
|
truth_confidence = $3,
|
||||||
|
truth_category = $4,
|
||||||
|
truth_evaluated_at = NOW(),
|
||||||
|
ecan_sti = $5,
|
||||||
|
ecan_lti = $6
|
||||||
|
WHERE id = $1
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
&update.id,
|
||||||
|
&update.truth_value,
|
||||||
|
&update.truth_confidence,
|
||||||
|
&update.truth_category,
|
||||||
|
&update.ecan_sti,
|
||||||
|
&update.ecan_lti,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("Failed to update truth score in batch")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
transaction.commit().await?;
|
||||||
|
Ok(updates.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get aggregated truth scoring statistics.
|
||||||
|
pub async fn get_truth_stats(&self) -> Result<TruthStats> {
|
||||||
|
let client = self.pool.get().await?;
|
||||||
|
let row = client
|
||||||
|
.query_one(
|
||||||
|
r#"
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS total,
|
||||||
|
COUNT(truth_evaluated_at) AS scored,
|
||||||
|
COUNT(*) - COUNT(truth_evaluated_at) AS unscored,
|
||||||
|
COUNT(*) FILTER (WHERE truth_category = 'verified') AS cat_verified,
|
||||||
|
COUNT(*) FILTER (WHERE truth_category = 'plausible') AS cat_plausible,
|
||||||
|
COUNT(*) FILTER (WHERE truth_category = 'unverified') AS cat_unverified,
|
||||||
|
COUNT(*) FILTER (WHERE truth_category = 'contradicted') AS cat_contradicted,
|
||||||
|
AVG(truth_value) FILTER (WHERE truth_value IS NOT NULL) AS avg_tv,
|
||||||
|
AVG(truth_confidence) FILTER (WHERE truth_confidence IS NOT NULL) AS avg_conf
|
||||||
|
FROM memories
|
||||||
|
WHERE expires_at IS NULL OR expires_at > NOW()
|
||||||
|
"#,
|
||||||
|
&[],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("Failed to get truth stats")?;
|
||||||
|
|
||||||
|
let total: i64 = row.get("total");
|
||||||
|
let scored: i64 = row.get("scored");
|
||||||
|
let coverage_pct = if total > 0 {
|
||||||
|
(scored as f64 / total as f64) * 100.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(TruthStats {
|
||||||
|
total_memories: total,
|
||||||
|
scored_memories: scored,
|
||||||
|
unscored_memories: row.get("unscored"),
|
||||||
|
category_verified: row.get("cat_verified"),
|
||||||
|
category_plausible: row.get("cat_plausible"),
|
||||||
|
category_unverified: row.get("cat_unverified"),
|
||||||
|
category_contradicted: row.get("cat_contradicted"),
|
||||||
|
avg_truth_value: row.get("avg_tv"),
|
||||||
|
avg_confidence: row.get("avg_conf"),
|
||||||
|
coverage_pct,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Result for a single batch entry
|
/// Result for a single batch entry
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct BatchStoreResult {
|
pub struct BatchStoreResult {
|
||||||
|
|||||||
Reference in New Issue
Block a user