From 38a621da55a8287cec3a84043bc9cc2fc544d3bf Mon Sep 17 00:00:00 2001 From: Agent Zero Date: Sat, 4 Apr 2026 04:05:11 +0000 Subject: [PATCH 1/3] feat(tools): add evaluate MCP tool (#37) --- src/tools/evaluate.rs | 209 ++++++++++++++++++++++++++++++++++++++++++ src/tools/mod.rs | 20 ++++ 2 files changed, 229 insertions(+) create mode 100644 src/tools/evaluate.rs diff --git a/src/tools/evaluate.rs b/src/tools/evaluate.rs new file mode 100644 index 0000000..7ff8bf3 --- /dev/null +++ b/src/tools/evaluate.rs @@ -0,0 +1,209 @@ +//! Evaluate Tool - Score a claim's truthfulness against the memory store + +use anyhow::{anyhow, Context, Result}; +use serde_json::Value; +use std::sync::Arc; +use tracing::info; + +use crate::auth::PUBLIC_AUTH_SCOPE; +use crate::tools::INTERNAL_AUTH_SCOPE_ARG; +use crate::truth::ecan::EcanParams; +use crate::truth::scorer::{score_memory, RelatedMemory, ScorerConfig}; +use crate::AppState; + +/// Execute the evaluate tool +pub async fn execute(state: &Arc, arguments: Value) -> Result { + // Get embedding engine, return error if not ready + let embedding_engine = state + .get_embedding() + .await + .ok_or_else(|| anyhow!("Embedding engine not ready - service is still initializing"))?; + + // Extract parameters + let claim = arguments + .get("claim") + .and_then(|v| v.as_str()) + .context("Missing required parameter: claim")?; + + let context = arguments + .get("context") + .and_then(|v| v.as_str()); + + let auth_scope = arguments + .get(INTERNAL_AUTH_SCOPE_ARG) + .and_then(|v| v.as_str()) + .unwrap_or(PUBLIC_AUTH_SCOPE); + + // Build the text to embed: claim + optional context + let embed_text = match context { + Some(ctx) => format!("{} {}", claim, ctx), + None => claim.to_string(), + }; + + info!( + "Evaluating claim for auth scope '{}': '{}' ({} chars)", + auth_scope, + &claim[..claim.len().min(100)], + claim.len() + ); + + // Generate embedding for the claim + let claim_embedding = embedding_engine + .embed(&embed_text) + .context("Failed to generate claim embedding")?; + + // Find related memories using query_memories + let truth_config = &state.config.truth; + let matches = state + .db + .query_memories( + auth_scope, + None, // no source_agent_id filter + claim, // use claim text for hybrid search + &claim_embedding, + truth_config.cross_ref_limit, // limit from config + 0.3, // low threshold to cast a wide net for scoring + 0.6, // vector_weight + 0.4, // text_weight + ) + .await + .context("Failed to query related memories")?; + + let related_count = matches.len(); + + info!("Found {} related memories for scoring", related_count); + + // Convert MemoryMatch results to RelatedMemory for the scorer + let related: Vec = matches + .iter() + .map(|m| RelatedMemory { + similarity: m.similarity, + content: m.record.content.clone(), + truth_value: m.record.truth_value, + truth_confidence: m.record.truth_confidence, + }) + .collect(); + + // Build ScorerConfig from TruthConfig + let scorer_config = ScorerConfig { + pln_base_confidence: truth_config.pln_base_confidence, + contradiction_threshold: truth_config.contradiction_threshold, + verification_threshold: truth_config.verification_threshold, + ecan: EcanParams::new( + truth_config.ecan_decay_rate, + truth_config.ecan_spread_factor, + ), + }; + + // Score the claim (no existing ECAN values since this is an on-demand evaluation) + let result = score_memory(&scorer_config, claim, &related, None, None); + + // Build human-readable reasoning + let reasoning = build_reasoning(claim, &result, related_count); + + info!( + "Claim scored: tv={:.3}, conf={:.3}, category={}, related={}", + result.truth_value, + result.truth_confidence, + result.category, + related_count + ); + + Ok(serde_json::json!({ + "success": true, + "truth_value": result.truth_value, + "truth_confidence": result.truth_confidence, + "truth_category": result.category.as_str(), + "ecan_sti": result.ecan_sti, + "ecan_lti": result.ecan_lti, + "related_count": related_count, + "confirmation_count": result.confirmation_count, + "contradiction_count": result.contradiction_count, + "reasoning": reasoning + }) + .to_string()) +} + +/// Build a human-readable explanation of the scoring result. +fn build_reasoning( + claim: &str, + result: &crate::truth::scorer::ScoringResult, + related_count: usize, +) -> String { + let mut parts = Vec::new(); + + // Describe evidence base + if related_count == 0 { + parts.push("No related memories found in the store.".to_string()); + } else { + parts.push(format!( + "Found {} related memor{} in the store.", + related_count, + if related_count == 1 { "y" } else { "ies" } + )); + } + + // Describe confirmations/contradictions + if result.confirmation_count > 0 { + parts.push(format!( + "{} memor{} confirm{} this claim.", + result.confirmation_count, + if result.confirmation_count == 1 { "y" } else { "ies" }, + if result.confirmation_count == 1 { "s" } else { "" } + )); + } + if result.contradiction_count > 0 { + parts.push(format!( + "{} memor{} contradict{} this claim.", + result.contradiction_count, + if result.contradiction_count == 1 { "y" } else { "ies" }, + if result.contradiction_count == 1 { "s" } else { "" } + )); + } + + // Describe category + let category_desc = match result.category.as_str() { + "verified" => format!( + "The claim '{}' is VERIFIED with truth value {:.2} and confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + "plausible" => format!( + "The claim '{}' is PLAUSIBLE with truth value {:.2} and confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + "unverified" => format!( + "The claim '{}' is UNVERIFIED — insufficient evidence. Truth value {:.2}, confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + "contradicted" => format!( + "The claim '{}' is CONTRADICTED by existing memories. Truth value {:.2}, confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + other => format!( + "The claim scored with category '{}', truth value {:.2}, confidence {:.2}.", + other, + result.truth_value, + result.truth_confidence + ), + }; + parts.push(category_desc); + + parts.join(" ") +} + +/// Truncate a claim for display in reasoning text. +fn truncate_claim(claim: &str) -> &str { + if claim.len() <= 80 { + claim + } else { + &claim[..80] + } +} diff --git a/src/tools/mod.rs b/src/tools/mod.rs index a70fae7..875302e 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,6 +1,7 @@ //! MCP Tools for OpenBrain pub mod batch_store; +pub mod evaluate; pub mod purge; pub mod query; pub mod store; @@ -139,6 +140,24 @@ pub fn get_tool_definitions() -> Vec { "required": ["confirm"] } }), + json!({ + "name": "evaluate", + "description": "Score a claim's truthfulness against the memory store using neuro-symbolic reasoning (PLN + ECAN)", + "inputSchema": { + "type": "object", + "properties": { + "claim": { + "type": "string", + "description": "The text claim to evaluate for truthfulness" + }, + "context": { + "type": "string", + "description": "Optional additional context to improve scoring accuracy" + } + }, + "required": ["claim"] + } + }), ] } @@ -152,6 +171,7 @@ pub async fn execute_tool( "batch_store" => batch_store::execute(state, arguments).await, "query" => query::execute(state, arguments).await, "purge" => purge::execute(state, arguments).await, + "evaluate" => evaluate::execute(state, arguments).await, _ => anyhow::bail!("Unknown tool: {}", tool_name), } } From 8d0f94451380f2a9eb5875a745b0cd6d6aa04b3b Mon Sep 17 00:00:00 2001 From: Agent Zero Date: Sat, 4 Apr 2026 04:07:58 +0000 Subject: [PATCH 2/3] feat(truth): add background truth scoring worker (#36) --- src/db.rs | 46 ++++++++++++++++++ src/lib.rs | 65 +++++++++++++++++++++++++ src/truth/mod.rs | 3 ++ src/truth/worker.rs | 112 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 src/truth/worker.rs diff --git a/src/db.rs b/src/db.rs index e4a126b..e844a69 100644 --- a/src/db.rs +++ b/src/db.rs @@ -640,6 +640,52 @@ impl Database { coverage_pct, }) } + + /// Find memories related to the given embedding vector, excluding the source memory. + /// Used by the truth scoring worker for cross-referencing. + pub async fn find_related_memories( + &self, + candidate_embedding: &[f32], + exclude_id: Uuid, + limit: i64, + ) -> Result> { + let vector = pgvector::Vector::from(candidate_embedding.to_vec()); + let client = self.pool.get().await?; + let rows = client + .query( + r#" + SELECT id, content, truth_value, truth_confidence, + 1 - (embedding <=> $1) AS similarity + FROM memories + WHERE id != $2 + AND (expires_at IS NULL OR expires_at > NOW()) + ORDER BY embedding <=> $1 + LIMIT $3 + "#, + &[&vector, &exclude_id, &limit], + ) + .await + .context("Failed to find related memories")?; + + Ok(rows + .iter() + .map(|row| RelatedMemoryRow { + similarity: row.get("similarity"), + content: row.get("content"), + truth_value: row.get("truth_value"), + truth_confidence: row.get("truth_confidence"), + }) + .collect()) + } +} + +/// A row returned from the related memories query. +#[derive(Debug, Clone)] +pub struct RelatedMemoryRow { + pub similarity: f64, + pub content: String, + pub truth_value: Option, + pub truth_confidence: Option, } /// Result for a single batch entry diff --git a/src/lib.rs b/src/lib.rs index 52be22a..24fb76d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,6 +143,71 @@ pub async fn run_server(config: Config, db: Database) -> Result<()> { }); } + // Spawn truth scoring background worker if enabled + if config.truth.enabled { + let truth_state = state.clone(); + let truth_config = config.truth.clone(); + let scoring_interval = config.truth.scoring_interval_seconds; + info!( + "Truth scoring enabled (interval={}s, batch={})", + scoring_interval, truth_config.batch_size + ); + tokio::spawn(async move { + let mut interval = tokio::time::interval( + tokio::time::Duration::from_secs(scoring_interval), + ); + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + + // Wait for the embedding engine to be ready before starting + loop { + let readiness = truth_state.readiness.read().await; + match &*readiness { + ReadinessState::Ready => break, + ReadinessState::Failed(_) => { + error!("Embedding engine failed — truth scoring worker exiting"); + return; + } + _ => { + drop(readiness); + tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; + } + } + } + info!("Truth scoring worker started"); + + loop { + interval.tick().await; + // Acquire embedding reference for this cycle + let embedding_guard = truth_state.embedding.read().await; + let embedding = match &*embedding_guard { + Some(e) => e.clone(), + None => { + warn!("Embedding engine not available — skipping truth scoring cycle"); + continue; + } + }; + drop(embedding_guard); + + match truth::worker::run_scoring_cycle( + &truth_state.db, + &embedding, + &truth_config, + ) + .await + { + Ok(scored) if scored > 0 => { + info!("Truth scoring cycle complete: {} memories scored", scored); + } + Ok(_) => {} + Err(err) => { + error!("Truth scoring cycle failed: {:?}", err); + } + } + } + }); + } + + // Create MCP state for SSE transport let mcp_state = McpState::new(state.clone()); diff --git a/src/truth/mod.rs b/src/truth/mod.rs index 3cc2415..0406297 100644 --- a/src/truth/mod.rs +++ b/src/truth/mod.rs @@ -10,7 +10,10 @@ //! truth values from evidence chains. //! - **ECAN** (Economic Attention Network): Manages short-term and long-term //! importance of memories, enabling natural prioritization of verified knowledge. +//! - **Scorer**: Orchestrates PLN and ECAN into a unified scoring pipeline. +//! - **Worker**: Background daemon that periodically scores unscored and stale memories. pub mod ecan; pub mod pln; pub mod scorer; +pub mod worker; diff --git a/src/truth/worker.rs b/src/truth/worker.rs new file mode 100644 index 0000000..2995df3 --- /dev/null +++ b/src/truth/worker.rs @@ -0,0 +1,112 @@ +//! Background truth scoring worker. +//! +//! Periodically fetches unscored and stale memories, runs them through +//! the scoring pipeline (PLN + ECAN + cross-referencing), and writes +//! truth scores back to the database. + +use std::sync::Arc; + +use anyhow::Result; +use tracing::{debug, error, info, warn}; + +use crate::config::TruthConfig; +use crate::db::{Database, TruthScoreUpdate}; +use crate::embedding::EmbeddingEngine; +use crate::truth::ecan::EcanParams; +use crate::truth::scorer::{RelatedMemory, ScorerConfig, score_memory}; + +/// Run a single scoring cycle: fetch candidates, score them, write results. +/// +/// Returns the number of memories scored in this cycle. +pub async fn run_scoring_cycle( + db: &Database, + _embedding: &Arc, + config: &TruthConfig, +) -> Result { + let scorer_config = ScorerConfig { + pln_base_confidence: config.pln_base_confidence, + contradiction_threshold: config.contradiction_threshold, + verification_threshold: config.verification_threshold, + ecan: EcanParams::new(config.ecan_decay_rate, config.ecan_spread_factor), + }; + + let batch_size = config.batch_size as i64; + let rescore_after = config.rescore_after_seconds as i64; + let cross_ref_limit = config.cross_ref_limit as i64; + + // Fetch candidates: unscored first, then stale + let mut candidates = db.get_unscored_memories(batch_size).await?; + let unscored_count = candidates.len(); + + if candidates.len() < batch_size as usize { + let remaining = batch_size - candidates.len() as i64; + let stale = db.get_stale_memories(rescore_after, remaining).await?; + candidates.extend(stale); + } + + if candidates.is_empty() { + debug!("No memories to score this cycle"); + return Ok(0); + } + + info!( + "Scoring {} memories ({} unscored, {} stale)", + candidates.len(), + unscored_count, + candidates.len() - unscored_count + ); + + let mut updates: Vec = Vec::with_capacity(candidates.len()); + + for candidate in &candidates { + // Cross-reference: find related memories using vector similarity + let related_rows = match db + .find_related_memories(&candidate.embedding, candidate.id, cross_ref_limit) + .await + { + Ok(r) => r, + Err(err) => { + warn!( + "Failed to cross-reference memory {}: {:?}", + candidate.id, err + ); + Vec::new() + } + }; + + // Convert DB rows to scorer's RelatedMemory type + let related: Vec = related_rows + .into_iter() + .map(|row| RelatedMemory { + similarity: row.similarity, + content: row.content, + truth_value: row.truth_value, + truth_confidence: row.truth_confidence, + }) + .collect(); + + // Score the memory + let result = score_memory( + &scorer_config, + &candidate.content, + &related, + candidate.ecan_sti, + candidate.ecan_lti, + ); + + updates.push(TruthScoreUpdate { + id: candidate.id, + truth_value: result.truth_value, + truth_confidence: result.truth_confidence, + truth_category: result.category.to_string(), + ecan_sti: result.ecan_sti, + ecan_lti: result.ecan_lti, + }); + } + + // Batch write scores + let count = db.batch_update_truth_scores(&updates).await?; + info!("Updated truth scores for {} memories", count); + + Ok(count) +} From e181dad8c73e664aace8b0b1e1e2b1523ff8c701 Mon Sep 17 00:00:00 2001 From: Agent Zero Date: Sat, 4 Apr 2026 09:47:10 +0000 Subject: [PATCH 3/3] fix: add missing warn import, cast f64 similarity to f32 --- src/lib.rs | 2 +- src/truth/worker.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 24fb76d..51e4092 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use tokio::net::TcpListener; use tower_http::cors::{Any, CorsLayer}; use tower_http::trace::TraceLayer; -use tracing::{error, info}; +use tracing::{error, info, warn}; use crate::auth::auth_middleware; use crate::config::Config; diff --git a/src/truth/worker.rs b/src/truth/worker.rs index 2995df3..6305d3d 100644 --- a/src/truth/worker.rs +++ b/src/truth/worker.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use anyhow::Result; -use tracing::{debug, error, info, warn}; +use tracing::{debug, info, warn}; use crate::config::TruthConfig; use crate::db::{Database, TruthScoreUpdate}; @@ -78,7 +78,7 @@ pub async fn run_scoring_cycle( let related: Vec = related_rows .into_iter() .map(|row| RelatedMemory { - similarity: row.similarity, + similarity: row.similarity as f32, content: row.content, truth_value: row.truth_value, truth_confidence: row.truth_confidence,