From 38a621da55a8287cec3a84043bc9cc2fc544d3bf Mon Sep 17 00:00:00 2001 From: Agent Zero Date: Sat, 4 Apr 2026 04:05:11 +0000 Subject: [PATCH] feat(tools): add evaluate MCP tool (#37) --- src/tools/evaluate.rs | 209 ++++++++++++++++++++++++++++++++++++++++++ src/tools/mod.rs | 20 ++++ 2 files changed, 229 insertions(+) create mode 100644 src/tools/evaluate.rs diff --git a/src/tools/evaluate.rs b/src/tools/evaluate.rs new file mode 100644 index 0000000..7ff8bf3 --- /dev/null +++ b/src/tools/evaluate.rs @@ -0,0 +1,209 @@ +//! Evaluate Tool - Score a claim's truthfulness against the memory store + +use anyhow::{anyhow, Context, Result}; +use serde_json::Value; +use std::sync::Arc; +use tracing::info; + +use crate::auth::PUBLIC_AUTH_SCOPE; +use crate::tools::INTERNAL_AUTH_SCOPE_ARG; +use crate::truth::ecan::EcanParams; +use crate::truth::scorer::{score_memory, RelatedMemory, ScorerConfig}; +use crate::AppState; + +/// Execute the evaluate tool +pub async fn execute(state: &Arc, arguments: Value) -> Result { + // Get embedding engine, return error if not ready + let embedding_engine = state + .get_embedding() + .await + .ok_or_else(|| anyhow!("Embedding engine not ready - service is still initializing"))?; + + // Extract parameters + let claim = arguments + .get("claim") + .and_then(|v| v.as_str()) + .context("Missing required parameter: claim")?; + + let context = arguments + .get("context") + .and_then(|v| v.as_str()); + + let auth_scope = arguments + .get(INTERNAL_AUTH_SCOPE_ARG) + .and_then(|v| v.as_str()) + .unwrap_or(PUBLIC_AUTH_SCOPE); + + // Build the text to embed: claim + optional context + let embed_text = match context { + Some(ctx) => format!("{} {}", claim, ctx), + None => claim.to_string(), + }; + + info!( + "Evaluating claim for auth scope '{}': '{}' ({} chars)", + auth_scope, + &claim[..claim.len().min(100)], + claim.len() + ); + + // Generate embedding for the claim + let claim_embedding = embedding_engine + .embed(&embed_text) + .context("Failed to generate claim embedding")?; + + // Find related memories using query_memories + let truth_config = &state.config.truth; + let matches = state + .db + .query_memories( + auth_scope, + None, // no source_agent_id filter + claim, // use claim text for hybrid search + &claim_embedding, + truth_config.cross_ref_limit, // limit from config + 0.3, // low threshold to cast a wide net for scoring + 0.6, // vector_weight + 0.4, // text_weight + ) + .await + .context("Failed to query related memories")?; + + let related_count = matches.len(); + + info!("Found {} related memories for scoring", related_count); + + // Convert MemoryMatch results to RelatedMemory for the scorer + let related: Vec = matches + .iter() + .map(|m| RelatedMemory { + similarity: m.similarity, + content: m.record.content.clone(), + truth_value: m.record.truth_value, + truth_confidence: m.record.truth_confidence, + }) + .collect(); + + // Build ScorerConfig from TruthConfig + let scorer_config = ScorerConfig { + pln_base_confidence: truth_config.pln_base_confidence, + contradiction_threshold: truth_config.contradiction_threshold, + verification_threshold: truth_config.verification_threshold, + ecan: EcanParams::new( + truth_config.ecan_decay_rate, + truth_config.ecan_spread_factor, + ), + }; + + // Score the claim (no existing ECAN values since this is an on-demand evaluation) + let result = score_memory(&scorer_config, claim, &related, None, None); + + // Build human-readable reasoning + let reasoning = build_reasoning(claim, &result, related_count); + + info!( + "Claim scored: tv={:.3}, conf={:.3}, category={}, related={}", + result.truth_value, + result.truth_confidence, + result.category, + related_count + ); + + Ok(serde_json::json!({ + "success": true, + "truth_value": result.truth_value, + "truth_confidence": result.truth_confidence, + "truth_category": result.category.as_str(), + "ecan_sti": result.ecan_sti, + "ecan_lti": result.ecan_lti, + "related_count": related_count, + "confirmation_count": result.confirmation_count, + "contradiction_count": result.contradiction_count, + "reasoning": reasoning + }) + .to_string()) +} + +/// Build a human-readable explanation of the scoring result. +fn build_reasoning( + claim: &str, + result: &crate::truth::scorer::ScoringResult, + related_count: usize, +) -> String { + let mut parts = Vec::new(); + + // Describe evidence base + if related_count == 0 { + parts.push("No related memories found in the store.".to_string()); + } else { + parts.push(format!( + "Found {} related memor{} in the store.", + related_count, + if related_count == 1 { "y" } else { "ies" } + )); + } + + // Describe confirmations/contradictions + if result.confirmation_count > 0 { + parts.push(format!( + "{} memor{} confirm{} this claim.", + result.confirmation_count, + if result.confirmation_count == 1 { "y" } else { "ies" }, + if result.confirmation_count == 1 { "s" } else { "" } + )); + } + if result.contradiction_count > 0 { + parts.push(format!( + "{} memor{} contradict{} this claim.", + result.contradiction_count, + if result.contradiction_count == 1 { "y" } else { "ies" }, + if result.contradiction_count == 1 { "s" } else { "" } + )); + } + + // Describe category + let category_desc = match result.category.as_str() { + "verified" => format!( + "The claim '{}' is VERIFIED with truth value {:.2} and confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + "plausible" => format!( + "The claim '{}' is PLAUSIBLE with truth value {:.2} and confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + "unverified" => format!( + "The claim '{}' is UNVERIFIED — insufficient evidence. Truth value {:.2}, confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + "contradicted" => format!( + "The claim '{}' is CONTRADICTED by existing memories. Truth value {:.2}, confidence {:.2}.", + truncate_claim(claim), + result.truth_value, + result.truth_confidence + ), + other => format!( + "The claim scored with category '{}', truth value {:.2}, confidence {:.2}.", + other, + result.truth_value, + result.truth_confidence + ), + }; + parts.push(category_desc); + + parts.join(" ") +} + +/// Truncate a claim for display in reasoning text. +fn truncate_claim(claim: &str) -> &str { + if claim.len() <= 80 { + claim + } else { + &claim[..80] + } +} diff --git a/src/tools/mod.rs b/src/tools/mod.rs index a70fae7..875302e 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,6 +1,7 @@ //! MCP Tools for OpenBrain pub mod batch_store; +pub mod evaluate; pub mod purge; pub mod query; pub mod store; @@ -139,6 +140,24 @@ pub fn get_tool_definitions() -> Vec { "required": ["confirm"] } }), + json!({ + "name": "evaluate", + "description": "Score a claim's truthfulness against the memory store using neuro-symbolic reasoning (PLN + ECAN)", + "inputSchema": { + "type": "object", + "properties": { + "claim": { + "type": "string", + "description": "The text claim to evaluate for truthfulness" + }, + "context": { + "type": "string", + "description": "Optional additional context to improve scoring accuracy" + } + }, + "required": ["claim"] + } + }), ] } @@ -152,6 +171,7 @@ pub async fn execute_tool( "batch_store" => batch_store::execute(state, arguments).await, "query" => query::execute(state, arguments).await, "purge" => purge::execute(state, arguments).await, + "evaluate" => evaluate::execute(state, arguments).await, _ => anyhow::bail!("Unknown tool: {}", tool_name), } }