feat(tools): add evaluate MCP tool (#37)

2026-06-15 22:07:08 +00:00 · 2026-04-04 04:05:11 +00:00
parent 50d0a944b5
commit 38a621da55
2 changed files with 229 additions and 0 deletions
--- a/src/tools/evaluate.rs
+++ b/src/tools/evaluate.rs
@@ -0,0 +1,209 @@
+//! Evaluate Tool - Score a claim's truthfulness against the memory store
+
+use anyhow::{anyhow, Context, Result};
+use serde_json::Value;
+use std::sync::Arc;
+use tracing::info;
+
+use crate::auth::PUBLIC_AUTH_SCOPE;
+use crate::tools::INTERNAL_AUTH_SCOPE_ARG;
+use crate::truth::ecan::EcanParams;
+use crate::truth::scorer::{score_memory, RelatedMemory, ScorerConfig};
+use crate::AppState;
+
+/// Execute the evaluate tool
+pub async fn execute(state: &Arc<AppState>, arguments: Value) -> Result<String> {
+    // Get embedding engine, return error if not ready
+    let embedding_engine = state
+        .get_embedding()
+        .await
+        .ok_or_else(|| anyhow!("Embedding engine not ready - service is still initializing"))?;
+
+    // Extract parameters
+    let claim = arguments
+        .get("claim")
+        .and_then(|v| v.as_str())
+        .context("Missing required parameter: claim")?;
+
+    let context = arguments
+        .get("context")
+        .and_then(|v| v.as_str());
+
+    let auth_scope = arguments
+        .get(INTERNAL_AUTH_SCOPE_ARG)
+        .and_then(|v| v.as_str())
+        .unwrap_or(PUBLIC_AUTH_SCOPE);
+
+    // Build the text to embed: claim + optional context
+    let embed_text = match context {
+        Some(ctx) => format!("{} {}", claim, ctx),
+        None => claim.to_string(),
+    };
+
+    info!(
+        "Evaluating claim for auth scope '{}': '{}' ({} chars)",
+        auth_scope,
+        &claim[..claim.len().min(100)],
+        claim.len()
+    );
+
+    // Generate embedding for the claim
+    let claim_embedding = embedding_engine
+        .embed(&embed_text)
+        .context("Failed to generate claim embedding")?;
+
+    // Find related memories using query_memories
+    let truth_config = &state.config.truth;
+    let matches = state
+        .db
+        .query_memories(
+            auth_scope,
+            None,            // no source_agent_id filter
+            claim,           // use claim text for hybrid search
+            &claim_embedding,
+            truth_config.cross_ref_limit, // limit from config
+            0.3,             // low threshold to cast a wide net for scoring
+            0.6,             // vector_weight
+            0.4,             // text_weight
+        )
+        .await
+        .context("Failed to query related memories")?;
+
+    let related_count = matches.len();
+
+    info!("Found {} related memories for scoring", related_count);
+
+    // Convert MemoryMatch results to RelatedMemory for the scorer
+    let related: Vec<RelatedMemory> = matches
+        .iter()
+        .map(|m| RelatedMemory {
+            similarity: m.similarity,
+            content: m.record.content.clone(),
+            truth_value: m.record.truth_value,
+            truth_confidence: m.record.truth_confidence,
+        })
+        .collect();
+
+    // Build ScorerConfig from TruthConfig
+    let scorer_config = ScorerConfig {
+        pln_base_confidence: truth_config.pln_base_confidence,
+        contradiction_threshold: truth_config.contradiction_threshold,
+        verification_threshold: truth_config.verification_threshold,
+        ecan: EcanParams::new(
+            truth_config.ecan_decay_rate,
+            truth_config.ecan_spread_factor,
+        ),
+    };
+
+    // Score the claim (no existing ECAN values since this is an on-demand evaluation)
+    let result = score_memory(&scorer_config, claim, &related, None, None);
+
+    // Build human-readable reasoning
+    let reasoning = build_reasoning(claim, &result, related_count);
+
+    info!(
+        "Claim scored: tv={:.3}, conf={:.3}, category={}, related={}",
+        result.truth_value,
+        result.truth_confidence,
+        result.category,
+        related_count
+    );
+
+    Ok(serde_json::json!({
+        "success": true,
+        "truth_value": result.truth_value,
+        "truth_confidence": result.truth_confidence,
+        "truth_category": result.category.as_str(),
+        "ecan_sti": result.ecan_sti,
+        "ecan_lti": result.ecan_lti,
+        "related_count": related_count,
+        "confirmation_count": result.confirmation_count,
+        "contradiction_count": result.contradiction_count,
+        "reasoning": reasoning
+    })
+    .to_string())
+}
+
+/// Build a human-readable explanation of the scoring result.
+fn build_reasoning(
+    claim: &str,
+    result: &crate::truth::scorer::ScoringResult,
+    related_count: usize,
+) -> String {
+    let mut parts = Vec::new();
+
+    // Describe evidence base
+    if related_count == 0 {
+        parts.push("No related memories found in the store.".to_string());
+    } else {
+        parts.push(format!(
+            "Found {} related memor{} in the store.",
+            related_count,
+            if related_count == 1 { "y" } else { "ies" }
+        ));
+    }
+
+    // Describe confirmations/contradictions
+    if result.confirmation_count > 0 {
+        parts.push(format!(
+            "{} memor{} confirm{} this claim.",
+            result.confirmation_count,
+            if result.confirmation_count == 1 { "y" } else { "ies" },
+            if result.confirmation_count == 1 { "s" } else { "" }
+        ));
+    }
+    if result.contradiction_count > 0 {
+        parts.push(format!(
+            "{} memor{} contradict{} this claim.",
+            result.contradiction_count,
+            if result.contradiction_count == 1 { "y" } else { "ies" },
+            if result.contradiction_count == 1 { "s" } else { "" }
+        ));
+    }
+
+    // Describe category
+    let category_desc = match result.category.as_str() {
+        "verified" => format!(
+            "The claim '{}' is VERIFIED with truth value {:.2} and confidence {:.2}.",
+            truncate_claim(claim),
+            result.truth_value,
+            result.truth_confidence
+        ),
+        "plausible" => format!(
+            "The claim '{}' is PLAUSIBLE with truth value {:.2} and confidence {:.2}.",
+            truncate_claim(claim),
+            result.truth_value,
+            result.truth_confidence
+        ),
+        "unverified" => format!(
+            "The claim '{}' is UNVERIFIED — insufficient evidence. Truth value {:.2}, confidence {:.2}.",
+            truncate_claim(claim),
+            result.truth_value,
+            result.truth_confidence
+        ),
+        "contradicted" => format!(
+            "The claim '{}' is CONTRADICTED by existing memories. Truth value {:.2}, confidence {:.2}.",
+            truncate_claim(claim),
+            result.truth_value,
+            result.truth_confidence
+        ),
+        other => format!(
+            "The claim scored with category '{}', truth value {:.2}, confidence {:.2}.",
+            other,
+            result.truth_value,
+            result.truth_confidence
+        ),
+    };
+    parts.push(category_desc);
+
+    parts.join(" ")
+}
+
+/// Truncate a claim for display in reasoning text.
+fn truncate_claim(claim: &str) -> &str {
+    if claim.len() <= 80 {
+        claim
+    } else {
+        &claim[..80]
+    }
+}
--- a/src/tools/mod.rs
+++ b/src/tools/mod.rs
@@ -1,6 +1,7 @@
 //! MCP Tools for OpenBrain

 pub mod batch_store;
+pub mod evaluate;
 pub mod purge;
 pub mod query;
 pub mod store;
@@ -139,6 +140,24 @@ pub fn get_tool_definitions() -> Vec<Value> {
                "required": ["confirm"]
            }
        }),
+        json!({
+            "name": "evaluate",
+            "description": "Score a claim's truthfulness against the memory store using neuro-symbolic reasoning (PLN + ECAN)",
+            "inputSchema": {
+                "type": "object",
+                "properties": {
+                    "claim": {
+                        "type": "string",
+                        "description": "The text claim to evaluate for truthfulness"
+                    },
+                    "context": {
+                        "type": "string",
+                        "description": "Optional additional context to improve scoring accuracy"
+                    }
+                },
+                "required": ["claim"]
+            }
+        }),
    ]
 }

@@ -152,6 +171,7 @@ pub async fn execute_tool(
        "batch_store" => batch_store::execute(state, arguments).await,
        "query" => query::execute(state, arguments).await,
        "purge" => purge::execute(state, arguments).await,
+        "evaluate" => evaluate::execute(state, arguments).await,
        _ => anyhow::bail!("Unknown tool: {}", tool_name),
    }
 }