Files
openbrain-mcp/src/tools/evaluate.rs
2026-04-04 04:05:11 +00:00

210 lines
6.7 KiB
Rust

//! Evaluate Tool - Score a claim's truthfulness against the memory store
use anyhow::{anyhow, Context, Result};
use serde_json::Value;
use std::sync::Arc;
use tracing::info;
use crate::auth::PUBLIC_AUTH_SCOPE;
use crate::tools::INTERNAL_AUTH_SCOPE_ARG;
use crate::truth::ecan::EcanParams;
use crate::truth::scorer::{score_memory, RelatedMemory, ScorerConfig};
use crate::AppState;
/// Execute the evaluate tool
pub async fn execute(state: &Arc<AppState>, arguments: Value) -> Result<String> {
// Get embedding engine, return error if not ready
let embedding_engine = state
.get_embedding()
.await
.ok_or_else(|| anyhow!("Embedding engine not ready - service is still initializing"))?;
// Extract parameters
let claim = arguments
.get("claim")
.and_then(|v| v.as_str())
.context("Missing required parameter: claim")?;
let context = arguments
.get("context")
.and_then(|v| v.as_str());
let auth_scope = arguments
.get(INTERNAL_AUTH_SCOPE_ARG)
.and_then(|v| v.as_str())
.unwrap_or(PUBLIC_AUTH_SCOPE);
// Build the text to embed: claim + optional context
let embed_text = match context {
Some(ctx) => format!("{} {}", claim, ctx),
None => claim.to_string(),
};
info!(
"Evaluating claim for auth scope '{}': '{}' ({} chars)",
auth_scope,
&claim[..claim.len().min(100)],
claim.len()
);
// Generate embedding for the claim
let claim_embedding = embedding_engine
.embed(&embed_text)
.context("Failed to generate claim embedding")?;
// Find related memories using query_memories
let truth_config = &state.config.truth;
let matches = state
.db
.query_memories(
auth_scope,
None, // no source_agent_id filter
claim, // use claim text for hybrid search
&claim_embedding,
truth_config.cross_ref_limit, // limit from config
0.3, // low threshold to cast a wide net for scoring
0.6, // vector_weight
0.4, // text_weight
)
.await
.context("Failed to query related memories")?;
let related_count = matches.len();
info!("Found {} related memories for scoring", related_count);
// Convert MemoryMatch results to RelatedMemory for the scorer
let related: Vec<RelatedMemory> = matches
.iter()
.map(|m| RelatedMemory {
similarity: m.similarity,
content: m.record.content.clone(),
truth_value: m.record.truth_value,
truth_confidence: m.record.truth_confidence,
})
.collect();
// Build ScorerConfig from TruthConfig
let scorer_config = ScorerConfig {
pln_base_confidence: truth_config.pln_base_confidence,
contradiction_threshold: truth_config.contradiction_threshold,
verification_threshold: truth_config.verification_threshold,
ecan: EcanParams::new(
truth_config.ecan_decay_rate,
truth_config.ecan_spread_factor,
),
};
// Score the claim (no existing ECAN values since this is an on-demand evaluation)
let result = score_memory(&scorer_config, claim, &related, None, None);
// Build human-readable reasoning
let reasoning = build_reasoning(claim, &result, related_count);
info!(
"Claim scored: tv={:.3}, conf={:.3}, category={}, related={}",
result.truth_value,
result.truth_confidence,
result.category,
related_count
);
Ok(serde_json::json!({
"success": true,
"truth_value": result.truth_value,
"truth_confidence": result.truth_confidence,
"truth_category": result.category.as_str(),
"ecan_sti": result.ecan_sti,
"ecan_lti": result.ecan_lti,
"related_count": related_count,
"confirmation_count": result.confirmation_count,
"contradiction_count": result.contradiction_count,
"reasoning": reasoning
})
.to_string())
}
/// Build a human-readable explanation of the scoring result.
fn build_reasoning(
claim: &str,
result: &crate::truth::scorer::ScoringResult,
related_count: usize,
) -> String {
let mut parts = Vec::new();
// Describe evidence base
if related_count == 0 {
parts.push("No related memories found in the store.".to_string());
} else {
parts.push(format!(
"Found {} related memor{} in the store.",
related_count,
if related_count == 1 { "y" } else { "ies" }
));
}
// Describe confirmations/contradictions
if result.confirmation_count > 0 {
parts.push(format!(
"{} memor{} confirm{} this claim.",
result.confirmation_count,
if result.confirmation_count == 1 { "y" } else { "ies" },
if result.confirmation_count == 1 { "s" } else { "" }
));
}
if result.contradiction_count > 0 {
parts.push(format!(
"{} memor{} contradict{} this claim.",
result.contradiction_count,
if result.contradiction_count == 1 { "y" } else { "ies" },
if result.contradiction_count == 1 { "s" } else { "" }
));
}
// Describe category
let category_desc = match result.category.as_str() {
"verified" => format!(
"The claim '{}' is VERIFIED with truth value {:.2} and confidence {:.2}.",
truncate_claim(claim),
result.truth_value,
result.truth_confidence
),
"plausible" => format!(
"The claim '{}' is PLAUSIBLE with truth value {:.2} and confidence {:.2}.",
truncate_claim(claim),
result.truth_value,
result.truth_confidence
),
"unverified" => format!(
"The claim '{}' is UNVERIFIED — insufficient evidence. Truth value {:.2}, confidence {:.2}.",
truncate_claim(claim),
result.truth_value,
result.truth_confidence
),
"contradicted" => format!(
"The claim '{}' is CONTRADICTED by existing memories. Truth value {:.2}, confidence {:.2}.",
truncate_claim(claim),
result.truth_value,
result.truth_confidence
),
other => format!(
"The claim scored with category '{}', truth value {:.2}, confidence {:.2}.",
other,
result.truth_value,
result.truth_confidence
),
};
parts.push(category_desc);
parts.join(" ")
}
/// Truncate a claim for display in reasoning text.
fn truncate_claim(claim: &str) -> &str {
if claim.len() <= 80 {
claim
} else {
&claim[..80]
}
}