mirror of
https://gitea.ingwaz.work/Ingwaz/openbrain-mcp.git
synced 2026-06-15 22:07:08 +00:00
feat(tools): add evaluate MCP tool (#37)
This commit is contained in:
209
src/tools/evaluate.rs
Normal file
209
src/tools/evaluate.rs
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
//! Evaluate Tool - Score a claim's truthfulness against the memory store
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::auth::PUBLIC_AUTH_SCOPE;
|
||||||
|
use crate::tools::INTERNAL_AUTH_SCOPE_ARG;
|
||||||
|
use crate::truth::ecan::EcanParams;
|
||||||
|
use crate::truth::scorer::{score_memory, RelatedMemory, ScorerConfig};
|
||||||
|
use crate::AppState;
|
||||||
|
|
||||||
|
/// Execute the evaluate tool
|
||||||
|
pub async fn execute(state: &Arc<AppState>, arguments: Value) -> Result<String> {
|
||||||
|
// Get embedding engine, return error if not ready
|
||||||
|
let embedding_engine = state
|
||||||
|
.get_embedding()
|
||||||
|
.await
|
||||||
|
.ok_or_else(|| anyhow!("Embedding engine not ready - service is still initializing"))?;
|
||||||
|
|
||||||
|
// Extract parameters
|
||||||
|
let claim = arguments
|
||||||
|
.get("claim")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.context("Missing required parameter: claim")?;
|
||||||
|
|
||||||
|
let context = arguments
|
||||||
|
.get("context")
|
||||||
|
.and_then(|v| v.as_str());
|
||||||
|
|
||||||
|
let auth_scope = arguments
|
||||||
|
.get(INTERNAL_AUTH_SCOPE_ARG)
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or(PUBLIC_AUTH_SCOPE);
|
||||||
|
|
||||||
|
// Build the text to embed: claim + optional context
|
||||||
|
let embed_text = match context {
|
||||||
|
Some(ctx) => format!("{} {}", claim, ctx),
|
||||||
|
None => claim.to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"Evaluating claim for auth scope '{}': '{}' ({} chars)",
|
||||||
|
auth_scope,
|
||||||
|
&claim[..claim.len().min(100)],
|
||||||
|
claim.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Generate embedding for the claim
|
||||||
|
let claim_embedding = embedding_engine
|
||||||
|
.embed(&embed_text)
|
||||||
|
.context("Failed to generate claim embedding")?;
|
||||||
|
|
||||||
|
// Find related memories using query_memories
|
||||||
|
let truth_config = &state.config.truth;
|
||||||
|
let matches = state
|
||||||
|
.db
|
||||||
|
.query_memories(
|
||||||
|
auth_scope,
|
||||||
|
None, // no source_agent_id filter
|
||||||
|
claim, // use claim text for hybrid search
|
||||||
|
&claim_embedding,
|
||||||
|
truth_config.cross_ref_limit, // limit from config
|
||||||
|
0.3, // low threshold to cast a wide net for scoring
|
||||||
|
0.6, // vector_weight
|
||||||
|
0.4, // text_weight
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("Failed to query related memories")?;
|
||||||
|
|
||||||
|
let related_count = matches.len();
|
||||||
|
|
||||||
|
info!("Found {} related memories for scoring", related_count);
|
||||||
|
|
||||||
|
// Convert MemoryMatch results to RelatedMemory for the scorer
|
||||||
|
let related: Vec<RelatedMemory> = matches
|
||||||
|
.iter()
|
||||||
|
.map(|m| RelatedMemory {
|
||||||
|
similarity: m.similarity,
|
||||||
|
content: m.record.content.clone(),
|
||||||
|
truth_value: m.record.truth_value,
|
||||||
|
truth_confidence: m.record.truth_confidence,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Build ScorerConfig from TruthConfig
|
||||||
|
let scorer_config = ScorerConfig {
|
||||||
|
pln_base_confidence: truth_config.pln_base_confidence,
|
||||||
|
contradiction_threshold: truth_config.contradiction_threshold,
|
||||||
|
verification_threshold: truth_config.verification_threshold,
|
||||||
|
ecan: EcanParams::new(
|
||||||
|
truth_config.ecan_decay_rate,
|
||||||
|
truth_config.ecan_spread_factor,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Score the claim (no existing ECAN values since this is an on-demand evaluation)
|
||||||
|
let result = score_memory(&scorer_config, claim, &related, None, None);
|
||||||
|
|
||||||
|
// Build human-readable reasoning
|
||||||
|
let reasoning = build_reasoning(claim, &result, related_count);
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"Claim scored: tv={:.3}, conf={:.3}, category={}, related={}",
|
||||||
|
result.truth_value,
|
||||||
|
result.truth_confidence,
|
||||||
|
result.category,
|
||||||
|
related_count
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(serde_json::json!({
|
||||||
|
"success": true,
|
||||||
|
"truth_value": result.truth_value,
|
||||||
|
"truth_confidence": result.truth_confidence,
|
||||||
|
"truth_category": result.category.as_str(),
|
||||||
|
"ecan_sti": result.ecan_sti,
|
||||||
|
"ecan_lti": result.ecan_lti,
|
||||||
|
"related_count": related_count,
|
||||||
|
"confirmation_count": result.confirmation_count,
|
||||||
|
"contradiction_count": result.contradiction_count,
|
||||||
|
"reasoning": reasoning
|
||||||
|
})
|
||||||
|
.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a human-readable explanation of the scoring result.
|
||||||
|
fn build_reasoning(
|
||||||
|
claim: &str,
|
||||||
|
result: &crate::truth::scorer::ScoringResult,
|
||||||
|
related_count: usize,
|
||||||
|
) -> String {
|
||||||
|
let mut parts = Vec::new();
|
||||||
|
|
||||||
|
// Describe evidence base
|
||||||
|
if related_count == 0 {
|
||||||
|
parts.push("No related memories found in the store.".to_string());
|
||||||
|
} else {
|
||||||
|
parts.push(format!(
|
||||||
|
"Found {} related memor{} in the store.",
|
||||||
|
related_count,
|
||||||
|
if related_count == 1 { "y" } else { "ies" }
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Describe confirmations/contradictions
|
||||||
|
if result.confirmation_count > 0 {
|
||||||
|
parts.push(format!(
|
||||||
|
"{} memor{} confirm{} this claim.",
|
||||||
|
result.confirmation_count,
|
||||||
|
if result.confirmation_count == 1 { "y" } else { "ies" },
|
||||||
|
if result.confirmation_count == 1 { "s" } else { "" }
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if result.contradiction_count > 0 {
|
||||||
|
parts.push(format!(
|
||||||
|
"{} memor{} contradict{} this claim.",
|
||||||
|
result.contradiction_count,
|
||||||
|
if result.contradiction_count == 1 { "y" } else { "ies" },
|
||||||
|
if result.contradiction_count == 1 { "s" } else { "" }
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Describe category
|
||||||
|
let category_desc = match result.category.as_str() {
|
||||||
|
"verified" => format!(
|
||||||
|
"The claim '{}' is VERIFIED with truth value {:.2} and confidence {:.2}.",
|
||||||
|
truncate_claim(claim),
|
||||||
|
result.truth_value,
|
||||||
|
result.truth_confidence
|
||||||
|
),
|
||||||
|
"plausible" => format!(
|
||||||
|
"The claim '{}' is PLAUSIBLE with truth value {:.2} and confidence {:.2}.",
|
||||||
|
truncate_claim(claim),
|
||||||
|
result.truth_value,
|
||||||
|
result.truth_confidence
|
||||||
|
),
|
||||||
|
"unverified" => format!(
|
||||||
|
"The claim '{}' is UNVERIFIED — insufficient evidence. Truth value {:.2}, confidence {:.2}.",
|
||||||
|
truncate_claim(claim),
|
||||||
|
result.truth_value,
|
||||||
|
result.truth_confidence
|
||||||
|
),
|
||||||
|
"contradicted" => format!(
|
||||||
|
"The claim '{}' is CONTRADICTED by existing memories. Truth value {:.2}, confidence {:.2}.",
|
||||||
|
truncate_claim(claim),
|
||||||
|
result.truth_value,
|
||||||
|
result.truth_confidence
|
||||||
|
),
|
||||||
|
other => format!(
|
||||||
|
"The claim scored with category '{}', truth value {:.2}, confidence {:.2}.",
|
||||||
|
other,
|
||||||
|
result.truth_value,
|
||||||
|
result.truth_confidence
|
||||||
|
),
|
||||||
|
};
|
||||||
|
parts.push(category_desc);
|
||||||
|
|
||||||
|
parts.join(" ")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Truncate a claim for display in reasoning text.
|
||||||
|
fn truncate_claim(claim: &str) -> &str {
|
||||||
|
if claim.len() <= 80 {
|
||||||
|
claim
|
||||||
|
} else {
|
||||||
|
&claim[..80]
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
//! MCP Tools for OpenBrain
|
//! MCP Tools for OpenBrain
|
||||||
|
|
||||||
pub mod batch_store;
|
pub mod batch_store;
|
||||||
|
pub mod evaluate;
|
||||||
pub mod purge;
|
pub mod purge;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
pub mod store;
|
pub mod store;
|
||||||
@@ -139,6 +140,24 @@ pub fn get_tool_definitions() -> Vec<Value> {
|
|||||||
"required": ["confirm"]
|
"required": ["confirm"]
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
json!({
|
||||||
|
"name": "evaluate",
|
||||||
|
"description": "Score a claim's truthfulness against the memory store using neuro-symbolic reasoning (PLN + ECAN)",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"claim": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The text claim to evaluate for truthfulness"
|
||||||
|
},
|
||||||
|
"context": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Optional additional context to improve scoring accuracy"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["claim"]
|
||||||
|
}
|
||||||
|
}),
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -152,6 +171,7 @@ pub async fn execute_tool(
|
|||||||
"batch_store" => batch_store::execute(state, arguments).await,
|
"batch_store" => batch_store::execute(state, arguments).await,
|
||||||
"query" => query::execute(state, arguments).await,
|
"query" => query::execute(state, arguments).await,
|
||||||
"purge" => purge::execute(state, arguments).await,
|
"purge" => purge::execute(state, arguments).await,
|
||||||
|
"evaluate" => evaluate::execute(state, arguments).await,
|
||||||
_ => anyhow::bail!("Unknown tool: {}", tool_name),
|
_ => anyhow::bail!("Unknown tool: {}", tool_name),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user