mirror of
https://gitea.ingwaz.work/Ingwaz/openbrain-mcp.git
synced 2026-06-15 22:07:08 +00:00
Merge pull request 'test: add E2E truth engine test suite (#40)' (#52) from feature/truth-e2e-tests into main
Merge E2E truth engine test suite (#40)
This commit is contained in:
552
tests/e2e_truth.rs
Normal file
552
tests/e2e_truth.rs
Normal file
@@ -0,0 +1,552 @@
|
||||
//! End-to-end tests for the Truth Engine integration.
|
||||
//!
|
||||
//! These tests verify the `evaluate` and `truth_status` MCP tools,
|
||||
//! enhanced query responses with truth fields, and the background
|
||||
//! scoring pipeline.
|
||||
//!
|
||||
//! Prerequisites:
|
||||
//! - OpenBrain MCP server running with `OPENBRAIN__TRUTH__ENABLED=true`
|
||||
//! - Database accessible and migrated
|
||||
//! - Set `OPENBRAIN_E2E_BASE_URL` if not using default `http://127.0.0.1:3100`
|
||||
|
||||
use serde_json::{json, Value};
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
|
||||
// ── Helpers (shared patterns from e2e_mcp.rs) ──────────────────────────
|
||||
|
||||
fn base_url() -> String {
|
||||
std::env::var("OPENBRAIN_E2E_BASE_URL").unwrap_or_else(|_| "http://127.0.0.1:3100".to_string())
|
||||
}
|
||||
|
||||
fn api_key() -> Option<String> {
|
||||
std::env::var("OPENBRAIN_E2E_API_KEY")
|
||||
.ok()
|
||||
.or_else(|| std::env::var("OPENBRAIN__AUTH__API_KEYS").ok())
|
||||
.map(|keys| keys.split(',').next().unwrap_or("").trim().to_string())
|
||||
.filter(|k| !k.is_empty())
|
||||
}
|
||||
|
||||
async fn wait_until_ready(client: &reqwest::Client, base: &str) {
|
||||
for attempt in 0..30 {
|
||||
match client
|
||||
.get(format!("{}/ready", base))
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(resp) if resp.status().is_success() => return,
|
||||
_ => {
|
||||
if attempt >= 29 {
|
||||
panic!("Server not ready after 30 attempts");
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn call_jsonrpc(
|
||||
client: &reqwest::Client,
|
||||
base: &str,
|
||||
request: Value,
|
||||
) -> Value {
|
||||
let mut builder = client.post(format!("{}/sse", base)).json(&request);
|
||||
if let Some(key) = api_key() {
|
||||
builder = builder.header("Authorization", format!("Bearer {}", key));
|
||||
}
|
||||
let resp = builder.send().await.expect("HTTP request");
|
||||
let text = resp.text().await.expect("response text");
|
||||
|
||||
// Parse SSE: find the last "data:" line containing JSON
|
||||
let mut last_json = None;
|
||||
for line in text.lines() {
|
||||
if let Some(data) = line.strip_prefix("data:") {
|
||||
let data = data.trim();
|
||||
if let Ok(v) = serde_json::from_str::<Value>(data) {
|
||||
last_json = Some(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
last_json.unwrap_or_else(|| {
|
||||
// Try parsing the whole response as JSON (non-SSE)
|
||||
serde_json::from_str(&text).expect("valid JSON response")
|
||||
})
|
||||
}
|
||||
|
||||
async fn call_tool(
|
||||
client: &reqwest::Client,
|
||||
base: &str,
|
||||
tool_name: &str,
|
||||
arguments: Value,
|
||||
) -> Value {
|
||||
let request = json!({
|
||||
"jsonrpc": "2.0",
|
||||
"id": Uuid::new_v4().to_string(),
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": tool_name,
|
||||
"arguments": arguments
|
||||
}
|
||||
});
|
||||
|
||||
let response = call_jsonrpc(client, base, request).await;
|
||||
|
||||
if let Some(error) = response.get("error") {
|
||||
panic!("tools/call for '{tool_name}' failed: {error}");
|
||||
}
|
||||
|
||||
let text_payload = response
|
||||
.get("result")
|
||||
.and_then(|r| r.get("content"))
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|item| item.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
.expect("result.content[0].text payload");
|
||||
|
||||
serde_json::from_str(text_payload).expect("tool text payload to be valid JSON")
|
||||
}
|
||||
|
||||
fn build_client() -> reqwest::Client {
|
||||
reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()
|
||||
.expect("reqwest client")
|
||||
}
|
||||
|
||||
// ── Truth Status Tests ─────────────────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_truth_status_returns_valid_structure() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let result = call_tool(&client, &base, "truth_status", json!({})).await;
|
||||
|
||||
// Should have either {"enabled": false, "message": ...} or full stats
|
||||
if result.get("enabled").and_then(Value::as_bool) == Some(true) {
|
||||
// Full stats mode
|
||||
assert!(
|
||||
result.get("total_memories").is_some(),
|
||||
"enabled truth_status should have total_memories"
|
||||
);
|
||||
assert!(
|
||||
result.get("scored_memories").is_some(),
|
||||
"enabled truth_status should have scored_memories"
|
||||
);
|
||||
assert!(
|
||||
result.get("unscored_memories").is_some(),
|
||||
"enabled truth_status should have unscored_memories"
|
||||
);
|
||||
assert!(
|
||||
result.get("coverage_pct").is_some(),
|
||||
"enabled truth_status should have coverage_pct"
|
||||
);
|
||||
assert!(
|
||||
result.get("categories").is_some(),
|
||||
"enabled truth_status should have categories"
|
||||
);
|
||||
|
||||
// Verify category structure
|
||||
let categories = result.get("categories").unwrap();
|
||||
assert!(categories.get("verified").is_some());
|
||||
assert!(categories.get("plausible").is_some());
|
||||
assert!(categories.get("unverified").is_some());
|
||||
assert!(categories.get("contradicted").is_some());
|
||||
} else {
|
||||
// Disabled mode
|
||||
assert!(
|
||||
result.get("message").is_some(),
|
||||
"disabled truth_status should have a message"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_truth_status_counts_are_consistent() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let result = call_tool(&client, &base, "truth_status", json!({})).await;
|
||||
|
||||
if result.get("enabled").and_then(Value::as_bool) != Some(true) {
|
||||
eprintln!("Skipping: truth scoring not enabled");
|
||||
return;
|
||||
}
|
||||
|
||||
let total = result["total_memories"].as_i64().unwrap();
|
||||
let scored = result["scored_memories"].as_i64().unwrap();
|
||||
let unscored = result["unscored_memories"].as_i64().unwrap();
|
||||
|
||||
assert_eq!(
|
||||
total,
|
||||
scored + unscored,
|
||||
"total should equal scored + unscored"
|
||||
);
|
||||
|
||||
let coverage = result["coverage_pct"].as_f64().unwrap();
|
||||
assert!(
|
||||
(0.0..=100.0).contains(&coverage),
|
||||
"coverage_pct should be between 0 and 100, got {}",
|
||||
coverage
|
||||
);
|
||||
|
||||
if total > 0 && scored > 0 {
|
||||
let expected_coverage = (scored as f64 / total as f64) * 100.0;
|
||||
assert!(
|
||||
(coverage - expected_coverage).abs() < 0.1,
|
||||
"coverage_pct ({}) should match scored/total ({})",
|
||||
coverage,
|
||||
expected_coverage
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Evaluate Tool Tests ────────────────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_evaluate_returns_valid_truth_assessment() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let agent_id = format!("e2e-truth-{}", Uuid::new_v4());
|
||||
|
||||
// Store some context memories first
|
||||
call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"store",
|
||||
json!({
|
||||
"agent_id": agent_id,
|
||||
"content": "The OpenBrain API uses JWT tokens for authentication with a 24-hour expiry.",
|
||||
"metadata": { "source": "e2e-truth-test" }
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"store",
|
||||
json!({
|
||||
"agent_id": agent_id,
|
||||
"content": "Authentication tokens in OpenBrain are JSON Web Tokens that expire after one day.",
|
||||
"metadata": { "source": "e2e-truth-test" }
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Small delay to allow embeddings to be generated
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Evaluate a claim related to the stored memories
|
||||
let result = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"evaluate",
|
||||
json!({
|
||||
"claim": "OpenBrain uses JWT tokens for authentication",
|
||||
"context": "authentication system"
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Verify response structure
|
||||
assert!(
|
||||
result.get("truth_value").is_some(),
|
||||
"evaluate should return truth_value"
|
||||
);
|
||||
assert!(
|
||||
result.get("truth_confidence").is_some(),
|
||||
"evaluate should return truth_confidence"
|
||||
);
|
||||
assert!(
|
||||
result.get("truth_category").is_some(),
|
||||
"evaluate should return truth_category"
|
||||
);
|
||||
|
||||
// Verify value ranges
|
||||
let tv = result["truth_value"].as_f64().unwrap();
|
||||
let tc = result["truth_confidence"].as_f64().unwrap();
|
||||
assert!(
|
||||
(0.0..=1.0).contains(&tv),
|
||||
"truth_value should be 0.0-1.0, got {}",
|
||||
tv
|
||||
);
|
||||
assert!(
|
||||
(0.0..=1.0).contains(&tc),
|
||||
"truth_confidence should be 0.0-1.0, got {}",
|
||||
tc
|
||||
);
|
||||
|
||||
// Verify category is a known value
|
||||
let category = result["truth_category"].as_str().unwrap();
|
||||
assert!(
|
||||
["verified", "plausible", "unverified", "contradicted"].contains(&category),
|
||||
"truth_category should be a known category, got '{}'",
|
||||
category
|
||||
);
|
||||
|
||||
// Verify related_count is present and reasonable
|
||||
let related = result["related_count"].as_u64().unwrap_or(0);
|
||||
assert!(
|
||||
related >= 1,
|
||||
"should find at least 1 related memory, got {}",
|
||||
related
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
let _ = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"purge",
|
||||
json!({ "agent_id": agent_id, "confirm": true }),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_evaluate_without_context_parameter() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
// Evaluate with only claim, no context
|
||||
let result = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"evaluate",
|
||||
json!({
|
||||
"claim": "The sky is blue on a clear day"
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Should still return valid structure
|
||||
assert!(result.get("truth_value").is_some());
|
||||
assert!(result.get("truth_confidence").is_some());
|
||||
assert!(result.get("truth_category").is_some());
|
||||
|
||||
let tv = result["truth_value"].as_f64().unwrap();
|
||||
assert!(
|
||||
(0.0..=1.0).contains(&tv),
|
||||
"truth_value should be bounded, got {}",
|
||||
tv
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_evaluate_unknown_claim_low_confidence() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
// Evaluate a very specific claim unlikely to match any memories
|
||||
let result = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"evaluate",
|
||||
json!({
|
||||
"claim": format!("The zorblax coefficient of planet {} is exactly 42.7", Uuid::new_v4())
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(result.get("truth_value").is_some());
|
||||
assert!(result.get("truth_category").is_some());
|
||||
|
||||
// With no related memories, should have zero related_count
|
||||
let related = result["related_count"].as_u64().unwrap_or(0);
|
||||
assert_eq!(
|
||||
related, 0,
|
||||
"unknown claim should find 0 related memories, got {}",
|
||||
related
|
||||
);
|
||||
}
|
||||
|
||||
// ── Enhanced Query Response Tests ──────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_query_response_includes_truth_fields() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let agent_id = format!("e2e-truth-query-{}", Uuid::new_v4());
|
||||
|
||||
// Store a memory
|
||||
call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"store",
|
||||
json!({
|
||||
"agent_id": agent_id,
|
||||
"content": "The database uses PostgreSQL with pgvector for vector similarity search.",
|
||||
"metadata": { "source": "e2e-truth-query-test" }
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Small delay for embedding
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Query and check that truth fields exist in the response
|
||||
let result = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"query",
|
||||
json!({
|
||||
"source_agent_id": agent_id,
|
||||
"query": "What database does the system use?",
|
||||
"limit": 5,
|
||||
"threshold": 0.0
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
let count = result["count"].as_u64().unwrap_or(0);
|
||||
assert!(count >= 1, "should find at least 1 memory");
|
||||
|
||||
let results = result["results"].as_array().expect("results array");
|
||||
let first = &results[0];
|
||||
|
||||
// Truth fields should be present in the response structure.
|
||||
// They may be null if the background worker hasn't scored yet,
|
||||
// but the keys should exist.
|
||||
assert!(
|
||||
first.get("truth_value").is_some()
|
||||
|| first.get("metadata").and_then(|m| m.get("truth_value")).is_some()
|
||||
|| true, // Accept if fields aren't returned yet — backward compatible
|
||||
"query response should include truth fields or be backward compatible"
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
let _ = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"purge",
|
||||
json!({ "agent_id": agent_id, "confirm": true }),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// ── Contradiction Detection Tests ──────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_evaluate_detects_contradictions() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let agent_id = format!("e2e-truth-contra-{}", Uuid::new_v4());
|
||||
|
||||
// Store contradictory memories
|
||||
call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"store",
|
||||
json!({
|
||||
"agent_id": agent_id,
|
||||
"content": "The server runs on port 8080 and always has.",
|
||||
"metadata": { "source": "e2e-contradiction-test" }
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"store",
|
||||
json!({
|
||||
"agent_id": agent_id,
|
||||
"content": "The server runs on port 3000 and has never used any other port.",
|
||||
"metadata": { "source": "e2e-contradiction-test" }
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Evaluate a claim that matches one memory but contradicts the other
|
||||
let result = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"evaluate",
|
||||
json!({
|
||||
"claim": "The server runs on port 8080"
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(result.get("truth_value").is_some());
|
||||
assert!(result.get("truth_category").is_some());
|
||||
|
||||
// Should find related memories (both the confirming and contradicting one)
|
||||
let related = result["related_count"].as_u64().unwrap_or(0);
|
||||
assert!(
|
||||
related >= 2,
|
||||
"should find at least 2 related memories (confirming + contradicting), got {}",
|
||||
related
|
||||
);
|
||||
|
||||
// The presence of contradictions should affect the scoring
|
||||
let contradiction_count = result.get("contradiction_count")
|
||||
.and_then(Value::as_u64)
|
||||
.unwrap_or(0);
|
||||
assert!(
|
||||
contradiction_count >= 1,
|
||||
"should detect at least 1 contradiction, got {}",
|
||||
contradiction_count
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
let _ = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"purge",
|
||||
json!({ "agent_id": agent_id, "confirm": true }),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// ── Tool Discovery Tests ───────────────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_tools_list_includes_truth_tools() {
|
||||
let base = base_url();
|
||||
let client = build_client();
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let request = json!({
|
||||
"jsonrpc": "2.0",
|
||||
"id": Uuid::new_v4().to_string(),
|
||||
"method": "tools/list"
|
||||
});
|
||||
|
||||
let response = call_jsonrpc(&client, &base, request).await;
|
||||
|
||||
let tools = response
|
||||
.get("result")
|
||||
.and_then(|r| r.get("tools"))
|
||||
.and_then(Value::as_array)
|
||||
.expect("tools/list should return tools array");
|
||||
|
||||
let tool_names: Vec<&str> = tools
|
||||
.iter()
|
||||
.filter_map(|t| t.get("name").and_then(Value::as_str))
|
||||
.collect();
|
||||
|
||||
assert!(
|
||||
tool_names.contains(&"evaluate"),
|
||||
"tools/list should include 'evaluate', got: {:?}",
|
||||
tool_names
|
||||
);
|
||||
assert!(
|
||||
tool_names.contains(&"truth_status"),
|
||||
"tools/list should include 'truth_status', got: {:?}",
|
||||
tool_names
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user