mirror of
https://gitea.ingwaz.work/Ingwaz/openbrain-mcp.git
synced 2026-03-31 14:49:06 +00:00
Add server-side deduplication on ingest
This commit is contained in:
219
tests/e2e_mcp.rs
219
tests/e2e_mcp.rs
@@ -1242,5 +1242,224 @@ async fn e2e_existing_store_unchanged() -> anyhow::Result<()> {
|
||||
.await;
|
||||
|
||||
assert!(result["success"].as_bool().unwrap_or(false));
|
||||
assert_eq!(result["deduplicated"].as_bool(), Some(false));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
// =============================================================================
|
||||
// Deduplication Tests (Issue #14)
|
||||
// =============================================================================
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_store_deduplicates_and_merges_metadata() -> anyhow::Result<()> {
|
||||
let base = base_url();
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(20))
|
||||
.build()
|
||||
.expect("reqwest client");
|
||||
|
||||
ensure_schema().await;
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let agent = format!("dedup_{}", uuid::Uuid::new_v4());
|
||||
let content = format!("Dedup fact {} prefers concise replies", uuid::Uuid::new_v4());
|
||||
let _ = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"purge",
|
||||
json!({ "agent_id": agent.clone(), "confirm": true }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let first = call_tool(&client, &base, "store", json!({
|
||||
"agent_id": agent.clone(),
|
||||
"content": content.clone(),
|
||||
"metadata": {
|
||||
"source": "first",
|
||||
"keep": true,
|
||||
"override": "old"
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
|
||||
assert_eq!(first["deduplicated"].as_bool(), Some(false));
|
||||
|
||||
let first_query = call_tool(&client, &base, "query", json!({
|
||||
"agent_id": agent.clone(),
|
||||
"query": content.clone(),
|
||||
"limit": 5,
|
||||
"threshold": 0.0
|
||||
}))
|
||||
.await;
|
||||
let first_created_at = first_query["results"]
|
||||
.as_array()
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|item| item.get("created_at"))
|
||||
.and_then(Value::as_str)
|
||||
.expect("first created_at")
|
||||
.to_string();
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(1100)).await;
|
||||
|
||||
let second = call_tool(&client, &base, "store", json!({
|
||||
"agent_id": agent.clone(),
|
||||
"content": content.clone(),
|
||||
"metadata": {
|
||||
"override": "new",
|
||||
"second": true
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
|
||||
assert_eq!(second["deduplicated"].as_bool(), Some(true));
|
||||
assert_eq!(second["id"], first["id"]);
|
||||
|
||||
let query = call_tool(&client, &base, "query", json!({
|
||||
"agent_id": agent.clone(),
|
||||
"query": content.clone(),
|
||||
"limit": 5,
|
||||
"threshold": 0.0
|
||||
}))
|
||||
.await;
|
||||
|
||||
assert_eq!(query["count"].as_u64(), Some(1));
|
||||
let stored = query["results"]
|
||||
.as_array()
|
||||
.and_then(|items| items.first())
|
||||
.expect("dedup query result");
|
||||
|
||||
assert_eq!(stored["metadata"]["source"], "first");
|
||||
assert_eq!(stored["metadata"]["keep"], true);
|
||||
assert_eq!(stored["metadata"]["override"], "new");
|
||||
assert_eq!(stored["metadata"]["second"], true);
|
||||
|
||||
let second_created_at = stored["created_at"]
|
||||
.as_str()
|
||||
.expect("second created_at");
|
||||
assert!(
|
||||
second_created_at > first_created_at.as_str(),
|
||||
"deduplicated write should refresh created_at"
|
||||
);
|
||||
|
||||
let _ = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"purge",
|
||||
json!({ "agent_id": agent, "confirm": true }),
|
||||
)
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_store_dedup_is_agent_scoped() -> anyhow::Result<()> {
|
||||
let base = base_url();
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(20))
|
||||
.build()
|
||||
.expect("reqwest client");
|
||||
|
||||
ensure_schema().await;
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let agent_a = format!("dedup_scope_a_{}", uuid::Uuid::new_v4());
|
||||
let agent_b = format!("dedup_scope_b_{}", uuid::Uuid::new_v4());
|
||||
let content = format!("Shared cross-agent fact {}", uuid::Uuid::new_v4());
|
||||
|
||||
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_a.clone(), "confirm": true })).await;
|
||||
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_b.clone(), "confirm": true })).await;
|
||||
|
||||
let first = call_tool(&client, &base, "store", json!({
|
||||
"agent_id": agent_a.clone(),
|
||||
"content": content.clone()
|
||||
}))
|
||||
.await;
|
||||
let second = call_tool(&client, &base, "store", json!({
|
||||
"agent_id": agent_b.clone(),
|
||||
"content": content.clone()
|
||||
}))
|
||||
.await;
|
||||
|
||||
assert_eq!(first["deduplicated"].as_bool(), Some(false));
|
||||
assert_eq!(second["deduplicated"].as_bool(), Some(false));
|
||||
assert_ne!(first["id"], second["id"]);
|
||||
|
||||
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_a, "confirm": true })).await;
|
||||
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_b, "confirm": true })).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn e2e_batch_store_deduplicates_within_batch() -> anyhow::Result<()> {
|
||||
let base = base_url();
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(20))
|
||||
.build()
|
||||
.expect("reqwest client");
|
||||
|
||||
ensure_schema().await;
|
||||
wait_until_ready(&client, &base).await;
|
||||
|
||||
let agent = format!("batch_dedup_{}", uuid::Uuid::new_v4());
|
||||
let content = format!("Batch dedup fact {}", uuid::Uuid::new_v4());
|
||||
let _ = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"purge",
|
||||
json!({ "agent_id": agent.clone(), "confirm": true }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let result = call_tool(&client, &base, "batch_store", json!({
|
||||
"agent_id": agent.clone(),
|
||||
"entries": [
|
||||
{
|
||||
"content": content.clone(),
|
||||
"metadata": { "source": "first", "keep": "yes" }
|
||||
},
|
||||
{
|
||||
"content": content.clone(),
|
||||
"metadata": { "source": "second", "merged": "yes" }
|
||||
}
|
||||
]
|
||||
}))
|
||||
.await;
|
||||
|
||||
let results = result["results"].as_array().expect("batch results");
|
||||
assert_eq!(result["count"].as_u64(), Some(2));
|
||||
assert_eq!(results[0]["deduplicated"].as_bool(), Some(false));
|
||||
assert_eq!(results[0]["status"], "stored");
|
||||
assert_eq!(results[1]["deduplicated"].as_bool(), Some(true));
|
||||
assert_eq!(results[1]["status"], "deduplicated");
|
||||
assert_eq!(results[0]["id"], results[1]["id"]);
|
||||
|
||||
let query = call_tool(&client, &base, "query", json!({
|
||||
"agent_id": agent.clone(),
|
||||
"query": content.clone(),
|
||||
"limit": 5,
|
||||
"threshold": 0.0
|
||||
}))
|
||||
.await;
|
||||
|
||||
assert_eq!(query["count"].as_u64(), Some(1));
|
||||
let stored = query["results"]
|
||||
.as_array()
|
||||
.and_then(|items| items.first())
|
||||
.expect("batch dedup query result");
|
||||
assert_eq!(stored["metadata"]["source"], "second");
|
||||
assert_eq!(stored["metadata"]["keep"], "yes");
|
||||
assert_eq!(stored["metadata"]["merged"], "yes");
|
||||
|
||||
let _ = call_tool(
|
||||
&client,
|
||||
&base,
|
||||
"purge",
|
||||
json!({ "agent_id": agent, "confirm": true }),
|
||||
)
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user