Add server-side deduplication on ingest

This commit is contained in:
Agent Zero
2026-03-24 05:40:30 +00:00
parent 5d5c042dd1
commit 61d6448b44
8 changed files with 421 additions and 23 deletions

View File

@@ -1242,5 +1242,224 @@ async fn e2e_existing_store_unchanged() -> anyhow::Result<()> {
.await;
assert!(result["success"].as_bool().unwrap_or(false));
assert_eq!(result["deduplicated"].as_bool(), Some(false));
Ok(())
}
// =============================================================================
// Deduplication Tests (Issue #14)
// =============================================================================
#[tokio::test]
async fn e2e_store_deduplicates_and_merges_metadata() -> anyhow::Result<()> {
let base = base_url();
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(20))
.build()
.expect("reqwest client");
ensure_schema().await;
wait_until_ready(&client, &base).await;
let agent = format!("dedup_{}", uuid::Uuid::new_v4());
let content = format!("Dedup fact {} prefers concise replies", uuid::Uuid::new_v4());
let _ = call_tool(
&client,
&base,
"purge",
json!({ "agent_id": agent.clone(), "confirm": true }),
)
.await;
let first = call_tool(&client, &base, "store", json!({
"agent_id": agent.clone(),
"content": content.clone(),
"metadata": {
"source": "first",
"keep": true,
"override": "old"
}
}))
.await;
assert_eq!(first["deduplicated"].as_bool(), Some(false));
let first_query = call_tool(&client, &base, "query", json!({
"agent_id": agent.clone(),
"query": content.clone(),
"limit": 5,
"threshold": 0.0
}))
.await;
let first_created_at = first_query["results"]
.as_array()
.and_then(|items| items.first())
.and_then(|item| item.get("created_at"))
.and_then(Value::as_str)
.expect("first created_at")
.to_string();
tokio::time::sleep(Duration::from_millis(1100)).await;
let second = call_tool(&client, &base, "store", json!({
"agent_id": agent.clone(),
"content": content.clone(),
"metadata": {
"override": "new",
"second": true
}
}))
.await;
assert_eq!(second["deduplicated"].as_bool(), Some(true));
assert_eq!(second["id"], first["id"]);
let query = call_tool(&client, &base, "query", json!({
"agent_id": agent.clone(),
"query": content.clone(),
"limit": 5,
"threshold": 0.0
}))
.await;
assert_eq!(query["count"].as_u64(), Some(1));
let stored = query["results"]
.as_array()
.and_then(|items| items.first())
.expect("dedup query result");
assert_eq!(stored["metadata"]["source"], "first");
assert_eq!(stored["metadata"]["keep"], true);
assert_eq!(stored["metadata"]["override"], "new");
assert_eq!(stored["metadata"]["second"], true);
let second_created_at = stored["created_at"]
.as_str()
.expect("second created_at");
assert!(
second_created_at > first_created_at.as_str(),
"deduplicated write should refresh created_at"
);
let _ = call_tool(
&client,
&base,
"purge",
json!({ "agent_id": agent, "confirm": true }),
)
.await;
Ok(())
}
#[tokio::test]
async fn e2e_store_dedup_is_agent_scoped() -> anyhow::Result<()> {
let base = base_url();
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(20))
.build()
.expect("reqwest client");
ensure_schema().await;
wait_until_ready(&client, &base).await;
let agent_a = format!("dedup_scope_a_{}", uuid::Uuid::new_v4());
let agent_b = format!("dedup_scope_b_{}", uuid::Uuid::new_v4());
let content = format!("Shared cross-agent fact {}", uuid::Uuid::new_v4());
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_a.clone(), "confirm": true })).await;
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_b.clone(), "confirm": true })).await;
let first = call_tool(&client, &base, "store", json!({
"agent_id": agent_a.clone(),
"content": content.clone()
}))
.await;
let second = call_tool(&client, &base, "store", json!({
"agent_id": agent_b.clone(),
"content": content.clone()
}))
.await;
assert_eq!(first["deduplicated"].as_bool(), Some(false));
assert_eq!(second["deduplicated"].as_bool(), Some(false));
assert_ne!(first["id"], second["id"]);
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_a, "confirm": true })).await;
let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent_b, "confirm": true })).await;
Ok(())
}
#[tokio::test]
async fn e2e_batch_store_deduplicates_within_batch() -> anyhow::Result<()> {
let base = base_url();
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(20))
.build()
.expect("reqwest client");
ensure_schema().await;
wait_until_ready(&client, &base).await;
let agent = format!("batch_dedup_{}", uuid::Uuid::new_v4());
let content = format!("Batch dedup fact {}", uuid::Uuid::new_v4());
let _ = call_tool(
&client,
&base,
"purge",
json!({ "agent_id": agent.clone(), "confirm": true }),
)
.await;
let result = call_tool(&client, &base, "batch_store", json!({
"agent_id": agent.clone(),
"entries": [
{
"content": content.clone(),
"metadata": { "source": "first", "keep": "yes" }
},
{
"content": content.clone(),
"metadata": { "source": "second", "merged": "yes" }
}
]
}))
.await;
let results = result["results"].as_array().expect("batch results");
assert_eq!(result["count"].as_u64(), Some(2));
assert_eq!(results[0]["deduplicated"].as_bool(), Some(false));
assert_eq!(results[0]["status"], "stored");
assert_eq!(results[1]["deduplicated"].as_bool(), Some(true));
assert_eq!(results[1]["status"], "deduplicated");
assert_eq!(results[0]["id"], results[1]["id"]);
let query = call_tool(&client, &base, "query", json!({
"agent_id": agent.clone(),
"query": content.clone(),
"limit": 5,
"threshold": 0.0
}))
.await;
assert_eq!(query["count"].as_u64(), Some(1));
let stored = query["results"]
.as_array()
.and_then(|items| items.first())
.expect("batch dedup query result");
assert_eq!(stored["metadata"]["source"], "second");
assert_eq!(stored["metadata"]["keep"], "yes");
assert_eq!(stored["metadata"]["merged"], "yes");
let _ = call_tool(
&client,
&base,
"purge",
json!({ "agent_id": agent, "confirm": true }),
)
.await;
Ok(())
}