Add server-side deduplication on ingest

This commit is contained in:
Agent Zero
2026-03-24 05:40:30 +00:00
parent 5d5c042dd1
commit 61d6448b44
8 changed files with 421 additions and 23 deletions

View File

@@ -103,7 +103,7 @@ pub async fn execute(state: &Arc<AppState>, arguments: Value) -> Result<String>
// 5. Batch DB insert (single transaction for atomicity)
let results = state
.db
.batch_store_memories(agent_id, processed_entries)
.batch_store_memories(agent_id, processed_entries, state.config.dedup.threshold)
.await
.context("Failed to batch store memories")?;

View File

@@ -15,7 +15,7 @@ pub fn get_tool_definitions() -> Vec<Value> {
vec![
json!({
"name": "store",
"description": "Store a memory with automatic embedding generation and keyword extraction. The memory will be associated with the agent_id for isolated retrieval.",
"description": "Store a memory with automatic embedding generation and keyword extraction. Near-duplicate memories for the same agent are deduplicated automatically by similarity, with metadata merged and timestamps refreshed.",
"inputSchema": {
"type": "object",
"properties": {
@@ -41,7 +41,7 @@ pub fn get_tool_definitions() -> Vec<Value> {
}),
json!({
"name": "batch_store",
"description": "Store multiple memories with automatic embedding generation and keyword extraction. Accepts 1-50 entries and stores them atomically in a single transaction.",
"description": "Store multiple memories with automatic embedding generation and keyword extraction. Accepts 1-50 entries, stores them atomically in a single transaction, and applies the same automatic deduplication rules as single-store.",
"inputSchema": {
"type": "object",
"properties": {

View File

@@ -60,20 +60,26 @@ pub async fn execute(state: &Arc<AppState>, arguments: Value) -> Result<String>
&keywords,
metadata,
expires_at.clone(),
state.config.dedup.threshold,
)
.await
.context("Failed to store memory")?;
info!("Memory stored with ID: {}", id);
info!(
"Memory {} with ID: {}",
if id.deduplicated { "deduplicated" } else { "stored" },
id.id
);
Ok(serde_json::json!({
"success": true,
"id": id.to_string(),
"id": id.id.to_string(),
"agent_id": agent_id,
"deduplicated": id.deduplicated,
"keywords": keywords,
"embedding_dimension": embedding.len(),
"ttl": ttl,
"expires_at": expires_at.as_ref().map(|ts| ts.to_rfc3339())
"expires_at": id.expires_at.as_ref().map(|ts| ts.to_rfc3339())
})
.to_string())
}