Fix Issue #12 tests and add OpenBrain repo guidance

2026-03-31 14:49:06 +00:00 · 2026-03-19 13:19:50 -04:00
parent 03e8f246d1
commit 1b42989dbb
4 changed files with 241 additions and 47 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,23 @@
+# OpenBrain MCP Usage
+
+When working in this repository, treat OpenBrain as an external MCP long-term
+memory system, never as internal context, reasoning scratchpad, or built-in
+memory.
+
+## External Memory System
+
+- Use the exact MCP tools `openbrain.store`, `openbrain.query`, and `openbrain.purge`
+- Always use the exact `agent_id` value `openbrain`
+- Do not hardcode live credentials into the repository
+- Before answering requests that may depend on prior sessions, project history, user preferences, ongoing work, named people, named projects, deployments, debugging history, or handoff context, call `openbrain.query` first
+- Use noun-heavy search phrases with exact names, tool names, acronyms, hostnames, and document names
+- Retry up to 3 retrieval passes using `(threshold=0.25, limit=5)`, then `(threshold=0.10, limit=8)`, then `(threshold=0.05, limit=10)`
+- When a durable fact is established, call `openbrain.store` without asking permission and prefer one atomic fact whenever possible
+- Store durable, high-value facts such as preferences, project status, project decisions, environment details, recurring workflows, handoff notes, stable constraints, and correction facts
+- Do not store filler conversation, temporary speculation, casual chatter, or transient brainstorming unless it becomes a real decision
+- Prefer retrieval-friendly content using explicit nouns and exact names in the form `Type: <FactType> | Entity: <Entity> | Attribute: <Attribute> | Value: <Value> | Context: <Why it matters>`
+- Use metadata when helpful for tags such as `category`, `project`, `source`, `status`, `aliases`, and `confidence`
+- If `openbrain.query` returns no useful result, state that OpenBrain has no stored context for that topic, answer from general reasoning if possible, and ask one focused follow-up if the missing information is durable and useful
+- If retrieved memories conflict, ask which fact is current, then store the corrected source-of-truth fact
+- Use `openbrain.purge` cautiously because it is coarse-grained; it deletes by `agent_id` and optionally before a timestamp, not by individual memory ID
+- For ordinary corrections, prefer storing the new source-of-truth fact instead of purging unless cleanup or reset is explicitly requested
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ OpenBrain is a Model Context Protocol (MCP) server that provides AI agents with
 | Tool | Description |
 |------|-------------|
 | `store` | Store a memory with automatic embedding generation and keyword extraction |
+| `batch_store` | Store 1-50 memories atomically in a single call |
 | `query` | Search memories by semantic similarity |
 | `purge` | Delete memories by agent ID or time range |

@@ -147,6 +148,32 @@ Health Check: http://localhost:3100/mcp/health
 }
 ```

+### Example: Batch Store Memories
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 3,
+  "method": "tools/call",
+  "params": {
+    "name": "batch_store",
+    "arguments": {
+      "agent_id": "assistant-1",
+      "entries": [
+        {
+          "content": "The user prefers dark mode",
+          "metadata": {"category": "preference"}
+        },
+        {
+          "content": "The user uses vim keybindings",
+          "metadata": {"category": "preference"}
+        }
+      ]
+    }
+  }
+}
+```
+
 ## Architecture

 ```
--- a/src/tools/mod.rs
+++ b/src/tools/mod.rs
@@ -15,31 +15,50 @@ pub fn get_tool_definitions() -> Vec<Value> {
    vec![
        json!({
            "name": "store",
-            "description": "Store a memory with automatic embedding generation",
+            "description": "Store a memory with automatic embedding generation and keyword extraction. The memory will be associated with the agent_id for isolated retrieval.",
            "inputSchema": {
                "type": "object",
                "properties": {
-                    "content": {"type": "string"},
-                    "agent_id": {"type": "string"},
-                    "metadata": {"type": "object"}
+                    "content": {
+                        "type": "string",
+                        "description": "The text content to store as a memory"
+                    },
+                    "agent_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the agent storing the memory (default: 'default')"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "description": "Optional metadata to attach to the memory"
+                    }
                },
                "required": ["content"]
            }
        }),
        json!({
            "name": "batch_store",
-            "description": "Store multiple memories in a single call (1-50 entries)",
+            "description": "Store multiple memories with automatic embedding generation and keyword extraction. Accepts 1-50 entries and stores them atomically in a single transaction.",
            "inputSchema": {
                "type": "object",
                "properties": {
-                    "agent_id": {"type": "string"},
+                    "agent_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the agent storing the memories (default: 'default')"
+                    },
                    "entries": {
                        "type": "array",
+                        "description": "Array of 1-50 memory entries to store atomically",
                        "items": {
                            "type": "object",
                            "properties": {
-                                "content": {"type": "string"},
-                                "metadata": {"type": "object"}
+                                "content": {
+                                    "type": "string",
+                                    "description": "The text content to store as a memory"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "description": "Optional metadata to attach to the memory"
+                                }
                            },
                            "required": ["content"]
                        }
@@ -50,27 +69,48 @@ pub fn get_tool_definitions() -> Vec<Value> {
        }),
        json!({
            "name": "query",
-            "description": "Query memories by semantic similarity",
+            "description": "Query stored memories using semantic similarity search. Returns the most relevant memories based on the query text.",
            "inputSchema": {
                "type": "object",
                "properties": {
-                    "query": {"type": "string"},
-                    "agent_id": {"type": "string"},
-                    "limit": {"type": "integer"},
-                    "threshold": {"type": "number"}
+                    "query": {
+                        "type": "string",
+                        "description": "The search query text"
+                    },
+                    "agent_id": {
+                        "type": "string",
+                        "description": "Agent ID to search within (default: 'default')"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Maximum number of results to return (default: 10)"
+                    },
+                    "threshold": {
+                        "type": "number",
+                        "description": "Minimum similarity threshold 0.0-1.0 (default: 0.5)"
+                    }
                },
                "required": ["query"]
            }
        }),
        json!({
            "name": "purge",
-            "description": "Delete memories by agent_id",
+            "description": "Delete memories for an agent. Can delete all memories or those before a specific timestamp.",
            "inputSchema": {
                "type": "object",
                "properties": {
-                    "agent_id": {"type": "string"},
-                    "before": {"type": "string"},
-                    "confirm": {"type": "boolean"}
+                    "agent_id": {
+                        "type": "string",
+                        "description": "Agent ID whose memories to delete (required)"
+                    },
+                    "before": {
+                        "type": "string",
+                        "description": "Optional ISO8601 timestamp - delete memories created before this time"
+                    },
+                    "confirm": {
+                        "type": "boolean",
+                        "description": "Must be true to confirm deletion"
+                    }
                },
                "required": ["agent_id", "confirm"]
            }
--- a/tests/e2e_mcp.rs
+++ b/tests/e2e_mcp.rs
@@ -879,60 +879,155 @@ async fn e2e_auth_enabled_accepts_test_key() {

 #[tokio::test]
 async fn e2e_batch_store_basic() -> anyhow::Result<()> {
-    let agent = format!("batch_{}", uuid::Uuid::new_v4());
-    let _ = db.purge_memories(&agent, None).await;
+    let base = base_url();
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(20))
+        .build()
+        .expect("reqwest client");

-    let resp = client.call_tool("batch_store", serde_json::json!({
-        "agent_id": agent.clone(),
+    ensure_schema().await;
+    wait_until_ready(&client, &base).await;
+
+    let agent = format!("batch_{}", uuid::Uuid::new_v4());
+    let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent, "confirm": true })).await;
+
+    let result = call_tool(&client, &base, "batch_store", serde_json::json!({
+        "agent_id": agent,
        "entries": [
            {"content": "Fact alpha for batch test"},
            {"content": "Fact beta for batch test"},
            {"content": "Fact gamma for batch test"}
        ]
-    })).await?;
+    })).await;

-    let result: Value = serde_json::from_str(&resp.content[0].text)?;
    assert!(result["success"].as_bool().unwrap_or(false));
    assert_eq!(result["count"].as_i64().unwrap_or(0), 3);
-
-    db.purge_memories(&agent, None).await?;
    Ok(())
 }

 #[tokio::test]
 async fn e2e_batch_store_empty_rejected() -> anyhow::Result<()> {
-    let resp = client.call_tool("batch_store", serde_json::json!({
-        "entries": []
-    })).await;
-    assert!(resp.is_err() || resp.as_ref().unwrap().is_error());
+    let base = base_url();
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(20))
+        .build()
+        .expect("reqwest client");
+
+    wait_until_ready(&client, &base).await;
+
+    let response = call_jsonrpc(
+        &client,
+        &base,
+        json!({
+            "jsonrpc": "2.0",
+            "id": "batch-empty-1",
+            "method": "tools/call",
+            "params": {
+                "name": "batch_store",
+                "arguments": {
+                    "entries": []
+                }
+            }
+        }),
+    )
+    .await;
+
+    assert!(response.get("error").is_some(), "empty batch_store should return an error");
    Ok(())
 }

 #[tokio::test]
 async fn e2e_batch_store_exceeds_max() -> anyhow::Result<()> {
+    let base = base_url();
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(20))
+        .build()
+        .expect("reqwest client");
+
+    wait_until_ready(&client, &base).await;
+
    let entries: Vec<Value> = (0..51).map(|i| serde_json::json!({"content": format!("Entry {}", i)})).collect();
-    let resp = client.call_tool("batch_store", serde_json::json!({
-        "entries": entries
-    })).await;
-    assert!(resp.is_err() || resp.as_ref().unwrap().is_error());
+    let response = call_jsonrpc(
+        &client,
+        &base,
+        json!({
+            "jsonrpc": "2.0",
+            "id": "batch-too-large-1",
+            "method": "tools/call",
+            "params": {
+                "name": "batch_store",
+                "arguments": {
+                    "entries": entries
+                }
+            }
+        }),
+    )
+    .await;
+
+    assert!(response.get("error").is_some(), "oversized batch_store should return an error");
    Ok(())
 }

 #[tokio::test]
 async fn e2e_batch_store_missing_content() -> anyhow::Result<()> {
-    let resp = client.call_tool("batch_store", serde_json::json!({
-        "entries": [{"content": "Valid entry"}, {"metadata": {}}]
-    })).await;
-    assert!(resp.is_err() || resp.as_ref().unwrap().is_error());
+    let base = base_url();
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(20))
+        .build()
+        .expect("reqwest client");
+
+    wait_until_ready(&client, &base).await;
+
+    let response = call_jsonrpc(
+        &client,
+        &base,
+        json!({
+            "jsonrpc": "2.0",
+            "id": "batch-missing-content-1",
+            "method": "tools/call",
+            "params": {
+                "name": "batch_store",
+                "arguments": {
+                    "entries": [{"content": "Valid entry"}, {"metadata": {}}]
+                }
+            }
+        }),
+    )
+    .await;
+
+    assert!(response.get("error").is_some(), "missing batch entry content should return an error");
    Ok(())
 }

 #[tokio::test]
 async fn e2e_batch_store_appears_in_tools() -> anyhow::Result<()> {
-    let tools = client.list_tools().await?;
-    let parsed: Value = serde_json::from_str(&tools.content[0].text)?;
-    let names: Vec<&str> = parsed.as_array().unwrap().iter()
-        .filter_map(|t| t.get("name").and_then(|n| n.as_str()))
+    let base = base_url();
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(20))
+        .build()
+        .expect("reqwest client");
+
+    wait_until_ready(&client, &base).await;
+
+    let response = call_jsonrpc(
+        &client,
+        &base,
+        json!({
+            "jsonrpc": "2.0",
+            "id": "batch-tools-list-1",
+            "method": "tools/list",
+            "params": {}
+        }),
+    )
+    .await;
+
+    let names: Vec<&str> = response
+        .get("result")
+        .and_then(|value| value.get("tools"))
+        .and_then(Value::as_array)
+        .expect("tools/list result.tools")
+        .iter()
+        .filter_map(|t| t.get("name").and_then(Value::as_str))
        .collect();
    assert!(names.contains(&"batch_store"));
    Ok(())
@@ -940,14 +1035,23 @@ async fn e2e_batch_store_appears_in_tools() -> anyhow::Result<()> {

 #[tokio::test]
 async fn e2e_existing_store_unchanged() -> anyhow::Result<()> {
+    let base = base_url();
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(20))
+        .build()
+        .expect("reqwest client");
+
+    ensure_schema().await;
+    wait_until_ready(&client, &base).await;
+
    let agent = format!("compat_{}", uuid::Uuid::new_v4());
-    let _ = db.purge_memories(&agent, None).await;
-    let resp = client.call_tool("store", serde_json::json!({
-        "agent_id": agent.clone(),
+    let _ = call_tool(&client, &base, "purge", json!({ "agent_id": agent, "confirm": true })).await;
+
+    let result = call_tool(&client, &base, "store", serde_json::json!({
+        "agent_id": agent,
        "content": "Original store still works"
-    })).await?;
-    let result: Value = serde_json::from_str(&resp.content[0].text)?;
+    })).await;
+
    assert!(result["success"].as_bool().unwrap_or(false));
-    db.purge_memories(&agent, None).await?;
    Ok(())
 }