Files
openbrain-mcp/src/db.rs
2026-03-24 03:20:10 +00:00

297 lines
9.5 KiB
Rust

//! Database module for PostgreSQL with pgvector support
//!
//! Provides connection pooling and query helpers for vector operations.
use anyhow::{Context, Result};
use deadpool_postgres::{Config, Pool, Runtime};
use pgvector::Vector;
use tokio_postgres::NoTls;
use tracing::info;
use uuid::Uuid;
use serde::Serialize;
use serde_json::Value;
use crate::config::DatabaseConfig;
/// Database wrapper with connection pool
#[derive(Clone)]
pub struct Database {
pool: Pool,
}
/// A memory record stored in the database
#[derive(Debug, Clone)]
pub struct MemoryRecord {
pub id: Uuid,
pub agent_id: String,
pub content: String,
pub embedding: Vec<f32>,
pub keywords: Vec<String>,
pub metadata: serde_json::Value,
pub created_at: chrono::DateTime<chrono::Utc>,
pub expires_at: Option<chrono::DateTime<chrono::Utc>>,
}
/// Query result with similarity score
#[derive(Debug, Clone)]
pub struct MemoryMatch {
pub record: MemoryRecord,
pub similarity: f32,
pub vector_score: f32,
pub text_score: f32,
pub hybrid_score: f32,
}
impl Database {
/// Create a new database connection pool
pub async fn new(config: &DatabaseConfig) -> Result<Self> {
let mut cfg = Config::new();
cfg.host = Some(config.host.clone());
cfg.port = Some(config.port);
cfg.dbname = Some(config.name.clone());
cfg.user = Some(config.user.clone());
cfg.password = Some(config.password.clone());
let pool = cfg
.create_pool(Some(Runtime::Tokio1), NoTls)
.context("Failed to create database pool")?;
// Test connection
let client = pool.get().await.context("Failed to get database connection")?;
client
.simple_query("SELECT 1")
.await
.context("Failed to execute test query")?;
info!("Database connection pool created with {} connections", config.pool_size);
Ok(Self { pool })
}
/// Store a memory record
pub async fn store_memory(
&self,
agent_id: &str,
content: &str,
embedding: &[f32],
keywords: &[String],
metadata: serde_json::Value,
expires_at: Option<chrono::DateTime<chrono::Utc>>,
) -> Result<Uuid> {
let client = self.pool.get().await?;
let id = Uuid::new_v4();
let vector = Vector::from(embedding.to_vec());
client
.execute(
r#"
INSERT INTO memories (id, agent_id, content, embedding, keywords, metadata, expires_at)
VALUES ($1, $2, $3, $4, $5, $6, $7)
"#,
&[&id, &agent_id, &content, &vector, &keywords, &metadata, &expires_at],
)
.await
.context("Failed to store memory")?;
Ok(id)
}
/// Query memories by vector similarity
pub async fn query_memories(
&self,
agent_id: &str,
query_text: &str,
embedding: &[f32],
limit: i64,
threshold: f32,
vector_weight: f32,
text_weight: f32,
) -> Result<Vec<MemoryMatch>> {
let client = self.pool.get().await?;
let vector = Vector::from(embedding.to_vec());
let rows = client
.query(
r#"
WITH search_query AS (
SELECT
NULLIF(plainto_tsquery('pg_catalog.english', $2)::text, '') AS query_text,
plainto_tsquery('pg_catalog.english', $2) AS ts_query
),
scored AS (
SELECT
id,
agent_id,
content,
keywords,
metadata,
created_at,
expires_at,
(1 - (embedding <=> $1))::real AS vector_score,
CASE
WHEN search_query.query_text IS NULL THEN 0::real
WHEN memories.tsv @@ search_query.ts_query
THEN ts_rank(memories.tsv, search_query.ts_query, 32)::real
ELSE 0::real
END AS text_score
FROM memories
CROSS JOIN search_query
WHERE memories.agent_id = $3
AND (memories.expires_at IS NULL OR memories.expires_at > NOW())
),
ranked AS (
SELECT
*,
MAX(CASE WHEN text_score > 0 THEN 1 ELSE 0 END) OVER () AS has_text_match
FROM scored
)
SELECT
id,
agent_id,
content,
keywords,
metadata,
created_at,
expires_at,
vector_score,
text_score,
CASE
WHEN has_text_match = 1
THEN (($5 * vector_score) + ($6 * text_score))::real
ELSE vector_score
END AS hybrid_score
FROM ranked
WHERE vector_score >= $4 OR text_score > 0
ORDER BY hybrid_score DESC, vector_score DESC
LIMIT $7
"#,
&[
&vector,
&query_text,
&agent_id,
&threshold,
&vector_weight,
&text_weight,
&limit,
],
)
.await
.context("Failed to query memories")?;
let matches = rows
.iter()
.map(|row| MemoryMatch {
record: MemoryRecord {
id: row.get("id"),
agent_id: row.get("agent_id"),
content: row.get("content"),
// Query responses do not include raw embedding payloads.
embedding: Vec::new(),
keywords: row.get("keywords"),
metadata: row.get("metadata"),
created_at: row.get("created_at"),
expires_at: row.get("expires_at"),
},
similarity: row.get("hybrid_score"),
vector_score: row.get("vector_score"),
text_score: row.get("text_score"),
hybrid_score: row.get("hybrid_score"),
})
.collect();
Ok(matches)
}
/// Delete memories by agent_id and optional filters
pub async fn purge_memories(
&self,
agent_id: &str,
before: Option<chrono::DateTime<chrono::Utc>>,
) -> Result<u64> {
let client = self.pool.get().await?;
let count = if let Some(before_ts) = before {
client
.execute(
"DELETE FROM memories WHERE agent_id = $1 AND created_at < $2",
&[&agent_id, &before_ts],
)
.await?
} else {
client
.execute("DELETE FROM memories WHERE agent_id = $1", &[&agent_id])
.await?
};
Ok(count)
}
/// Get memory count for an agent
pub async fn count_memories(&self, agent_id: &str) -> Result<i64> {
let client = self.pool.get().await?;
let row = client
.query_one(
"SELECT COUNT(*) as count FROM memories WHERE agent_id = $1 AND (expires_at IS NULL OR expires_at > NOW())",
&[&agent_id],
)
.await?;
Ok(row.get("count"))
}
/// Delete expired memories across all agents
pub async fn cleanup_expired_memories(&self) -> Result<u64> {
let client = self.pool.get().await?;
let deleted = client
.execute(
"DELETE FROM memories WHERE expires_at IS NOT NULL AND expires_at <= NOW()",
&[],
)
.await
.context("Failed to cleanup expired memories")?;
Ok(deleted)
}
}
/// Result for a single batch entry
#[derive(Debug, Clone, Serialize)]
pub struct BatchStoreResult {
pub id: String,
pub status: String,
pub expires_at: Option<String>,
}
impl Database {
/// Store multiple memories in a single transaction
pub async fn batch_store_memories(
&self,
agent_id: &str,
entries: Vec<(
String,
Value,
Vec<f32>,
Vec<String>,
Option<chrono::DateTime<chrono::Utc>>,
)>,
) -> Result<Vec<BatchStoreResult>> {
let mut client = self.pool.get().await?;
let transaction = client.transaction().await?;
let mut results = Vec::with_capacity(entries.len());
for (content, metadata, embedding, keywords, expires_at) in entries {
let id = Uuid::new_v4();
let vector = Vector::from(embedding);
transaction.execute(
r#"INSERT INTO memories (id, agent_id, content, embedding, keywords, metadata, expires_at) VALUES ($1, $2, $3, $4, $5, $6, $7)"#,
&[&id, &agent_id, &content, &vector, &keywords, &metadata, &expires_at],
).await?;
results.push(BatchStoreResult {
id: id.to_string(),
status: "stored".to_string(),
expires_at: expires_at.map(|ts| ts.to_rfc3339()),
});
}
transaction.commit().await?;
Ok(results)
}
}