mirror of
https://gitea.ingwaz.work/Ingwaz/openbrain-mcp.git
synced 2026-06-16 06:17:08 +00:00
Add server-side deduplication on ingest
This commit is contained in:
@@ -12,6 +12,7 @@ pub struct Config {
|
||||
pub database: DatabaseConfig,
|
||||
pub embedding: EmbeddingConfig,
|
||||
pub query: QueryConfig,
|
||||
pub dedup: DedupConfig,
|
||||
pub ttl: TtlConfig,
|
||||
pub auth: AuthConfig,
|
||||
}
|
||||
@@ -56,6 +57,13 @@ pub struct QueryConfig {
|
||||
pub text_weight: f32,
|
||||
}
|
||||
|
||||
/// Deduplication configuration
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct DedupConfig {
|
||||
#[serde(default = "default_dedup_threshold")]
|
||||
pub threshold: f32,
|
||||
}
|
||||
|
||||
/// TTL / expiry configuration
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct TtlConfig {
|
||||
@@ -106,6 +114,7 @@ fn default_model_path() -> String { "models/all-MiniLM-L6-v2".to_string() }
|
||||
fn default_embedding_dim() -> usize { 384 }
|
||||
fn default_vector_weight() -> f32 { 0.6 }
|
||||
fn default_text_weight() -> f32 { 0.4 }
|
||||
fn default_dedup_threshold() -> f32 { 0.90 }
|
||||
fn default_cleanup_interval_seconds() -> u64 { 300 }
|
||||
fn default_auth_enabled() -> bool { false }
|
||||
|
||||
@@ -128,6 +137,8 @@ impl Config {
|
||||
// Query settings
|
||||
.set_default("query.vector_weight", default_vector_weight() as f64)?
|
||||
.set_default("query.text_weight", default_text_weight() as f64)?
|
||||
// Dedup settings
|
||||
.set_default("dedup.threshold", default_dedup_threshold() as f64)?
|
||||
// TTL settings
|
||||
.set_default(
|
||||
"ttl.cleanup_interval_seconds",
|
||||
@@ -156,6 +167,11 @@ impl Config {
|
||||
config.query.text_weight = parsed;
|
||||
}
|
||||
}
|
||||
if let Ok(dedup_threshold) = std::env::var("DEDUP_THRESHOLD") {
|
||||
if let Ok(parsed) = dedup_threshold.parse::<f32>() {
|
||||
config.dedup.threshold = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
@@ -184,6 +200,9 @@ impl Default for Config {
|
||||
vector_weight: default_vector_weight(),
|
||||
text_weight: default_text_weight(),
|
||||
},
|
||||
dedup: DedupConfig {
|
||||
threshold: default_dedup_threshold(),
|
||||
},
|
||||
ttl: TtlConfig {
|
||||
cleanup_interval_seconds: default_cleanup_interval_seconds(),
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user