Initial public release

2026-06-16 06:17:08 +00:00 · 2026-03-07 13:41:36 -05:00
commit 774982dc5a
22 changed files with 3517 additions and 0 deletions
--- a/.gitea/download-model.sh
+++ b/.gitea/download-model.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+#
+# Download ONNX embedding model for OpenBrain MCP
+# Downloads all-MiniLM-L6-v2 from Hugging Face
+#
+
+set -euo pipefail
+
+DEPLOY_DIR="${DEPLOY_DIR:-/opt/openbrain-mcp}"
+MODEL_DIR="$DEPLOY_DIR/models/all-MiniLM-L6-v2"
+MODEL_NAME="sentence-transformers/all-MiniLM-L6-v2"
+
+# Colors
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
+
+# Check if model already exists
+if [ -f "$MODEL_DIR/model.onnx" ] && [ -f "$MODEL_DIR/tokenizer.json" ]; then
+    log_info "Model already exists at $MODEL_DIR"
+    exit 0
+fi
+
+log_info "Downloading embedding model to $MODEL_DIR..."
+mkdir -p "$MODEL_DIR"
+
+# Method 1: Try using huggingface-cli if available
+if command -v huggingface-cli &> /dev/null; then
+    log_info "Using huggingface-cli to download model..."
+    huggingface-cli download "$MODEL_NAME" \
+        --local-dir "$MODEL_DIR" \
+        --include "*.onnx" "*.json" "*.txt" \
+        --exclude "*.bin" "*.safetensors" "*.h5"
+else
+    # Method 2: Direct download from Hugging Face
+    log_info "Downloading directly from Hugging Face..."
+    
+    BASE_URL="https://huggingface.co/$MODEL_NAME/resolve/main"
+    
+    # Download ONNX model (we need the optimized one)
+    # First try the onnx directory
+    ONNX_URL="https://huggingface.co/$MODEL_NAME/resolve/main/onnx/model.onnx"
+    
+    log_info "Downloading model.onnx..."
+    if ! curl -fSL "$ONNX_URL" -o "$MODEL_DIR/model.onnx" 2>/dev/null; then
+        # Fallback: convert from pytorch (requires python)
+        log_warn "ONNX model not found, will need to convert from PyTorch..."
+        log_warn "Installing optimum for ONNX export..."
+        pip install --quiet optimum[exporters] onnx onnxruntime
+        
+        python3 << PYEOF
+from optimum.onnxruntime import ORTModelForFeatureExtraction
+from transformers import AutoTokenizer
+
+model = ORTModelForFeatureExtraction.from_pretrained("$MODEL_NAME", export=True)
+tokenizer = AutoTokenizer.from_pretrained("$MODEL_NAME")
+
+model.save_pretrained("$MODEL_DIR")
+tokenizer.save_pretrained("$MODEL_DIR")
+print("Model exported to ONNX successfully!")
+PYEOF
+    fi
+    
+    # Download tokenizer files
+    log_info "Downloading tokenizer.json..."
+    curl -fSL "$BASE_URL/tokenizer.json" -o "$MODEL_DIR/tokenizer.json" 2>/dev/null || true
+    
+    log_info "Downloading tokenizer_config.json..."
+    curl -fSL "$BASE_URL/tokenizer_config.json" -o "$MODEL_DIR/tokenizer_config.json" 2>/dev/null || true
+    
+    log_info "Downloading config.json..."
+    curl -fSL "$BASE_URL/config.json" -o "$MODEL_DIR/config.json" 2>/dev/null || true
+    
+    log_info "Downloading vocab.txt..."
+    curl -fSL "$BASE_URL/vocab.txt" -o "$MODEL_DIR/vocab.txt" 2>/dev/null || true
+    
+    log_info "Downloading special_tokens_map.json..."
+    curl -fSL "$BASE_URL/special_tokens_map.json" -o "$MODEL_DIR/special_tokens_map.json" 2>/dev/null || true
+fi
+
+# Verify download
+if [ -f "$MODEL_DIR/model.onnx" ]; then
+    MODEL_SIZE=$(du -h "$MODEL_DIR/model.onnx" | cut -f1)
+    log_info "Model downloaded successfully! Size: $MODEL_SIZE"
+    ls -la "$MODEL_DIR/"
+else
+    log_warn "Warning: model.onnx not found after download"
+    exit 1
+fi