From 6e40b712ef36013dbe030ef453d9e5ea3fd68898 Mon Sep 17 00:00:00 2001 From: samzong Date: Fri, 5 Dec 2025 11:46:52 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20docs(milvus):=20add=20milvus=20helm?= =?UTF-8?q?=20install=20and=20semantic=20cache=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: samzong --- tools/milvus/README.md | 53 ++ tools/milvus/test-milvus-deployment.sh | 459 +++++++++++ website/docs/installation/milvus.md | 723 ++++++++++++++++++ .../tutorials/semantic-cache/hybrid-cache.md | 6 +- .../tutorials/semantic-cache/milvus-cache.md | 1 + website/sidebars.ts | 1 + 6 files changed, 1240 insertions(+), 3 deletions(-) create mode 100644 tools/milvus/README.md create mode 100755 tools/milvus/test-milvus-deployment.sh create mode 100644 website/docs/installation/milvus.md diff --git a/tools/milvus/README.md b/tools/milvus/README.md new file mode 100644 index 000000000..ebf7d8f4f --- /dev/null +++ b/tools/milvus/README.md @@ -0,0 +1,53 @@ +# Milvus Installation Validation Script + +Validates commands in `website/docs/installation/milvus.md`. + +## Features + +1. Prerequisites check (kubectl, kind, helm) +2. Create Kind cluster (`make create-cluster`) +3. Deploy Milvus (Standalone or Cluster mode) +4. Verify deployment +5. Apply client config & network policies +6. Connection tests + +## Deployment Modes + +| Mode | Use Case | +| -------------- | ------------------- | +| **Standalone** | Development/testing | +| **Cluster** | Production (HA) | + +## Usage + +**Interactive:** + +```bash +./tools/milvus/test-milvus-deployment.sh +``` + +**Non-Interactive (CI/CD):** + +```bash +MILVUS_MODE=standalone RECREATE_CLUSTER=false CLEANUP=false ./tools/milvus/test-milvus-deployment.sh +``` + +### Environment Variables + +| Variable | Values | Description | +| ------------------ | ----------------------- | ------------------------------- | +| `MILVUS_MODE` | `standalone`, `cluster` | Deployment mode | +| `RECREATE_CLUSTER` | `true`, `false` | Recreate Kind cluster if exists | +| `CLEANUP` | `true`, `false` | Cleanup after test | + +## Troubleshooting + +**ServiceMonitor CRD Not Found:** +```bash +# Add: --set metrics.serviceMonitor.enabled=false +``` + +**Both Pulsar versions running:** +```bash +# Add: --set pulsar.enabled=false --set pulsarv3.enabled=true +``` diff --git a/tools/milvus/test-milvus-deployment.sh b/tools/milvus/test-milvus-deployment.sh new file mode 100755 index 000000000..99a01a795 --- /dev/null +++ b/tools/milvus/test-milvus-deployment.sh @@ -0,0 +1,459 @@ +#!/usr/bin/env bash + +# This script validates all commands in website/docs/installation/milvus.md +# +# Usage: +# # Non-interactive standalone deployment +# MILVUS_MODE=standalone RECREATE_CLUSTER=false CLEANUP=false ./tools/milvus/test-milvus-deployment.sh +# +# # Non-interactive cluster deployment with cleanup +# MILVUS_MODE=cluster RECREATE_CLUSTER=true CLEANUP=true ./tools/milvus/test-milvus-deployment.sh + +set -e # Exit on error +set -u # Exit on undefined variable + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Configuration +NAMESPACE="vllm-semantic-router-system" +CLUSTER_NAME="semantic-router-cluster" +RELEASE_NAME="milvus-semantic-cache" + +# Environment variable defaults (empty means interactive) +MILVUS_MODE="${MILVUS_MODE:-}" # standalone or cluster +RECREATE_CLUSTER="${RECREATE_CLUSTER:-}" # true or false +CLEANUP="${CLEANUP:-}" # true or false + +# Helper functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +check_command() { + if ! command -v "$1" &> /dev/null; then + log_error "$1 is not installed" + return 1 + fi +} + +# Section: Prerequisites Check +section_prerequisites() { + log_info "=== Checking Prerequisites ===" + + check_command kubectl || exit 1 + check_command kind || exit 1 + check_command helm || exit 1 + + log_success "All prerequisites are installed" +} + +# Section: Create Kind Cluster +section_create_cluster() { + log_info "=== Creating Kind Cluster ===" + + if kind get clusters | grep -q "^${CLUSTER_NAME}$"; then + log_warning "Cluster ${CLUSTER_NAME} already exists" + + local recreate="$RECREATE_CLUSTER" + if [ -z "$recreate" ]; then + read -r -p "Delete and recreate? (y/N): " confirm + [[ "$confirm" =~ ^[Yy]$ ]] && recreate="true" || recreate="false" + fi + + if [ "$recreate" = "true" ]; then + log_info "Recreating cluster..." + make delete-cluster + else + log_info "Using existing cluster" + return 0 + fi + fi + + make create-cluster + log_success "Cluster created successfully" +} + +# Section: Deploy Milvus with Helm +section_deploy_milvus_helm() { + local mode=$1 + log_info "=== Deploying Milvus ${mode^} Mode with Helm ===" + + # Add Milvus Helm repo + log_info "Adding Milvus Helm repository..." + helm repo add milvus https://zilliztech.github.io/milvus-helm/ + helm repo update + + # Check if release exists + if helm list -n "$NAMESPACE" | grep -q "$RELEASE_NAME"; then + log_warning "Helm release '$RELEASE_NAME' already exists" + log_info "Uninstalling existing release..." + helm uninstall "$RELEASE_NAME" -n "$NAMESPACE" || true + sleep 10 + fi + + # Create namespace if not exists + kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - + + # Install Milvus based on mode + log_info "Installing Milvus ${mode} mode..." + + if [ "$mode" = "cluster" ]; then + log_info "Cluster mode: Using Pulsar v3 (disabling old Pulsar)" + helm install "$RELEASE_NAME" milvus/milvus \ + --set cluster.enabled=true \ + --set etcd.replicaCount=3 \ + --set minio.mode=distributed \ + --set pulsar.enabled=false \ + --set pulsarv3.enabled=true \ + --set metrics.serviceMonitor.enabled=false \ + --namespace "$NAMESPACE" \ + --wait --timeout=15m + else + helm install "$RELEASE_NAME" milvus/milvus \ + --set cluster.enabled=false \ + --set etcd.replicaCount=1 \ + --set minio.mode=standalone \ + --set pulsar.enabled=false \ + --set metrics.serviceMonitor.enabled=false \ + --namespace "$NAMESPACE" \ + --wait --timeout=15m + fi + + log_info "Note: ServiceMonitor is disabled. To enable, install Prometheus Operator first." + + log_success "Milvus ${mode} mode deployed successfully" +} + +# Section: Verify Milvus Deployment +section_verify_milvus() { + log_info "=== Verifying Milvus Deployment ===" + + # Wait for Milvus pods + log_info "Waiting for Milvus pods to be ready..." + kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=milvus \ + -n "$NAMESPACE" --timeout=600s || { + log_error "Milvus pods not ready" + kubectl get pods -n "$NAMESPACE" + return 1 + } + + # Check pods + log_info "Milvus pods:" + kubectl get pods -l app.kubernetes.io/name=milvus -n "$NAMESPACE" + + # Check services + log_info "Milvus services:" + kubectl get svc -l app.kubernetes.io/name=milvus -n "$NAMESPACE" + + log_success "Milvus deployment verified" +} + +# Section: Apply Milvus Client Config +section_apply_client_config() { + log_info "=== Applying Milvus Client Config ===" + + kubectl apply -n "$NAMESPACE" -f - < /dev/null; then + log_info "Prometheus Operator detected" + log_info "ServiceMonitor is enabled by default in Helm values" + kubectl get servicemonitor -n "$NAMESPACE" 2>/dev/null && log_success "ServiceMonitor found" || log_warning "ServiceMonitor not yet created" + else + log_warning "Prometheus Operator not installed, ServiceMonitor will not be created" + fi +} + +# Section: Connection Tests +section_connection_tests() { + log_info "=== Testing Milvus Connection ===" + + # Get Milvus service + local milvus_svc + milvus_svc=$(kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/name=milvus -o jsonpath='{.items[0].metadata.name}') + + if [ -z "$milvus_svc" ]; then + log_error "Milvus service not found" + return 1 + fi + + log_info "Milvus service: $milvus_svc" + + # Port forward for testing + log_info "Setting up port-forward for testing..." + kubectl port-forward -n "$NAMESPACE" "svc/$milvus_svc" 19530:19530 & + local pf_pid=$! + sleep 5 + + # Test connection with nc + log_info "Testing connection with netcat..." + if command -v nc &> /dev/null; then + if nc -zv localhost 19530 2>&1 | grep -q "succeeded"; then + log_success "Connection test passed" + else + log_warning "Connection test failed (this is expected if Milvus is still starting)" + fi + else + log_warning "netcat not installed, skipping connection test" + fi + + # Cleanup port-forward + kill $pf_pid 2>/dev/null || true +} + +# Section: Troubleshooting Commands +section_troubleshooting() { + log_info "=== Running Troubleshooting Commands ===" + + # Overall health check + log_info "Overall health check:" + kubectl get all -l app.kubernetes.io/name=milvus -n "$NAMESPACE" + + # Check PVC status + log_info "PVC status:" + kubectl get pvc -n "$NAMESPACE" + + # Check StorageClass + log_info "StorageClass:" + kubectl get sc + + # Check NetworkPolicy + log_info "NetworkPolicy:" + kubectl get networkpolicy -n "$NAMESPACE" + + # Component logs (last 20 lines) + log_info "Milvus logs (last 20 lines):" + kubectl logs -l app.kubernetes.io/name=milvus -n "$NAMESPACE" --tail=20 || { + log_warning "Could not retrieve logs" + } + + log_success "Troubleshooting commands completed" +} + +# Section: Cleanup +section_cleanup() { + log_info "=== Cleanup ===" + + local do_cleanup="$CLEANUP" + if [ -z "$do_cleanup" ]; then + read -r -p "Do you want to cleanup Milvus deployment? (y/N): " confirm + [[ "$confirm" =~ ^[Yy]$ ]] && do_cleanup="true" || do_cleanup="false" + fi + + if [ "$do_cleanup" = "true" ]; then + log_info "Uninstalling Milvus..." + helm uninstall "$RELEASE_NAME" -n "$NAMESPACE" || true + + log_info "Deleting namespace resources..." + kubectl delete configmap milvus-client-config -n "$NAMESPACE" --ignore-not-found=true || true + kubectl delete networkpolicy allow-router-to-milvus -n "$NAMESPACE" --ignore-not-found=true || true + kubectl delete secret milvus-auth -n "$NAMESPACE" --ignore-not-found=true || true + + log_success "Cleanup completed" + else + log_info "Skipping cleanup" + fi +} + +# Section: Select Deployment Mode +section_select_mode() { + log_info "=== Select Milvus Deployment Mode ===" + + # Validate if MILVUS_MODE is set + if [ -n "${MILVUS_MODE}" ]; then + if [ "${MILVUS_MODE}" != "standalone" ] && [ "${MILVUS_MODE}" != "cluster" ]; then + log_error "Invalid MILVUS_MODE: ${MILVUS_MODE}. Must be 'standalone' or 'cluster'" + exit 1 + fi + log_info "Using MILVUS_MODE from environment: ${MILVUS_MODE}" + else + # Interactive mode selection + echo "Available deployment modes:" + echo " 1) Standalone - Single instance (development/testing)" + echo " 2) Cluster - High availability (production)" + echo "" + read -r -p "Select mode (1/2) [default: 1]: " mode_choice + + case "$mode_choice" in + 2) + MILVUS_MODE="cluster" + ;; + 1|"") + MILVUS_MODE="standalone" + ;; + *) + log_error "Invalid choice" + exit 1 + ;; + esac + fi + + log_info "Selected mode: ${MILVUS_MODE}" + + if [ "${MILVUS_MODE}" = "cluster" ]; then + log_warning "Cluster mode requires more resources (etcd, minio, pulsar)" + fi +} + +# Main execution +main() { + log_info "Starting Milvus Installation Validation" + log_info "This script validates commands from website/docs/installation/milvus.md" + echo "" + + # Run sections + section_prerequisites + echo "" + + section_create_cluster + echo "" + + section_select_mode + echo "" + + section_deploy_milvus_helm "$MILVUS_MODE" + echo "" + + section_verify_milvus + echo "" + + section_apply_client_config + echo "" + + section_networking_security + echo "" + + section_monitoring + echo "" + + section_connection_tests + echo "" + + section_troubleshooting + echo "" + + log_success "All validation steps completed!" + echo "" + log_info "Summary:" + echo " - Cluster: $CLUSTER_NAME" + echo " - Namespace: $NAMESPACE" + echo " - Milvus Mode: $MILVUS_MODE" + echo " - Deployment: Helm release '$RELEASE_NAME'" + echo "" + log_info "Next steps:" + echo " - Test Milvus connection: kubectl port-forward -n $NAMESPACE svc/$RELEASE_NAME 19530:19530" + echo " - View logs: kubectl logs -l app.kubernetes.io/name=milvus -n $NAMESPACE -f" + + if [ "$MILVUS_MODE" = "cluster" ]; then + echo " - Check etcd: kubectl get pods -l app.kubernetes.io/name=etcd -n $NAMESPACE" + echo " - Check minio: kubectl get pods -l app.kubernetes.io/name=minio -n $NAMESPACE" + echo " - Check pulsar: kubectl get pods -l app.kubernetes.io/name=pulsar -n $NAMESPACE" + fi + + echo " - Cleanup: Run this script with cleanup option or manually uninstall" + echo "" + + section_cleanup +} + +# Run main function +main "$@" diff --git a/website/docs/installation/milvus.md b/website/docs/installation/milvus.md new file mode 100644 index 000000000..423be8901 --- /dev/null +++ b/website/docs/installation/milvus.md @@ -0,0 +1,723 @@ +--- +sidebar_position: 5 +--- + +# Milvus Semantic Cache + +This guide covers deploying Milvus as the semantic cache backend for the Semantic Router in Kubernetes. Milvus provides persistent, scalable vector storage compared to the default in-memory cache. + +:::note +Milvus is optional. The router works with the default memory backend out of the box. Use Milvus when you need persistence, horizontal scaling, or cache sharing across router replicas. +::: + +## Deployment Options + +Two approaches are available: + +- **Helm**: Quick start and parameterized deployments +- **Milvus Operator**: Production-grade lifecycle management, rolling upgrades, health checks, and dependency orchestration + +## Prerequisites + +- Kubernetes cluster with `kubectl` configured +- Default `StorageClass` available +- Helm 3.x installed + +:::note[ServiceMonitor Requirement] +The default Helm values enable ServiceMonitor for Prometheus metrics collection, which requires [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) to be installed first. + +**For testing without Prometheus Operator**, disable ServiceMonitor using `--set metrics.serviceMonitor.enabled=false` (see deployment commands below). +::: + +## Deploy with Helm + +### Standalone Mode + +Suitable for development and small-scale deployments: + +```bash +helm repo add milvus https://zilliztech.github.io/milvus-helm/ +helm repo update +``` + +**Without Prometheus Operator** (for testing/development): + +```bash +helm install milvus-semantic-cache milvus/milvus \ + --set cluster.enabled=false \ + --set etcd.replicaCount=1 \ + --set minio.mode=standalone \ + --set pulsar.enabled=false \ + --set metrics.serviceMonitor.enabled=false \ + --namespace vllm-semantic-router-system --create-namespace +``` + +**With Prometheus Operator** (production with monitoring): + +```bash +helm install milvus-semantic-cache milvus/milvus \ + --set cluster.enabled=false \ + --set etcd.replicaCount=1 \ + --set minio.mode=standalone \ + --set pulsar.enabled=false \ + --namespace vllm-semantic-router-system --create-namespace +``` + +### Cluster Mode + +Recommended for production with high availability: + +```bash +helm repo add milvus https://zilliztech.github.io/milvus-helm/ +helm repo update +``` + +:::note[Pulsar Version] +Milvus 2.4+ uses Pulsar v3 by default. The values below disable the old Pulsar to avoid conflicts. +::: + +**Without Prometheus Operator** (for testing): + +```bash +helm install milvus-semantic-cache milvus/milvus \ + --set cluster.enabled=true \ + --set etcd.replicaCount=3 \ + --set minio.mode=distributed \ + --set pulsar.enabled=false \ + --set pulsarv3.enabled=true \ + --set metrics.serviceMonitor.enabled=false \ + --namespace vllm-semantic-router-system --create-namespace +``` + +**With Prometheus Operator** (production with monitoring): + +```bash +helm install milvus-semantic-cache milvus/milvus \ + --set cluster.enabled=true \ + --set etcd.replicaCount=3 \ + --set minio.mode=distributed \ + --set pulsar.enabled=false \ + --set pulsarv3.enabled=true \ + --namespace vllm-semantic-router-system --create-namespace +``` + +## Deploy with Milvus Operator + +1. Install Milvus Operator following the [official instructions](https://github.com/zilliztech/milvus-operator) + +2. Apply the Custom Resource: + +**Standalone:** + +```bash +kubectl apply -n vllm-semantic-router-system -f - </v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "test", "messages": [{"role": "user", "content": "Hello"}]}' + +# Repeat request to verify cache hit +curl -X POST http:///v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "test", "messages": [{"role": "user", "content": "Hello"}]}' +``` + +### Monitor Metrics + +- Cache hit ratio should stabilize after warm-up +- Latency: Milvus adds ~1-5ms per lookup vs memory cache +- Error rate should remain at baseline + +### Rollback + +```bash +# Revert to memory backend +kubectl patch configmap semantic-router-config -n vllm-semantic-router-system \ + --type merge -p '{"data":{"config.yaml":"semantic_cache:\n backend_type: \"memory\""}}' + +# Restart router +kubectl rollout restart deployment/semantic-router -n vllm-semantic-router-system + +# Verify +kubectl logs -l app=semantic-router -n vllm-semantic-router-system | grep -i "cache" +``` + +:::note +Data in Milvus is preserved and can be reused when switching back. +::: + +## Backup and Recovery + +### Backup Strategies + +**1. Milvus Native Backup (Recommended)** + +Use [milvus-backup](https://github.com/zilliztech/milvus-backup): + +```bash +# Install +wget https://github.com/zilliztech/milvus-backup/releases/latest/download/milvus-backup_Linux_x86_64.tar.gz +tar -xzf milvus-backup_Linux_x86_64.tar.gz + +# Create backup +./milvus-backup create -n semantic_cache_backup \ + --milvus.address milvus-cluster.vllm-semantic-router-system.svc.cluster.local:19530 + +# List / Restore +./milvus-backup list +./milvus-backup restore -n semantic_cache_backup +``` + +**2. Storage-Level Backup** + +Use volume snapshots (requires CSI snapshot controller): + +```yaml +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshot +metadata: + name: milvus-data-snapshot + namespace: vllm-semantic-router-system +spec: + volumeSnapshotClassName: csi-snapclass + source: + persistentVolumeClaimName: milvus-data +``` + +**3. MinIO/S3 Backup (Cluster Mode)** + +Configure bucket versioning and replication: + +```bash +mc version enable myminio/milvus-bucket +mc replicate add myminio/milvus-bucket --remote-bucket milvus-bucket-dr \ + --arn "arn:minio:replication::..." +``` + +### Recovery Procedures + +**From milvus-backup:** + +```bash +# Stop router +kubectl scale deployment/semantic-router -n vllm-semantic-router-system --replicas=0 + +# Restore +./milvus-backup restore -n semantic_cache_backup --restore_index + +# Restart router +kubectl scale deployment/semantic-router -n vllm-semantic-router-system --replicas=3 +``` + +**From VolumeSnapshot:** + +```bash +kubectl apply -f - <