6.8 多网关部署

多网关部署允许在同一台机器或不同机器上运行多个 Gateway 实例,实现环境隔离、负载分担和高可用。

概述

多网关部署的常见场景:

  • 环境隔离 - 开发、测试、生产环境分离
  • 用户隔离 - 不同用户使用独立 Gateway
  • 负载均衡 - 分散请求到多个实例
  • 高可用 - 主备模式提高可靠性

同机多实例

隔离要求

每个实例必须使用不同的:

  • OPENCLAW_CONFIG_PATH - 配置文件路径
  • OPENCLAW_STATE_DIR - 状态目录
  • gateway.port - 监听端口

配置示例

实例 1 - 开发环境

# ~/.openclaw-dev/openclaw.json
{
  "gateway": {
    "mode": "local",
    "port": 19001,
    "auth": {
      "token": "dev-token"
    }
  },
  "agents": {
    "defaults": {
      "workspace": "~/.openclaw-dev/workspace"
    }
  }
}

实例 2 - 生产环境

# ~/.openclaw/openclaw.json
{
  "gateway": {
    "mode": "local",
    "port": 18789,
    "auth": {
      "token": "prod-token"
    }
  },
  "agents": {
    "defaults": {
      "workspace": "~/.openclaw/workspace"
    }
  }
}

启动脚本

#!/bin/bash

# 启动开发实例
OPENCLAW_PROFILE=dev \
OPENCLAW_STATE_DIR=~/.openclaw-dev \
OPENCLAW_CONFIG_PATH=~/.openclaw-dev/openclaw.json \
openclaw gateway start

# 启动生产实例
OPENCLAW_PROFILE=prod \
OPENCLAW_STATE_DIR=~/.openclaw \
OPENCLAW_CONFIG_PATH=~/.openclaw/openclaw.json \
openclaw gateway start

使用 Profile

Dev Profile

开发 profile 自动配置隔离环境:

# 自动使用 ~/.openclaw-dev 和端口 19001
OPENCLAW_PROFILE=dev openclaw gateway start --dev

# 或
openclaw gateway start --profile dev --dev

自定义 Profile

# 创建自定义 profile
export OPENCLAW_PROFILE=staging
export OPENCLAW_STATE_DIR=~/.openclaw-staging
export OPENCLAW_GATEWAY_PORT=18800

openclaw gateway start

跨机器部署

架构设计

┌─────────────┐
│   Gateway 1 │ - 主节点(机器 A)
│  :18789     │
└─────────────┘
       │
       │ 健康检查
       ↓
┌─────────────┐
│   Gateway 2 │ - 备节点(机器 B)
│  :18789     │
└─────────────┘
       │
       ↓
┌─────────────┐
│   Redis     │ - 分布式锁
└─────────────┘

主节点配置

# 机器 A: gateway-primary.json
{
  "gateway": {
    "mode": "local",
    "port": 18789,
    "bind": "0.0.0.0",
    "ha": {
      "enabled": true,
      "role": "primary",
      "priority": 100
    },
    "lock": {
      "type": "redis",
      "redis": {
        "url": "redis://redis-server:6379",
        "key": "openclaw:gateway:lock"
      }
    }
  }
}

备节点配置

# 机器 B: gateway-secondary.json
{
  "gateway": {
    "mode": "local",
    "port": 18789,
    "bind": "0.0.0.0",
    "ha": {
      "enabled": true,
      "role": "secondary",
      "priority": 50,
      "primaryCheck": {
        "url": "http://gateway-primary:18789/health",
        "interval": "10s"
      }
    },
    "lock": {
      "type": "redis",
      "redis": {
        "url": "redis://redis-server:6379",
        "key": "openclaw:gateway:lock"
      }
    }
  }
}

负载均衡

Nginx 配置

upstream openclaw_gateway {
    least_conn;
    server gateway1.local:18789 weight=3;
    server gateway2.local:18789 weight=2;
    server gateway3.local:18789 weight=1;
}

server {
    listen 80;
    server_name gateway.example.com;

    location / {
        proxy_pass http://openclaw_gateway;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
    }
}

健康检查

upstream openclaw_gateway {
    server gateway1.local:18789 max_fails=3 fail_timeout=30s;
    server gateway2.local:18789 max_fails=3 fail_timeout=30s;
    
    # 健康检查
    check interval=5000 rise=2 fall=3 timeout=1000 type=http;
    check_http_send "GET /health HTTP/1.0\r\n\r\n";
    check_http_expect_alive http_2xx http_3xx;
}

Kubernetes 部署

Deployment

apiVersion: apps/v1
kind: Deployment
metadata:
  name: openclaw-gateway
spec:
  replicas: 3
  selector:
    matchLabels:
      app: openclaw-gateway
  template:
    metadata:
      labels:
        app: openclaw-gateway
    spec:
      containers:
      - name: gateway
        image: openclaw/gateway:latest
        ports:
        - containerPort: 18789
        env:
        - name: OPENCLAW_GATEWAY_PORT
          value: "18789"
        - name: OPENCLAW_LOCK_TYPE
          value: "redis"
        - name: REDIS_URL
          valueFrom:
            secretKeyRef:
              name: openclaw-secrets
              key: redis-url
        livenessProbe:
          httpGet:
            path: /health
            port: 18789
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /ready
            port: 18789
          initialDelaySeconds: 10
          periodSeconds: 5

Service

apiVersion: v1
kind: Service
metadata:
  name: openclaw-gateway
spec:
  type: LoadBalancer
  selector:
    app: openclaw-gateway
  ports:
  - protocol: TCP
    port: 80
    targetPort: 18789

HorizontalPodAutoscaler

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: openclaw-gateway-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: openclaw-gateway
  minReplicas: 2
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80

Docker Compose

多实例配置

version: '3.8'

services:
  gateway1:
    image: openclaw/gateway:latest
    container_name: openclaw-gateway-1
    ports:
      - "18789:18789"
    environment:
      - OPENCLAW_STATE_DIR=/data
      - OPENCLAW_GATEWAY_PORT=18789
      - OPENCLAW_LOCK_TYPE=redis
      - REDIS_URL=redis://redis:6379
    volumes:
      - ./data/gateway1:/data
    depends_on:
      - redis

  gateway2:
    image: openclaw/gateway:latest
    container_name: openclaw-gateway-2
    ports:
      - "18790:18789"
    environment:
      - OPENCLAW_STATE_DIR=/data
      - OPENCLAW_GATEWAY_PORT=18789
      - OPENCLAW_LOCK_TYPE=redis
      - REDIS_URL=redis://redis:6379
    volumes:
      - ./data/gateway2:/data
    depends_on:
      - redis

  redis:
    image: redis:alpine
    container_name: openclaw-redis
    ports:
      - "6379:6379"
    volumes:
      - redis-data:/data

volumes:
  redis-data:

监控多实例

查看所有实例

# 列出运行的实例
ps aux | grep openclaw

# 检查端口占用
lsof -i :18789
lsof -i :19001

# Docker 环境
docker ps --filter "name=openclaw-gateway"

健康监控

#!/bin/bash
# check-gateways.sh

GATEWAYS=(
    "http://gateway1:18789"
    "http://gateway2:18789"
    "http://gateway3:18789"
)

for gw in "${GATEWAYS[@]}"; do
    status=$(curl -s -o /dev/null -w "%{http_code}" "$gw/health")
    if [ "$status" == "200" ]; then
        echo "✓ $gw is healthy"
    else
        echo "✗ $gw is down (HTTP $status)"
    fi
done

故障切换

自动切换

{
  "gateway": {
    "ha": {
      "enabled": true,
      "failover": {
        "enabled": true,
        "timeout": "30s",
        "retries": 3,
        "backoff": "exponential"
      }
    }
  }
}

手动切换

# 停止主节点
openclaw gateway stop --node primary

# 备节点自动接管

# 恢复主节点
openclaw gateway start --node primary

# 主节点重新接管(可选)
openclaw gateway promote --node primary

会话保持

粘性会话

# Nginx sticky sessions
upstream openclaw_gateway {
    ip_hash;  # 基于客户端 IP
    server gateway1:18789;
    server gateway2:18789;
}

# 或使用 cookie
upstream openclaw_gateway {
    server gateway1:18789;
    server gateway2:18789;
    sticky cookie openclaw_route expires=1h;
}

会话共享

{
  "gateway": {
    "sessions": {
      "storage": "redis",
      "redis": {
        "url": "redis://redis:6379",
        "keyPrefix": "openclaw:sessions:",
        "ttl": "24h"
      }
    }
  }
}

最佳实践

  • 使用分布式锁避免实例冲突
  • 配置健康检查和自动故障切换
  • 使用 Redis 等共享存储同步会话
  • 监控所有实例的资源使用
  • 设置告警通知实例故障
  • 定期测试故障切换流程
  • 保持配置版本同步

故障排查

端口冲突

# 检查端口占用
lsof -i :18789

# 修改端口配置
{
  "gateway": {
    "port": 18790  // 使用不同端口
  }
}

锁竞争

# 查看锁状态
openclaw gateway lock-status

# 检查 Redis 锁
redis-cli GET openclaw:gateway:lock

# 清理陈旧锁
openclaw gateway force-unlock

配置不同步

# 比较配置
diff ~/.openclaw/openclaw.json ~/.openclaw-dev/openclaw.json

# 同步配置
rsync -av gateway1:/etc/openclaw/ gateway2:/etc/openclaw/
更多信息
更多高可用部署和运维最佳实践请参考 官方文档