feat(gateway): add per-key model override and alias for transparent model routing

Admin can configure modelOverride (actual upstream model) and modelAlias (name shown to users) per API key. When set, users don't need to specify the real model — the gateway substitutes it transparently in both requests and responses (including SSE streams). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 00:31:26 -08:00 · 2026-02-26 00:31:26 -08:00 · e898e6551d
parent dd765ed7a4
commit e898e6551d
9 changed files with 184 additions and 34 deletions
--- a/packages/admin-client/src/features/llm-gateway/infrastructure/llm-gateway.api.ts
+++ b/packages/admin-client/src/features/llm-gateway/infrastructure/llm-gateway.api.ts
@ -22,6 +22,8 @@ export interface GatewayApiKey {
  enabled: boolean;
  expiresAt: string | null;
  lastUsedAt: string | null;
  modelOverride: string | null;
  modelAlias: string | null;
  createdBy: string | null;
  createdAt: string;
  rawKey?: string; // only on creation
@ -103,7 +105,7 @@ export const gatewayApi = {
    const res = await api.get(`${BASE}/keys`);
    return res.data.data;
  },
-  createKey: async (dto: { name: string; owner?: string; rateLimitRpm?: number; rateLimitTpd?: number; monthlyBudget?: number; expiresAt?: string }) => {
+  createKey: async (dto: { name: string; owner?: string; rateLimitRpm?: number; rateLimitTpd?: number; monthlyBudget?: number; expiresAt?: string; modelOverride?: string; modelAlias?: string }) => {
    const res = await api.post(`${BASE}/keys`, dto);
    return res.data.data;
  },
--- a/packages/admin-client/src/features/llm-gateway/presentation/components/ApiKeysTab.tsx
+++ b/packages/admin-client/src/features/llm-gateway/presentation/components/ApiKeysTab.tsx
@ -42,6 +42,14 @@ export function ApiKeysTab() {
      dataIndex: 'owner',
      key: 'owner',
    },
    {
      title: '模型覆盖',
      dataIndex: 'modelOverride',
      key: 'modelOverride',
      render: (v: string | null, record: GatewayApiKey) => v
        ? <Text code>{record.modelAlias || v}</Text>
        : <Text type="secondary">透传</Text>,
    },
    {
      title: '限速 (RPM)',
      dataIndex: 'rateLimitRpm',
@ -114,6 +122,12 @@ export function ApiKeysTab() {
          <Form.Item name="rateLimitTpd" label="每日 Token 限制" initialValue={1000000}>
            <InputNumber min={1000} max={100000000} style={{ width: '100%' }} />
          </Form.Item>
          <Form.Item name="modelOverride" label="模型覆盖" tooltip="设置后用户无需指定模型，网关自动替换为此模型发往上游">
            <Input placeholder="例如: claude-sonnet-4-5-20250929" allowClear />
          </Form.Item>
          <Form.Item name="modelAlias" label="响应模型名称" tooltip="返回给用户的模型名，留空则显示实际模型名">
            <Input placeholder="例如: szai-model-v1" allowClear />
          </Form.Item>
        </Form>
      </Modal>
--- a/packages/services/conversation-service/src/adapters/inbound/admin-gateway.controller.ts
+++ b/packages/services/conversation-service/src/adapters/inbound/admin-gateway.controller.ts
@ -92,6 +92,8 @@ export class AdminGatewayKeysController {
      rateLimitTpd?: number;
      monthlyBudget?: number;
      expiresAt?: string;
      modelOverride?: string;
      modelAlias?: string;
    },
  ) {
    const admin = this.verifyAdmin(auth);
@ -123,6 +125,8 @@ export class AdminGatewayKeysController {
      monthlyBudget: dto.monthlyBudget ?? null,
      enabled: true,
      expiresAt: dto.expiresAt ? new Date(dto.expiresAt) : null,
      modelOverride: dto.modelOverride?.trim() || null,
      modelAlias: dto.modelAlias?.trim() || null,
      createdBy: admin.id,
    });
@ -151,6 +155,8 @@ export class AdminGatewayKeysController {
      monthlyBudget?: number | null;
      expiresAt?: string | null;
      enabled?: boolean;
      modelOverride?: string | null;
      modelAlias?: string | null;
    },
  ) {
    this.verifyAdmin(auth);
@ -166,6 +172,8 @@ export class AdminGatewayKeysController {
    if (dto.monthlyBudget !== undefined) key.monthlyBudget = dto.monthlyBudget;
    if (dto.expiresAt !== undefined) key.expiresAt = dto.expiresAt ? new Date(dto.expiresAt) : null;
    if (dto.enabled !== undefined) key.enabled = dto.enabled;
    if (dto.modelOverride !== undefined) key.modelOverride = dto.modelOverride?.trim() || null;
    if (dto.modelAlias !== undefined) key.modelAlias = dto.modelAlias?.trim() || null;
    const updated = await this.repo.save(key);
    return { success: true, data: updated };
--- a/packages/services/conversation-service/src/infrastructure/database/postgres/entities/gateway-api-key.orm.ts
+++ b/packages/services/conversation-service/src/infrastructure/database/postgres/entities/gateway-api-key.orm.ts
@ -52,6 +52,12 @@ export class GatewayApiKeyORM {
  @Column({ name: 'last_used_at', type: 'timestamp', nullable: true })
  lastUsedAt: Date | null;
  @Column({ name: 'model_override', type: 'varchar', length: 100, nullable: true })
  modelOverride: string | null;
  @Column({ name: 'model_alias', type: 'varchar', length: 100, nullable: true })
  modelAlias: string | null;
  @Column({ name: 'created_by', type: 'uuid', nullable: true })
  createdBy: string | null;
--- a/packages/services/llm-gateway/src/middleware/auth.ts
+++ b/packages/services/llm-gateway/src/middleware/auth.ts
@ -50,7 +50,7 @@ async function lookupApiKey(apiKey: string): Promise<ApiKeyRecord | null> {
  const row = await queryOne<any>(
    `SELECT id, tenant_id, key_hash, key_prefix, name, owner,
            permissions, rate_limit_rpm, rate_limit_tpd, monthly_budget,
-            enabled, expires_at, last_used_at
+            enabled, expires_at, last_used_at, model_override, model_alias
     FROM gateway_api_keys
     WHERE key_hash = $1`,
    [hash],
@ -72,6 +72,8 @@ async function lookupApiKey(apiKey: string): Promise<ApiKeyRecord | null> {
    enabled: row.enabled,
    expiresAt: row.expires_at ? new Date(row.expires_at) : null,
    lastUsedAt: row.last_used_at ? new Date(row.last_used_at) : null,
    modelOverride: row.model_override || null,
    modelAlias: row.model_alias || null,
  };
  keyCache.set(hash, { record, cachedAt: Date.now() });
--- a/packages/services/llm-gateway/src/proxy/anthropic-proxy.ts
+++ b/packages/services/llm-gateway/src/proxy/anthropic-proxy.ts
@ -25,19 +25,28 @@ export function createAnthropicProxy(config: GatewayConfig) {
      return;
    }
-    const model = body.model || 'unknown';
+    const requestedModel = body.model || 'unknown';
-    // 2. Check model permission
+    // 2. Resolve effective model (override takes priority)
-    if (!isModelAllowed(apiKeyRecord, model)) {
+    const effectiveModel = apiKeyRecord.modelOverride || requestedModel;
    const aliasModel = apiKeyRecord.modelOverride
      ? (apiKeyRecord.modelAlias || apiKeyRecord.modelOverride)
      : requestedModel;
    // 3. Check model permission (skip when override is set — admin controls the model)
    if (!apiKeyRecord.modelOverride && !isModelAllowed(apiKeyRecord, requestedModel)) {
      reply.status(403).send({
        error: {
          type: 'permission_error',
-          message: `Model "${model}" is not allowed for this API key.`,
+          message: `Model "${requestedModel}" is not allowed for this API key.`,
        },
      });
      return;
    }
    // Replace model in body with effective model for upstream
    body.model = effectiveModel;
    // 3. Check streaming permission
    if (body.stream && apiKeyRecord.permissions?.allowStreaming === false) {
      reply.status(403).send({
@ -58,7 +67,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
          apiKeyId: apiKeyRecord.id,
          requestMethod: 'POST',
          requestPath: '/v1/messages',
-          requestModel: model,
+          requestModel: effectiveModel,
          requestIp: clientIp,
          contentFiltered: true,
          filterRuleId: filterResult.ruleId || null,
@ -78,7 +87,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
    }
    // 5. Inject regulatory content into system prompt
-    const injection = await injectSystemPrompt(body.system, model, apiKeyRecord.id);
+    const injection = await injectSystemPrompt(body.system, effectiveModel, apiKeyRecord.id);
    body.system = injection.system;
    // 6. Build upstream request headers
@ -106,7 +115,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
        apiKeyId: apiKeyRecord.id,
        requestMethod: 'POST',
        requestPath: '/v1/messages',
-        requestModel: model,
+        requestModel: effectiveModel,
        requestIp: clientIp,
        contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
        filterRuleId: filterResult.ruleId || null,
@ -138,13 +147,18 @@ export function createAnthropicProxy(config: GatewayConfig) {
      const usageTracker = createStreamUsageTracker();
-      await pipeSSEStream(upstreamResponse.body, reply.raw, usageTracker.onDataLine);
+      // Build transform to replace real model name with alias in SSE chunks
      const streamTransform = apiKeyRecord.modelOverride
        ? (chunk: string) => chunk.replaceAll(effectiveModel, aliasModel)
        : undefined;
-      // Record usage from stream (async)
+      await pipeSSEStream(upstreamResponse.body, reply.raw, usageTracker.onDataLine, streamTransform);
      // Record usage from stream — log real model for billing
      const streamUsage = usageTracker.getUsage();
      recordFromAnthropicResponse(
        apiKeyRecord.id,
-        model,
+        effectiveModel,
        { input_tokens: streamUsage.inputTokens, output_tokens: streamUsage.outputTokens },
        upstreamResponse.status,
        Date.now() - startTime,
@ -154,7 +168,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
        apiKeyId: apiKeyRecord.id,
        requestMethod: 'POST',
        requestPath: '/v1/messages',
-        requestModel: model,
+        requestModel: effectiveModel,
        requestIp: clientIp,
        contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
        filterRuleId: filterResult.ruleId || null,
@ -166,18 +180,43 @@ export function createAnthropicProxy(config: GatewayConfig) {
      // Non-streaming response — buffer and forward
      const responseText = await upstreamResponse.text();
-      // Try to extract usage for logging
+      // Try to extract usage for logging (use real model for billing)
      try {
        const responseJson = JSON.parse(responseText);
        if (responseJson.usage) {
          recordFromAnthropicResponse(
            apiKeyRecord.id,
-            model,
+            effectiveModel,
            responseJson.usage,
            upstreamResponse.status,
            durationMs,
          );
        }
        // Mask model name in response if override is active
        if (apiKeyRecord.modelOverride && responseJson.model) {
          responseJson.model = aliasModel;
          const maskedText = JSON.stringify(responseJson);
          recordAudit({
            apiKeyId: apiKeyRecord.id,
            requestMethod: 'POST',
            requestPath: '/v1/messages',
            requestModel: effectiveModel,
            requestIp: clientIp,
            contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
            filterRuleId: filterResult.ruleId || null,
            injectionApplied: injection.applied,
            responseStatus: upstreamResponse.status,
            durationMs,
          });
          reply.raw.writeHead(upstreamResponse.status, {
            'Content-Type': 'application/json',
          });
          reply.raw.end(maskedText);
          return;
        }
      } catch {
        // Not JSON — still forward
      }
@ -186,7 +225,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
        apiKeyId: apiKeyRecord.id,
        requestMethod: 'POST',
        requestPath: '/v1/messages',
-        requestModel: model,
+        requestModel: effectiveModel,
        requestIp: clientIp,
        contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
        filterRuleId: filterResult.ruleId || null,
--- a/packages/services/llm-gateway/src/proxy/openai-proxy.ts
+++ b/packages/services/llm-gateway/src/proxy/openai-proxy.ts
@ -4,7 +4,7 @@ import { ApiKeyRecord } from '../types';
 import { isModelAllowed } from '../middleware/auth';
 import { recordFromOpenAIResponse } from '../logging/usage-tracker';
 import { recordAudit } from '../logging/audit-logger';
-import { pipeSSEStream, createStreamUsageTracker } from './stream-pipe';
+import { pipeSSEStream } from './stream-pipe';
 export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
  return async function handleEmbeddings(request: FastifyRequest, reply: FastifyReply): Promise<void> {
@ -22,18 +22,25 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
      return;
    }
-    const model = body.model || 'unknown';
+    const requestedModel = body.model || 'unknown';
    const effectiveModel = apiKeyRecord.modelOverride || requestedModel;
    const aliasModel = apiKeyRecord.modelOverride
      ? (apiKeyRecord.modelAlias || apiKeyRecord.modelOverride)
      : requestedModel;
-    if (!isModelAllowed(apiKeyRecord, model)) {
+    if (!apiKeyRecord.modelOverride && !isModelAllowed(apiKeyRecord, requestedModel)) {
      reply.status(403).send({
        error: {
          type: 'permission_error',
-          message: `Model "${model}" is not allowed for this API key.`,
+          message: `Model "${requestedModel}" is not allowed for this API key.`,
        },
      });
      return;
    }
    // Replace model for upstream
    body.model = effectiveModel;
    let upstreamResponse: Response;
    try {
      // openaiUpstreamUrl may already include /v1 (e.g., "https://host:8443/v1")
@ -51,7 +58,7 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
        apiKeyId: apiKeyRecord.id,
        requestMethod: 'POST',
        requestPath: '/v1/embeddings',
-        requestModel: model,
+        requestModel: effectiveModel,
        requestIp: clientIp,
        contentFiltered: false,
        filterRuleId: null,
@ -77,12 +84,37 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
      if (responseJson.usage) {
        recordFromOpenAIResponse(
          apiKeyRecord.id,
-          model,
+          effectiveModel,
          responseJson.usage,
          upstreamResponse.status,
          durationMs,
        );
      }
      // Mask model name in response if override is active
      if (apiKeyRecord.modelOverride && responseJson.model) {
        responseJson.model = aliasModel;
        const maskedText = JSON.stringify(responseJson);
        recordAudit({
          apiKeyId: apiKeyRecord.id,
          requestMethod: 'POST',
          requestPath: '/v1/embeddings',
          requestModel: effectiveModel,
          requestIp: clientIp,
          contentFiltered: false,
          filterRuleId: null,
          injectionApplied: false,
          responseStatus: upstreamResponse.status,
          durationMs,
        });
        reply.raw.writeHead(upstreamResponse.status, {
          'Content-Type': 'application/json',
        });
        reply.raw.end(maskedText);
        return;
      }
    } catch {
      // Not JSON
    }
@ -91,7 +123,7 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
      apiKeyId: apiKeyRecord.id,
      requestMethod: 'POST',
      requestPath: '/v1/embeddings',
-      requestModel: model,
+      requestModel: effectiveModel,
      requestIp: clientIp,
      contentFiltered: false,
      filterRuleId: null,
@ -123,18 +155,25 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
      return;
    }
-    const model = body.model || 'unknown';
+    const requestedModel = body.model || 'unknown';
    const effectiveModel = apiKeyRecord.modelOverride || requestedModel;
    const aliasModel = apiKeyRecord.modelOverride
      ? (apiKeyRecord.modelAlias || apiKeyRecord.modelOverride)
      : requestedModel;
-    if (!isModelAllowed(apiKeyRecord, model)) {
+    if (!apiKeyRecord.modelOverride && !isModelAllowed(apiKeyRecord, requestedModel)) {
      reply.status(403).send({
        error: {
          type: 'permission_error',
-          message: `Model "${model}" is not allowed for this API key.`,
+          message: `Model "${requestedModel}" is not allowed for this API key.`,
        },
      });
      return;
    }
    // Replace model for upstream
    body.model = effectiveModel;
    let upstreamResponse: Response;
    try {
      // openaiUpstreamUrl may already include /v1 (e.g., "https://host:8443/v1")
@ -152,7 +191,7 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
        apiKeyId: apiKeyRecord.id,
        requestMethod: 'POST',
        requestPath: '/v1/chat/completions',
-        requestModel: model,
+        requestModel: effectiveModel,
        requestIp: clientIp,
        contentFiltered: false,
        filterRuleId: null,
@ -181,13 +220,18 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
        'X-Accel-Buffering': 'no',
      });
-      await pipeSSEStream(upstreamResponse.body, reply.raw);
+      // Build transform to replace real model name with alias in SSE chunks
      const streamTransform = apiKeyRecord.modelOverride
        ? (chunk: string) => chunk.replaceAll(effectiveModel, aliasModel)
        : undefined;
      await pipeSSEStream(upstreamResponse.body, reply.raw, undefined, streamTransform);
      recordAudit({
        apiKeyId: apiKeyRecord.id,
        requestMethod: 'POST',
        requestPath: '/v1/chat/completions',
-        requestModel: model,
+        requestModel: effectiveModel,
        requestIp: clientIp,
        contentFiltered: false,
        filterRuleId: null,
@ -198,11 +242,42 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
    } else {
      const responseText = await upstreamResponse.text();
      // Try to mask model name in non-streaming response
      try {
        const responseJson = JSON.parse(responseText);
        if (apiKeyRecord.modelOverride && responseJson.model) {
          responseJson.model = aliasModel;
          const maskedText = JSON.stringify(responseJson);
          recordAudit({
            apiKeyId: apiKeyRecord.id,
            requestMethod: 'POST',
            requestPath: '/v1/chat/completions',
            requestModel: effectiveModel,
            requestIp: clientIp,
            contentFiltered: false,
            filterRuleId: null,
            injectionApplied: false,
            responseStatus: upstreamResponse.status,
            durationMs,
          });
          reply.raw.writeHead(upstreamResponse.status, {
            'Content-Type': 'application/json',
          });
          reply.raw.end(maskedText);
          return;
        }
      } catch {
        // Not JSON — forward as-is
      }
      recordAudit({
        apiKeyId: apiKeyRecord.id,
        requestMethod: 'POST',
        requestPath: '/v1/chat/completions',
-        requestModel: model,
+        requestModel: effectiveModel,
        requestIp: clientIp,
        contentFiltered: false,
        filterRuleId: null,
--- a/packages/services/llm-gateway/src/proxy/stream-pipe.ts
+++ b/packages/services/llm-gateway/src/proxy/stream-pipe.ts
@ -12,6 +12,7 @@ export async function pipeSSEStream(
  upstreamBody: ReadableStream<Uint8Array>,
  clientResponse: ServerResponse,
  onDataLine?: (line: string) => void,
  transformChunk?: (chunk: string) => string,
 ): Promise<void> {
  const reader = upstreamBody.getReader();
  const decoder = new TextDecoder();
@ -23,10 +24,7 @@ export async function pipeSSEStream(
      const chunk = decoder.decode(value, { stream: true });
-      // Forward chunk immediately
+      // Parse SSE data lines for usage extraction (before transform, so we get real model data)
      const canContinue = clientResponse.write(chunk);
      // Parse SSE data lines for usage extraction
      if (onDataLine) {
        const lines = chunk.split('\n');
        for (const line of lines) {
@ -36,6 +34,10 @@ export async function pipeSSEStream(
        }
      }
      // Apply transform (e.g., model name replacement) then forward
      const outputChunk = transformChunk ? transformChunk(chunk) : chunk;
      const canContinue = clientResponse.write(outputChunk);
      // Handle backpressure
      if (!canContinue) {
        await new Promise<void>((resolve) => clientResponse.once('drain', resolve));
--- a/packages/services/llm-gateway/src/types.ts
+++ b/packages/services/llm-gateway/src/types.ts
@ -14,6 +14,8 @@ export interface ApiKeyRecord {
  enabled: boolean;
  expiresAt: Date | null;
  lastUsedAt: Date | null;
  modelOverride: string | null;   // If set, replaces user's model with this
  modelAlias: string | null;      // Model name shown in responses (masks real model)
 }
 export interface ApiKeyPermissions {