feat(gateway): add per-key model override and alias for transparent model routing
Admin can configure modelOverride (actual upstream model) and modelAlias (name shown to users) per API key. When set, users don't need to specify the real model — the gateway substitutes it transparently in both requests and responses (including SSE streams). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
dd765ed7a4
commit
e898e6551d
|
|
@ -22,6 +22,8 @@ export interface GatewayApiKey {
|
||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
expiresAt: string | null;
|
expiresAt: string | null;
|
||||||
lastUsedAt: string | null;
|
lastUsedAt: string | null;
|
||||||
|
modelOverride: string | null;
|
||||||
|
modelAlias: string | null;
|
||||||
createdBy: string | null;
|
createdBy: string | null;
|
||||||
createdAt: string;
|
createdAt: string;
|
||||||
rawKey?: string; // only on creation
|
rawKey?: string; // only on creation
|
||||||
|
|
@ -103,7 +105,7 @@ export const gatewayApi = {
|
||||||
const res = await api.get(`${BASE}/keys`);
|
const res = await api.get(`${BASE}/keys`);
|
||||||
return res.data.data;
|
return res.data.data;
|
||||||
},
|
},
|
||||||
createKey: async (dto: { name: string; owner?: string; rateLimitRpm?: number; rateLimitTpd?: number; monthlyBudget?: number; expiresAt?: string }) => {
|
createKey: async (dto: { name: string; owner?: string; rateLimitRpm?: number; rateLimitTpd?: number; monthlyBudget?: number; expiresAt?: string; modelOverride?: string; modelAlias?: string }) => {
|
||||||
const res = await api.post(`${BASE}/keys`, dto);
|
const res = await api.post(`${BASE}/keys`, dto);
|
||||||
return res.data.data;
|
return res.data.data;
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,14 @@ export function ApiKeysTab() {
|
||||||
dataIndex: 'owner',
|
dataIndex: 'owner',
|
||||||
key: 'owner',
|
key: 'owner',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
title: '模型覆盖',
|
||||||
|
dataIndex: 'modelOverride',
|
||||||
|
key: 'modelOverride',
|
||||||
|
render: (v: string | null, record: GatewayApiKey) => v
|
||||||
|
? <Text code>{record.modelAlias || v}</Text>
|
||||||
|
: <Text type="secondary">透传</Text>,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
title: '限速 (RPM)',
|
title: '限速 (RPM)',
|
||||||
dataIndex: 'rateLimitRpm',
|
dataIndex: 'rateLimitRpm',
|
||||||
|
|
@ -114,6 +122,12 @@ export function ApiKeysTab() {
|
||||||
<Form.Item name="rateLimitTpd" label="每日 Token 限制" initialValue={1000000}>
|
<Form.Item name="rateLimitTpd" label="每日 Token 限制" initialValue={1000000}>
|
||||||
<InputNumber min={1000} max={100000000} style={{ width: '100%' }} />
|
<InputNumber min={1000} max={100000000} style={{ width: '100%' }} />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
<Form.Item name="modelOverride" label="模型覆盖" tooltip="设置后用户无需指定模型,网关自动替换为此模型发往上游">
|
||||||
|
<Input placeholder="例如: claude-sonnet-4-5-20250929" allowClear />
|
||||||
|
</Form.Item>
|
||||||
|
<Form.Item name="modelAlias" label="响应模型名称" tooltip="返回给用户的模型名,留空则显示实际模型名">
|
||||||
|
<Input placeholder="例如: szai-model-v1" allowClear />
|
||||||
|
</Form.Item>
|
||||||
</Form>
|
</Form>
|
||||||
</Modal>
|
</Modal>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,8 @@ export class AdminGatewayKeysController {
|
||||||
rateLimitTpd?: number;
|
rateLimitTpd?: number;
|
||||||
monthlyBudget?: number;
|
monthlyBudget?: number;
|
||||||
expiresAt?: string;
|
expiresAt?: string;
|
||||||
|
modelOverride?: string;
|
||||||
|
modelAlias?: string;
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
const admin = this.verifyAdmin(auth);
|
const admin = this.verifyAdmin(auth);
|
||||||
|
|
@ -123,6 +125,8 @@ export class AdminGatewayKeysController {
|
||||||
monthlyBudget: dto.monthlyBudget ?? null,
|
monthlyBudget: dto.monthlyBudget ?? null,
|
||||||
enabled: true,
|
enabled: true,
|
||||||
expiresAt: dto.expiresAt ? new Date(dto.expiresAt) : null,
|
expiresAt: dto.expiresAt ? new Date(dto.expiresAt) : null,
|
||||||
|
modelOverride: dto.modelOverride?.trim() || null,
|
||||||
|
modelAlias: dto.modelAlias?.trim() || null,
|
||||||
createdBy: admin.id,
|
createdBy: admin.id,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -151,6 +155,8 @@ export class AdminGatewayKeysController {
|
||||||
monthlyBudget?: number | null;
|
monthlyBudget?: number | null;
|
||||||
expiresAt?: string | null;
|
expiresAt?: string | null;
|
||||||
enabled?: boolean;
|
enabled?: boolean;
|
||||||
|
modelOverride?: string | null;
|
||||||
|
modelAlias?: string | null;
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
this.verifyAdmin(auth);
|
this.verifyAdmin(auth);
|
||||||
|
|
@ -166,6 +172,8 @@ export class AdminGatewayKeysController {
|
||||||
if (dto.monthlyBudget !== undefined) key.monthlyBudget = dto.monthlyBudget;
|
if (dto.monthlyBudget !== undefined) key.monthlyBudget = dto.monthlyBudget;
|
||||||
if (dto.expiresAt !== undefined) key.expiresAt = dto.expiresAt ? new Date(dto.expiresAt) : null;
|
if (dto.expiresAt !== undefined) key.expiresAt = dto.expiresAt ? new Date(dto.expiresAt) : null;
|
||||||
if (dto.enabled !== undefined) key.enabled = dto.enabled;
|
if (dto.enabled !== undefined) key.enabled = dto.enabled;
|
||||||
|
if (dto.modelOverride !== undefined) key.modelOverride = dto.modelOverride?.trim() || null;
|
||||||
|
if (dto.modelAlias !== undefined) key.modelAlias = dto.modelAlias?.trim() || null;
|
||||||
|
|
||||||
const updated = await this.repo.save(key);
|
const updated = await this.repo.save(key);
|
||||||
return { success: true, data: updated };
|
return { success: true, data: updated };
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,12 @@ export class GatewayApiKeyORM {
|
||||||
@Column({ name: 'last_used_at', type: 'timestamp', nullable: true })
|
@Column({ name: 'last_used_at', type: 'timestamp', nullable: true })
|
||||||
lastUsedAt: Date | null;
|
lastUsedAt: Date | null;
|
||||||
|
|
||||||
|
@Column({ name: 'model_override', type: 'varchar', length: 100, nullable: true })
|
||||||
|
modelOverride: string | null;
|
||||||
|
|
||||||
|
@Column({ name: 'model_alias', type: 'varchar', length: 100, nullable: true })
|
||||||
|
modelAlias: string | null;
|
||||||
|
|
||||||
@Column({ name: 'created_by', type: 'uuid', nullable: true })
|
@Column({ name: 'created_by', type: 'uuid', nullable: true })
|
||||||
createdBy: string | null;
|
createdBy: string | null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ async function lookupApiKey(apiKey: string): Promise<ApiKeyRecord | null> {
|
||||||
const row = await queryOne<any>(
|
const row = await queryOne<any>(
|
||||||
`SELECT id, tenant_id, key_hash, key_prefix, name, owner,
|
`SELECT id, tenant_id, key_hash, key_prefix, name, owner,
|
||||||
permissions, rate_limit_rpm, rate_limit_tpd, monthly_budget,
|
permissions, rate_limit_rpm, rate_limit_tpd, monthly_budget,
|
||||||
enabled, expires_at, last_used_at
|
enabled, expires_at, last_used_at, model_override, model_alias
|
||||||
FROM gateway_api_keys
|
FROM gateway_api_keys
|
||||||
WHERE key_hash = $1`,
|
WHERE key_hash = $1`,
|
||||||
[hash],
|
[hash],
|
||||||
|
|
@ -72,6 +72,8 @@ async function lookupApiKey(apiKey: string): Promise<ApiKeyRecord | null> {
|
||||||
enabled: row.enabled,
|
enabled: row.enabled,
|
||||||
expiresAt: row.expires_at ? new Date(row.expires_at) : null,
|
expiresAt: row.expires_at ? new Date(row.expires_at) : null,
|
||||||
lastUsedAt: row.last_used_at ? new Date(row.last_used_at) : null,
|
lastUsedAt: row.last_used_at ? new Date(row.last_used_at) : null,
|
||||||
|
modelOverride: row.model_override || null,
|
||||||
|
modelAlias: row.model_alias || null,
|
||||||
};
|
};
|
||||||
|
|
||||||
keyCache.set(hash, { record, cachedAt: Date.now() });
|
keyCache.set(hash, { record, cachedAt: Date.now() });
|
||||||
|
|
|
||||||
|
|
@ -25,19 +25,28 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const model = body.model || 'unknown';
|
const requestedModel = body.model || 'unknown';
|
||||||
|
|
||||||
// 2. Check model permission
|
// 2. Resolve effective model (override takes priority)
|
||||||
if (!isModelAllowed(apiKeyRecord, model)) {
|
const effectiveModel = apiKeyRecord.modelOverride || requestedModel;
|
||||||
|
const aliasModel = apiKeyRecord.modelOverride
|
||||||
|
? (apiKeyRecord.modelAlias || apiKeyRecord.modelOverride)
|
||||||
|
: requestedModel;
|
||||||
|
|
||||||
|
// 3. Check model permission (skip when override is set — admin controls the model)
|
||||||
|
if (!apiKeyRecord.modelOverride && !isModelAllowed(apiKeyRecord, requestedModel)) {
|
||||||
reply.status(403).send({
|
reply.status(403).send({
|
||||||
error: {
|
error: {
|
||||||
type: 'permission_error',
|
type: 'permission_error',
|
||||||
message: `Model "${model}" is not allowed for this API key.`,
|
message: `Model "${requestedModel}" is not allowed for this API key.`,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replace model in body with effective model for upstream
|
||||||
|
body.model = effectiveModel;
|
||||||
|
|
||||||
// 3. Check streaming permission
|
// 3. Check streaming permission
|
||||||
if (body.stream && apiKeyRecord.permissions?.allowStreaming === false) {
|
if (body.stream && apiKeyRecord.permissions?.allowStreaming === false) {
|
||||||
reply.status(403).send({
|
reply.status(403).send({
|
||||||
|
|
@ -58,7 +67,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/messages',
|
requestPath: '/v1/messages',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: true,
|
contentFiltered: true,
|
||||||
filterRuleId: filterResult.ruleId || null,
|
filterRuleId: filterResult.ruleId || null,
|
||||||
|
|
@ -78,7 +87,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. Inject regulatory content into system prompt
|
// 5. Inject regulatory content into system prompt
|
||||||
const injection = await injectSystemPrompt(body.system, model, apiKeyRecord.id);
|
const injection = await injectSystemPrompt(body.system, effectiveModel, apiKeyRecord.id);
|
||||||
body.system = injection.system;
|
body.system = injection.system;
|
||||||
|
|
||||||
// 6. Build upstream request headers
|
// 6. Build upstream request headers
|
||||||
|
|
@ -106,7 +115,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/messages',
|
requestPath: '/v1/messages',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
|
contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
|
||||||
filterRuleId: filterResult.ruleId || null,
|
filterRuleId: filterResult.ruleId || null,
|
||||||
|
|
@ -138,13 +147,18 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
|
|
||||||
const usageTracker = createStreamUsageTracker();
|
const usageTracker = createStreamUsageTracker();
|
||||||
|
|
||||||
await pipeSSEStream(upstreamResponse.body, reply.raw, usageTracker.onDataLine);
|
// Build transform to replace real model name with alias in SSE chunks
|
||||||
|
const streamTransform = apiKeyRecord.modelOverride
|
||||||
|
? (chunk: string) => chunk.replaceAll(effectiveModel, aliasModel)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
// Record usage from stream (async)
|
await pipeSSEStream(upstreamResponse.body, reply.raw, usageTracker.onDataLine, streamTransform);
|
||||||
|
|
||||||
|
// Record usage from stream — log real model for billing
|
||||||
const streamUsage = usageTracker.getUsage();
|
const streamUsage = usageTracker.getUsage();
|
||||||
recordFromAnthropicResponse(
|
recordFromAnthropicResponse(
|
||||||
apiKeyRecord.id,
|
apiKeyRecord.id,
|
||||||
model,
|
effectiveModel,
|
||||||
{ input_tokens: streamUsage.inputTokens, output_tokens: streamUsage.outputTokens },
|
{ input_tokens: streamUsage.inputTokens, output_tokens: streamUsage.outputTokens },
|
||||||
upstreamResponse.status,
|
upstreamResponse.status,
|
||||||
Date.now() - startTime,
|
Date.now() - startTime,
|
||||||
|
|
@ -154,7 +168,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/messages',
|
requestPath: '/v1/messages',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
|
contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
|
||||||
filterRuleId: filterResult.ruleId || null,
|
filterRuleId: filterResult.ruleId || null,
|
||||||
|
|
@ -166,18 +180,43 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
// Non-streaming response — buffer and forward
|
// Non-streaming response — buffer and forward
|
||||||
const responseText = await upstreamResponse.text();
|
const responseText = await upstreamResponse.text();
|
||||||
|
|
||||||
// Try to extract usage for logging
|
// Try to extract usage for logging (use real model for billing)
|
||||||
try {
|
try {
|
||||||
const responseJson = JSON.parse(responseText);
|
const responseJson = JSON.parse(responseText);
|
||||||
if (responseJson.usage) {
|
if (responseJson.usage) {
|
||||||
recordFromAnthropicResponse(
|
recordFromAnthropicResponse(
|
||||||
apiKeyRecord.id,
|
apiKeyRecord.id,
|
||||||
model,
|
effectiveModel,
|
||||||
responseJson.usage,
|
responseJson.usage,
|
||||||
upstreamResponse.status,
|
upstreamResponse.status,
|
||||||
durationMs,
|
durationMs,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask model name in response if override is active
|
||||||
|
if (apiKeyRecord.modelOverride && responseJson.model) {
|
||||||
|
responseJson.model = aliasModel;
|
||||||
|
const maskedText = JSON.stringify(responseJson);
|
||||||
|
|
||||||
|
recordAudit({
|
||||||
|
apiKeyId: apiKeyRecord.id,
|
||||||
|
requestMethod: 'POST',
|
||||||
|
requestPath: '/v1/messages',
|
||||||
|
requestModel: effectiveModel,
|
||||||
|
requestIp: clientIp,
|
||||||
|
contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
|
||||||
|
filterRuleId: filterResult.ruleId || null,
|
||||||
|
injectionApplied: injection.applied,
|
||||||
|
responseStatus: upstreamResponse.status,
|
||||||
|
durationMs,
|
||||||
|
});
|
||||||
|
|
||||||
|
reply.raw.writeHead(upstreamResponse.status, {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
});
|
||||||
|
reply.raw.end(maskedText);
|
||||||
|
return;
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// Not JSON — still forward
|
// Not JSON — still forward
|
||||||
}
|
}
|
||||||
|
|
@ -186,7 +225,7 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/messages',
|
requestPath: '/v1/messages',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
|
contentFiltered: filterResult.action === 'warn' || filterResult.action === 'log',
|
||||||
filterRuleId: filterResult.ruleId || null,
|
filterRuleId: filterResult.ruleId || null,
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import { ApiKeyRecord } from '../types';
|
||||||
import { isModelAllowed } from '../middleware/auth';
|
import { isModelAllowed } from '../middleware/auth';
|
||||||
import { recordFromOpenAIResponse } from '../logging/usage-tracker';
|
import { recordFromOpenAIResponse } from '../logging/usage-tracker';
|
||||||
import { recordAudit } from '../logging/audit-logger';
|
import { recordAudit } from '../logging/audit-logger';
|
||||||
import { pipeSSEStream, createStreamUsageTracker } from './stream-pipe';
|
import { pipeSSEStream } from './stream-pipe';
|
||||||
|
|
||||||
export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
||||||
return async function handleEmbeddings(request: FastifyRequest, reply: FastifyReply): Promise<void> {
|
return async function handleEmbeddings(request: FastifyRequest, reply: FastifyReply): Promise<void> {
|
||||||
|
|
@ -22,18 +22,25 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const model = body.model || 'unknown';
|
const requestedModel = body.model || 'unknown';
|
||||||
|
const effectiveModel = apiKeyRecord.modelOverride || requestedModel;
|
||||||
|
const aliasModel = apiKeyRecord.modelOverride
|
||||||
|
? (apiKeyRecord.modelAlias || apiKeyRecord.modelOverride)
|
||||||
|
: requestedModel;
|
||||||
|
|
||||||
if (!isModelAllowed(apiKeyRecord, model)) {
|
if (!apiKeyRecord.modelOverride && !isModelAllowed(apiKeyRecord, requestedModel)) {
|
||||||
reply.status(403).send({
|
reply.status(403).send({
|
||||||
error: {
|
error: {
|
||||||
type: 'permission_error',
|
type: 'permission_error',
|
||||||
message: `Model "${model}" is not allowed for this API key.`,
|
message: `Model "${requestedModel}" is not allowed for this API key.`,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replace model for upstream
|
||||||
|
body.model = effectiveModel;
|
||||||
|
|
||||||
let upstreamResponse: Response;
|
let upstreamResponse: Response;
|
||||||
try {
|
try {
|
||||||
// openaiUpstreamUrl may already include /v1 (e.g., "https://host:8443/v1")
|
// openaiUpstreamUrl may already include /v1 (e.g., "https://host:8443/v1")
|
||||||
|
|
@ -51,7 +58,7 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/embeddings',
|
requestPath: '/v1/embeddings',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: false,
|
contentFiltered: false,
|
||||||
filterRuleId: null,
|
filterRuleId: null,
|
||||||
|
|
@ -77,12 +84,37 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
||||||
if (responseJson.usage) {
|
if (responseJson.usage) {
|
||||||
recordFromOpenAIResponse(
|
recordFromOpenAIResponse(
|
||||||
apiKeyRecord.id,
|
apiKeyRecord.id,
|
||||||
model,
|
effectiveModel,
|
||||||
responseJson.usage,
|
responseJson.usage,
|
||||||
upstreamResponse.status,
|
upstreamResponse.status,
|
||||||
durationMs,
|
durationMs,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask model name in response if override is active
|
||||||
|
if (apiKeyRecord.modelOverride && responseJson.model) {
|
||||||
|
responseJson.model = aliasModel;
|
||||||
|
const maskedText = JSON.stringify(responseJson);
|
||||||
|
|
||||||
|
recordAudit({
|
||||||
|
apiKeyId: apiKeyRecord.id,
|
||||||
|
requestMethod: 'POST',
|
||||||
|
requestPath: '/v1/embeddings',
|
||||||
|
requestModel: effectiveModel,
|
||||||
|
requestIp: clientIp,
|
||||||
|
contentFiltered: false,
|
||||||
|
filterRuleId: null,
|
||||||
|
injectionApplied: false,
|
||||||
|
responseStatus: upstreamResponse.status,
|
||||||
|
durationMs,
|
||||||
|
});
|
||||||
|
|
||||||
|
reply.raw.writeHead(upstreamResponse.status, {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
});
|
||||||
|
reply.raw.end(maskedText);
|
||||||
|
return;
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// Not JSON
|
// Not JSON
|
||||||
}
|
}
|
||||||
|
|
@ -91,7 +123,7 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/embeddings',
|
requestPath: '/v1/embeddings',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: false,
|
contentFiltered: false,
|
||||||
filterRuleId: null,
|
filterRuleId: null,
|
||||||
|
|
@ -123,18 +155,25 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const model = body.model || 'unknown';
|
const requestedModel = body.model || 'unknown';
|
||||||
|
const effectiveModel = apiKeyRecord.modelOverride || requestedModel;
|
||||||
|
const aliasModel = apiKeyRecord.modelOverride
|
||||||
|
? (apiKeyRecord.modelAlias || apiKeyRecord.modelOverride)
|
||||||
|
: requestedModel;
|
||||||
|
|
||||||
if (!isModelAllowed(apiKeyRecord, model)) {
|
if (!apiKeyRecord.modelOverride && !isModelAllowed(apiKeyRecord, requestedModel)) {
|
||||||
reply.status(403).send({
|
reply.status(403).send({
|
||||||
error: {
|
error: {
|
||||||
type: 'permission_error',
|
type: 'permission_error',
|
||||||
message: `Model "${model}" is not allowed for this API key.`,
|
message: `Model "${requestedModel}" is not allowed for this API key.`,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replace model for upstream
|
||||||
|
body.model = effectiveModel;
|
||||||
|
|
||||||
let upstreamResponse: Response;
|
let upstreamResponse: Response;
|
||||||
try {
|
try {
|
||||||
// openaiUpstreamUrl may already include /v1 (e.g., "https://host:8443/v1")
|
// openaiUpstreamUrl may already include /v1 (e.g., "https://host:8443/v1")
|
||||||
|
|
@ -152,7 +191,7 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/chat/completions',
|
requestPath: '/v1/chat/completions',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: false,
|
contentFiltered: false,
|
||||||
filterRuleId: null,
|
filterRuleId: null,
|
||||||
|
|
@ -181,13 +220,18 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
|
||||||
'X-Accel-Buffering': 'no',
|
'X-Accel-Buffering': 'no',
|
||||||
});
|
});
|
||||||
|
|
||||||
await pipeSSEStream(upstreamResponse.body, reply.raw);
|
// Build transform to replace real model name with alias in SSE chunks
|
||||||
|
const streamTransform = apiKeyRecord.modelOverride
|
||||||
|
? (chunk: string) => chunk.replaceAll(effectiveModel, aliasModel)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
await pipeSSEStream(upstreamResponse.body, reply.raw, undefined, streamTransform);
|
||||||
|
|
||||||
recordAudit({
|
recordAudit({
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/chat/completions',
|
requestPath: '/v1/chat/completions',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: false,
|
contentFiltered: false,
|
||||||
filterRuleId: null,
|
filterRuleId: null,
|
||||||
|
|
@ -198,11 +242,42 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
|
||||||
} else {
|
} else {
|
||||||
const responseText = await upstreamResponse.text();
|
const responseText = await upstreamResponse.text();
|
||||||
|
|
||||||
|
// Try to mask model name in non-streaming response
|
||||||
|
try {
|
||||||
|
const responseJson = JSON.parse(responseText);
|
||||||
|
|
||||||
|
if (apiKeyRecord.modelOverride && responseJson.model) {
|
||||||
|
responseJson.model = aliasModel;
|
||||||
|
const maskedText = JSON.stringify(responseJson);
|
||||||
|
|
||||||
|
recordAudit({
|
||||||
|
apiKeyId: apiKeyRecord.id,
|
||||||
|
requestMethod: 'POST',
|
||||||
|
requestPath: '/v1/chat/completions',
|
||||||
|
requestModel: effectiveModel,
|
||||||
|
requestIp: clientIp,
|
||||||
|
contentFiltered: false,
|
||||||
|
filterRuleId: null,
|
||||||
|
injectionApplied: false,
|
||||||
|
responseStatus: upstreamResponse.status,
|
||||||
|
durationMs,
|
||||||
|
});
|
||||||
|
|
||||||
|
reply.raw.writeHead(upstreamResponse.status, {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
});
|
||||||
|
reply.raw.end(maskedText);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Not JSON — forward as-is
|
||||||
|
}
|
||||||
|
|
||||||
recordAudit({
|
recordAudit({
|
||||||
apiKeyId: apiKeyRecord.id,
|
apiKeyId: apiKeyRecord.id,
|
||||||
requestMethod: 'POST',
|
requestMethod: 'POST',
|
||||||
requestPath: '/v1/chat/completions',
|
requestPath: '/v1/chat/completions',
|
||||||
requestModel: model,
|
requestModel: effectiveModel,
|
||||||
requestIp: clientIp,
|
requestIp: clientIp,
|
||||||
contentFiltered: false,
|
contentFiltered: false,
|
||||||
filterRuleId: null,
|
filterRuleId: null,
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ export async function pipeSSEStream(
|
||||||
upstreamBody: ReadableStream<Uint8Array>,
|
upstreamBody: ReadableStream<Uint8Array>,
|
||||||
clientResponse: ServerResponse,
|
clientResponse: ServerResponse,
|
||||||
onDataLine?: (line: string) => void,
|
onDataLine?: (line: string) => void,
|
||||||
|
transformChunk?: (chunk: string) => string,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const reader = upstreamBody.getReader();
|
const reader = upstreamBody.getReader();
|
||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
|
|
@ -23,10 +24,7 @@ export async function pipeSSEStream(
|
||||||
|
|
||||||
const chunk = decoder.decode(value, { stream: true });
|
const chunk = decoder.decode(value, { stream: true });
|
||||||
|
|
||||||
// Forward chunk immediately
|
// Parse SSE data lines for usage extraction (before transform, so we get real model data)
|
||||||
const canContinue = clientResponse.write(chunk);
|
|
||||||
|
|
||||||
// Parse SSE data lines for usage extraction
|
|
||||||
if (onDataLine) {
|
if (onDataLine) {
|
||||||
const lines = chunk.split('\n');
|
const lines = chunk.split('\n');
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
|
|
@ -36,6 +34,10 @@ export async function pipeSSEStream(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply transform (e.g., model name replacement) then forward
|
||||||
|
const outputChunk = transformChunk ? transformChunk(chunk) : chunk;
|
||||||
|
const canContinue = clientResponse.write(outputChunk);
|
||||||
|
|
||||||
// Handle backpressure
|
// Handle backpressure
|
||||||
if (!canContinue) {
|
if (!canContinue) {
|
||||||
await new Promise<void>((resolve) => clientResponse.once('drain', resolve));
|
await new Promise<void>((resolve) => clientResponse.once('drain', resolve));
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,8 @@ export interface ApiKeyRecord {
|
||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
expiresAt: Date | null;
|
expiresAt: Date | null;
|
||||||
lastUsedAt: Date | null;
|
lastUsedAt: Date | null;
|
||||||
|
modelOverride: string | null; // If set, replaces user's model with this
|
||||||
|
modelAlias: string | null; // Model name shown in responses (masks real model)
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ApiKeyPermissions {
|
export interface ApiKeyPermissions {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue