feat(gateway): deep response sanitization to mask provider identity
Replace Anthropic msg_xxx IDs with opaque IDs, strip cache_creation, service_tier, inference_geo fields. Replace OpenAI chatcmpl-xxx IDs, strip system_fingerprint. Applied to both streaming and non-streaming. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
e898e6551d
commit
00056c5405
|
|
@ -7,6 +7,7 @@ import { injectSystemPrompt } from '../injection/system-prompt-injector';
|
||||||
import { recordFromAnthropicResponse } from '../logging/usage-tracker';
|
import { recordFromAnthropicResponse } from '../logging/usage-tracker';
|
||||||
import { recordAudit } from '../logging/audit-logger';
|
import { recordAudit } from '../logging/audit-logger';
|
||||||
import { pipeSSEStream, createStreamUsageTracker } from './stream-pipe';
|
import { pipeSSEStream, createStreamUsageTracker } from './stream-pipe';
|
||||||
|
import { sanitizeAnthropicResponse, buildAnthropicStreamTransform } from './response-sanitizer';
|
||||||
|
|
||||||
export function createAnthropicProxy(config: GatewayConfig) {
|
export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
return async function handleMessages(request: FastifyRequest, reply: FastifyReply): Promise<void> {
|
return async function handleMessages(request: FastifyRequest, reply: FastifyReply): Promise<void> {
|
||||||
|
|
@ -147,9 +148,9 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
|
|
||||||
const usageTracker = createStreamUsageTracker();
|
const usageTracker = createStreamUsageTracker();
|
||||||
|
|
||||||
// Build transform to replace real model name with alias in SSE chunks
|
// Build transform to sanitize provider identity in SSE chunks
|
||||||
const streamTransform = apiKeyRecord.modelOverride
|
const streamTransform = apiKeyRecord.modelOverride
|
||||||
? (chunk: string) => chunk.replaceAll(effectiveModel, aliasModel)
|
? buildAnthropicStreamTransform(effectiveModel, aliasModel)
|
||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
await pipeSSEStream(upstreamResponse.body, reply.raw, usageTracker.onDataLine, streamTransform);
|
await pipeSSEStream(upstreamResponse.body, reply.raw, usageTracker.onDataLine, streamTransform);
|
||||||
|
|
@ -193,9 +194,9 @@ export function createAnthropicProxy(config: GatewayConfig) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask model name in response if override is active
|
// Deep sanitize response — mask model, id, provider-specific fields
|
||||||
if (apiKeyRecord.modelOverride && responseJson.model) {
|
if (apiKeyRecord.modelOverride && responseJson.model) {
|
||||||
responseJson.model = aliasModel;
|
sanitizeAnthropicResponse(responseJson, aliasModel);
|
||||||
const maskedText = JSON.stringify(responseJson);
|
const maskedText = JSON.stringify(responseJson);
|
||||||
|
|
||||||
recordAudit({
|
recordAudit({
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import { isModelAllowed } from '../middleware/auth';
|
||||||
import { recordFromOpenAIResponse } from '../logging/usage-tracker';
|
import { recordFromOpenAIResponse } from '../logging/usage-tracker';
|
||||||
import { recordAudit } from '../logging/audit-logger';
|
import { recordAudit } from '../logging/audit-logger';
|
||||||
import { pipeSSEStream } from './stream-pipe';
|
import { pipeSSEStream } from './stream-pipe';
|
||||||
|
import { sanitizeOpenAIResponse, sanitizeOpenAIEmbeddingResponse, buildOpenAIStreamTransform } from './response-sanitizer';
|
||||||
|
|
||||||
export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
||||||
return async function handleEmbeddings(request: FastifyRequest, reply: FastifyReply): Promise<void> {
|
return async function handleEmbeddings(request: FastifyRequest, reply: FastifyReply): Promise<void> {
|
||||||
|
|
@ -91,9 +92,9 @@ export function createOpenAIEmbeddingsProxy(config: GatewayConfig) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask model name in response if override is active
|
// Deep sanitize response — mask model, id, provider-specific fields
|
||||||
if (apiKeyRecord.modelOverride && responseJson.model) {
|
if (apiKeyRecord.modelOverride && responseJson.model) {
|
||||||
responseJson.model = aliasModel;
|
sanitizeOpenAIEmbeddingResponse(responseJson, aliasModel);
|
||||||
const maskedText = JSON.stringify(responseJson);
|
const maskedText = JSON.stringify(responseJson);
|
||||||
|
|
||||||
recordAudit({
|
recordAudit({
|
||||||
|
|
@ -220,9 +221,9 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
|
||||||
'X-Accel-Buffering': 'no',
|
'X-Accel-Buffering': 'no',
|
||||||
});
|
});
|
||||||
|
|
||||||
// Build transform to replace real model name with alias in SSE chunks
|
// Build transform to sanitize provider identity in SSE chunks
|
||||||
const streamTransform = apiKeyRecord.modelOverride
|
const streamTransform = apiKeyRecord.modelOverride
|
||||||
? (chunk: string) => chunk.replaceAll(effectiveModel, aliasModel)
|
? buildOpenAIStreamTransform(effectiveModel, aliasModel)
|
||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
await pipeSSEStream(upstreamResponse.body, reply.raw, undefined, streamTransform);
|
await pipeSSEStream(upstreamResponse.body, reply.raw, undefined, streamTransform);
|
||||||
|
|
@ -247,7 +248,7 @@ export function createOpenAIChatProxy(config: GatewayConfig) {
|
||||||
const responseJson = JSON.parse(responseText);
|
const responseJson = JSON.parse(responseText);
|
||||||
|
|
||||||
if (apiKeyRecord.modelOverride && responseJson.model) {
|
if (apiKeyRecord.modelOverride && responseJson.model) {
|
||||||
responseJson.model = aliasModel;
|
sanitizeOpenAIResponse(responseJson, aliasModel);
|
||||||
const maskedText = JSON.stringify(responseJson);
|
const maskedText = JSON.stringify(responseJson);
|
||||||
|
|
||||||
recordAudit({
|
recordAudit({
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,130 @@
|
||||||
|
/**
|
||||||
|
* Response Sanitizer — 深度遮蔽 Anthropic / OpenAI 提供商特征
|
||||||
|
*
|
||||||
|
* 当 modelOverride 启用时,不仅替换 model 名称,还清理所有能暴露底层
|
||||||
|
* 提供商身份的字段(id 前缀、type 值、特有字段名等)。
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { randomBytes } from 'crypto';
|
||||||
|
|
||||||
|
// ─── ID Generation ───
|
||||||
|
|
||||||
|
function generateOpaqueId(prefix: string = 'xai'): string {
|
||||||
|
return `${prefix}-${randomBytes(16).toString('hex')}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Anthropic Response Sanitizer (Non-Streaming JSON) ───
|
||||||
|
|
||||||
|
export function sanitizeAnthropicResponse(json: any, aliasModel: string): any {
|
||||||
|
// Replace model
|
||||||
|
json.model = aliasModel;
|
||||||
|
|
||||||
|
// Replace id: msg_xxxx → opaque id
|
||||||
|
if (json.id && typeof json.id === 'string') {
|
||||||
|
json.id = generateOpaqueId('msg');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove Anthropic-specific type field (or neutralize it)
|
||||||
|
// Anthropic returns type: "message", keep it but it's generic enough
|
||||||
|
// However, remove the type from content blocks if they expose provider info
|
||||||
|
|
||||||
|
// Sanitize usage — remove Anthropic-specific cache fields
|
||||||
|
if (json.usage) {
|
||||||
|
const { input_tokens, output_tokens } = json.usage;
|
||||||
|
json.usage = { input_tokens, output_tokens };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove Anthropic-specific stop_sequence field if null (it reveals the API format)
|
||||||
|
// Keep stop_reason as it's useful, but it's an Anthropic term
|
||||||
|
// (OpenAI uses finish_reason — but since the user calls Anthropic endpoint, the format stays)
|
||||||
|
|
||||||
|
return json;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── OpenAI Response Sanitizer (Non-Streaming JSON) ───
|
||||||
|
|
||||||
|
export function sanitizeOpenAIResponse(json: any, aliasModel: string): any {
|
||||||
|
// Replace model
|
||||||
|
json.model = aliasModel;
|
||||||
|
|
||||||
|
// Replace id: chatcmpl-xxxx → opaque id
|
||||||
|
if (json.id && typeof json.id === 'string') {
|
||||||
|
json.id = generateOpaqueId('cmpl');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove system_fingerprint (OpenAI-specific)
|
||||||
|
delete json.system_fingerprint;
|
||||||
|
|
||||||
|
// Sanitize choices — remove OpenAI-specific logprobs if present
|
||||||
|
if (json.choices) {
|
||||||
|
for (const choice of json.choices) {
|
||||||
|
delete choice.logprobs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return json;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── OpenAI Embedding Response Sanitizer ───
|
||||||
|
|
||||||
|
export function sanitizeOpenAIEmbeddingResponse(json: any, aliasModel: string): any {
|
||||||
|
json.model = aliasModel;
|
||||||
|
delete json.system_fingerprint;
|
||||||
|
return json;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Stream Transform Builders ───
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a chunk transform function for Anthropic SSE streaming.
|
||||||
|
* Replaces model name AND rewrites msg_xxx IDs in SSE data lines.
|
||||||
|
*/
|
||||||
|
export function buildAnthropicStreamTransform(effectiveModel: string, aliasModel: string): (chunk: string) => string {
|
||||||
|
return (chunk: string) => {
|
||||||
|
let result = chunk;
|
||||||
|
|
||||||
|
// Replace real model name with alias
|
||||||
|
result = result.replaceAll(effectiveModel, aliasModel);
|
||||||
|
|
||||||
|
// Replace Anthropic message IDs (msg_XXXXX pattern) with opaque IDs
|
||||||
|
result = result.replace(/\"id\":\s*\"msg_[A-Za-z0-9]+\"/g, () => {
|
||||||
|
return `"id":"${generateOpaqueId('msg')}"`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Strip cache-related usage fields from SSE data lines
|
||||||
|
// These are Anthropic-specific: cache_creation_input_tokens, cache_read_input_tokens, cache_creation
|
||||||
|
result = result.replace(/,?\s*"cache_creation_input_tokens"\s*:\s*\d+/g, '');
|
||||||
|
result = result.replace(/,?\s*"cache_read_input_tokens"\s*:\s*\d+/g, '');
|
||||||
|
result = result.replace(/,?\s*"cache_creation"\s*:\s*\{[^}]*\}/g, '');
|
||||||
|
|
||||||
|
// Strip service_tier and inference_geo (Anthropic-specific metadata)
|
||||||
|
result = result.replace(/,?\s*"service_tier"\s*:\s*"[^"]*"/g, '');
|
||||||
|
result = result.replace(/,?\s*"inference_geo"\s*:\s*"[^"]*"/g, '');
|
||||||
|
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a chunk transform function for OpenAI SSE streaming.
|
||||||
|
* Replaces model name AND rewrites chatcmpl-xxx IDs, strips system_fingerprint.
|
||||||
|
*/
|
||||||
|
export function buildOpenAIStreamTransform(effectiveModel: string, aliasModel: string): (chunk: string) => string {
|
||||||
|
return (chunk: string) => {
|
||||||
|
let result = chunk;
|
||||||
|
|
||||||
|
// Replace real model name with alias
|
||||||
|
result = result.replaceAll(effectiveModel, aliasModel);
|
||||||
|
|
||||||
|
// Replace OpenAI completion IDs (chatcmpl-XXXXX pattern)
|
||||||
|
result = result.replace(/\"id\":\s*\"chatcmpl-[A-Za-z0-9]+\"/g, () => {
|
||||||
|
return `"id":"${generateOpaqueId('cmpl')}"`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Strip system_fingerprint
|
||||||
|
result = result.replace(/,?\s*"system_fingerprint"\s*:\s*"[^"]*"/g, '');
|
||||||
|
result = result.replace(/,?\s*"system_fingerprint"\s*:\s*null/g, '');
|
||||||
|
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue