From 93050b688949d01e55bb8a6f23b79ee86a0ea93e Mon Sep 17 00:00:00 2001 From: hailin Date: Sat, 10 Jan 2026 01:42:33 -0800 Subject: [PATCH] perf(claude): enable Prompt Caching for ~90% cost savings on system prompt --- .../claude/claude-agent.service.ts | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/packages/services/conversation-service/src/infrastructure/claude/claude-agent.service.ts b/packages/services/conversation-service/src/infrastructure/claude/claude-agent.service.ts index 798cfd5..dfbee50 100644 --- a/packages/services/conversation-service/src/infrastructure/claude/claude-agent.service.ts +++ b/packages/services/conversation-service/src/infrastructure/claude/claude-agent.service.ts @@ -70,6 +70,7 @@ export class ClaudeAgentService implements OnModuleInit { /** * Send a message and get streaming response with tool loop support + * Uses Prompt Caching to reduce costs (~90% savings on cached system prompt) */ async *sendMessage( message: string, @@ -101,15 +102,25 @@ export class ClaudeAgentService implements OnModuleInit { const maxIterations = 10; // Safety limit let iterations = 0; + // System prompt with cache_control for Prompt Caching + // Cache TTL is 5 minutes, cache hits cost only 10% of normal input price + const systemWithCache: Anthropic.TextBlockParam[] = [ + { + type: 'text', + text: systemPrompt, + cache_control: { type: 'ephemeral' }, + }, + ]; + while (iterations < maxIterations) { iterations++; try { - // Create streaming message + // Create streaming message with cached system prompt const stream = await this.client.messages.stream({ model: 'claude-sonnet-4-20250514', max_tokens: 4096, - system: systemPrompt, + system: systemWithCache, messages, tools: tools as Anthropic.Tool[], }); @@ -243,6 +254,7 @@ export class ClaudeAgentService implements OnModuleInit { /** * Non-streaming message for simple queries + * Uses Prompt Caching for cost optimization */ async sendMessageSync( message: string, @@ -267,10 +279,19 @@ export class ClaudeAgentService implements OnModuleInit { content: message, }); + // System prompt with cache_control for Prompt Caching + const systemWithCache: Anthropic.TextBlockParam[] = [ + { + type: 'text', + text: systemPrompt, + cache_control: { type: 'ephemeral' }, + }, + ]; + const response = await this.client.messages.create({ model: 'claude-sonnet-4-20250514', max_tokens: 4096, - system: systemPrompt, + system: systemWithCache, messages, tools: tools as Anthropic.Tool[], });