perf(claude): enable Prompt Caching for ~90% cost savings on system prompt

This commit is contained in:
hailin 2026-01-10 01:42:33 -08:00
parent c914693f94
commit 93050b6889
1 changed files with 24 additions and 3 deletions

View File

@ -70,6 +70,7 @@ export class ClaudeAgentService implements OnModuleInit {
/** /**
* Send a message and get streaming response with tool loop support * Send a message and get streaming response with tool loop support
* Uses Prompt Caching to reduce costs (~90% savings on cached system prompt)
*/ */
async *sendMessage( async *sendMessage(
message: string, message: string,
@ -101,15 +102,25 @@ export class ClaudeAgentService implements OnModuleInit {
const maxIterations = 10; // Safety limit const maxIterations = 10; // Safety limit
let iterations = 0; let iterations = 0;
// System prompt with cache_control for Prompt Caching
// Cache TTL is 5 minutes, cache hits cost only 10% of normal input price
const systemWithCache: Anthropic.TextBlockParam[] = [
{
type: 'text',
text: systemPrompt,
cache_control: { type: 'ephemeral' },
},
];
while (iterations < maxIterations) { while (iterations < maxIterations) {
iterations++; iterations++;
try { try {
// Create streaming message // Create streaming message with cached system prompt
const stream = await this.client.messages.stream({ const stream = await this.client.messages.stream({
model: 'claude-sonnet-4-20250514', model: 'claude-sonnet-4-20250514',
max_tokens: 4096, max_tokens: 4096,
system: systemPrompt, system: systemWithCache,
messages, messages,
tools: tools as Anthropic.Tool[], tools: tools as Anthropic.Tool[],
}); });
@ -243,6 +254,7 @@ export class ClaudeAgentService implements OnModuleInit {
/** /**
* Non-streaming message for simple queries * Non-streaming message for simple queries
* Uses Prompt Caching for cost optimization
*/ */
async sendMessageSync( async sendMessageSync(
message: string, message: string,
@ -267,10 +279,19 @@ export class ClaudeAgentService implements OnModuleInit {
content: message, content: message,
}); });
// System prompt with cache_control for Prompt Caching
const systemWithCache: Anthropic.TextBlockParam[] = [
{
type: 'text',
text: systemPrompt,
cache_control: { type: 'ephemeral' },
},
];
const response = await this.client.messages.create({ const response = await this.client.messages.create({
model: 'claude-sonnet-4-20250514', model: 'claude-sonnet-4-20250514',
max_tokens: 4096, max_tokens: 4096,
system: systemPrompt, system: systemWithCache,
messages, messages,
tools: tools as Anthropic.Tool[], tools: tools as Anthropic.Tool[],
}); });