perf(claude): enable Prompt Caching for ~90% cost savings on system prompt
This commit is contained in:
parent
c914693f94
commit
93050b6889
|
|
@ -70,6 +70,7 @@ export class ClaudeAgentService implements OnModuleInit {
|
|||
|
||||
/**
|
||||
* Send a message and get streaming response with tool loop support
|
||||
* Uses Prompt Caching to reduce costs (~90% savings on cached system prompt)
|
||||
*/
|
||||
async *sendMessage(
|
||||
message: string,
|
||||
|
|
@ -101,15 +102,25 @@ export class ClaudeAgentService implements OnModuleInit {
|
|||
const maxIterations = 10; // Safety limit
|
||||
let iterations = 0;
|
||||
|
||||
// System prompt with cache_control for Prompt Caching
|
||||
// Cache TTL is 5 minutes, cache hits cost only 10% of normal input price
|
||||
const systemWithCache: Anthropic.TextBlockParam[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: systemPrompt,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
];
|
||||
|
||||
while (iterations < maxIterations) {
|
||||
iterations++;
|
||||
|
||||
try {
|
||||
// Create streaming message
|
||||
// Create streaming message with cached system prompt
|
||||
const stream = await this.client.messages.stream({
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
max_tokens: 4096,
|
||||
system: systemPrompt,
|
||||
system: systemWithCache,
|
||||
messages,
|
||||
tools: tools as Anthropic.Tool[],
|
||||
});
|
||||
|
|
@ -243,6 +254,7 @@ export class ClaudeAgentService implements OnModuleInit {
|
|||
|
||||
/**
|
||||
* Non-streaming message for simple queries
|
||||
* Uses Prompt Caching for cost optimization
|
||||
*/
|
||||
async sendMessageSync(
|
||||
message: string,
|
||||
|
|
@ -267,10 +279,19 @@ export class ClaudeAgentService implements OnModuleInit {
|
|||
content: message,
|
||||
});
|
||||
|
||||
// System prompt with cache_control for Prompt Caching
|
||||
const systemWithCache: Anthropic.TextBlockParam[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: systemPrompt,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
];
|
||||
|
||||
const response = await this.client.messages.create({
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
max_tokens: 4096,
|
||||
system: systemPrompt,
|
||||
system: systemWithCache,
|
||||
messages,
|
||||
tools: tools as Anthropic.Tool[],
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in New Issue