perf(claude): enable Prompt Caching for ~90% cost savings on system prompt
This commit is contained in:
parent
c914693f94
commit
93050b6889
|
|
@ -70,6 +70,7 @@ export class ClaudeAgentService implements OnModuleInit {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send a message and get streaming response with tool loop support
|
* Send a message and get streaming response with tool loop support
|
||||||
|
* Uses Prompt Caching to reduce costs (~90% savings on cached system prompt)
|
||||||
*/
|
*/
|
||||||
async *sendMessage(
|
async *sendMessage(
|
||||||
message: string,
|
message: string,
|
||||||
|
|
@ -101,15 +102,25 @@ export class ClaudeAgentService implements OnModuleInit {
|
||||||
const maxIterations = 10; // Safety limit
|
const maxIterations = 10; // Safety limit
|
||||||
let iterations = 0;
|
let iterations = 0;
|
||||||
|
|
||||||
|
// System prompt with cache_control for Prompt Caching
|
||||||
|
// Cache TTL is 5 minutes, cache hits cost only 10% of normal input price
|
||||||
|
const systemWithCache: Anthropic.TextBlockParam[] = [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: systemPrompt,
|
||||||
|
cache_control: { type: 'ephemeral' },
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
while (iterations < maxIterations) {
|
while (iterations < maxIterations) {
|
||||||
iterations++;
|
iterations++;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Create streaming message
|
// Create streaming message with cached system prompt
|
||||||
const stream = await this.client.messages.stream({
|
const stream = await this.client.messages.stream({
|
||||||
model: 'claude-sonnet-4-20250514',
|
model: 'claude-sonnet-4-20250514',
|
||||||
max_tokens: 4096,
|
max_tokens: 4096,
|
||||||
system: systemPrompt,
|
system: systemWithCache,
|
||||||
messages,
|
messages,
|
||||||
tools: tools as Anthropic.Tool[],
|
tools: tools as Anthropic.Tool[],
|
||||||
});
|
});
|
||||||
|
|
@ -243,6 +254,7 @@ export class ClaudeAgentService implements OnModuleInit {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Non-streaming message for simple queries
|
* Non-streaming message for simple queries
|
||||||
|
* Uses Prompt Caching for cost optimization
|
||||||
*/
|
*/
|
||||||
async sendMessageSync(
|
async sendMessageSync(
|
||||||
message: string,
|
message: string,
|
||||||
|
|
@ -267,10 +279,19 @@ export class ClaudeAgentService implements OnModuleInit {
|
||||||
content: message,
|
content: message,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// System prompt with cache_control for Prompt Caching
|
||||||
|
const systemWithCache: Anthropic.TextBlockParam[] = [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: systemPrompt,
|
||||||
|
cache_control: { type: 'ephemeral' },
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
const response = await this.client.messages.create({
|
const response = await this.client.messages.create({
|
||||||
model: 'claude-sonnet-4-20250514',
|
model: 'claude-sonnet-4-20250514',
|
||||||
max_tokens: 4096,
|
max_tokens: 4096,
|
||||||
system: systemPrompt,
|
system: systemWithCache,
|
||||||
messages,
|
messages,
|
||||||
tools: tools as Anthropic.Tool[],
|
tools: tools as Anthropic.Tool[],
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue