fix(conversation): improve token tracking accuracy

- Add 'error' chunk type to StreamChunk for partial token capture
- Record partial tokens to token_usage table even on API errors
- Capture error chunk tokens in conversation.service.ts
- Save partial response and tokens before re-throwing errors
- Add token aggregation from token_usage table for accurate stats
- Display detailed token info in admin (cache tokens, cost, API calls)
- Export TokenDetails type for frontend consumption

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-01-25 17:23:25 -08:00
parent ae99b78579
commit 7acdf78e0c
6 changed files with 203 additions and 47 deletions

View File

@ -63,4 +63,5 @@ export type {
PaginatedConversations, PaginatedConversations,
ConversationStatistics, ConversationStatistics,
ConversationQueryParams, ConversationQueryParams,
TokenDetails,
} from '../infrastructure/conversations.api'; } from '../infrastructure/conversations.api';

View File

@ -11,6 +11,14 @@ export interface DeviceInfo {
region?: string; region?: string;
} }
export interface TokenDetails {
cacheCreationTokens: number;
cacheReadTokens: number;
totalTokens: number;
estimatedCost: number;
apiCallCount: number;
}
export interface ConversationDto { export interface ConversationDto {
id: string; id: string;
userId: string; userId: string;
@ -23,6 +31,7 @@ export interface ConversationDto {
assistantMessageCount: number; assistantMessageCount: number;
totalInputTokens: number; totalInputTokens: number;
totalOutputTokens: number; totalOutputTokens: number;
tokenDetails?: TokenDetails;
rating: number | null; rating: number | null;
feedback: string | null; feedback: string | null;
hasConverted: boolean; hasConverted: boolean;

View File

@ -419,25 +419,67 @@ export function ConversationsPage() {
Token 使 Token 使
</Space> </Space>
</Title> </Title>
<Row gutter={16} className="mb-4"> <Row gutter={[8, 8]} className="mb-4">
<Col span={12}> <Col span={8}>
<Card size="small"> <Card size="small">
<Statistic <Statistic
title="输入 Tokens" title="输入 Tokens"
value={conversationDetail.totalInputTokens} value={conversationDetail.totalInputTokens}
valueStyle={{ fontSize: 18 }} valueStyle={{ fontSize: 16 }}
/> />
</Card> </Card>
</Col> </Col>
<Col span={12}> <Col span={8}>
<Card size="small"> <Card size="small">
<Statistic <Statistic
title="输出 Tokens" title="输出 Tokens"
value={conversationDetail.totalOutputTokens} value={conversationDetail.totalOutputTokens}
valueStyle={{ fontSize: 18 }} valueStyle={{ fontSize: 16 }}
/> />
</Card> </Card>
</Col> </Col>
<Col span={8}>
<Card size="small">
<Statistic
title="API 调用"
value={conversationDetail.tokenDetails?.apiCallCount || 0}
suffix="次"
valueStyle={{ fontSize: 16 }}
/>
</Card>
</Col>
{conversationDetail.tokenDetails && (
<>
<Col span={8}>
<Card size="small">
<Statistic
title="Cache 创建"
value={conversationDetail.tokenDetails.cacheCreationTokens}
valueStyle={{ fontSize: 14, color: '#faad14' }}
/>
</Card>
</Col>
<Col span={8}>
<Card size="small">
<Statistic
title="Cache 读取"
value={conversationDetail.tokenDetails.cacheReadTokens}
valueStyle={{ fontSize: 14, color: '#52c41a' }}
/>
</Card>
</Col>
<Col span={8}>
<Card size="small">
<Statistic
title="预估成本"
value={conversationDetail.tokenDetails.estimatedCost.toFixed(4)}
prefix="$"
valueStyle={{ fontSize: 14, color: '#1890ff' }}
/>
</Card>
</Col>
</>
)}
</Row> </Row>
{/* Messages */} {/* Messages */}

View File

@ -11,6 +11,7 @@ import { Repository } from 'typeorm';
import * as jwt from 'jsonwebtoken'; import * as jwt from 'jsonwebtoken';
import { ConversationORM } from '../../infrastructure/database/postgres/entities/conversation.orm'; import { ConversationORM } from '../../infrastructure/database/postgres/entities/conversation.orm';
import { MessageORM } from '../../infrastructure/database/postgres/entities/message.orm'; import { MessageORM } from '../../infrastructure/database/postgres/entities/message.orm';
import { TokenUsageORM } from '../../infrastructure/database/postgres/entities/token-usage.orm';
interface AdminPayload { interface AdminPayload {
id: string; id: string;
@ -18,6 +19,16 @@ interface AdminPayload {
role: string; role: string;
} }
interface TokenAggregation {
totalInputTokens: number;
totalOutputTokens: number;
totalCacheCreationTokens: number;
totalCacheReadTokens: number;
totalTokens: number;
totalEstimatedCost: number;
apiCallCount: number;
}
/** /**
* API - admin-client 使 * API - admin-client 使
* JWT * JWT
@ -29,6 +40,8 @@ export class AdminConversationController {
private conversationRepo: Repository<ConversationORM>, private conversationRepo: Repository<ConversationORM>,
@InjectRepository(MessageORM) @InjectRepository(MessageORM)
private messageRepo: Repository<MessageORM>, private messageRepo: Repository<MessageORM>,
@InjectRepository(TokenUsageORM)
private tokenUsageRepo: Repository<TokenUsageORM>,
) {} ) {}
/** /**
@ -49,6 +62,35 @@ export class AdminConversationController {
} }
} }
/**
* token_usage token 使
*/
private async aggregateTokenUsage(conversationId: string): Promise<TokenAggregation> {
const result = await this.tokenUsageRepo
.createQueryBuilder('t')
.select([
'SUM(t.input_tokens) as "totalInputTokens"',
'SUM(t.output_tokens) as "totalOutputTokens"',
'SUM(t.cache_creation_tokens) as "totalCacheCreationTokens"',
'SUM(t.cache_read_tokens) as "totalCacheReadTokens"',
'SUM(t.total_tokens) as "totalTokens"',
'SUM(t.estimated_cost) as "totalEstimatedCost"',
'COUNT(*) as "apiCallCount"',
])
.where('t.conversation_id = :conversationId', { conversationId })
.getRawOne();
return {
totalInputTokens: parseInt(result?.totalInputTokens || '0'),
totalOutputTokens: parseInt(result?.totalOutputTokens || '0'),
totalCacheCreationTokens: parseInt(result?.totalCacheCreationTokens || '0'),
totalCacheReadTokens: parseInt(result?.totalCacheReadTokens || '0'),
totalTokens: parseInt(result?.totalTokens || '0'),
totalEstimatedCost: parseFloat(result?.totalEstimatedCost || '0'),
apiCallCount: parseInt(result?.apiCallCount || '0'),
};
}
/** /**
* *
*/ */
@ -189,6 +231,7 @@ export class AdminConversationController {
/** /**
* *
* token_usage token
*/ */
@Get(':id') @Get(':id')
async getConversation( async getConversation(
@ -208,6 +251,9 @@ export class AdminConversationController {
}; };
} }
// 从 token_usage 表获取准确的 token 统计(而不是仅依赖 conversation 实体)
const tokenStats = await this.aggregateTokenUsage(id);
return { return {
success: true, success: true,
data: { data: {
@ -220,8 +266,17 @@ export class AdminConversationController {
messageCount: conversation.messageCount, messageCount: conversation.messageCount,
userMessageCount: conversation.userMessageCount, userMessageCount: conversation.userMessageCount,
assistantMessageCount: conversation.assistantMessageCount, assistantMessageCount: conversation.assistantMessageCount,
totalInputTokens: conversation.totalInputTokens, // 使用聚合的准确 token 数据
totalOutputTokens: conversation.totalOutputTokens, totalInputTokens: tokenStats.totalInputTokens,
totalOutputTokens: tokenStats.totalOutputTokens,
// 额外的 token 详情
tokenDetails: {
cacheCreationTokens: tokenStats.totalCacheCreationTokens,
cacheReadTokens: tokenStats.totalCacheReadTokens,
totalTokens: tokenStats.totalTokens,
estimatedCost: tokenStats.totalEstimatedCost,
apiCallCount: tokenStats.apiCallCount,
},
rating: conversation.rating, rating: conversation.rating,
feedback: conversation.feedback, feedback: conversation.feedback,
hasConverted: conversation.hasConverted, hasConverted: conversation.hasConverted,

View File

@ -160,36 +160,48 @@ export class ConversationService {
let updatedState: ConversationContext['consultingState'] | undefined; let updatedState: ConversationContext['consultingState'] | undefined;
let inputTokens = 0; let inputTokens = 0;
let outputTokens = 0; let outputTokens = 0;
let streamError: Error | null = null;
// Stream response from Claude (with attachments for multimodal support) // Stream response from Claude (with attachments for multimodal support)
for await (const chunk of this.claudeAgentService.sendMessage( try {
params.content, for await (const chunk of this.claudeAgentService.sendMessage(
context, params.content,
params.attachments, context,
)) { params.attachments,
if (chunk.type === 'text' && chunk.content) { )) {
fullResponse += chunk.content; if (chunk.type === 'text' && chunk.content) {
} else if (chunk.type === 'tool_use') { fullResponse += chunk.content;
toolCalls.push({ } else if (chunk.type === 'tool_use') {
name: chunk.toolName!, toolCalls.push({
input: chunk.toolInput!, name: chunk.toolName!,
result: null, input: chunk.toolInput!,
}); result: null,
} else if (chunk.type === 'tool_result') { });
const lastToolCall = toolCalls[toolCalls.length - 1]; } else if (chunk.type === 'tool_result') {
if (lastToolCall) { const lastToolCall = toolCalls[toolCalls.length - 1];
lastToolCall.result = chunk.toolResult; if (lastToolCall) {
lastToolCall.result = chunk.toolResult;
}
} else if (chunk.type === 'state_update' && chunk.newState) {
// V2: Capture updated consulting state
updatedState = chunk.newState;
} else if (chunk.type === 'end') {
// Capture token usage from end chunk
inputTokens = chunk.inputTokens || 0;
outputTokens = chunk.outputTokens || 0;
} else if (chunk.type === 'error') {
// Capture partial token usage from error chunk
inputTokens = chunk.inputTokens || 0;
outputTokens = chunk.outputTokens || 0;
console.warn(`[ConversationService] Stream error, captured partial tokens: in=${inputTokens}, out=${outputTokens}`);
} }
} else if (chunk.type === 'state_update' && chunk.newState) {
// V2: Capture updated consulting state
updatedState = chunk.newState;
} else if (chunk.type === 'end') {
// Capture token usage from end chunk
inputTokens = chunk.inputTokens || 0;
outputTokens = chunk.outputTokens || 0;
}
yield chunk; yield chunk;
}
} catch (error) {
// Capture the error but continue to save partial data
streamError = error instanceof Error ? error : new Error(String(error));
console.error('[ConversationService] Stream error:', streamError.message);
} }
// V2: Save updated consulting state to conversation // V2: Save updated consulting state to conversation
@ -199,20 +211,24 @@ export class ConversationService {
conversation.updateConsultingState(stateForDb); conversation.updateConsultingState(stateForDb);
} }
// Save assistant response // Save assistant response (even partial response on error)
const assistantMessage = MessageEntity.create({ if (fullResponse) {
id: uuidv4(), const assistantMessage = MessageEntity.create({
conversationId: params.conversationId, id: uuidv4(),
role: MessageRole.ASSISTANT, conversationId: params.conversationId,
type: MessageType.TEXT, role: MessageRole.ASSISTANT,
content: fullResponse, type: MessageType.TEXT,
metadata: toolCalls.length > 0 ? { toolCalls } : undefined, content: fullResponse,
}); metadata: toolCalls.length > 0 ? { toolCalls } : undefined,
await this.messageRepo.save(assistantMessage); });
await this.messageRepo.save(assistantMessage);
}
// Update conversation statistics // Update conversation statistics (always update tokens, even on error)
conversation.incrementMessageCount('user'); conversation.incrementMessageCount('user');
conversation.incrementMessageCount('assistant'); if (fullResponse) {
conversation.incrementMessageCount('assistant');
}
conversation.addTokens(inputTokens, outputTokens); conversation.addTokens(inputTokens, outputTokens);
// Update conversation title if first message // Update conversation title if first message
@ -224,6 +240,11 @@ export class ConversationService {
// Save all updates to conversation // Save all updates to conversation
await this.conversationRepo.update(conversation); await this.conversationRepo.update(conversation);
// Re-throw error after saving partial data
if (streamError) {
throw streamError;
}
} }
/** /**

View File

@ -58,7 +58,7 @@ export interface ConversationContext {
} }
export interface StreamChunk { export interface StreamChunk {
type: 'text' | 'tool_use' | 'tool_result' | 'end' | 'stage_change' | 'state_update'; type: 'text' | 'tool_use' | 'tool_result' | 'end' | 'stage_change' | 'state_update' | 'error';
content?: string; content?: string;
toolName?: string; toolName?: string;
toolInput?: Record<string, unknown>; toolInput?: Record<string, unknown>;
@ -66,9 +66,11 @@ export interface StreamChunk {
// V2新增 // V2新增
stageName?: string; stageName?: string;
newState?: ConsultingState; newState?: ConsultingState;
// Token usage (returned with 'end' chunk) // Token usage (returned with 'end' or 'error' chunk)
inputTokens?: number; inputTokens?: number;
outputTokens?: number; outputTokens?: number;
// Error info (only for 'error' type)
errorMessage?: string;
} }
@Injectable() @Injectable()
@ -401,6 +403,32 @@ export class ClaudeAgentServiceV2 implements OnModuleInit {
} catch (error) { } catch (error) {
console.error('[ClaudeAgentV2] Claude API error:', error); console.error('[ClaudeAgentV2] Claude API error:', error);
// Record partial token usage even on error (for cost tracking)
if (totalInputTokens > 0 || totalOutputTokens > 0) {
const latencyMs = Date.now() - startTime;
this.tokenUsageService.recordUsage({
userId: context.userId,
conversationId: context.conversationId,
model: 'claude-sonnet-4-20250514',
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
cacheCreationTokens: totalCacheCreationTokens,
cacheReadTokens: totalCacheReadTokens,
intentType: intent?.type || 'UNKNOWN',
toolCalls: toolCallCount,
responseLength: fullResponseText.length,
latencyMs,
}).catch(err => console.error('[ClaudeAgentV2] Failed to record partial token usage:', err));
}
// Yield error chunk with partial tokens before throwing
yield {
type: 'error',
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
errorMessage: error instanceof Error ? error.message : 'Unknown error',
};
throw error; throw error;
} }
} }