refactor(agents): remove Structured Output (Layer 2) to enable true streaming
背景: 在 commitbb1a113中引入了 4 层回复质量控制体系: - Layer 1: System Prompt (1095行详细指导) - Layer 2: Structured Output (Zod schema → output_config) - Layer 3: LLM-as-Judge (Haiku 4.5 评分) - Layer 4: Per-intent hard truncation (已在db8617d移除) Layer 2 (Structured Output) 的问题: 1. 阻塞流式输出 — output_config 强制模型输出 JSON,JSON 片段无法展示给 用户,导致整个响应缓冲后才一次性输出 2. Zod 验证频繁崩溃 — intent 枚举值不匹配时 SDK 抛错,已出现 4 次 hotfix (b55cd4b,db8617d,7af8c4d, 及本次) 3. followUp 字段导致内容丢失 — 模型将回答内容分到 followUp 后被过滤 4. intent 分类仅用于日志,对用户体验无价值 5. z.string() 无 .max() 约束 — 实际不控制回答长度 移除后,回答质量由以下机制保证(全部保留): - Layer 1: System Prompt — 意图分类表、回答风格、长度指导 - Layer 3: LLM-Judge — 相关性/简洁性/噪音评分,不合格则自动重试 - API max_tokens: 2048 — 硬限制输出上限 改动: - coordinator-agent.service.ts: 移除 zodOutputFormat/CoordinatorResponseSchema import 和 outputConfig 参数 - agent-loop.ts: 移除 text_delta 中的 outputConfig 守卫(文本现在直接流式 输出)、移除 output_config API 参数、移除两个 Structured Output 验证失败 恢复 catch 块、移除 JSON 解析 + safety net 块 - agent.types.ts: 从 AgentLoopParams 接口移除 outputConfig 字段 - coordinator-response.schema.ts: 清空 Zod schema/工具函数,保留历史备注 效果: - 用户现在能看到逐字流式输出(token-by-token streaming) - 消除了 Structured Output 相关的所有崩溃风险 - 代码净减 ~130 行 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
913a3fd375
commit
6767215f83
|
|
@ -31,10 +31,6 @@ import {
|
|||
isAgentInvocationTool,
|
||||
getToolsForClaudeAPI,
|
||||
} from '../tools/coordinator-tools';
|
||||
import {
|
||||
MAX_FOLLOWUP_LENGTH,
|
||||
smartTruncate,
|
||||
} from '../schemas/coordinator-response.schema';
|
||||
|
||||
const logger = new Logger('AgentLoop');
|
||||
|
||||
|
|
@ -180,7 +176,6 @@ export async function* agentLoop(
|
|||
messages: messages as any,
|
||||
tools: getToolsForClaudeAPI(additionalTools) as any,
|
||||
max_tokens: 2048,
|
||||
...(params.outputConfig ? { output_config: params.outputConfig } : {}),
|
||||
} as any);
|
||||
break; // success
|
||||
} catch (error: any) {
|
||||
|
|
@ -235,14 +230,11 @@ export async function* agentLoop(
|
|||
|
||||
if (delta.type === 'text_delta') {
|
||||
currentTextContent += delta.text;
|
||||
// Structured Output 模式下不直接 yield text(JSON 片段不能展示给用户)
|
||||
if (!params.outputConfig) {
|
||||
yield {
|
||||
type: 'text',
|
||||
content: delta.text,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
yield {
|
||||
type: 'text',
|
||||
content: delta.text,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
} else if (delta.type === 'input_json_delta') {
|
||||
// Tool input being streamed — accumulate silently
|
||||
}
|
||||
|
|
@ -288,43 +280,6 @@ export async function* agentLoop(
|
|||
}
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
|
||||
// ---- Structured Output 验证失败恢复 ----
|
||||
// SDK 的 Zod 验证可能失败(如 intent 枚举值不匹配),
|
||||
// 但模型的 answer 文本通常是正确的。从已累积的 blocks 中提取内容。
|
||||
if (errMsg.includes('Failed to parse structured output') && assistantBlocks.length > 0) {
|
||||
logger.warn(`Structured output validation failed, recovering from accumulated text`);
|
||||
const accumulatedText = assistantBlocks
|
||||
.filter(b => b.type === 'text' && 'text' in b)
|
||||
.map(b => (b as any).text)
|
||||
.join('');
|
||||
|
||||
if (accumulatedText) {
|
||||
try {
|
||||
// 手动 JSON.parse 跳过 Zod 验证 — intent 值不影响用户看到的内容
|
||||
const parsed = JSON.parse(accumulatedText);
|
||||
if (parsed.answer) {
|
||||
yield { type: 'text', content: parsed.answer, timestamp: Date.now() };
|
||||
if (parsed.followUp) {
|
||||
if (/?|\?/.test(parsed.followUp)) {
|
||||
yield { type: 'text', content: '\n\n' + parsed.followUp, timestamp: Date.now() };
|
||||
} else {
|
||||
yield { type: 'text', content: parsed.followUp, timestamp: Date.now() };
|
||||
}
|
||||
}
|
||||
logger.debug(`[Turn ${currentTurn + 1}] Recovered intent: ${parsed.intent} (validation bypassed)`);
|
||||
} else {
|
||||
yield { type: 'text', content: accumulatedText, timestamp: Date.now() };
|
||||
}
|
||||
} catch {
|
||||
// JSON 也解析不了 → 原始文本兜底
|
||||
yield { type: 'text', content: accumulatedText, timestamp: Date.now() };
|
||||
}
|
||||
}
|
||||
return; // 流已结束,无法继续正常流程
|
||||
}
|
||||
|
||||
// 其他流错误 → 向用户报错
|
||||
logger.error(`Stream processing error: ${errMsg}`);
|
||||
yield {
|
||||
type: 'error',
|
||||
|
|
@ -341,37 +296,6 @@ export async function* agentLoop(
|
|||
finalMessage = await stream.finalMessage();
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
|
||||
// finalMessage 也可能因 structured output 验证失败而抛错
|
||||
if (errMsg.includes('Failed to parse structured output') && assistantBlocks.length > 0) {
|
||||
logger.warn(`finalMessage structured output validation failed, recovering`);
|
||||
const accumulatedText = assistantBlocks
|
||||
.filter(b => b.type === 'text' && 'text' in b)
|
||||
.map(b => (b as any).text)
|
||||
.join('');
|
||||
|
||||
if (accumulatedText) {
|
||||
try {
|
||||
const parsed = JSON.parse(accumulatedText);
|
||||
if (parsed.answer) {
|
||||
yield { type: 'text', content: parsed.answer, timestamp: Date.now() };
|
||||
if (parsed.followUp) {
|
||||
if (/?|\?/.test(parsed.followUp)) {
|
||||
yield { type: 'text', content: '\n\n' + parsed.followUp, timestamp: Date.now() };
|
||||
} else {
|
||||
yield { type: 'text', content: parsed.followUp, timestamp: Date.now() };
|
||||
}
|
||||
}
|
||||
} else {
|
||||
yield { type: 'text', content: accumulatedText, timestamp: Date.now() };
|
||||
}
|
||||
} catch {
|
||||
yield { type: 'text', content: accumulatedText, timestamp: Date.now() };
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
logger.error(`Failed to get final message: ${errMsg}`);
|
||||
yield {
|
||||
type: 'error',
|
||||
|
|
@ -418,44 +342,6 @@ export async function* agentLoop(
|
|||
.map(b => b.text)
|
||||
.join('');
|
||||
|
||||
// ---- Structured Output 解析:从 JSON 中提取 answer + followUp ----
|
||||
// 长度控制由 提示词 + Schema描述 + LLM-Judge 三层负责
|
||||
// 这里只做 JSON 解析 + 安全网(2000字极端情况保护)
|
||||
if (params.outputConfig && responseText) {
|
||||
try {
|
||||
const parsed = JSON.parse(responseText);
|
||||
if (parsed.answer) {
|
||||
const SAFETY_NET = 2000; // 极端情况安全网,正常不会触发
|
||||
let answer = parsed.answer;
|
||||
if (answer.length > SAFETY_NET) {
|
||||
answer = smartTruncate(answer, SAFETY_NET);
|
||||
logger.warn(
|
||||
`[Turn ${currentTurn + 1}] Answer hit safety net: ${parsed.answer.length} → ${answer.length} chars (intent=${parsed.intent})`,
|
||||
);
|
||||
}
|
||||
|
||||
yield { type: 'text', content: answer, timestamp: Date.now() };
|
||||
|
||||
// followUp:含?→ 跟进问题;否则直接追加(模型可能将内容分到 followUp)
|
||||
if (parsed.followUp) {
|
||||
if (/?|\?/.test(parsed.followUp)) {
|
||||
const followUp = smartTruncate(parsed.followUp, MAX_FOLLOWUP_LENGTH);
|
||||
yield { type: 'text', content: '\n\n' + followUp, timestamp: Date.now() };
|
||||
} else {
|
||||
// 非问题的 followUp 直接追加(防止内容丢失)
|
||||
yield { type: 'text', content: parsed.followUp, timestamp: Date.now() };
|
||||
}
|
||||
}
|
||||
} else {
|
||||
yield { type: 'text', content: responseText, timestamp: Date.now() };
|
||||
}
|
||||
logger.debug(`[Turn ${currentTurn + 1}] Structured output intent: ${parsed.intent}`);
|
||||
} catch {
|
||||
logger.warn(`[Turn ${currentTurn + 1}] Structured output parse failed, falling back to raw text`);
|
||||
yield { type: 'text', content: responseText, timestamp: Date.now() };
|
||||
}
|
||||
}
|
||||
|
||||
// --- Evaluation Gate (optional, zero-config safe) ---
|
||||
if (params.evaluationGate) {
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -21,10 +21,6 @@ import {
|
|||
CoordinatorPromptConfig,
|
||||
} from '../prompts/coordinator-system-prompt';
|
||||
|
||||
// Structured Output
|
||||
import { zodOutputFormat } from '@anthropic-ai/sdk/helpers/zod';
|
||||
import { CoordinatorResponseSchema } from '../schemas/coordinator-response.schema';
|
||||
|
||||
// Specialist Services
|
||||
import { PolicyExpertService } from '../specialists/policy-expert.service';
|
||||
import { AssessmentExpertService } from '../specialists/assessment-expert.service';
|
||||
|
|
@ -290,7 +286,6 @@ export class CoordinatorAgentService implements OnModuleInit {
|
|||
currentTurnCount: 0,
|
||||
currentCostUsd: 0,
|
||||
evaluationGate: evaluationGateCallback,
|
||||
outputConfig: { format: zodOutputFormat(CoordinatorResponseSchema) as any },
|
||||
};
|
||||
|
||||
// 6. Create tool executor
|
||||
|
|
|
|||
|
|
@ -1,52 +1,17 @@
|
|||
/**
|
||||
* Coordinator Response Schema — Structured Output
|
||||
* Coordinator Response Schema — 已弃用
|
||||
*
|
||||
* 强制 Coordinator 输出结构化 JSON,包含意图分类和简洁回答。
|
||||
* 通过 Anthropic API 的 output_config 实现硬约束。
|
||||
* 历史说明:
|
||||
* 此文件曾定义 Zod schema 用于 Anthropic API 的 output_config (Structured Output),
|
||||
* 强制模型输出 JSON { intent, answer, followUp }。
|
||||
*
|
||||
* 移除原因(2026-02):
|
||||
* 1. Structured Output 阻塞了流式输出(JSON 片段无法展示给用户)
|
||||
* 2. Zod 验证 intent 枚举频繁崩溃,已出现 4 次 hotfix
|
||||
* 3. followUp 字段导致模型将内容分割丢失
|
||||
* 4. intent 分类仅用于日志,对用户无价值
|
||||
* 5. 回答精准度完全由 System Prompt (Layer 1) + LLM-Judge (Layer 3) 控制
|
||||
*
|
||||
* 保留此文件(空)以避免 git blame 丢失历史上下文。
|
||||
* 如果未来需要重新引入结构化输出,可参考 git 历史。
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
|
||||
export const CoordinatorResponseSchema = z.object({
|
||||
intent: z.enum([
|
||||
'factual_question', // 直接事实问题:"X的条件是什么"
|
||||
'yes_no_question', // 是非判断问题:"我能不能申请X"
|
||||
'comparison_question', // 对比选择问题:"A和B哪个好"
|
||||
'assessment_request', // 评估请求:"帮我评估一下"
|
||||
'objection_expression', // 情绪/犹豫表达:"太贵了"/"怕被拒"
|
||||
'detailed_consultation', // 复杂政策咨询:明确要求详细了解
|
||||
'casual_chat', // 闲聊/打招呼:"你好"
|
||||
]),
|
||||
answer: z.string().describe('直接回答用户的文本,简洁精准,默认100字以内'),
|
||||
followUp: z.string().optional().describe('直接对用户提出的跟进问题(必须以?结尾,必须是用户能看到的自然语言问题,禁止填写内部策略备注)'),
|
||||
});
|
||||
|
||||
export type CoordinatorResponse = z.infer<typeof CoordinatorResponseSchema>;
|
||||
|
||||
/** followUp 问题最大长度 */
|
||||
export const MAX_FOLLOWUP_LENGTH = 80;
|
||||
|
||||
/**
|
||||
* 智能截断:在句子边界处截断,避免截断在句子中间
|
||||
*/
|
||||
export function smartTruncate(text: string, maxLen: number): string {
|
||||
if (text.length <= maxLen) return text;
|
||||
|
||||
const truncated = text.substring(0, maxLen);
|
||||
|
||||
// 在截断范围内找最后一个句子结束符
|
||||
const sentenceEnders = ['。', '!', '?', ';', '. ', '! ', '? '];
|
||||
let lastEnd = -1;
|
||||
for (const ender of sentenceEnders) {
|
||||
const idx = truncated.lastIndexOf(ender);
|
||||
if (idx > lastEnd) lastEnd = idx;
|
||||
}
|
||||
|
||||
// 如果在后半段找到句子边界,在那里截断
|
||||
if (lastEnd > maxLen * 0.5) {
|
||||
return text.substring(0, lastEnd + 1);
|
||||
}
|
||||
|
||||
// 没有好的边界,硬截断
|
||||
return truncated + '...';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -280,8 +280,6 @@ export interface AgentLoopParams {
|
|||
turnCount: number,
|
||||
agentsUsed: string[],
|
||||
) => Promise<import('../coordinator/evaluation-gate.service').GateResult>;
|
||||
/** Structured Output — 传入 Claude API 的 output_config */
|
||||
outputConfig?: { format: Record<string, unknown> };
|
||||
}
|
||||
|
||||
/** Claude API 消息格式 */
|
||||
|
|
|
|||
Loading…
Reference in New Issue