300 lines
8.6 KiB
TypeScript
300 lines
8.6 KiB
TypeScript
/**
|
|
* 意图分类器 - 第一层
|
|
* 快速分析用户意图,决定回复策略
|
|
*/
|
|
|
|
export enum IntentType {
|
|
/** 简单查询 - 直接回答,无需工具 */
|
|
SIMPLE_QUERY = 'SIMPLE_QUERY',
|
|
/** 深度咨询 - 需要知识库检索 */
|
|
DEEP_CONSULTATION = 'DEEP_CONSULTATION',
|
|
/** 需要行动 - 需要调用工具执行操作 */
|
|
ACTION_NEEDED = 'ACTION_NEEDED',
|
|
/** 闲聊 - 简短友好回复 */
|
|
CHAT = 'CHAT',
|
|
/** 澄清 - 用户意图不明,需要追问 */
|
|
CLARIFICATION = 'CLARIFICATION',
|
|
/** 确认 - 用户确认/否定之前的回答 */
|
|
CONFIRMATION = 'CONFIRMATION',
|
|
}
|
|
|
|
export interface IntentResult {
|
|
type: IntentType;
|
|
confidence: number;
|
|
/** 建议的最大回复长度(字符数) */
|
|
maxResponseLength: number;
|
|
/** 是否需要调用工具 */
|
|
needsTools: boolean;
|
|
/** 建议使用的工具 */
|
|
suggestedTools?: string[];
|
|
/** 检测到的关键实体 */
|
|
entities?: Record<string, string>;
|
|
/** 是否为后续问题(基于上下文) */
|
|
isFollowUp: boolean;
|
|
}
|
|
|
|
interface Message {
|
|
role: 'user' | 'assistant';
|
|
content: string;
|
|
}
|
|
|
|
/**
|
|
* 意图分类器
|
|
* 使用规则 + 关键词匹配快速分类,无需 API 调用
|
|
*/
|
|
export class IntentClassifier {
|
|
// 简单查询关键词
|
|
private simpleQueryPatterns = [
|
|
/^(什么是|是什么|哪个|哪些|多少|几|怎么样|好不好)/,
|
|
/^(请问|想问|问一下)/,
|
|
/(是多少|要多久|需要什么|有哪些)/,
|
|
/^(可以吗|能不能|行不行)/,
|
|
];
|
|
|
|
// 深度咨询关键词
|
|
private deepConsultationPatterns = [
|
|
/(怎么办|如何|怎样|怎么做|该怎么)/,
|
|
/(详细|具体|解释|说明|介绍)/,
|
|
/(比较|对比|区别|差异)/,
|
|
/(流程|步骤|过程|程序)/,
|
|
/(条件|要求|资格|标准)/,
|
|
/(材料|文件|清单|准备)/,
|
|
/(评估|分析|建议|推荐)/,
|
|
];
|
|
|
|
// 需要行动的关键词
|
|
private actionPatterns = [
|
|
/(帮我|帮忙|请帮|麻烦)/,
|
|
/(查询|查一下|搜索|找一下|查找)/,
|
|
/(计算|算一下|估算)/,
|
|
/(预约|申请|提交|办理)/,
|
|
/(发送|转发|通知)/,
|
|
];
|
|
|
|
// 闲聊关键词
|
|
private chatPatterns = [
|
|
/^(你好|您好|hi|hello|嗨|早|晚)/i,
|
|
/^(谢谢|感谢|多谢|thanks)/i,
|
|
/^(再见|拜拜|bye|88)/i,
|
|
/^(好的|明白|知道了|了解|收到)/,
|
|
/(哈哈|呵呵|嘿嘿|😀|👍|🙏)/,
|
|
];
|
|
|
|
// 确认/否定关键词
|
|
private confirmationPatterns = [
|
|
/^(是的|对|对的|没错|正确|是啊|嗯)/,
|
|
/^(不是|不对|错了|不|否)/,
|
|
/^(还有|另外|还想问|补充)/,
|
|
];
|
|
|
|
// 工具关键词映射
|
|
private toolKeywords: Record<string, string[]> = {
|
|
'knowledge-search': ['知识', '文章', '政策', '规定', '信息', '资料'],
|
|
'search-web': ['最新', '新闻', '近期', '实时', '当前', '现在'],
|
|
'get-weather': ['天气', '温度', '下雨', '晴天'],
|
|
'experience-recall': ['之前', '上次', '经验', '类似'],
|
|
'user-memory-recall': ['我之前', '我上次', '我的', '记得我'],
|
|
'immigration-assessment': ['评估', '打分', '分数', '资格'],
|
|
'calculate': ['计算', '算', '多少钱', '费用'],
|
|
};
|
|
|
|
// 移民相关类别关键词
|
|
private immigrationCategories: Record<string, string[]> = {
|
|
'skilled-migration': ['技术移民', '189', '190', '491', '打分', '职业评估', 'EOI'],
|
|
'employer-sponsored': ['雇主担保', '482', '494', '186', '雇主', '工签'],
|
|
'business-investment': ['商业移民', '投资移民', '188', '132', '商业', '投资'],
|
|
'family-reunion': ['家庭团聚', '配偶', '父母', '子女', '820', '801', '143'],
|
|
'student-visa': ['学生签证', '500', '485', '留学', '毕业生'],
|
|
'visitor-visa': ['旅游签', '600', '访客', '探亲'],
|
|
};
|
|
|
|
/**
|
|
* 分类用户意图
|
|
*/
|
|
classify(
|
|
userMessage: string,
|
|
conversationHistory: Message[] = [],
|
|
): IntentResult {
|
|
const text = userMessage.trim();
|
|
const isFollowUp = this.detectFollowUp(text, conversationHistory);
|
|
|
|
// 1. 检测闲聊
|
|
if (this.matchPatterns(text, this.chatPatterns)) {
|
|
return {
|
|
type: IntentType.CHAT,
|
|
confidence: 0.9,
|
|
maxResponseLength: 100,
|
|
needsTools: false,
|
|
isFollowUp,
|
|
};
|
|
}
|
|
|
|
// 2. 检测确认/否定
|
|
if (this.matchPatterns(text, this.confirmationPatterns) && isFollowUp) {
|
|
return {
|
|
type: IntentType.CONFIRMATION,
|
|
confidence: 0.85,
|
|
maxResponseLength: 200,
|
|
needsTools: false,
|
|
isFollowUp: true,
|
|
};
|
|
}
|
|
|
|
// 3. 检测需要行动
|
|
const actionTools = this.detectActionTools(text);
|
|
if (this.matchPatterns(text, this.actionPatterns) || actionTools.length > 0) {
|
|
return {
|
|
type: IntentType.ACTION_NEEDED,
|
|
confidence: 0.8,
|
|
maxResponseLength: 500,
|
|
needsTools: true,
|
|
suggestedTools: actionTools.length > 0 ? actionTools : undefined,
|
|
entities: this.extractEntities(text),
|
|
isFollowUp,
|
|
};
|
|
}
|
|
|
|
// 4. 检测深度咨询
|
|
if (this.matchPatterns(text, this.deepConsultationPatterns)) {
|
|
return {
|
|
type: IntentType.DEEP_CONSULTATION,
|
|
confidence: 0.85,
|
|
maxResponseLength: 800,
|
|
needsTools: true,
|
|
suggestedTools: ['knowledge-search'],
|
|
entities: this.extractEntities(text),
|
|
isFollowUp,
|
|
};
|
|
}
|
|
|
|
// 5. 检测简单查询
|
|
if (this.matchPatterns(text, this.simpleQueryPatterns)) {
|
|
return {
|
|
type: IntentType.SIMPLE_QUERY,
|
|
confidence: 0.8,
|
|
maxResponseLength: 300,
|
|
needsTools: false,
|
|
entities: this.extractEntities(text),
|
|
isFollowUp,
|
|
};
|
|
}
|
|
|
|
// 6. 消息太短或不清楚,需要澄清
|
|
if (text.length < 5 && !isFollowUp) {
|
|
return {
|
|
type: IntentType.CLARIFICATION,
|
|
confidence: 0.7,
|
|
maxResponseLength: 150,
|
|
needsTools: false,
|
|
isFollowUp,
|
|
};
|
|
}
|
|
|
|
// 7. 默认:根据长度和内容判断
|
|
const hasImmigrationKeywords = this.detectImmigrationCategory(text);
|
|
if (hasImmigrationKeywords) {
|
|
return {
|
|
type: IntentType.DEEP_CONSULTATION,
|
|
confidence: 0.7,
|
|
maxResponseLength: 600,
|
|
needsTools: true,
|
|
suggestedTools: ['knowledge-search'],
|
|
entities: this.extractEntities(text),
|
|
isFollowUp,
|
|
};
|
|
}
|
|
|
|
// 默认为简单查询
|
|
return {
|
|
type: IntentType.SIMPLE_QUERY,
|
|
confidence: 0.6,
|
|
maxResponseLength: 400,
|
|
needsTools: false,
|
|
entities: this.extractEntities(text),
|
|
isFollowUp,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* 检测是否为后续问题
|
|
*/
|
|
private detectFollowUp(text: string, history: Message[]): boolean {
|
|
if (history.length === 0) return false;
|
|
|
|
// 代词检测
|
|
const pronouns = ['这个', '那个', '它', '这', '那', '上面', '刚才', '前面'];
|
|
if (pronouns.some(p => text.includes(p))) return true;
|
|
|
|
// 省略主语的短问题
|
|
if (text.length < 20 && !text.includes('我')) return true;
|
|
|
|
// 以连接词开头
|
|
const connectors = ['那', '然后', '接着', '还有', '另外', '所以'];
|
|
if (connectors.some(c => text.startsWith(c))) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* 匹配模式
|
|
*/
|
|
private matchPatterns(text: string, patterns: RegExp[]): boolean {
|
|
return patterns.some(p => p.test(text));
|
|
}
|
|
|
|
/**
|
|
* 检测需要的工具
|
|
*/
|
|
private detectActionTools(text: string): string[] {
|
|
const tools: string[] = [];
|
|
for (const [tool, keywords] of Object.entries(this.toolKeywords)) {
|
|
if (keywords.some(kw => text.includes(kw))) {
|
|
tools.push(tool);
|
|
}
|
|
}
|
|
return tools;
|
|
}
|
|
|
|
/**
|
|
* 检测移民类别
|
|
*/
|
|
private detectImmigrationCategory(text: string): string | null {
|
|
for (const [category, keywords] of Object.entries(this.immigrationCategories)) {
|
|
if (keywords.some(kw => text.includes(kw))) {
|
|
return category;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* 提取实体
|
|
*/
|
|
private extractEntities(text: string): Record<string, string> {
|
|
const entities: Record<string, string> = {};
|
|
|
|
// 签证子类
|
|
const visaMatch = text.match(/\b(189|190|491|482|494|186|188|132|820|801|143|500|485|600)\b/);
|
|
if (visaMatch) {
|
|
entities.visaSubclass = visaMatch[1];
|
|
}
|
|
|
|
// 移民类别
|
|
const category = this.detectImmigrationCategory(text);
|
|
if (category) {
|
|
entities.category = category;
|
|
}
|
|
|
|
// 职业
|
|
const occupationMatch = text.match(/(会计|工程师|IT|程序员|护士|厨师|电工|木工|焊工)/);
|
|
if (occupationMatch) {
|
|
entities.occupation = occupationMatch[1];
|
|
}
|
|
|
|
return entities;
|
|
}
|
|
}
|
|
|
|
// 单例导出
|
|
export const intentClassifier = new IntentClassifier();
|