iconsulting/packages/services/conversation-service/src/infrastructure/claude/claude-agent.service.ts

import { Injectable, OnModuleInit } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import Anthropic from '@anthropic-ai/sdk';
import { ImmigrationToolsService } from './tools/immigration-tools.service';
import { TokenUsageService } from './token-usage.service';
import { buildSystemPrompt, SystemPromptConfig } from './prompts/system-prompt';
import { KnowledgeClientService } from '../knowledge/knowledge-client.service';
import { intentClassifier, IntentResult, IntentType } from './intent-classifier';
import { responseGate } from './response-gate';

export interface FileAttachment {
  id: string;
  originalName: string;
  mimeType: string;
  type: 'image' | 'document' | 'audio' | 'video' | 'other';
  size: number;
  downloadUrl?: string;
  thumbnailUrl?: string;
}

export interface ConversationContext {
  userId: string;
  conversationId: string;
  userMemory?: string[];
  previousMessages?: Array<{
    role: 'user' | 'assistant';
    content: string;
    attachments?: FileAttachment[];
  }>;
}

export interface StreamChunk {
  type: 'text' | 'tool_use' | 'tool_result' | 'end';
  content?: string;
  toolName?: string;
  toolInput?: Record<string, unknown>;
  toolResult?: unknown;
}

@Injectable()
export class ClaudeAgentService implements OnModuleInit {
  private client: Anthropic;
  private systemPromptConfig: SystemPromptConfig;

  constructor(
    private configService: ConfigService,
    private immigrationToolsService: ImmigrationToolsService,
    private knowledgeClient: KnowledgeClientService,
    private tokenUsageService: TokenUsageService,
  ) {}

  onModuleInit() {
    const baseUrl = this.configService.get<string>('ANTHROPIC_BASE_URL');
    const isProxyUrl = baseUrl && (baseUrl.includes('67.223.119.33') || baseUrl.match(/^\d+\.\d+\.\d+\.\d+/));

    // If using IP-based proxy, disable TLS certificate verification
    if (isProxyUrl) {
      console.log(`Using Anthropic proxy (TLS verification disabled): ${baseUrl}`);
      process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
    }

    this.client = new Anthropic({
      apiKey: this.configService.get<string>('ANTHROPIC_API_KEY'),
      baseURL: baseUrl || undefined,
    });

    if (baseUrl && !isProxyUrl) {
      console.log(`Using Anthropic API base URL: ${baseUrl}`);
    }

    // Initialize with default config
    this.systemPromptConfig = {
      identity: '专业、友善、耐心的香港移民顾问',
      conversationStyle: '专业但不生硬，用简洁明了的语言解答',
    };
  }

  /**
   * Update system prompt configuration (for evolution)
   */
  updateSystemPromptConfig(config: Partial<SystemPromptConfig>) {
    this.systemPromptConfig = {
      ...this.systemPromptConfig,
      ...config,
    };
  }

  /**
   * Calculate max tokens based on intent classification
   * 严格控制回复长度，中文约 1.5 tokens/字符
   */
  private calculateMaxTokens(intent: IntentResult): number {
    // 中文约 1.5 tokens/字符，稍加余量取 1.8
    const tokensPerChar = 1.8;
    const baseTokens = Math.round(intent.maxResponseLength * tokensPerChar);

    // 根据意图类型调整，严格限制上限
    switch (intent.type) {
      case IntentType.CHAT:
        return Math.min(200, baseTokens); // 闲聊严格限制 200 tokens
      case IntentType.SIMPLE_QUERY:
        return Math.min(600, baseTokens); // 简单查询限制 600 tokens (~300字)
      case IntentType.CLARIFICATION:
        return Math.min(300, baseTokens); // 澄清限制 300 tokens
      case IntentType.CONFIRMATION:
        return Math.min(400, baseTokens); // 确认限制 400 tokens
      case IntentType.DEEP_CONSULTATION:
        return Math.min(1600, Math.max(800, baseTokens)); // 深度咨询 800-1600
      case IntentType.ACTION_NEEDED:
        return Math.min(1000, Math.max(500, baseTokens)); // 需要行动 500-1000
      default:
        return 1024; // 默认 1024
    }
  }

  /**
   * Fetch and format approved system experiences for injection
   */
  private async getAccumulatedExperience(query: string): Promise<string> {
    try {
      const experiences = await this.knowledgeClient.searchExperiences({
        query,
        activeOnly: true,
        limit: 5,
      });

      if (experiences.length === 0) {
        return '暂无';
      }

      return experiences
        .map((exp, index) => `${index + 1}. [${exp.experienceType}] ${exp.content}`)
        .join('\n');
    } catch (error) {
      console.error('[ClaudeAgent] Failed to fetch experiences:', error);
      return '暂无';
    }
  }

  /**
   * Build multimodal content blocks for Claude Vision API
   */
  private async buildMultimodalContent(
    text: string,
    attachments?: FileAttachment[],
  ): Promise<Anthropic.ContentBlockParam[]> {
    const content: Anthropic.ContentBlockParam[] = [];

    // Add image attachments first (Claude processes images before text)
    if (attachments && attachments.length > 0) {
      for (const attachment of attachments) {
        if (attachment.type === 'image' && attachment.downloadUrl) {
          try {
            // Fetch the image and convert to base64
            const response = await fetch(attachment.downloadUrl);
            if (response.ok) {
              const buffer = await response.arrayBuffer();
              const base64Data = Buffer.from(buffer).toString('base64');

              // Determine media type
              const mediaType = attachment.mimeType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';

              content.push({
                type: 'image',
                source: {
                  type: 'base64',
                  media_type: mediaType,
                  data: base64Data,
                },
              });
            }
          } catch (error) {
            console.error(`Failed to fetch image ${attachment.originalName}:`, error);
          }
        } else if (attachment.type === 'document') {
          // For documents, add a text reference
          content.push({
            type: 'text',
            text: `[Attached document: ${attachment.originalName}]`,
          });
        }
      }
    }

    // Add the text message
    if (text) {
      content.push({
        type: 'text',
        text,
      });
    }

    return content;
  }

  /**
   * Send a message and get streaming response with tool loop support
   * Uses Prompt Caching to reduce costs (~90% savings on cached system prompt)
   * Supports multimodal messages with image attachments
   * Implements 3-layer architecture: Intent Classification -> ReAct Agent -> Response Gate
   */
  async *sendMessage(
    message: string,
    context: ConversationContext,
    attachments?: FileAttachment[],
  ): AsyncGenerator<StreamChunk> {
    // ========== 第一层：意图分类 ==========
    const conversationHistory = context.previousMessages?.map(msg => ({
      role: msg.role,
      content: msg.content,
    })) || [];
    const intent = intentClassifier.classify(message, conversationHistory);

    console.log(`[ClaudeAgent] Intent classified: ${intent.type}, maxLength: ${intent.maxResponseLength}, needsTools: ${intent.needsTools}`);

    // ========== 第二层：ReAct Agent ==========
    const tools = this.immigrationToolsService.getTools();

    // Fetch relevant system experiences and inject into prompt
    const accumulatedExperience = await this.getAccumulatedExperience(message);
    const dynamicConfig: SystemPromptConfig = {
      ...this.systemPromptConfig,
      accumulatedExperience,
      intentHint: intent, // 注入意图分类结果
    };
    const systemPrompt = buildSystemPrompt(dynamicConfig);

    // Build messages array
    const messages: Anthropic.MessageParam[] = [];

    // Add previous messages if any (with multimodal support)
    if (context.previousMessages) {
      for (const msg of context.previousMessages) {
        if (msg.attachments && msg.attachments.length > 0 && msg.role === 'user') {
          // Build multimodal content for messages with attachments
          const multimodalContent = await this.buildMultimodalContent(msg.content, msg.attachments);
          messages.push({
            role: msg.role,
            content: multimodalContent,
          });
        } else {
          messages.push({
            role: msg.role,
            content: msg.content,
          });
        }
      }
    }

    // Add current message (with multimodal support)
    if (attachments && attachments.length > 0) {
      const multimodalContent = await this.buildMultimodalContent(message, attachments);
      messages.push({
        role: 'user',
        content: multimodalContent,
      });
    } else {
      messages.push({
        role: 'user',
        content: message,
      });
    }

    // Tool loop - continue until we get a final response (no tool use)
    const maxIterations = 10; // Safety limit
    let iterations = 0;

    // 根据意图分类调整 max_tokens
    const maxTokens = this.calculateMaxTokens(intent);

    // System prompt with cache_control for Prompt Caching
    // Cache TTL is 5 minutes, cache hits cost only 10% of normal input price
    const systemWithCache: Anthropic.TextBlockParam[] = [
      {
        type: 'text',
        text: systemPrompt,
        cache_control: { type: 'ephemeral' },
      },
    ];

    // 用于收集完整响应以进行门控检查
    let fullResponseText = '';

    // Token 使用量累积
    const startTime = Date.now();
    let totalInputTokens = 0;
    let totalOutputTokens = 0;
    let totalCacheCreationTokens = 0;
    let totalCacheReadTokens = 0;
    let toolCallCount = 0;

    while (iterations < maxIterations) {
      iterations++;

      try {
        // Create streaming message with cached system prompt
        const stream = await this.client.messages.stream({
          model: 'claude-sonnet-4-20250514',
          max_tokens: maxTokens,
          system: systemWithCache,
          messages,
          tools: tools as Anthropic.Tool[],
        });

        let currentToolUse: {
          id: string;
          name: string;
          inputJson: string;
          input: Record<string, unknown>;
        } | null = null;

        // Collect all tool uses and text blocks in this response
        const toolUses: Array<{ id: string; name: string; input: Record<string, unknown> }> = [];
        const assistantContent: Anthropic.ContentBlockParam[] = [];
        let hasText = false;

        for await (const event of stream) {
          if (event.type === 'content_block_start') {
            if (event.content_block.type === 'tool_use') {
              currentToolUse = {
                id: event.content_block.id,
                name: event.content_block.name,
                inputJson: '',
                input: {},
              };
            }
          } else if (event.type === 'content_block_delta') {
            if (event.delta.type === 'text_delta') {
              hasText = true;
              fullResponseText += event.delta.text; // 收集完整响应
              yield {
                type: 'text',
                content: event.delta.text,
              };
            } else if (event.delta.type === 'input_json_delta' && currentToolUse) {
              currentToolUse.inputJson += event.delta.partial_json || '';
            }
          } else if (event.type === 'content_block_stop') {
            if (currentToolUse) {
              // Parse the complete accumulated JSON
              try {
                currentToolUse.input = JSON.parse(currentToolUse.inputJson || '{}');
              } catch (e) {
                console.error('Failed to parse tool input JSON:', currentToolUse.inputJson, e);
                currentToolUse.input = {};
              }

              toolUses.push({
                id: currentToolUse.id,
                name: currentToolUse.name,
                input: currentToolUse.input,
              });

              yield {
                type: 'tool_use',
                toolName: currentToolUse.name,
                toolInput: currentToolUse.input,
              };

              currentToolUse = null;
            }
          }
        }

        // 获取最终消息以提取 usage 信息
        const finalMsg = await stream.finalMessage();

        // 累积 token 使用量
        if (finalMsg.usage) {
          totalInputTokens += finalMsg.usage.input_tokens || 0;
          totalOutputTokens += finalMsg.usage.output_tokens || 0;
          // Prompt Caching 的 tokens (如果 API 返回)
          const usage = finalMsg.usage as unknown as Record<string, number>;
          totalCacheCreationTokens += usage.cache_creation_input_tokens || 0;
          totalCacheReadTokens += usage.cache_read_input_tokens || 0;
        }

        // If no tool uses, we're done
        if (toolUses.length === 0) {
          // ========== 第三层：回复质量门控（日志记录） ==========
          if (fullResponseText) {
            const gateResult = responseGate.check(fullResponseText, intent, message);
            console.log(`[ClaudeAgent] Response gate: passed=${gateResult.passed}, length=${fullResponseText.length}/${intent.maxResponseLength}`);
            if (!gateResult.passed && gateResult.suggestions) {
              console.log(`[ClaudeAgent] Gate suggestions: ${gateResult.suggestions.join(', ')}`);
            }
          }

          // ========== 记录 Token 使用量 ==========
          const latencyMs = Date.now() - startTime;
          this.tokenUsageService.recordUsage({
            userId: context.userId,
            conversationId: context.conversationId,
            model: 'claude-sonnet-4-20250514',
            inputTokens: totalInputTokens,
            outputTokens: totalOutputTokens,
            cacheCreationTokens: totalCacheCreationTokens,
            cacheReadTokens: totalCacheReadTokens,
            intentType: intent.type,
            toolCalls: toolCallCount,
            responseLength: fullResponseText.length,
            latencyMs,
          }).catch(err => console.error('[ClaudeAgent] Failed to record token usage:', err));

          yield { type: 'end' };
          return;
        }

        // 累积工具调用次数
        toolCallCount += toolUses.length;

        // Build assistant message content with tool uses
        for (const block of finalMsg.content) {
          if (block.type === 'text') {
            assistantContent.push({ type: 'text', text: block.text });
          } else if (block.type === 'tool_use') {
            assistantContent.push({
              type: 'tool_use',
              id: block.id,
              name: block.name,
              input: block.input as Record<string, unknown>,
            });
          }
        }

        // Add assistant message with tool uses
        messages.push({
          role: 'assistant',
          content: assistantContent,
        });

        // Execute all tools and collect results
        const toolResults: Anthropic.ToolResultBlockParam[] = [];
        for (const toolUse of toolUses) {
          const result = await this.immigrationToolsService.executeTool(
            toolUse.name,
            toolUse.input,
            context,
          );

          yield {
            type: 'tool_result',
            toolName: toolUse.name,
            toolResult: result,
          };

          toolResults.push({
            type: 'tool_result',
            tool_use_id: toolUse.id,
            content: JSON.stringify(result),
          });
        }

        // Add user message with tool results
        messages.push({
          role: 'user',
          content: toolResults,
        });

        // Continue the loop to get Claude's response after tool execution

      } catch (error) {
        console.error('Claude API error:', error);
        throw error;
      }
    }

    console.error('Tool loop exceeded maximum iterations');
    yield { type: 'end' };
  }

  /**
   * Non-streaming message for simple queries
   * Uses Prompt Caching for cost optimization
   */
  async sendMessageSync(
    message: string,
    context: ConversationContext,
  ): Promise<string> {
    const tools = this.immigrationToolsService.getTools();

    // Fetch relevant system experiences and inject into prompt
    const accumulatedExperience = await this.getAccumulatedExperience(message);
    const dynamicConfig: SystemPromptConfig = {
      ...this.systemPromptConfig,
      accumulatedExperience,
    };
    const systemPrompt = buildSystemPrompt(dynamicConfig);

    const messages: Anthropic.MessageParam[] = [];

    if (context.previousMessages) {
      for (const msg of context.previousMessages) {
        messages.push({
          role: msg.role,
          content: msg.content,
        });
      }
    }

    messages.push({
      role: 'user',
      content: message,
    });

    // System prompt with cache_control for Prompt Caching
    const systemWithCache: Anthropic.TextBlockParam[] = [
      {
        type: 'text',
        text: systemPrompt,
        cache_control: { type: 'ephemeral' },
      },
    ];

    const response = await this.client.messages.create({
      model: 'claude-sonnet-4-20250514',
      max_tokens: 4096,
      system: systemWithCache,
      messages,
      tools: tools as Anthropic.Tool[],
    });

    // Extract text response
    let result = '';
    for (const block of response.content) {
      if (block.type === 'text') {
        result += block.text;
      }
    }

    return result;
  }

  /**
   * Analyze content (for evolution service)
   */
  async analyze(prompt: string): Promise<string> {
    const response = await this.client.messages.create({
      model: 'claude-sonnet-4-20250514',
      max_tokens: 8192,
      messages: [
        {
          role: 'user',
          content: prompt,
        },
      ],
    });

    let result = '';
    for (const block of response.content) {
      if (block.type === 'text') {
        result += block.text;
      }
    }

    return result;
  }
}