diff --git a/packages/services/agent-service/src/infrastructure/dingtalk/dingtalk-router.service.ts b/packages/services/agent-service/src/infrastructure/dingtalk/dingtalk-router.service.ts index d23b680..868145a 100644 --- a/packages/services/agent-service/src/infrastructure/dingtalk/dingtalk-router.service.ts +++ b/packages/services/agent-service/src/infrastructure/dingtalk/dingtalk-router.service.ts @@ -29,6 +29,8 @@ * - DingTalk API response capped at 256 KB (prevents memory spike on bad response) * - Bridge (OpenClaw) response also capped at 256 KB * - Dual routing: senderStaffId (OAuth binding) + senderId (code binding) both handled + * - Bridge task timeout explicitly set to 55s (bridge default 25s is too short for LLM) + * - sessionWebhook expiry fallback: if webhook expires before LLM replies, uses batchSend * - Periodic cleanup for all in-memory maps (5 min interval) */ @@ -78,7 +80,7 @@ const OAUTH_STATE_TTL_MS = 10 * 60 * 1000; // 10 min const TOKEN_REFRESH_BUFFER = 300; // seconds before expiry to proactively refresh const WS_RECONNECT_BASE_MS = 2_000; const WS_RECONNECT_MAX_MS = 60_000; -const TASK_TIMEOUT_S = 30; +const TASK_TIMEOUT_S = 55; // seconds — bridge default is 25s; must pass explicitly const DEDUP_TTL_MS = 10 * 60 * 1000; const RATE_LIMIT_PER_MIN = 10; const QUEUE_MAX_DEPTH = 5; @@ -622,8 +624,10 @@ export class DingTalkRouterService implements OnModuleInit, OnModuleDestroy { prompt: text, sessionKey: `agent:main:dt-${userId}`, idempotencyKey: msg.msgId, + // Pass explicit timeout to bridge — default is 25s which is too short for LLM calls. + timeoutSeconds: TASK_TIMEOUT_S, }, - (TASK_TIMEOUT_S + 5) * 1000, + (TASK_TIMEOUT_S + 10) * 1000, ); if (result.ok && result.result !== undefined) { @@ -638,7 +642,41 @@ export class DingTalkRouterService implements OnModuleInit, OnModuleDestroy { reply = '与小龙虾通信时出现错误,请稍后重试。'; } - this.reply(msg, reply); + // Try sessionWebhook first; if it has expired by the time we have a reply (LLM took + // longer than ~30s), fall back to proactive batchSend so the reply still reaches the user. + const webhookExpiry = msg.sessionWebhookExpiredTime > 1e11 + ? msg.sessionWebhookExpiredTime + : msg.sessionWebhookExpiredTime * 1000; + + if (Date.now() <= webhookExpiry) { + this.reply(msg, reply); + } else { + this.logger.warn( + `sessionWebhook expired for msgId=${msg.msgId} — falling back to batchSend for userId=${userId}`, + ); + const staffId = msg.senderStaffId?.trim(); + if (staffId) { + this.getToken() + .then((token) => + this.httpsPost( + 'api.dingtalk.com', + '/v1.0/robot/oToMessages/batchSend', + { + robotCode: this.clientId, + userIds: [staffId], + msgKey: 'sampleText', + msgParam: JSON.stringify({ content: reply }), + }, + { 'x-acs-dingtalk-access-token': token }, + ), + ) + .catch((e: Error) => + this.logger.error(`batchSend fallback failed for msgId=${msg.msgId}:`, e.message), + ); + } else { + this.logger.warn(`No staffId for batchSend fallback, reply lost for msgId=${msg.msgId}`); + } + } } // ── Reply (chunked) ────────────────────────────────────────────────────────