fix(dingtalk): 55s bridge timeout + batchSend fallback for expired webhooks

Root cause of "Bridge call failed" errors: bridge /task endpoint defaults
to 25s agent reply timeout, but LLM calls through the iConsulting gateway
can take 30-60s. Fix: pass timeoutSeconds=55 explicitly in POST body.

Also add batchSend fallback in routeToAgent: if the sessionWebhook has
expired by the time the LLM replies (user sent a message, LLM took >30s,
webhook window closed), the reply is now sent via proactive batchSend
using senderStaffId instead of being silently dropped.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-03-08 23:33:56 -07:00
parent 5874907300
commit 440819add8
1 changed files with 41 additions and 3 deletions

View File

@ -29,6 +29,8 @@
* - DingTalk API response capped at 256 KB (prevents memory spike on bad response) * - DingTalk API response capped at 256 KB (prevents memory spike on bad response)
* - Bridge (OpenClaw) response also capped at 256 KB * - Bridge (OpenClaw) response also capped at 256 KB
* - Dual routing: senderStaffId (OAuth binding) + senderId (code binding) both handled * - Dual routing: senderStaffId (OAuth binding) + senderId (code binding) both handled
* - Bridge task timeout explicitly set to 55s (bridge default 25s is too short for LLM)
* - sessionWebhook expiry fallback: if webhook expires before LLM replies, uses batchSend
* - Periodic cleanup for all in-memory maps (5 min interval) * - Periodic cleanup for all in-memory maps (5 min interval)
*/ */
@ -78,7 +80,7 @@ const OAUTH_STATE_TTL_MS = 10 * 60 * 1000; // 10 min
const TOKEN_REFRESH_BUFFER = 300; // seconds before expiry to proactively refresh const TOKEN_REFRESH_BUFFER = 300; // seconds before expiry to proactively refresh
const WS_RECONNECT_BASE_MS = 2_000; const WS_RECONNECT_BASE_MS = 2_000;
const WS_RECONNECT_MAX_MS = 60_000; const WS_RECONNECT_MAX_MS = 60_000;
const TASK_TIMEOUT_S = 30; const TASK_TIMEOUT_S = 55; // seconds — bridge default is 25s; must pass explicitly
const DEDUP_TTL_MS = 10 * 60 * 1000; const DEDUP_TTL_MS = 10 * 60 * 1000;
const RATE_LIMIT_PER_MIN = 10; const RATE_LIMIT_PER_MIN = 10;
const QUEUE_MAX_DEPTH = 5; const QUEUE_MAX_DEPTH = 5;
@ -622,8 +624,10 @@ export class DingTalkRouterService implements OnModuleInit, OnModuleDestroy {
prompt: text, prompt: text,
sessionKey: `agent:main:dt-${userId}`, sessionKey: `agent:main:dt-${userId}`,
idempotencyKey: msg.msgId, idempotencyKey: msg.msgId,
// Pass explicit timeout to bridge — default is 25s which is too short for LLM calls.
timeoutSeconds: TASK_TIMEOUT_S,
}, },
(TASK_TIMEOUT_S + 5) * 1000, (TASK_TIMEOUT_S + 10) * 1000,
); );
if (result.ok && result.result !== undefined) { if (result.ok && result.result !== undefined) {
@ -638,7 +642,41 @@ export class DingTalkRouterService implements OnModuleInit, OnModuleDestroy {
reply = '与小龙虾通信时出现错误,请稍后重试。'; reply = '与小龙虾通信时出现错误,请稍后重试。';
} }
// Try sessionWebhook first; if it has expired by the time we have a reply (LLM took
// longer than ~30s), fall back to proactive batchSend so the reply still reaches the user.
const webhookExpiry = msg.sessionWebhookExpiredTime > 1e11
? msg.sessionWebhookExpiredTime
: msg.sessionWebhookExpiredTime * 1000;
if (Date.now() <= webhookExpiry) {
this.reply(msg, reply); this.reply(msg, reply);
} else {
this.logger.warn(
`sessionWebhook expired for msgId=${msg.msgId} — falling back to batchSend for userId=${userId}`,
);
const staffId = msg.senderStaffId?.trim();
if (staffId) {
this.getToken()
.then((token) =>
this.httpsPost<unknown>(
'api.dingtalk.com',
'/v1.0/robot/oToMessages/batchSend',
{
robotCode: this.clientId,
userIds: [staffId],
msgKey: 'sampleText',
msgParam: JSON.stringify({ content: reply }),
},
{ 'x-acs-dingtalk-access-token': token },
),
)
.catch((e: Error) =>
this.logger.error(`batchSend fallback failed for msgId=${msg.msgId}:`, e.message),
);
} else {
this.logger.warn(`No staffId for batchSend fallback, reply lost for msgId=${msg.msgId}`);
}
}
} }
// ── Reply (chunked) ──────────────────────────────────────────────────────── // ── Reply (chunked) ────────────────────────────────────────────────────────