fix(server-party): heartbeat during waitForAllParticipants
Problem: - co_managed_keygen server-party waits for external party after joining - No heartbeat sent during wait period (up to 5 minutes) - session-coordinator has 120 second inactivity timeout - Server-party marked as timed_out/failed while waiting Fix: - Send heartbeat in waitForAllParticipants polling loop - Add Heartbeat method to MessageRouterClient interface - Heartbeat every 2 seconds with poll interval - Heartbeat failure only logs warning, does not block Generated with Claude Code Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
c0e292535d
commit
576679ae30
|
|
@ -56,6 +56,7 @@ type SessionStatusInfo struct {
|
||||||
type MessageRouterClient interface {
|
type MessageRouterClient interface {
|
||||||
RouteMessage(ctx context.Context, sessionID uuid.UUID, fromParty string, toParties []string, roundNumber int, payload []byte) error
|
RouteMessage(ctx context.Context, sessionID uuid.UUID, fromParty string, toParties []string, roundNumber int, payload []byte) error
|
||||||
SubscribeMessages(ctx context.Context, sessionID uuid.UUID, partyID string) (<-chan *MPCMessage, error)
|
SubscribeMessages(ctx context.Context, sessionID uuid.UUID, partyID string) (<-chan *MPCMessage, error)
|
||||||
|
Heartbeat(ctx context.Context, partyID string) (int32, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SessionInfo contains session information from coordinator
|
// SessionInfo contains session information from coordinator
|
||||||
|
|
@ -127,7 +128,7 @@ func (uc *ParticipateKeygenUseCase) Execute(
|
||||||
// For co_managed_keygen: wait for all N participants to join before proceeding
|
// For co_managed_keygen: wait for all N participants to join before proceeding
|
||||||
// This is necessary because server parties join immediately but external party joins later
|
// This is necessary because server parties join immediately but external party joins later
|
||||||
if sessionInfo.SessionType == "co_managed_keygen" {
|
if sessionInfo.SessionType == "co_managed_keygen" {
|
||||||
sessionInfo, err = uc.waitForAllParticipants(ctx, input.SessionID, sessionInfo)
|
sessionInfo, err = uc.waitForAllParticipants(ctx, input.SessionID, sessionInfo, input.PartyID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
@ -395,6 +396,7 @@ func (uc *ParticipateKeygenUseCase) waitForAllParticipants(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
sessionID uuid.UUID,
|
sessionID uuid.UUID,
|
||||||
initialSessionInfo *SessionInfo,
|
initialSessionInfo *SessionInfo,
|
||||||
|
partyID string,
|
||||||
) (*SessionInfo, error) {
|
) (*SessionInfo, error) {
|
||||||
logger.Info("Waiting for all participants to join co_managed_keygen session",
|
logger.Info("Waiting for all participants to join co_managed_keygen session",
|
||||||
zap.String("session_id", sessionID.String()),
|
zap.String("session_id", sessionID.String()),
|
||||||
|
|
@ -418,6 +420,17 @@ func (uc *ParticipateKeygenUseCase) waitForAllParticipants(
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return nil, ctx.Err()
|
return nil, ctx.Err()
|
||||||
case <-time.After(pollInterval):
|
case <-time.After(pollInterval):
|
||||||
|
// Send heartbeat to keep the party alive during wait
|
||||||
|
// This prevents the session-coordinator from timing out this party
|
||||||
|
_, heartbeatErr := uc.messageRouter.Heartbeat(ctx, partyID)
|
||||||
|
if heartbeatErr != nil {
|
||||||
|
logger.Warn("Failed to send heartbeat during wait",
|
||||||
|
zap.String("session_id", sessionID.String()),
|
||||||
|
zap.String("party_id", partyID),
|
||||||
|
zap.Error(heartbeatErr))
|
||||||
|
// Continue anyway - heartbeat failure is not fatal
|
||||||
|
}
|
||||||
|
|
||||||
// Get full session status including participants
|
// Get full session status including participants
|
||||||
statusInfo, err := uc.sessionClient.GetSessionStatusFull(ctx, sessionID)
|
statusInfo, err := uc.sessionClient.GetSessionStatusFull(ctx, sessionID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue