package retry import ( "context" "time" "github.com/rwadurian/mpc-system/pkg/logger" "go.uber.org/zap" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) // Config defines retry configuration type Config struct { MaxAttempts int // Maximum number of retry attempts (default: 3) InitialBackoff time.Duration // Initial backoff duration (default: 100ms) MaxBackoff time.Duration // Maximum backoff duration (default: 5s) BackoffMultiple float64 // Backoff multiplier (default: 2.0) } // DefaultConfig returns default retry configuration func DefaultConfig() Config { return Config{ MaxAttempts: 3, InitialBackoff: 100 * time.Millisecond, MaxBackoff: 5 * time.Second, BackoffMultiple: 2.0, } } // Do executes a function with retry logic // Returns the result of the function or the last error after all retries are exhausted func Do[T any](ctx context.Context, cfg Config, operation string, fn func() (T, error)) (T, error) { var result T var lastErr error backoff := cfg.InitialBackoff for attempt := 1; attempt <= cfg.MaxAttempts; attempt++ { result, lastErr = fn() if lastErr == nil { return result, nil } // Check if error is retryable if !IsRetryable(lastErr) { logger.Warn("Non-retryable error, not retrying", zap.String("operation", operation), zap.Int("attempt", attempt), zap.Error(lastErr)) return result, lastErr } // Check if context is cancelled if ctx.Err() != nil { logger.Warn("Context cancelled, stopping retry", zap.String("operation", operation), zap.Int("attempt", attempt), zap.Error(ctx.Err())) return result, ctx.Err() } // Don't wait after the last attempt if attempt < cfg.MaxAttempts { logger.Warn("Operation failed, retrying", zap.String("operation", operation), zap.Int("attempt", attempt), zap.Int("max_attempts", cfg.MaxAttempts), zap.Duration("backoff", backoff), zap.Error(lastErr)) select { case <-ctx.Done(): return result, ctx.Err() case <-time.After(backoff): } // Calculate next backoff backoff = time.Duration(float64(backoff) * cfg.BackoffMultiple) if backoff > cfg.MaxBackoff { backoff = cfg.MaxBackoff } } } logger.Error("Operation failed after all retries", zap.String("operation", operation), zap.Int("attempts", cfg.MaxAttempts), zap.Error(lastErr)) return result, lastErr } // DoVoid executes a function that returns only error with retry logic func DoVoid(ctx context.Context, cfg Config, operation string, fn func() error) error { _, err := Do(ctx, cfg, operation, func() (struct{}, error) { return struct{}{}, fn() }) return err } // IsRetryable determines if an error is retryable func IsRetryable(err error) bool { if err == nil { return false } // Check gRPC status codes st, ok := status.FromError(err) if !ok { // Not a gRPC error, assume retryable for network errors return true } switch st.Code() { case codes.Unavailable, codes.ResourceExhausted, codes.Aborted, codes.Internal, codes.Unknown, codes.DeadlineExceeded: return true case codes.InvalidArgument, codes.NotFound, codes.AlreadyExists, codes.PermissionDenied, codes.FailedPrecondition, codes.OutOfRange, codes.Unimplemented, codes.Canceled, codes.Unauthenticated: return false default: return false } }