137 lines
3.3 KiB
Go
137 lines
3.3 KiB
Go
package retry
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/rwadurian/mpc-system/pkg/logger"
|
|
"go.uber.org/zap"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
// Config defines retry configuration
|
|
type Config struct {
|
|
MaxAttempts int // Maximum number of retry attempts (default: 3)
|
|
InitialBackoff time.Duration // Initial backoff duration (default: 100ms)
|
|
MaxBackoff time.Duration // Maximum backoff duration (default: 5s)
|
|
BackoffMultiple float64 // Backoff multiplier (default: 2.0)
|
|
}
|
|
|
|
// DefaultConfig returns default retry configuration
|
|
func DefaultConfig() Config {
|
|
return Config{
|
|
MaxAttempts: 3,
|
|
InitialBackoff: 100 * time.Millisecond,
|
|
MaxBackoff: 5 * time.Second,
|
|
BackoffMultiple: 2.0,
|
|
}
|
|
}
|
|
|
|
// Do executes a function with retry logic
|
|
// Returns the result of the function or the last error after all retries are exhausted
|
|
func Do[T any](ctx context.Context, cfg Config, operation string, fn func() (T, error)) (T, error) {
|
|
var result T
|
|
var lastErr error
|
|
|
|
backoff := cfg.InitialBackoff
|
|
|
|
for attempt := 1; attempt <= cfg.MaxAttempts; attempt++ {
|
|
result, lastErr = fn()
|
|
if lastErr == nil {
|
|
return result, nil
|
|
}
|
|
|
|
// Check if error is retryable
|
|
if !IsRetryable(lastErr) {
|
|
logger.Warn("Non-retryable error, not retrying",
|
|
zap.String("operation", operation),
|
|
zap.Int("attempt", attempt),
|
|
zap.Error(lastErr))
|
|
return result, lastErr
|
|
}
|
|
|
|
// Check if context is cancelled
|
|
if ctx.Err() != nil {
|
|
logger.Warn("Context cancelled, stopping retry",
|
|
zap.String("operation", operation),
|
|
zap.Int("attempt", attempt),
|
|
zap.Error(ctx.Err()))
|
|
return result, ctx.Err()
|
|
}
|
|
|
|
// Don't wait after the last attempt
|
|
if attempt < cfg.MaxAttempts {
|
|
logger.Warn("Operation failed, retrying",
|
|
zap.String("operation", operation),
|
|
zap.Int("attempt", attempt),
|
|
zap.Int("max_attempts", cfg.MaxAttempts),
|
|
zap.Duration("backoff", backoff),
|
|
zap.Error(lastErr))
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return result, ctx.Err()
|
|
case <-time.After(backoff):
|
|
}
|
|
|
|
// Calculate next backoff
|
|
backoff = time.Duration(float64(backoff) * cfg.BackoffMultiple)
|
|
if backoff > cfg.MaxBackoff {
|
|
backoff = cfg.MaxBackoff
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.Error("Operation failed after all retries",
|
|
zap.String("operation", operation),
|
|
zap.Int("attempts", cfg.MaxAttempts),
|
|
zap.Error(lastErr))
|
|
|
|
return result, lastErr
|
|
}
|
|
|
|
// DoVoid executes a function that returns only error with retry logic
|
|
func DoVoid(ctx context.Context, cfg Config, operation string, fn func() error) error {
|
|
_, err := Do(ctx, cfg, operation, func() (struct{}, error) {
|
|
return struct{}{}, fn()
|
|
})
|
|
return err
|
|
}
|
|
|
|
// IsRetryable determines if an error is retryable
|
|
func IsRetryable(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
|
|
// Check gRPC status codes
|
|
st, ok := status.FromError(err)
|
|
if !ok {
|
|
// Not a gRPC error, assume retryable for network errors
|
|
return true
|
|
}
|
|
|
|
switch st.Code() {
|
|
case codes.Unavailable,
|
|
codes.ResourceExhausted,
|
|
codes.Aborted,
|
|
codes.Internal,
|
|
codes.Unknown,
|
|
codes.DeadlineExceeded:
|
|
return true
|
|
case codes.InvalidArgument,
|
|
codes.NotFound,
|
|
codes.AlreadyExists,
|
|
codes.PermissionDenied,
|
|
codes.FailedPrecondition,
|
|
codes.OutOfRange,
|
|
codes.Unimplemented,
|
|
codes.Canceled,
|
|
codes.Unauthenticated:
|
|
return false
|
|
default:
|
|
return false
|
|
}
|
|
}
|