package main import ( "context" "database/sql" "flag" "fmt" "net" "net/http" "os" "os/signal" "syscall" "time" "github.com/gin-gonic/gin" _ "github.com/lib/pq" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/reflection" pb "github.com/rwadurian/mpc-system/api/grpc/router/v1" "github.com/rwadurian/mpc-system/pkg/config" "github.com/rwadurian/mpc-system/pkg/logger" grpcadapter "github.com/rwadurian/mpc-system/services/message-router/adapters/input/grpc" "github.com/rwadurian/mpc-system/services/message-router/adapters/output/memory" "github.com/rwadurian/mpc-system/services/message-router/adapters/output/postgres" "github.com/rwadurian/mpc-system/services/message-router/application/use_cases" "github.com/rwadurian/mpc-system/services/message-router/domain" "go.uber.org/zap" ) func main() { // Parse flags configPath := flag.String("config", "", "Path to config file") flag.Parse() // Load configuration cfg, err := config.Load(*configPath) if err != nil { fmt.Printf("Failed to load config: %v\n", err) os.Exit(1) } // Initialize logger if err := logger.Init(&logger.Config{ Level: cfg.Logger.Level, Encoding: cfg.Logger.Encoding, }); err != nil { fmt.Printf("Failed to initialize logger: %v\n", err) os.Exit(1) } defer logger.Sync() logger.Info("Starting Message Router Service", zap.String("environment", cfg.Server.Environment), zap.Int("grpc_port", cfg.Server.GRPCPort), zap.Int("http_port", cfg.Server.HTTPPort)) // Initialize database connection db, err := initDatabase(cfg.Database) if err != nil { logger.Fatal("Failed to connect to database", zap.Error(err)) } defer db.Close() // Initialize repositories and adapters messageRepo := postgres.NewMessagePostgresRepo(db) // Initialize in-memory message broker (replaces RabbitMQ) messageBroker := memory.NewMessageBrokerAdapter() defer messageBroker.Close() // Initialize party registry and event broadcaster for party-driven architecture partyRegistry := domain.NewPartyRegistry() eventBroadcaster := domain.NewSessionEventBroadcaster() // Initialize use cases routeMessageUC := use_cases.NewRouteMessageUseCase(messageRepo, messageBroker) getPendingMessagesUC := use_cases.NewGetPendingMessagesUseCase(messageRepo) // Connect to Session Coordinator for proxying session operations // This allows server-parties to only connect to Message Router coordinatorAddr := os.Getenv("SESSION_COORDINATOR_ADDR") if coordinatorAddr == "" { coordinatorAddr = "session-coordinator:50051" // Default in docker-compose } var coordinatorConn *grpc.ClientConn coordinatorConn, err = grpc.NewClient(coordinatorAddr, grpc.WithTransportCredentials(insecure.NewCredentials()), ) if err != nil { logger.Warn("Failed to connect to Session Coordinator for proxying (session operations will be unavailable)", zap.String("address", coordinatorAddr), zap.Error(err)) } else { defer coordinatorConn.Close() logger.Info("Connected to Session Coordinator for proxying session operations", zap.String("address", coordinatorAddr)) } // Start message cleanup background job go runMessageCleanup(messageRepo) // Start stale party detection background job go runStalePartyDetection(partyRegistry) // Create shutdown context ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Start servers errChan := make(chan error, 2) // Start gRPC server go func() { if err := startGRPCServer(cfg, routeMessageUC, getPendingMessagesUC, messageBroker, partyRegistry, eventBroadcaster, messageRepo, coordinatorConn); err != nil { errChan <- fmt.Errorf("gRPC server error: %w", err) } }() // Start HTTP server go func() { if err := startHTTPServer(cfg, routeMessageUC, getPendingMessagesUC); err != nil { errChan <- fmt.Errorf("HTTP server error: %w", err) } }() // Wait for shutdown signal sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) select { case sig := <-sigChan: logger.Info("Received shutdown signal", zap.String("signal", sig.String())) case err := <-errChan: logger.Error("Server error", zap.Error(err)) } // Graceful shutdown logger.Info("Shutting down...") cancel() time.Sleep(5 * time.Second) logger.Info("Shutdown complete") _ = ctx } func initDatabase(cfg config.DatabaseConfig) (*sql.DB, error) { const maxRetries = 10 const retryDelay = 2 * time.Second var db *sql.DB var err error for i := 0; i < maxRetries; i++ { db, err = sql.Open("postgres", cfg.DSN()) if err != nil { logger.Warn("Failed to open database connection, retrying...", zap.Int("attempt", i+1), zap.Int("max_retries", maxRetries), zap.Error(err)) time.Sleep(retryDelay * time.Duration(i+1)) continue } db.SetMaxOpenConns(cfg.MaxOpenConns) db.SetMaxIdleConns(cfg.MaxIdleConns) db.SetConnMaxLifetime(cfg.ConnMaxLife) // Test connection with Ping if err = db.Ping(); err != nil { logger.Warn("Failed to ping database, retrying...", zap.Int("attempt", i+1), zap.Int("max_retries", maxRetries), zap.Error(err)) db.Close() time.Sleep(retryDelay * time.Duration(i+1)) continue } // Verify database is actually usable with a simple query ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) var result int err = db.QueryRowContext(ctx, "SELECT 1").Scan(&result) cancel() if err != nil { logger.Warn("Database ping succeeded but query failed, retrying...", zap.Int("attempt", i+1), zap.Int("max_retries", maxRetries), zap.Error(err)) db.Close() time.Sleep(retryDelay * time.Duration(i+1)) continue } logger.Info("Connected to PostgreSQL and verified connectivity", zap.Int("attempt", i+1)) return db, nil } return nil, fmt.Errorf("failed to connect to database after %d retries: %w", maxRetries, err) } func startGRPCServer( cfg *config.Config, routeMessageUC *use_cases.RouteMessageUseCase, getPendingMessagesUC *use_cases.GetPendingMessagesUseCase, messageBroker *memory.MessageBrokerAdapter, partyRegistry *domain.PartyRegistry, eventBroadcaster *domain.SessionEventBroadcaster, messageRepo *postgres.MessagePostgresRepo, coordinatorConn *grpc.ClientConn, ) error { listener, err := net.Listen("tcp", fmt.Sprintf(":%d", cfg.Server.GRPCPort)) if err != nil { return err } grpcServer := grpc.NewServer() // Create and register the message router gRPC handler with party registry and event broadcaster messageRouterServer := grpcadapter.NewMessageRouterServer( routeMessageUC, getPendingMessagesUC, messageBroker, partyRegistry, eventBroadcaster, messageRepo, ) // Set coordinator connection for proxying session operations // This allows server-parties to only connect to Message Router if coordinatorConn != nil { messageRouterServer.SetCoordinatorConnection(coordinatorConn) } pb.RegisterMessageRouterServer(grpcServer, messageRouterServer) // Enable reflection for debugging reflection.Register(grpcServer) logger.Info("Starting gRPC server", zap.Int("port", cfg.Server.GRPCPort)) return grpcServer.Serve(listener) } func startHTTPServer( cfg *config.Config, routeMessageUC *use_cases.RouteMessageUseCase, getPendingMessagesUC *use_cases.GetPendingMessagesUseCase, ) error { if cfg.Server.Environment == "production" { gin.SetMode(gin.ReleaseMode) } router := gin.New() router.Use(gin.Recovery()) router.Use(gin.Logger()) // Health check router.GET("/health", func(c *gin.Context) { c.JSON(http.StatusOK, gin.H{ "status": "healthy", "service": "message-router", }) }) // API routes api := router.Group("/api/v1") { api.POST("/messages/route", func(c *gin.Context) { var req struct { SessionID string `json:"session_id" binding:"required"` FromParty string `json:"from_party" binding:"required"` ToParties []string `json:"to_parties"` RoundNumber int `json:"round_number"` MessageType string `json:"message_type"` Payload []byte `json:"payload" binding:"required"` } if err := c.ShouldBindJSON(&req); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } input := use_cases.RouteMessageInput{ SessionID: req.SessionID, FromParty: req.FromParty, ToParties: req.ToParties, RoundNumber: req.RoundNumber, MessageType: req.MessageType, Payload: req.Payload, } output, err := routeMessageUC.Execute(c.Request.Context(), input) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } c.JSON(http.StatusOK, gin.H{ "success": output.Success, "message_id": output.MessageID, }) }) api.GET("/messages/pending", func(c *gin.Context) { input := use_cases.GetPendingMessagesInput{ SessionID: c.Query("session_id"), PartyID: c.Query("party_id"), AfterTimestamp: 0, } messages, err := getPendingMessagesUC.Execute(c.Request.Context(), input) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } c.JSON(http.StatusOK, gin.H{"messages": messages}) }) } logger.Info("Starting HTTP server", zap.Int("port", cfg.Server.HTTPPort)) return router.Run(fmt.Sprintf(":%d", cfg.Server.HTTPPort)) } func runMessageCleanup(messageRepo *postgres.MessagePostgresRepo) { ticker := time.NewTicker(1 * time.Hour) defer ticker.Stop() for range ticker.C { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) // Delete messages older than 24 hours cutoff := time.Now().Add(-24 * time.Hour) count, err := messageRepo.DeleteOlderThan(ctx, cutoff) cancel() if err != nil { logger.Error("Failed to cleanup old messages", zap.Error(err)) } else if count > 0 { logger.Info("Cleaned up old messages", zap.Int64("count", count)) } } } // runStalePartyDetection periodically checks for stale parties and marks them as offline // Parties that haven't sent a heartbeat within the timeout are considered offline func runStalePartyDetection(partyRegistry *domain.PartyRegistry) { // Check every 30 seconds for stale parties ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() // Parties are considered stale if no heartbeat for 2 minutes staleTimeout := 2 * time.Minute logger.Info("Started stale party detection", zap.Duration("check_interval", 30*time.Second), zap.Duration("stale_timeout", staleTimeout)) for range ticker.C { staleParties := partyRegistry.MarkStalePartiesOffline(staleTimeout) if len(staleParties) > 0 { for _, party := range staleParties { logger.Warn("Party marked as offline (no heartbeat)", zap.String("party_id", party.PartyID), zap.String("role", party.Role), zap.Time("last_seen", party.LastSeen), zap.Bool("has_notification", party.IsOfflineMode())) } } } }