From 9a1ecf10ec6f94fa76066210657a015b26172576 Mon Sep 17 00:00:00 2001 From: hailin Date: Mon, 23 Feb 2026 05:30:34 -0800 Subject: [PATCH] fix: add restart policy, global error handlers, and fix tenant schema bug - Add restart: unless-stopped to all 12 Docker services - Add process.on(unhandledRejection/uncaughtException) to all 7 service main.ts - Fix handleEventTrigger using tenantId UUID as schema name instead of slug lookup - Wrap Redis event subscription callbacks in try/catch Co-Authored-By: Claude Opus 4.6 --- deploy/docker/docker-compose.yml | 12 ++ packages/services/agent-service/src/main.ts | 18 ++- packages/services/audit-service/src/main.ts | 18 ++- packages/services/auth-service/src/main.ts | 18 ++- packages/services/comm-service/src/main.ts | 18 ++- .../services/inventory-service/src/main.ts | 18 ++- packages/services/monitor-service/src/main.ts | 18 ++- .../standing-order-executor.service.ts | 127 +++++++++++------- packages/services/ops-service/src/main.ts | 18 ++- 9 files changed, 205 insertions(+), 60 deletions(-) diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml index 96bd93f..f140708 100644 --- a/deploy/docker/docker-compose.yml +++ b/deploy/docker/docker-compose.yml @@ -5,6 +5,7 @@ services: postgres: image: postgres:16-alpine container_name: it0-postgres + restart: unless-stopped environment: POSTGRES_USER: ${POSTGRES_USER:-it0} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-it0_dev} @@ -25,6 +26,7 @@ services: redis: image: redis:7-alpine container_name: it0-redis + restart: unless-stopped ports: - "16379:6379" healthcheck: @@ -40,6 +42,7 @@ services: build: context: ../../packages/gateway container_name: it0-api-gateway + restart: unless-stopped environment: - JWT_SECRET=${JWT_SECRET:-dev-jwt-secret} ports: @@ -77,6 +80,7 @@ services: SERVICE_NAME: auth-service SERVICE_PORT: 3001 container_name: it0-auth-service + restart: unless-stopped ports: - "13001:3001" environment: @@ -111,6 +115,7 @@ services: SERVICE_NAME: agent-service SERVICE_PORT: 3002 container_name: it0-agent-service + restart: unless-stopped ports: - "13002:3002" environment: @@ -145,6 +150,7 @@ services: SERVICE_NAME: ops-service SERVICE_PORT: 3003 container_name: it0-ops-service + restart: unless-stopped ports: - "13003:3003" environment: @@ -177,6 +183,7 @@ services: SERVICE_NAME: inventory-service SERVICE_PORT: 3004 container_name: it0-inventory-service + restart: unless-stopped ports: - "13004:3004" environment: @@ -208,6 +215,7 @@ services: SERVICE_NAME: monitor-service SERVICE_PORT: 3005 container_name: it0-monitor-service + restart: unless-stopped ports: - "13005:3005" environment: @@ -238,6 +246,7 @@ services: SERVICE_NAME: comm-service SERVICE_PORT: 3006 container_name: it0-comm-service + restart: unless-stopped ports: - "13006:3006" environment: @@ -273,6 +282,7 @@ services: SERVICE_NAME: audit-service SERVICE_PORT: 3007 container_name: it0-audit-service + restart: unless-stopped ports: - "13007:3007" environment: @@ -299,6 +309,7 @@ services: build: context: ../../packages/services/voice-service container_name: it0-voice-service + restart: unless-stopped ports: - "13008:3008" environment: @@ -323,6 +334,7 @@ services: build: context: ../../it0-web-admin container_name: it0-web-admin + restart: unless-stopped ports: - "13000:3000" environment: diff --git a/packages/services/agent-service/src/main.ts b/packages/services/agent-service/src/main.ts index e9560c8..5da7689 100644 --- a/packages/services/agent-service/src/main.ts +++ b/packages/services/agent-service/src/main.ts @@ -1,12 +1,26 @@ import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { AgentModule } from './agent.module'; +const logger = new Logger('AgentService'); + +// Prevent process crash from unhandled errors +process.on('unhandledRejection', (reason) => { + logger.error(`Unhandled Rejection: ${reason}`); +}); +process.on('uncaughtException', (error) => { + logger.error(`Uncaught Exception: ${error.message}`, error.stack); +}); + async function bootstrap() { const app = await NestFactory.create(AgentModule); const config = app.get(ConfigService); const port = config.get('AGENT_SERVICE_PORT', 3002); await app.listen(port); - console.log(`agent-service running on port ${port}`); + logger.log(`agent-service running on port ${port}`); } -bootstrap(); +bootstrap().catch((err) => { + logger.error(`Failed to start agent-service: ${err.message}`, err.stack); + process.exit(1); +}); diff --git a/packages/services/audit-service/src/main.ts b/packages/services/audit-service/src/main.ts index 8af064e..25d65e9 100644 --- a/packages/services/audit-service/src/main.ts +++ b/packages/services/audit-service/src/main.ts @@ -1,12 +1,26 @@ import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { AuditModule } from './audit.module'; +const logger = new Logger('AuditService'); + +// Prevent process crash from unhandled errors +process.on('unhandledRejection', (reason) => { + logger.error(`Unhandled Rejection: ${reason}`); +}); +process.on('uncaughtException', (error) => { + logger.error(`Uncaught Exception: ${error.message}`, error.stack); +}); + async function bootstrap() { const app = await NestFactory.create(AuditModule); const config = app.get(ConfigService); const port = config.get('AUDIT_SERVICE_PORT', 3007); await app.listen(port); - console.log(`audit-service running on port ${port}`); + logger.log(`audit-service running on port ${port}`); } -bootstrap(); +bootstrap().catch((err) => { + logger.error(`Failed to start audit-service: ${err.message}`, err.stack); + process.exit(1); +}); diff --git a/packages/services/auth-service/src/main.ts b/packages/services/auth-service/src/main.ts index e32aba0..cc9d217 100644 --- a/packages/services/auth-service/src/main.ts +++ b/packages/services/auth-service/src/main.ts @@ -1,12 +1,26 @@ import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { AuthModule } from './auth.module'; +const logger = new Logger('AuthService'); + +// Prevent process crash from unhandled errors +process.on('unhandledRejection', (reason) => { + logger.error(`Unhandled Rejection: ${reason}`); +}); +process.on('uncaughtException', (error) => { + logger.error(`Uncaught Exception: ${error.message}`, error.stack); +}); + async function bootstrap() { const app = await NestFactory.create(AuthModule); const config = app.get(ConfigService); const port = config.get('AUTH_SERVICE_PORT', 3001); await app.listen(port); - console.log(`auth-service running on port ${port}`); + logger.log(`auth-service running on port ${port}`); } -bootstrap(); +bootstrap().catch((err) => { + logger.error(`Failed to start auth-service: ${err.message}`, err.stack); + process.exit(1); +}); diff --git a/packages/services/comm-service/src/main.ts b/packages/services/comm-service/src/main.ts index 2169ee7..16a01ba 100644 --- a/packages/services/comm-service/src/main.ts +++ b/packages/services/comm-service/src/main.ts @@ -1,12 +1,26 @@ import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { CommModule } from './comm.module'; +const logger = new Logger('CommService'); + +// Prevent process crash from unhandled errors +process.on('unhandledRejection', (reason) => { + logger.error(`Unhandled Rejection: ${reason}`); +}); +process.on('uncaughtException', (error) => { + logger.error(`Uncaught Exception: ${error.message}`, error.stack); +}); + async function bootstrap() { const app = await NestFactory.create(CommModule); const config = app.get(ConfigService); const port = config.get('COMM_SERVICE_PORT', 3006); await app.listen(port); - console.log(`comm-service running on port ${port}`); + logger.log(`comm-service running on port ${port}`); } -bootstrap(); +bootstrap().catch((err) => { + logger.error(`Failed to start comm-service: ${err.message}`, err.stack); + process.exit(1); +}); diff --git a/packages/services/inventory-service/src/main.ts b/packages/services/inventory-service/src/main.ts index cb388c0..7ac3863 100644 --- a/packages/services/inventory-service/src/main.ts +++ b/packages/services/inventory-service/src/main.ts @@ -1,12 +1,26 @@ import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { InventoryModule } from './inventory.module'; +const logger = new Logger('InventoryService'); + +// Prevent process crash from unhandled errors +process.on('unhandledRejection', (reason) => { + logger.error(`Unhandled Rejection: ${reason}`); +}); +process.on('uncaughtException', (error) => { + logger.error(`Uncaught Exception: ${error.message}`, error.stack); +}); + async function bootstrap() { const app = await NestFactory.create(InventoryModule); const config = app.get(ConfigService); const port = config.get('INVENTORY_SERVICE_PORT', 3004); await app.listen(port); - console.log(`inventory-service running on port ${port}`); + logger.log(`inventory-service running on port ${port}`); } -bootstrap(); +bootstrap().catch((err) => { + logger.error(`Failed to start inventory-service: ${err.message}`, err.stack); + process.exit(1); +}); diff --git a/packages/services/monitor-service/src/main.ts b/packages/services/monitor-service/src/main.ts index dafe2d8..4425211 100644 --- a/packages/services/monitor-service/src/main.ts +++ b/packages/services/monitor-service/src/main.ts @@ -1,12 +1,26 @@ import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { MonitorModule } from './monitor.module'; +const logger = new Logger('MonitorService'); + +// Prevent process crash from unhandled errors +process.on('unhandledRejection', (reason) => { + logger.error(`Unhandled Rejection: ${reason}`); +}); +process.on('uncaughtException', (error) => { + logger.error(`Uncaught Exception: ${error.message}`, error.stack); +}); + async function bootstrap() { const app = await NestFactory.create(MonitorModule); const config = app.get(ConfigService); const port = config.get('MONITOR_SERVICE_PORT', 3005); await app.listen(port); - console.log(`monitor-service running on port ${port}`); + logger.log(`monitor-service running on port ${port}`); } -bootstrap(); +bootstrap().catch((err) => { + logger.error(`Failed to start monitor-service: ${err.message}`, err.stack); + process.exit(1); +}); diff --git a/packages/services/ops-service/src/domain/services/standing-order-executor.service.ts b/packages/services/ops-service/src/domain/services/standing-order-executor.service.ts index 2c10fb5..6611ee5 100644 --- a/packages/services/ops-service/src/domain/services/standing-order-executor.service.ts +++ b/packages/services/ops-service/src/domain/services/standing-order-executor.service.ts @@ -33,25 +33,37 @@ export class StandingOrderExecutorService implements OnModuleInit { async onModuleInit(): Promise { this.logger.log('Subscribing to event-driven standing order triggers...'); - await this.eventBus.subscribe( - EventPatterns.ALERT_FIRED, - 'standing-order-executor', - `executor-${crypto.randomUUID().slice(0, 8)}`, - async (event) => { - this.logger.debug(`Received ALERT_FIRED event: ${event.id}`); - await this.handleEventTrigger(EventPatterns.ALERT_FIRED, event); - }, - ); + try { + await this.eventBus.subscribe( + EventPatterns.ALERT_FIRED, + 'standing-order-executor', + `executor-${crypto.randomUUID().slice(0, 8)}`, + async (event) => { + try { + this.logger.debug(`Received ALERT_FIRED event: ${event.id}`); + await this.handleEventTrigger(EventPatterns.ALERT_FIRED, event); + } catch (err) { + this.logger.error(`ALERT_FIRED handler error: ${err}`); + } + }, + ); - await this.eventBus.subscribe( - EventPatterns.STANDING_ORDER_TRIGGERED, - 'standing-order-executor', - `executor-${crypto.randomUUID().slice(0, 8)}`, - async (event) => { - this.logger.debug(`Received STANDING_ORDER_TRIGGERED event: ${event.id}`); - await this.handleEventTrigger(EventPatterns.STANDING_ORDER_TRIGGERED, event); - }, - ); + await this.eventBus.subscribe( + EventPatterns.STANDING_ORDER_TRIGGERED, + 'standing-order-executor', + `executor-${crypto.randomUUID().slice(0, 8)}`, + async (event) => { + try { + this.logger.debug(`Received STANDING_ORDER_TRIGGERED event: ${event.id}`); + await this.handleEventTrigger(EventPatterns.STANDING_ORDER_TRIGGERED, event); + } catch (err) { + this.logger.error(`STANDING_ORDER_TRIGGERED handler error: ${err}`); + } + }, + ); + } catch (err) { + this.logger.error(`Failed to subscribe to events: ${err}`); + } } /** @@ -120,7 +132,7 @@ export class StandingOrderExecutorService implements OnModuleInit { /** * Handles an incoming event and matches it against event-triggered standing orders. - * Events from Redis carry tenantId — wrap in tenant context. + * Events from Redis carry tenantId — look up slug for correct schema name. */ private async handleEventTrigger( eventType: string, @@ -132,35 +144,58 @@ export class StandingOrderExecutorService implements OnModuleInit { return; } - await TenantContextService.run( - { - tenantId, - tenantName: tenantId, - plan: 'pro', - schemaName: `it0_t_${tenantId}`, - }, - async () => { - const activeOrders = - await this.standingOrderRepo.findByStatus('active'); - const matchingOrders = activeOrders.filter( - (order) => - order.trigger.type === 'event' && - order.trigger.eventType === eventType, - ); + let tenant: { id: string; name: string; slug: string } | undefined; + try { + const rows = await this.dataSource.query( + `SELECT id, name, slug FROM public.tenants WHERE id = $1 AND status = 'active'`, + [tenantId], + ); + tenant = rows[0]; + } catch (err) { + this.logger.error(`Failed to look up tenant ${tenantId}: ${err}`); + return; + } - for (const order of matchingOrders) { - this.logger.log( - `Event match for standing order "${order.name}" (${order.id}) on event ${eventType}`, + if (!tenant) { + this.logger.warn(`Tenant ${tenantId} not found or inactive, skipping event`); + return; + } + + try { + await TenantContextService.run( + { + tenantId: tenant.id, + tenantName: tenant.name, + plan: 'pro', + schemaName: `it0_t_${tenant.slug}`, + }, + async () => { + const activeOrders = + await this.standingOrderRepo.findByStatus('active'); + const matchingOrders = activeOrders.filter( + (order) => + order.trigger.type === 'event' && + order.trigger.eventType === eventType, ); - await this.executeOrder(order, { - triggerType: 'event', - eventType, - eventId: event.id, - eventPayload: event.payload, - }); - } - }, - ); + + for (const order of matchingOrders) { + this.logger.log( + `Event match for standing order "${order.name}" (${order.id}) on event ${eventType}`, + ); + await this.executeOrder(order, { + triggerType: 'event', + eventType, + eventId: event.id, + eventPayload: event.payload, + }); + } + }, + ); + } catch (err) { + this.logger.error( + `Event trigger error for tenant ${tenant.id}: ${err}`, + ); + } } /** diff --git a/packages/services/ops-service/src/main.ts b/packages/services/ops-service/src/main.ts index f561535..0016beb 100644 --- a/packages/services/ops-service/src/main.ts +++ b/packages/services/ops-service/src/main.ts @@ -1,12 +1,26 @@ import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { OpsModule } from './ops.module'; +const logger = new Logger('OpsService'); + +// Prevent process crash from unhandled errors +process.on('unhandledRejection', (reason) => { + logger.error(`Unhandled Rejection: ${reason}`); +}); +process.on('uncaughtException', (error) => { + logger.error(`Uncaught Exception: ${error.message}`, error.stack); +}); + async function bootstrap() { const app = await NestFactory.create(OpsModule); const config = app.get(ConfigService); const port = config.get('OPS_SERVICE_PORT', 3003); await app.listen(port); - console.log(`ops-service running on port ${port}`); + logger.log(`ops-service running on port ${port}`); } -bootstrap(); +bootstrap().catch((err) => { + logger.error(`Failed to start ops-service: ${err.message}`, err.stack); + process.exit(1); +});