feat: add STT provider switching (OpenAI ↔ Speechmatics) in settings
- Add VoiceConfig entity/repo/service/controller in agent-service for per-tenant STT provider persistence (default: speechmatics) - Add Speechmatics STT plugin in voice-agent with livekit-plugins-speechmatics - Modify voice-agent entrypoint for 3-way STT selection: metadata > agent-service config > env var fallback - Add "Voice" section in web-admin settings page with STT provider dropdown - Add i18n translations (en/zh) for voice settings - Add SPEECHMATICS_API_KEY env var in docker-compose Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7cb185e0cd
commit
f9c47de04b
|
|
@ -354,6 +354,7 @@ services:
|
||||||
- OPENAI_STT_MODEL=${OPENAI_STT_MODEL:-gpt-4o-transcribe}
|
- OPENAI_STT_MODEL=${OPENAI_STT_MODEL:-gpt-4o-transcribe}
|
||||||
- OPENAI_TTS_MODEL=${OPENAI_TTS_MODEL:-gpt-4o-mini-tts}
|
- OPENAI_TTS_MODEL=${OPENAI_TTS_MODEL:-gpt-4o-mini-tts}
|
||||||
- OPENAI_TTS_VOICE=${OPENAI_TTS_VOICE:-coral}
|
- OPENAI_TTS_VOICE=${OPENAI_TTS_VOICE:-coral}
|
||||||
|
- SPEECHMATICS_API_KEY=${SPEECHMATICS_API_KEY:-}
|
||||||
depends_on:
|
depends_on:
|
||||||
livekit-server:
|
livekit-server:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
|
||||||
|
|
@ -48,9 +48,9 @@ interface AccountInfo {
|
||||||
/* Constants */
|
/* Constants */
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
type SectionId = 'general' | 'notifications' | 'apikeys' | 'theme' | 'account';
|
type SectionId = 'general' | 'notifications' | 'apikeys' | 'theme' | 'account' | 'voice';
|
||||||
|
|
||||||
const SECTION_IDS: SectionId[] = ['general', 'notifications', 'apikeys', 'theme', 'account'];
|
const SECTION_IDS: SectionId[] = ['general', 'notifications', 'apikeys', 'theme', 'account', 'voice'];
|
||||||
|
|
||||||
const TIMEZONES = [
|
const TIMEZONES = [
|
||||||
'UTC', 'America/New_York', 'America/Chicago', 'America/Denver',
|
'UTC', 'America/New_York', 'America/Chicago', 'America/Denver',
|
||||||
|
|
@ -116,6 +116,7 @@ export default function SettingsPage() {
|
||||||
{activeSection === 'apikeys' && <ApiKeysSection />}
|
{activeSection === 'apikeys' && <ApiKeysSection />}
|
||||||
{activeSection === 'theme' && <ThemeSection />}
|
{activeSection === 'theme' && <ThemeSection />}
|
||||||
{activeSection === 'account' && <AccountSection />}
|
{activeSection === 'account' && <AccountSection />}
|
||||||
|
{activeSection === 'voice' && <VoiceSection />}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -785,3 +786,88 @@ function AccountSection() {
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Voice Section */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
interface VoiceSettings {
|
||||||
|
stt_provider: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const STT_PROVIDERS = [
|
||||||
|
{ value: 'speechmatics', labelKey: 'voice.providers.speechmatics' },
|
||||||
|
{ value: 'openai', labelKey: 'voice.providers.openai' },
|
||||||
|
];
|
||||||
|
|
||||||
|
function VoiceSection() {
|
||||||
|
const { t } = useTranslation('settings');
|
||||||
|
const { t: tc } = useTranslation('common');
|
||||||
|
const queryClient = useQueryClient();
|
||||||
|
|
||||||
|
const { data, isLoading } = useQuery<VoiceSettings>({
|
||||||
|
queryKey: queryKeys.settings.voice(),
|
||||||
|
queryFn: () => apiClient<VoiceSettings>('/api/v1/agent/voice-config'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const [sttProvider, setSttProvider] = useState('speechmatics');
|
||||||
|
const [initialized, setInitialized] = useState(false);
|
||||||
|
|
||||||
|
if (data && !initialized) {
|
||||||
|
setSttProvider(data.stt_provider || 'speechmatics');
|
||||||
|
setInitialized(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
const mutation = useMutation({
|
||||||
|
mutationFn: (body: VoiceSettings) =>
|
||||||
|
apiClient('/api/v1/agent/voice-config', { method: 'PUT', body }),
|
||||||
|
onSuccess: () => queryClient.invalidateQueries({ queryKey: queryKeys.settings.all }),
|
||||||
|
});
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="bg-card border rounded-lg p-6">
|
||||||
|
<h2 className="text-lg font-semibold mb-4">{t('voice.title')}</h2>
|
||||||
|
|
||||||
|
{isLoading ? (
|
||||||
|
<p className="text-muted-foreground text-sm">{tc('loading')}</p>
|
||||||
|
) : (
|
||||||
|
<div className="space-y-4 max-w-lg">
|
||||||
|
<div>
|
||||||
|
<label className="block text-sm font-medium mb-1">
|
||||||
|
{t('voice.sttProvider')}
|
||||||
|
</label>
|
||||||
|
<select
|
||||||
|
className="w-full border rounded-md px-3 py-2 bg-background text-sm"
|
||||||
|
value={sttProvider}
|
||||||
|
onChange={(e) => setSttProvider(e.target.value)}
|
||||||
|
>
|
||||||
|
{STT_PROVIDERS.map((p) => (
|
||||||
|
<option key={p.value} value={p.value}>
|
||||||
|
{t(p.labelKey)}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
<p className="text-xs text-muted-foreground mt-1">
|
||||||
|
{t('voice.sttProviderHint')}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={() => mutation.mutate({ stt_provider: sttProvider })}
|
||||||
|
disabled={mutation.isPending}
|
||||||
|
className="px-4 py-2 bg-primary text-primary-foreground rounded-md text-sm font-medium hover:opacity-90 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{mutation.isPending ? tc('saving') : tc('save')}
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{mutation.isError && (
|
||||||
|
<p className="text-sm text-red-500">{(mutation.error as Error).message}</p>
|
||||||
|
)}
|
||||||
|
{mutation.isSuccess && (
|
||||||
|
<p className="text-sm text-green-600">{t('voice.saved')}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,8 @@
|
||||||
"notifications": "Notifications",
|
"notifications": "Notifications",
|
||||||
"apikeys": "API Keys",
|
"apikeys": "API Keys",
|
||||||
"theme": "Theme",
|
"theme": "Theme",
|
||||||
"account": "Account"
|
"account": "Account",
|
||||||
|
"voice": "Voice"
|
||||||
},
|
},
|
||||||
"general": {
|
"general": {
|
||||||
"title": "General Settings",
|
"title": "General Settings",
|
||||||
|
|
@ -68,6 +69,16 @@
|
||||||
"passwordChanged": "Password changed successfully.",
|
"passwordChanged": "Password changed successfully.",
|
||||||
"changing": "Changing..."
|
"changing": "Changing..."
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"title": "Voice Settings",
|
||||||
|
"sttProvider": "Speech-to-Text Provider",
|
||||||
|
"sttProviderHint": "Choose the speech recognition engine for voice sessions.",
|
||||||
|
"providers": {
|
||||||
|
"speechmatics": "Speechmatics (Default)",
|
||||||
|
"openai": "OpenAI (gpt-4o-transcribe)"
|
||||||
|
},
|
||||||
|
"saved": "Voice settings saved."
|
||||||
|
},
|
||||||
"languages": {
|
"languages": {
|
||||||
"en": "English",
|
"en": "English",
|
||||||
"zh": "中文",
|
"zh": "中文",
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,8 @@
|
||||||
"notifications": "通知",
|
"notifications": "通知",
|
||||||
"apikeys": "API 密钥",
|
"apikeys": "API 密钥",
|
||||||
"theme": "主题",
|
"theme": "主题",
|
||||||
"account": "账户"
|
"account": "账户",
|
||||||
|
"voice": "语音"
|
||||||
},
|
},
|
||||||
"general": {
|
"general": {
|
||||||
"title": "通用设置",
|
"title": "通用设置",
|
||||||
|
|
@ -68,6 +69,16 @@
|
||||||
"passwordChanged": "密码修改成功。",
|
"passwordChanged": "密码修改成功。",
|
||||||
"changing": "修改中..."
|
"changing": "修改中..."
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"title": "语音设置",
|
||||||
|
"sttProvider": "语音转文字引擎",
|
||||||
|
"sttProviderHint": "选择语音通话时使用的语音识别引擎。",
|
||||||
|
"providers": {
|
||||||
|
"speechmatics": "Speechmatics(默认)",
|
||||||
|
"openai": "OpenAI (gpt-4o-transcribe)"
|
||||||
|
},
|
||||||
|
"saved": "语音设置已保存。"
|
||||||
|
},
|
||||||
"languages": {
|
"languages": {
|
||||||
"en": "English",
|
"en": "English",
|
||||||
"zh": "中文",
|
"zh": "中文",
|
||||||
|
|
|
||||||
|
|
@ -130,5 +130,6 @@ export const queryKeys = {
|
||||||
apiKeys: () => [...queryKeys.settings.all, 'api-keys'] as const,
|
apiKeys: () => [...queryKeys.settings.all, 'api-keys'] as const,
|
||||||
theme: () => [...queryKeys.settings.all, 'theme'] as const,
|
theme: () => [...queryKeys.settings.all, 'theme'] as const,
|
||||||
account: () => [...queryKeys.settings.all, 'account'] as const,
|
account: () => [...queryKeys.settings.all, 'account'] as const,
|
||||||
|
voice: () => [...queryKeys.settings.all, 'voice'] as const,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -35,8 +35,12 @@ import { StandingOrderRef } from './domain/entities/standing-order.entity';
|
||||||
import { TenantAgentConfig } from './domain/entities/tenant-agent-config.entity';
|
import { TenantAgentConfig } from './domain/entities/tenant-agent-config.entity';
|
||||||
import { AgentConfig } from './domain/entities/agent-config.entity';
|
import { AgentConfig } from './domain/entities/agent-config.entity';
|
||||||
import { HookScript } from './domain/entities/hook-script.entity';
|
import { HookScript } from './domain/entities/hook-script.entity';
|
||||||
|
import { VoiceConfig } from './domain/entities/voice-config.entity';
|
||||||
import { ConversationMessage } from './domain/entities/conversation-message.entity';
|
import { ConversationMessage } from './domain/entities/conversation-message.entity';
|
||||||
import { MessageRepository } from './infrastructure/repositories/message.repository';
|
import { MessageRepository } from './infrastructure/repositories/message.repository';
|
||||||
|
import { VoiceConfigRepository } from './infrastructure/repositories/voice-config.repository';
|
||||||
|
import { VoiceConfigService } from './infrastructure/services/voice-config.service';
|
||||||
|
import { VoiceConfigController } from './interfaces/rest/controllers/voice-config.controller';
|
||||||
import { ConversationContextService } from './domain/services/conversation-context.service';
|
import { ConversationContextService } from './domain/services/conversation-context.service';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
|
|
@ -45,13 +49,13 @@ import { ConversationContextService } from './domain/services/conversation-conte
|
||||||
DatabaseModule.forRoot(),
|
DatabaseModule.forRoot(),
|
||||||
TypeOrmModule.forFeature([
|
TypeOrmModule.forFeature([
|
||||||
AgentSession, AgentTask, CommandRecord, StandingOrderRef,
|
AgentSession, AgentTask, CommandRecord, StandingOrderRef,
|
||||||
TenantAgentConfig, AgentConfig, HookScript,
|
TenantAgentConfig, AgentConfig, HookScript, VoiceConfig,
|
||||||
ConversationMessage,
|
ConversationMessage,
|
||||||
]),
|
]),
|
||||||
],
|
],
|
||||||
controllers: [
|
controllers: [
|
||||||
AgentController, SessionController, RiskRulesController,
|
AgentController, SessionController, RiskRulesController,
|
||||||
TenantAgentConfigController, AgentConfigController, SkillsController, HooksController,
|
TenantAgentConfigController, AgentConfigController, VoiceConfigController, SkillsController, HooksController,
|
||||||
],
|
],
|
||||||
providers: [
|
providers: [
|
||||||
AgentStreamGateway,
|
AgentStreamGateway,
|
||||||
|
|
@ -70,9 +74,11 @@ import { ConversationContextService } from './domain/services/conversation-conte
|
||||||
MessageRepository,
|
MessageRepository,
|
||||||
TenantAgentConfigRepository,
|
TenantAgentConfigRepository,
|
||||||
AgentConfigRepository,
|
AgentConfigRepository,
|
||||||
|
VoiceConfigRepository,
|
||||||
HookScriptRepository,
|
HookScriptRepository,
|
||||||
TenantAgentConfigService,
|
TenantAgentConfigService,
|
||||||
AgentConfigService,
|
AgentConfigService,
|
||||||
|
VoiceConfigService,
|
||||||
AgentSkillService,
|
AgentSkillService,
|
||||||
HookScriptService,
|
HookScriptService,
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
/**
|
||||||
|
* Per-tenant voice configuration entity.
|
||||||
|
*
|
||||||
|
* Stores STT provider preference per tenant (e.g. 'speechmatics' or 'openai').
|
||||||
|
* Queried by voice-agent at session start to select the appropriate STT engine.
|
||||||
|
*/
|
||||||
|
import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn, UpdateDateColumn } from 'typeorm';
|
||||||
|
|
||||||
|
@Entity('voice_configs')
|
||||||
|
export class VoiceConfig {
|
||||||
|
@PrimaryGeneratedColumn('uuid')
|
||||||
|
id!: string;
|
||||||
|
|
||||||
|
@Column({ type: 'varchar', length: 20, unique: true })
|
||||||
|
tenantId!: string;
|
||||||
|
|
||||||
|
@Column({ type: 'varchar', length: 30, default: 'speechmatics' })
|
||||||
|
sttProvider!: string;
|
||||||
|
|
||||||
|
@CreateDateColumn({ type: 'timestamptz' })
|
||||||
|
createdAt!: Date;
|
||||||
|
|
||||||
|
@UpdateDateColumn({ type: 'timestamptz' })
|
||||||
|
updatedAt!: Date;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
/**
|
||||||
|
* Repository for VoiceConfig.
|
||||||
|
* Uses standard TypeORM repository (no schema-per-tenant — uses tenantId column filter).
|
||||||
|
*/
|
||||||
|
import { Injectable } from '@nestjs/common';
|
||||||
|
import { InjectRepository } from '@nestjs/typeorm';
|
||||||
|
import { Repository } from 'typeorm';
|
||||||
|
import { VoiceConfig } from '../../domain/entities/voice-config.entity';
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class VoiceConfigRepository {
|
||||||
|
constructor(
|
||||||
|
@InjectRepository(VoiceConfig)
|
||||||
|
private readonly repo: Repository<VoiceConfig>,
|
||||||
|
) {}
|
||||||
|
|
||||||
|
async findByTenantId(tenantId: string): Promise<VoiceConfig | null> {
|
||||||
|
return this.repo.findOneBy({ tenantId });
|
||||||
|
}
|
||||||
|
|
||||||
|
async save(entity: VoiceConfig): Promise<VoiceConfig> {
|
||||||
|
return this.repo.save(entity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
/**
|
||||||
|
* Service for managing per-tenant voice configuration (STT provider selection).
|
||||||
|
*/
|
||||||
|
import { Injectable } from '@nestjs/common';
|
||||||
|
import { VoiceConfigRepository } from '../repositories/voice-config.repository';
|
||||||
|
import { VoiceConfig } from '../../domain/entities/voice-config.entity';
|
||||||
|
|
||||||
|
export interface UpdateVoiceConfigDto {
|
||||||
|
stt_provider?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class VoiceConfigService {
|
||||||
|
constructor(private readonly repo: VoiceConfigRepository) {}
|
||||||
|
|
||||||
|
async findByTenantId(tenantId: string): Promise<VoiceConfig | null> {
|
||||||
|
return this.repo.findByTenantId(tenantId);
|
||||||
|
}
|
||||||
|
|
||||||
|
async upsert(tenantId: string, dto: UpdateVoiceConfigDto): Promise<VoiceConfig> {
|
||||||
|
let config = await this.repo.findByTenantId(tenantId);
|
||||||
|
if (!config) {
|
||||||
|
config = new VoiceConfig();
|
||||||
|
config.tenantId = tenantId;
|
||||||
|
}
|
||||||
|
if (dto.stt_provider !== undefined) {
|
||||||
|
config.sttProvider = dto.stt_provider;
|
||||||
|
}
|
||||||
|
return this.repo.save(config);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* REST controller for per-tenant voice configuration (STT provider selection).
|
||||||
|
*
|
||||||
|
* Endpoints (JWT validated by Kong gateway):
|
||||||
|
* GET /api/v1/agent/voice-config → Get current tenant's voice config
|
||||||
|
* PUT /api/v1/agent/voice-config → Upsert voice config
|
||||||
|
*/
|
||||||
|
import { Controller, Get, Put, Body, Headers } from '@nestjs/common';
|
||||||
|
import { VoiceConfigService, UpdateVoiceConfigDto } from '../../../infrastructure/services/voice-config.service';
|
||||||
|
|
||||||
|
const DEFAULT_CONFIG = {
|
||||||
|
stt_provider: 'speechmatics',
|
||||||
|
};
|
||||||
|
|
||||||
|
@Controller('api/v1/agent/voice-config')
|
||||||
|
export class VoiceConfigController {
|
||||||
|
constructor(private readonly voiceConfigService: VoiceConfigService) {}
|
||||||
|
|
||||||
|
@Get()
|
||||||
|
async getConfig(@Headers('x-tenant-id') tenantId: string) {
|
||||||
|
if (!tenantId) return DEFAULT_CONFIG;
|
||||||
|
const config = await this.voiceConfigService.findByTenantId(tenantId);
|
||||||
|
if (!config) return { ...DEFAULT_CONFIG, tenantId };
|
||||||
|
return {
|
||||||
|
tenantId: config.tenantId,
|
||||||
|
stt_provider: config.sttProvider,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Put()
|
||||||
|
async upsertConfig(
|
||||||
|
@Headers('x-tenant-id') tenantId: string,
|
||||||
|
@Body() dto: UpdateVoiceConfigDto,
|
||||||
|
) {
|
||||||
|
const config = await this.voiceConfigService.upsert(tenantId || 'default', dto);
|
||||||
|
return {
|
||||||
|
tenantId: config.tenantId,
|
||||||
|
stt_provider: config.sttProvider,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -2,6 +2,7 @@ livekit>=1.0.0
|
||||||
livekit-agents>=1.0.0
|
livekit-agents>=1.0.0
|
||||||
livekit-plugins-silero>=1.0.0
|
livekit-plugins-silero>=1.0.0
|
||||||
livekit-plugins-openai>=1.0.0
|
livekit-plugins-openai>=1.0.0
|
||||||
|
livekit-plugins-speechmatics>=1.0.0
|
||||||
faster-whisper==1.2.1
|
faster-whisper==1.2.1
|
||||||
kokoro==0.3.5
|
kokoro==0.3.5
|
||||||
misaki[zh]==0.7.17
|
misaki[zh]==0.7.17
|
||||||
|
|
|
||||||
|
|
@ -199,6 +199,7 @@ async def entrypoint(ctx: JobContext) -> None:
|
||||||
tts_voice = settings.openai_tts_voice
|
tts_voice = settings.openai_tts_voice
|
||||||
tts_style = ""
|
tts_style = ""
|
||||||
engine_type = "claude_agent_sdk"
|
engine_type = "claude_agent_sdk"
|
||||||
|
meta = {}
|
||||||
try:
|
try:
|
||||||
meta_str = ctx.job.metadata or "{}"
|
meta_str = ctx.job.metadata or "{}"
|
||||||
meta = json.loads(meta_str)
|
meta = json.loads(meta_str)
|
||||||
|
|
@ -212,8 +213,27 @@ async def entrypoint(ctx: JobContext) -> None:
|
||||||
logger.info("Auth header present: %s, TTS: voice=%s, style=%s, engine=%s",
|
logger.info("Auth header present: %s, TTS: voice=%s, style=%s, engine=%s",
|
||||||
bool(auth_header), tts_voice, tts_style[:50] if tts_style else "(default)", engine_type)
|
bool(auth_header), tts_voice, tts_style[:50] if tts_style else "(default)", engine_type)
|
||||||
|
|
||||||
# Build STT
|
# ── Resolve STT provider (metadata > agent-service config > env default) ──
|
||||||
if settings.stt_provider == "openai":
|
stt_provider = meta.get("stt_provider", "")
|
||||||
|
if not stt_provider and auth_header:
|
||||||
|
try:
|
||||||
|
import httpx as _httpx_cfg
|
||||||
|
async with _httpx_cfg.AsyncClient(timeout=_httpx_cfg.Timeout(5)) as _cfg_client:
|
||||||
|
_cfg_resp = await _cfg_client.get(
|
||||||
|
f"{settings.agent_service_url}/api/v1/agent/voice-config",
|
||||||
|
headers={"Authorization": auth_header},
|
||||||
|
)
|
||||||
|
if _cfg_resp.status_code == 200:
|
||||||
|
_voice_cfg = _cfg_resp.json()
|
||||||
|
stt_provider = _voice_cfg.get("stt_provider", "")
|
||||||
|
logger.info("Voice config from agent-service: stt_provider=%s", stt_provider)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch voice config from agent-service: %s", e)
|
||||||
|
if not stt_provider:
|
||||||
|
stt_provider = settings.stt_provider # env var fallback
|
||||||
|
|
||||||
|
# ── Build STT ──
|
||||||
|
if stt_provider == "openai":
|
||||||
from livekit.plugins import openai as openai_plugin
|
from livekit.plugins import openai as openai_plugin
|
||||||
import httpx as _httpx
|
import httpx as _httpx
|
||||||
import openai as _openai
|
import openai as _openai
|
||||||
|
|
@ -237,11 +257,15 @@ async def entrypoint(ctx: JobContext) -> None:
|
||||||
"silence_duration_ms": 800,
|
"silence_duration_ms": 800,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
elif stt_provider == "speechmatics":
|
||||||
|
from .plugins.speechmatics_stt import create_speechmatics_stt
|
||||||
|
stt = create_speechmatics_stt(language=settings.whisper_language)
|
||||||
else:
|
else:
|
||||||
stt = LocalWhisperSTT(
|
stt = LocalWhisperSTT(
|
||||||
model=ctx.proc.userdata.get("whisper_model"),
|
model=ctx.proc.userdata.get("whisper_model"),
|
||||||
language=settings.whisper_language,
|
language=settings.whisper_language,
|
||||||
)
|
)
|
||||||
|
logger.info("STT provider selected: %s", stt_provider)
|
||||||
|
|
||||||
# Build TTS
|
# Build TTS
|
||||||
if settings.tts_provider == "openai":
|
if settings.tts_provider == "openai":
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,32 @@
|
||||||
|
"""
|
||||||
|
Speechmatics STT factory for voice-agent.
|
||||||
|
|
||||||
|
Creates a livekit-plugins-speechmatics STT instance configured for
|
||||||
|
Mandarin-English bilingual recognition with speaker diarization support.
|
||||||
|
|
||||||
|
The SPEECHMATICS_API_KEY environment variable is read automatically
|
||||||
|
by the livekit-plugins-speechmatics package.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from livekit.plugins import speechmatics
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def create_speechmatics_stt(language: str = "cmn") -> speechmatics.STT:
|
||||||
|
"""Create a Speechmatics STT instance for the voice pipeline.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
language: Speechmatics language code. Default 'cmn' for Mandarin Chinese.
|
||||||
|
Use 'cmn_en' for Mandarin-English bilingual, 'en' for English.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured speechmatics.STT instance.
|
||||||
|
"""
|
||||||
|
stt = speechmatics.STT(
|
||||||
|
language=language,
|
||||||
|
enable_partials=True,
|
||||||
|
)
|
||||||
|
logger.info("Speechmatics STT created: language=%s", language)
|
||||||
|
return stt
|
||||||
Loading…
Reference in New Issue