fix: refactor voice-agent to official LiveKit v1.x AgentServer pattern

Replace deprecated WorkerOptions(entrypoint_fnc=...) with AgentServer() +
@server.rtc_session() decorator. Use server.setup_fnc for prewarm. Remove
manual ctx.connect() and ctx.wait_for_participant() calls that prevented
the pipeline from properly wiring up VAD→STT→LLM→TTS.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-28 12:31:31 -08:00
parent cf60b8733f
commit 00be878a95
1 changed files with 38 additions and 30 deletions

View File

@ -1,9 +1,8 @@
"""
IT0 Voice Agent LiveKit Agents entry point.
IT0 Voice Agent LiveKit Agents v1.x entry point.
This is a LiveKit Agent Worker that handles real-time voice conversations.
It connects to the LiveKit server, waits for users to join a room, and runs
the voice pipeline: VAD STT LLM (via agent-service) TTS.
Uses the official AgentServer + @server.rtc_session() pattern.
Pipeline: VAD STT LLM (via agent-service) TTS.
Usage:
python -m src.agent start
@ -12,8 +11,15 @@ Usage:
import json
import logging
from livekit import agents, rtc
from livekit.agents import AgentSession, Agent, RoomInputOptions, JobContext, JobProcess, cli, WorkerOptions, room_io
from livekit.agents import (
Agent,
AgentServer,
AgentSession,
JobContext,
JobProcess,
cli,
room_io,
)
from livekit.plugins import silero
from .config import settings
@ -36,6 +42,19 @@ class IT0VoiceAgent(Agent):
),
)
async def on_enter(self):
"""Called when the agent becomes active — greet the user."""
self.session.generate_reply(
instructions="用一句简短的话打招呼告诉用户你是IT0运维助手可以帮助什么。"
)
# ---------------------------------------------------------------------------
# Server setup
# ---------------------------------------------------------------------------
server = AgentServer()
def prewarm(proc: JobProcess) -> None:
"""Pre-load ML models into shared process memory.
@ -90,17 +109,17 @@ def prewarm(proc: JobProcess) -> None:
logger.info("Prewarm complete.")
async def entrypoint(ctx: JobContext) -> None:
"""Main entrypoint — called for each voice session (room join)."""
await ctx.connect(auto_subscribe=agents.AutoSubscribe.AUDIO_ONLY)
server.setup_fnc = prewarm
# Wait for a participant (the Flutter user) to join
participant = await ctx.wait_for_participant()
logger.info(
"Participant joined: identity=%s, name=%s",
participant.identity,
participant.name,
)
# ---------------------------------------------------------------------------
# Session entrypoint — called for each voice session (room join)
# ---------------------------------------------------------------------------
@server.rtc_session(agent_name="voice-agent")
async def entrypoint(ctx: JobContext) -> None:
"""Main entrypoint — called for each voice session."""
logger.info("New voice session: room=%s", ctx.room.name)
# Extract auth header from job metadata
# The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata,
@ -149,7 +168,7 @@ async def entrypoint(ctx: JobContext) -> None:
auth_header=auth_header,
)
# Create and start AgentSession
# Create and start AgentSession with the full pipeline
session = AgentSession(
vad=ctx.proc.userdata["vad"],
stt=stt,
@ -160,21 +179,10 @@ async def entrypoint(ctx: JobContext) -> None:
await session.start(
agent=IT0VoiceAgent(),
room=ctx.room,
room_options=room_io.RoomOptions(
audio_input=True,
audio_output=True,
participant_identity=participant.identity,
),
)
logger.info("Voice session started for participant %s", participant.identity)
logger.info("Voice session started for room %s", ctx.room.name)
if __name__ == "__main__":
cli.run_app(
WorkerOptions(
entrypoint_fnc=entrypoint,
prewarm_fnc=prewarm,
agent_name="voice-agent",
)
)
cli.run_app(server)