fix: refactor voice-agent to official LiveKit v1.x AgentServer pattern

Replace deprecated WorkerOptions(entrypoint_fnc=...) with AgentServer() + @server.rtc_session() decorator. Use server.setup_fnc for prewarm. Remove manual ctx.connect() and ctx.wait_for_participant() calls that prevented the pipeline from properly wiring up VAD→STT→LLM→TTS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 12:31:31 -08:00 · 2026-02-28 12:31:31 -08:00 · 00be878a95
parent cf60b8733f
commit 00be878a95
1 changed files with 38 additions and 30 deletions
--- a/packages/services/voice-agent/src/agent.py
+++ b/packages/services/voice-agent/src/agent.py
@ -1,9 +1,8 @@
 """
-IT0 Voice Agent — LiveKit Agents entry point.
+IT0 Voice Agent — LiveKit Agents v1.x entry point.

-This is a LiveKit Agent Worker that handles real-time voice conversations.
-It connects to the LiveKit server, waits for users to join a room, and runs
-the voice pipeline: VAD → STT → LLM (via agent-service) → TTS.
+Uses the official AgentServer + @server.rtc_session() pattern.
+Pipeline: VAD → STT → LLM (via agent-service) → TTS.

 Usage:
    python -m src.agent start
@ -12,8 +11,15 @@ Usage:
 import json
 import logging

-from livekit import agents, rtc
-from livekit.agents import AgentSession, Agent, RoomInputOptions, JobContext, JobProcess, cli, WorkerOptions, room_io
+from livekit.agents import (
+    Agent,
+    AgentServer,
+    AgentSession,
+    JobContext,
+    JobProcess,
+    cli,
+    room_io,
+)
 from livekit.plugins import silero

 from .config import settings
@ -36,6 +42,19 @@ class IT0VoiceAgent(Agent):
            ),
        )

+    async def on_enter(self):
+        """Called when the agent becomes active — greet the user."""
+        self.session.generate_reply(
+            instructions="用一句简短的话打招呼，告诉用户你是IT0运维助手，可以帮助什么。"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Server setup
+# ---------------------------------------------------------------------------
+
+server = AgentServer()
+

 def prewarm(proc: JobProcess) -> None:
    """Pre-load ML models into shared process memory.
@ -90,17 +109,17 @@ def prewarm(proc: JobProcess) -> None:
    logger.info("Prewarm complete.")


-async def entrypoint(ctx: JobContext) -> None:
-    """Main entrypoint — called for each voice session (room join)."""
-    await ctx.connect(auto_subscribe=agents.AutoSubscribe.AUDIO_ONLY)
+server.setup_fnc = prewarm

-    # Wait for a participant (the Flutter user) to join
-    participant = await ctx.wait_for_participant()
-    logger.info(
-        "Participant joined: identity=%s, name=%s",
-        participant.identity,
-        participant.name,
-    )
+
+# ---------------------------------------------------------------------------
+# Session entrypoint — called for each voice session (room join)
+# ---------------------------------------------------------------------------
+
+@server.rtc_session(agent_name="voice-agent")
+async def entrypoint(ctx: JobContext) -> None:
+    """Main entrypoint — called for each voice session."""
+    logger.info("New voice session: room=%s", ctx.room.name)

    # Extract auth header from job metadata
    # The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata,
@ -149,7 +168,7 @@ async def entrypoint(ctx: JobContext) -> None:
        auth_header=auth_header,
    )

-    # Create and start AgentSession
+    # Create and start AgentSession with the full pipeline
    session = AgentSession(
        vad=ctx.proc.userdata["vad"],
        stt=stt,
@ -160,21 +179,10 @@ async def entrypoint(ctx: JobContext) -> None:
    await session.start(
        agent=IT0VoiceAgent(),
        room=ctx.room,
-        room_options=room_io.RoomOptions(
-            audio_input=True,
-            audio_output=True,
-            participant_identity=participant.identity,
-        ),
    )

-    logger.info("Voice session started for participant %s", participant.identity)
+    logger.info("Voice session started for room %s", ctx.room.name)


 if __name__ == "__main__":
-    cli.run_app(
-        WorkerOptions(
-            entrypoint_fnc=entrypoint,
-            prewarm_fnc=prewarm,
-            agent_name="voice-agent",
-        )
-    )
+    cli.run_app(server)