fix: refactor voice-agent to official LiveKit v1.x AgentServer pattern
Replace deprecated WorkerOptions(entrypoint_fnc=...) with AgentServer() + @server.rtc_session() decorator. Use server.setup_fnc for prewarm. Remove manual ctx.connect() and ctx.wait_for_participant() calls that prevented the pipeline from properly wiring up VAD→STT→LLM→TTS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
cf60b8733f
commit
00be878a95
|
|
@ -1,9 +1,8 @@
|
||||||
"""
|
"""
|
||||||
IT0 Voice Agent — LiveKit Agents entry point.
|
IT0 Voice Agent — LiveKit Agents v1.x entry point.
|
||||||
|
|
||||||
This is a LiveKit Agent Worker that handles real-time voice conversations.
|
Uses the official AgentServer + @server.rtc_session() pattern.
|
||||||
It connects to the LiveKit server, waits for users to join a room, and runs
|
Pipeline: VAD → STT → LLM (via agent-service) → TTS.
|
||||||
the voice pipeline: VAD → STT → LLM (via agent-service) → TTS.
|
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python -m src.agent start
|
python -m src.agent start
|
||||||
|
|
@ -12,8 +11,15 @@ Usage:
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from livekit import agents, rtc
|
from livekit.agents import (
|
||||||
from livekit.agents import AgentSession, Agent, RoomInputOptions, JobContext, JobProcess, cli, WorkerOptions, room_io
|
Agent,
|
||||||
|
AgentServer,
|
||||||
|
AgentSession,
|
||||||
|
JobContext,
|
||||||
|
JobProcess,
|
||||||
|
cli,
|
||||||
|
room_io,
|
||||||
|
)
|
||||||
from livekit.plugins import silero
|
from livekit.plugins import silero
|
||||||
|
|
||||||
from .config import settings
|
from .config import settings
|
||||||
|
|
@ -36,6 +42,19 @@ class IT0VoiceAgent(Agent):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def on_enter(self):
|
||||||
|
"""Called when the agent becomes active — greet the user."""
|
||||||
|
self.session.generate_reply(
|
||||||
|
instructions="用一句简短的话打招呼,告诉用户你是IT0运维助手,可以帮助什么。"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Server setup
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
server = AgentServer()
|
||||||
|
|
||||||
|
|
||||||
def prewarm(proc: JobProcess) -> None:
|
def prewarm(proc: JobProcess) -> None:
|
||||||
"""Pre-load ML models into shared process memory.
|
"""Pre-load ML models into shared process memory.
|
||||||
|
|
@ -90,17 +109,17 @@ def prewarm(proc: JobProcess) -> None:
|
||||||
logger.info("Prewarm complete.")
|
logger.info("Prewarm complete.")
|
||||||
|
|
||||||
|
|
||||||
async def entrypoint(ctx: JobContext) -> None:
|
server.setup_fnc = prewarm
|
||||||
"""Main entrypoint — called for each voice session (room join)."""
|
|
||||||
await ctx.connect(auto_subscribe=agents.AutoSubscribe.AUDIO_ONLY)
|
|
||||||
|
|
||||||
# Wait for a participant (the Flutter user) to join
|
|
||||||
participant = await ctx.wait_for_participant()
|
# ---------------------------------------------------------------------------
|
||||||
logger.info(
|
# Session entrypoint — called for each voice session (room join)
|
||||||
"Participant joined: identity=%s, name=%s",
|
# ---------------------------------------------------------------------------
|
||||||
participant.identity,
|
|
||||||
participant.name,
|
@server.rtc_session(agent_name="voice-agent")
|
||||||
)
|
async def entrypoint(ctx: JobContext) -> None:
|
||||||
|
"""Main entrypoint — called for each voice session."""
|
||||||
|
logger.info("New voice session: room=%s", ctx.room.name)
|
||||||
|
|
||||||
# Extract auth header from job metadata
|
# Extract auth header from job metadata
|
||||||
# The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata,
|
# The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata,
|
||||||
|
|
@ -149,7 +168,7 @@ async def entrypoint(ctx: JobContext) -> None:
|
||||||
auth_header=auth_header,
|
auth_header=auth_header,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create and start AgentSession
|
# Create and start AgentSession with the full pipeline
|
||||||
session = AgentSession(
|
session = AgentSession(
|
||||||
vad=ctx.proc.userdata["vad"],
|
vad=ctx.proc.userdata["vad"],
|
||||||
stt=stt,
|
stt=stt,
|
||||||
|
|
@ -160,21 +179,10 @@ async def entrypoint(ctx: JobContext) -> None:
|
||||||
await session.start(
|
await session.start(
|
||||||
agent=IT0VoiceAgent(),
|
agent=IT0VoiceAgent(),
|
||||||
room=ctx.room,
|
room=ctx.room,
|
||||||
room_options=room_io.RoomOptions(
|
|
||||||
audio_input=True,
|
|
||||||
audio_output=True,
|
|
||||||
participant_identity=participant.identity,
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Voice session started for participant %s", participant.identity)
|
logger.info("Voice session started for room %s", ctx.room.name)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli.run_app(
|
cli.run_app(server)
|
||||||
WorkerOptions(
|
|
||||||
entrypoint_fnc=entrypoint,
|
|
||||||
prewarm_fnc=prewarm,
|
|
||||||
agent_name="voice-agent",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue