diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py index 2e90fcd..cfa897d 100644 --- a/packages/services/voice-agent/src/agent.py +++ b/packages/services/voice-agent/src/agent.py @@ -1,9 +1,8 @@ """ -IT0 Voice Agent — LiveKit Agents entry point. +IT0 Voice Agent — LiveKit Agents v1.x entry point. -This is a LiveKit Agent Worker that handles real-time voice conversations. -It connects to the LiveKit server, waits for users to join a room, and runs -the voice pipeline: VAD → STT → LLM (via agent-service) → TTS. +Uses the official AgentServer + @server.rtc_session() pattern. +Pipeline: VAD → STT → LLM (via agent-service) → TTS. Usage: python -m src.agent start @@ -12,8 +11,15 @@ Usage: import json import logging -from livekit import agents, rtc -from livekit.agents import AgentSession, Agent, RoomInputOptions, JobContext, JobProcess, cli, WorkerOptions, room_io +from livekit.agents import ( + Agent, + AgentServer, + AgentSession, + JobContext, + JobProcess, + cli, + room_io, +) from livekit.plugins import silero from .config import settings @@ -36,6 +42,19 @@ class IT0VoiceAgent(Agent): ), ) + async def on_enter(self): + """Called when the agent becomes active — greet the user.""" + self.session.generate_reply( + instructions="用一句简短的话打招呼,告诉用户你是IT0运维助手,可以帮助什么。" + ) + + +# --------------------------------------------------------------------------- +# Server setup +# --------------------------------------------------------------------------- + +server = AgentServer() + def prewarm(proc: JobProcess) -> None: """Pre-load ML models into shared process memory. @@ -90,17 +109,17 @@ def prewarm(proc: JobProcess) -> None: logger.info("Prewarm complete.") -async def entrypoint(ctx: JobContext) -> None: - """Main entrypoint — called for each voice session (room join).""" - await ctx.connect(auto_subscribe=agents.AutoSubscribe.AUDIO_ONLY) +server.setup_fnc = prewarm - # Wait for a participant (the Flutter user) to join - participant = await ctx.wait_for_participant() - logger.info( - "Participant joined: identity=%s, name=%s", - participant.identity, - participant.name, - ) + +# --------------------------------------------------------------------------- +# Session entrypoint — called for each voice session (room join) +# --------------------------------------------------------------------------- + +@server.rtc_session(agent_name="voice-agent") +async def entrypoint(ctx: JobContext) -> None: + """Main entrypoint — called for each voice session.""" + logger.info("New voice session: room=%s", ctx.room.name) # Extract auth header from job metadata # The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata, @@ -149,7 +168,7 @@ async def entrypoint(ctx: JobContext) -> None: auth_header=auth_header, ) - # Create and start AgentSession + # Create and start AgentSession with the full pipeline session = AgentSession( vad=ctx.proc.userdata["vad"], stt=stt, @@ -160,21 +179,10 @@ async def entrypoint(ctx: JobContext) -> None: await session.start( agent=IT0VoiceAgent(), room=ctx.room, - room_options=room_io.RoomOptions( - audio_input=True, - audio_output=True, - participant_identity=participant.identity, - ), ) - logger.info("Voice session started for participant %s", participant.identity) + logger.info("Voice session started for room %s", ctx.room.name) if __name__ == "__main__": - cli.run_app( - WorkerOptions( - entrypoint_fnc=entrypoint, - prewarm_fnc=prewarm, - agent_name="voice-agent", - ) - ) + cli.run_app(server)