From afc1ae6fbea2eea0e72d210706024bf5150e760e Mon Sep 17 00:00:00 2001
From: hailin <hailin.zhao@gdzx.xyz>
Date: Mon, 9 Mar 2026 06:56:31 -0700
Subject: [PATCH] feat(voice): randomly pick thinking sound from all 7 built-in
 clips per session

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 packages/services/voice-agent/src/agent.py | 23 +++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py
index 60bcfe6..0f03da0 100644
--- a/packages/services/voice-agent/src/agent.py
+++ b/packages/services/voice-agent/src/agent.py
@@ -387,16 +387,21 @@ async def entrypoint(ctx: JobContext) -> None:
         _cleanup_ref.append(_on_room_disconnect)
 
         # --- Thinking state audio feedback ---
-        # BackgroundAudioPlayer listens for AgentStateChangedEvent from the
-        # session. When state transitions to "thinking" (STT done, waiting for
-        # LLM response), it plays the built-in keyboard typing sound through
-        # the LiveKit audio track. The sound stops automatically when the agent
-        # enters "speaking" state (TTS begins). This gives the user audible
-        # feedback that the AI is processing their request.
-        # Available built-in clips: KEYBOARD_TYPING, KEYBOARD_TYPING2,
-        # OFFICE_AMBIENCE, CITY_AMBIENCE, FOREST_AMBIENCE, CROWDED_ROOM, HOLD_MUSIC
+        # Randomly pick one of all available built-in clips each session.
+        _all_clips = [
+            BuiltinAudioClip.KEYBOARD_TYPING,
+            BuiltinAudioClip.KEYBOARD_TYPING2,
+            BuiltinAudioClip.OFFICE_AMBIENCE,
+            BuiltinAudioClip.CITY_AMBIENCE,
+            BuiltinAudioClip.FOREST_AMBIENCE,
+            BuiltinAudioClip.CROWDED_ROOM,
+            BuiltinAudioClip.HOLD_MUSIC,
+        ]
+        import random as _random
+        _chosen_clip = _random.choice(_all_clips)
+        logger.info("Thinking sound this session: %s", _chosen_clip)
         bg_audio = BackgroundAudioPlayer(
-            thinking_sound=BuiltinAudioClip.KEYBOARD_TYPING,
+            thinking_sound=_chosen_clip,
         )
         await bg_audio.start(room=ctx.room, agent_session=session)