diff --git a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart
index 4fe75f7..7345fb1 100644
--- a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart
+++ b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart
@@ -43,7 +43,13 @@ class _AgentCallPageState extends ConsumerState<AgentCallPage>
   final List<double> _waveHeights = List.generate(20, (_) => 0.3);
   Timer? _waveTimer;
 
-  // Agent state (from lk.agent.state participant attribute)
+  // Agent state — read from the "lk.agent.state" participant attribute that
+  // LiveKit AgentSession publishes automatically. Values:
+  //   "initializing" → agent starting up
+  //   "listening"    → waiting for user speech
+  //   "thinking"     → STT done, LLM processing (show thinking animation)
+  //   "speaking"     → TTS playing response
+  // See ParticipantAttributesChanged listener in _acceptCall().
   String _agentState = '';
   late AnimationController _thinkingController;
 
@@ -149,6 +155,12 @@ class _AgentCallPageState extends ConsumerState<AgentCallPage>
             _onCallEnded();
           }
         })
+        // Agent state monitoring: LiveKit AgentSession on the server publishes
+        // "lk.agent.state" as a participant attribute. When it changes, we
+        // update _agentState to drive UI changes:
+        //   thinking → pulsing dots + "思考中..." + orange avatar glow
+        //   speaking → waveform animation + "语音通话中"
+        //   listening → default call UI
         ..on<ParticipantAttributesChanged>((event) {
           final state = event.attributes['lk.agent.state'];
           if (state != null && state != _agentState && mounted) {
diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py
index b1df99c..3ac2f73 100644
--- a/packages/services/voice-agent/src/agent.py
+++ b/packages/services/voice-agent/src/agent.py
@@ -4,6 +4,27 @@ IT0 Voice Agent — LiveKit Agents v1.x entry point.
 Uses the official AgentServer + @server.rtc_session() pattern.
 Pipeline: VAD → STT → LLM (via agent-service) → TTS.
 
+Agent State & Thinking Indicator
+---------------------------------
+LiveKit AgentSession (v1.4.3+) automatically publishes the participant
+attribute ``lk.agent.state`` with these values:
+
+    initializing → listening → thinking → speaking → listening → ...
+
+The state transition happens inside the framework:
+  - RoomIO._on_agent_state_changed() calls
+    local_participant.set_attributes({"lk.agent.state": state})
+
+On the Flutter side (livekit_client v2.6.4), the app listens for
+ParticipantAttributesChanged events and reads the ``lk.agent.state``
+attribute to drive UI changes:
+  - "thinking" → pulsing dots animation + "思考中..." text + orange avatar
+  - "speaking" → waveform animation driven by audio level
+  - "listening" → default call UI
+
+BackgroundAudioPlayer is configured below to play a keyboard typing
+sound effect during the "thinking" state as auditory feedback.
+
 Usage:
     python -m src.agent start
 """
@@ -310,7 +331,15 @@ async def entrypoint(ctx: JobContext) -> None:
             room_output_options=room_io.RoomOutputOptions(),
         )
 
-        # Play keyboard typing sound while agent is thinking (waiting for LLM)
+        # --- Thinking state audio feedback ---
+        # BackgroundAudioPlayer listens for AgentStateChangedEvent from the
+        # session. When state transitions to "thinking" (STT done, waiting for
+        # LLM response), it plays the built-in keyboard typing sound through
+        # the LiveKit audio track. The sound stops automatically when the agent
+        # enters "speaking" state (TTS begins). This gives the user audible
+        # feedback that the AI is processing their request.
+        # Available built-in clips: KEYBOARD_TYPING, KEYBOARD_TYPING2,
+        # OFFICE_AMBIENCE, CITY_AMBIENCE, FOREST_AMBIENCE, CROWDED_ROOM, HOLD_MUSIC
         bg_audio = BackgroundAudioPlayer(
             thinking_sound=BuiltinAudioClip.KEYBOARD_TYPING,
         )