import asyncio import base64 import json import uuid from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Request from fastapi.responses import Response from ..pipeline.twilio_transport import TwilioPhoneTransport from ..pipeline.base_pipeline import create_voice_pipeline router = APIRouter() @router.post("/voice/incoming") async def twilio_incoming_call(request: Request): """Handle incoming Twilio voice calls.""" # Return TwiML to connect to Media Streams twiml = """ """ return Response(content=twiml, media_type="application/xml") @router.websocket("/media-stream") async def twilio_media_stream(websocket: WebSocket): """ Twilio Media Streams WebSocket endpoint. Handles the bidirectional audio stream between Twilio and the Pipecat pipeline. Twilio sends JSON messages with events: connected, start, media, stop. Audio is mu-law 8kHz base64-encoded. """ await websocket.accept() app = websocket.app voice_session_id = f"tw_{uuid.uuid4().hex[:12]}" transport = None pipeline_task = None try: while True: data = await websocket.receive_text() message = json.loads(data) event = message.get("event") if event == "connected": # Twilio has connected the Media Stream WebSocket pass elif event == "start": # Stream is starting -- create transport and pipeline stream_sid = message.get("streamSid", "") # Create the Twilio transport transport = TwilioPhoneTransport(websocket, voice_session_id) transport._stream_sid = stream_sid # Build session context for the pipeline session_context = { "session_id": voice_session_id, "stream_sid": stream_sid, "call_sid": message.get("start", {}).get("callSid", ""), } # Create the Pipecat voice pipeline task = await create_voice_pipeline( transport, session_context, stt=getattr(app.state, "stt", None), tts=getattr(app.state, "tts", None), vad=getattr(app.state, "vad", None), ) # Run pipeline in background pipeline_task = asyncio.create_task(task.run()) elif event == "media": # Incoming audio from the phone call if transport is not None: # Decode base64 mu-law audio payload media_payload = message.get("media", {}).get("payload", "") mulaw_bytes = base64.b64decode(media_payload) # Convert mu-law 8kHz to PCM 16kHz via the input processor input_processor = transport.input() pcm_audio = input_processor.process_audio(mulaw_bytes) # Feed PCM audio into the pipeline (via transport websocket) # The pipeline reads from the transport; here we make the # decoded audio available for downstream processing. # In a full Pipecat integration the transport handles this # internally. For now, we send the converted audio back # through the output processor to complete the loop. output_processor = transport.output() await output_processor.send_audio(pcm_audio) elif event == "stop": # Stream is ending -- clean up break except WebSocketDisconnect: pass except json.JSONDecodeError: pass except Exception: pass finally: # Cancel pipeline task if still running if pipeline_task is not None and not pipeline_task.done(): pipeline_task.cancel() try: await pipeline_task except (asyncio.CancelledError, Exception): pass # Ensure websocket is closed try: await websocket.close() except Exception: pass