it0/packages/services/voice-service/src/api/twilio_webhook.py

import asyncio
import base64
import json
import uuid

from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Request
from fastapi.responses import Response

from ..pipeline.twilio_transport import TwilioPhoneTransport
from ..pipeline.base_pipeline import create_voice_pipeline

router = APIRouter()


@router.post("/voice/incoming")
async def twilio_incoming_call(request: Request):
    """Handle incoming Twilio voice calls."""
    # Return TwiML to connect to Media Streams
    twiml = """<?xml version="1.0" encoding="UTF-8"?>
    <Response>
        <Connect>
            <Stream url="wss://{host}/api/v1/twilio/media-stream" />
        </Connect>
    </Response>"""
    return Response(content=twiml, media_type="application/xml")


@router.websocket("/media-stream")
async def twilio_media_stream(websocket: WebSocket):
    """
    Twilio Media Streams WebSocket endpoint.

    Handles the bidirectional audio stream between Twilio and the Pipecat pipeline.
    Twilio sends JSON messages with events: connected, start, media, stop.
    Audio is mu-law 8kHz base64-encoded.
    """
    await websocket.accept()

    app = websocket.app
    voice_session_id = f"tw_{uuid.uuid4().hex[:12]}"
    transport = None
    pipeline_task = None

    try:
        while True:
            data = await websocket.receive_text()
            message = json.loads(data)
            event = message.get("event")

            if event == "connected":
                # Twilio has connected the Media Stream WebSocket
                pass

            elif event == "start":
                # Stream is starting -- create transport and pipeline
                stream_sid = message.get("streamSid", "")

                # Create the Twilio transport
                transport = TwilioPhoneTransport(websocket, voice_session_id)
                transport._stream_sid = stream_sid

                # Build session context for the pipeline
                session_context = {
                    "session_id": voice_session_id,
                    "stream_sid": stream_sid,
                    "call_sid": message.get("start", {}).get("callSid", ""),
                }

                # Create the Pipecat voice pipeline
                task = await create_voice_pipeline(
                    transport,
                    session_context,
                    stt=getattr(app.state, "stt", None),
                    tts=getattr(app.state, "tts", None),
                    vad=getattr(app.state, "vad", None),
                )

                # Run pipeline in background
                pipeline_task = asyncio.create_task(task.run())

            elif event == "media":
                # Incoming audio from the phone call
                if transport is not None:
                    # Decode base64 mu-law audio payload
                    media_payload = message.get("media", {}).get("payload", "")
                    mulaw_bytes = base64.b64decode(media_payload)

                    # Convert mu-law 8kHz to PCM 16kHz via the input processor
                    input_processor = transport.input()
                    pcm_audio = input_processor.process_audio(mulaw_bytes)

                    # Feed PCM audio into the pipeline (via transport websocket)
                    # The pipeline reads from the transport; here we make the
                    # decoded audio available for downstream processing.
                    # In a full Pipecat integration the transport handles this
                    # internally. For now, we send the converted audio back
                    # through the output processor to complete the loop.
                    output_processor = transport.output()
                    await output_processor.send_audio(pcm_audio)

            elif event == "stop":
                # Stream is ending -- clean up
                break

    except WebSocketDisconnect:
        pass
    except json.JSONDecodeError:
        pass
    except Exception:
        pass
    finally:
        # Cancel pipeline task if still running
        if pipeline_task is not None and not pipeline_task.done():
            pipeline_task.cancel()
            try:
                await pipeline_task
            except (asyncio.CancelledError, Exception):
                pass

        # Ensure websocket is closed
        try:
            await websocket.close()
        except Exception:
            pass