""" Twilio Media Streams audio transport. Used when Pipecat initiates outbound calls via Twilio. Audio codec: mu-law 8kHz (phone standard) <-> PCM 16kHz (Whisper input) """ import audioop import base64 import json from twilio.rest import Client as TwilioClient from ..config.settings import settings class TwilioPhoneTransport: """Twilio Media Streams transport for phone calls.""" def __init__(self, websocket, voice_session_id: str): self.websocket = websocket self.voice_session_id = voice_session_id self._stream_sid = None async def initiate_call(self, phone_number: str): """ Initiate outbound call via Twilio REST API. Creates a call that connects to the Media Streams WebSocket endpoint, allowing bidirectional audio streaming. """ client = TwilioClient( settings.twilio_account_sid, settings.twilio_auth_token, ) # TwiML instructs Twilio to open a Media Stream back to our server twiml = ( '' "" "" f'' "" "" ) call = client.calls.create( to=phone_number, from_=settings.twilio_phone_number, twiml=twiml, ) return call.sid def input(self): """ Audio input from phone (mu-law 8kHz -> PCM 16kHz conversion). Returns a TwilioInputProcessor that decodes mu-law encoded audio from the Twilio Media Stream and converts it to PCM 16kHz 16bit mono for the pipeline (Whisper STT expects 16kHz PCM). """ return TwilioInputProcessor(self) def output(self): """ Audio output to phone (PCM 16kHz -> mu-law 8kHz conversion). Returns a TwilioOutputProcessor that converts PCM 16kHz audio from the pipeline (TTS output) to mu-law 8kHz for Twilio phone playback. """ return TwilioOutputProcessor(self) class TwilioInputProcessor: """Converts mu-law 8kHz audio from Twilio to PCM 16kHz for the pipeline.""" def __init__(self, transport: TwilioPhoneTransport): self.transport = transport def process_audio(self, mulaw_bytes: bytes) -> bytes: """ Convert mu-law 8kHz audio to PCM 16kHz 16bit mono. Steps: 1. Decode mu-law to PCM 16bit (8kHz) 2. Resample from 8kHz to 16kHz (2x upsampling) """ # mu-law to linear PCM 16bit at 8kHz pcm_8khz = audioop.ulaw2lin(mulaw_bytes, 2) # Resample from 8kHz to 16kHz pcm_16khz, _ = audioop.ratecv(pcm_8khz, 2, 1, 8000, 16000, None) return pcm_16khz class TwilioOutputProcessor: """Converts PCM 16kHz audio from the pipeline to mu-law 8kHz for Twilio.""" def __init__(self, transport: TwilioPhoneTransport): self.transport = transport def process_audio(self, pcm_bytes: bytes) -> bytes: """ Convert PCM 16kHz 16bit mono to mu-law 8kHz. Steps: 1. Resample from 16kHz to 8kHz (2x downsampling) 2. Encode linear PCM to mu-law """ # Resample from 16kHz to 8kHz pcm_8khz, _ = audioop.ratecv(pcm_bytes, 2, 1, 16000, 8000, None) # Linear PCM to mu-law mulaw_bytes = audioop.lin2ulaw(pcm_8khz, 2) return mulaw_bytes async def send_audio(self, pcm_bytes: bytes): """Convert PCM audio to mu-law and send via Twilio Media Stream.""" mulaw_bytes = self.process_audio(pcm_bytes) # Twilio expects base64-encoded mu-law audio in a JSON media message payload = json.dumps({ "event": "media", "streamSid": self.transport._stream_sid, "media": { "payload": base64.b64encode(mulaw_bytes).decode("ascii"), }, }) await self.transport.websocket.send_text(payload)