135 lines
4.0 KiB
Python
135 lines
4.0 KiB
Python
"""
|
|
Twilio Media Streams audio transport.
|
|
|
|
Used when Pipecat initiates outbound calls via Twilio.
|
|
Audio codec: mu-law 8kHz (phone standard) <-> PCM 16kHz (Whisper input)
|
|
"""
|
|
|
|
import audioop
|
|
import base64
|
|
import json
|
|
|
|
from twilio.rest import Client as TwilioClient
|
|
|
|
from ..config.settings import settings
|
|
|
|
|
|
class TwilioPhoneTransport:
|
|
"""Twilio Media Streams transport for phone calls."""
|
|
|
|
def __init__(self, websocket, voice_session_id: str):
|
|
self.websocket = websocket
|
|
self.voice_session_id = voice_session_id
|
|
self._stream_sid = None
|
|
|
|
async def initiate_call(self, phone_number: str):
|
|
"""
|
|
Initiate outbound call via Twilio REST API.
|
|
|
|
Creates a call that connects to the Media Streams WebSocket endpoint,
|
|
allowing bidirectional audio streaming.
|
|
"""
|
|
client = TwilioClient(
|
|
settings.twilio_account_sid,
|
|
settings.twilio_auth_token,
|
|
)
|
|
|
|
# TwiML instructs Twilio to open a Media Stream back to our server
|
|
twiml = (
|
|
'<?xml version="1.0" encoding="UTF-8"?>'
|
|
"<Response>"
|
|
"<Connect>"
|
|
f'<Stream url="wss://{settings.host}:{settings.port}'
|
|
f'/api/v1/twilio/media-stream" />'
|
|
"</Connect>"
|
|
"</Response>"
|
|
)
|
|
|
|
call = client.calls.create(
|
|
to=phone_number,
|
|
from_=settings.twilio_phone_number,
|
|
twiml=twiml,
|
|
)
|
|
|
|
return call.sid
|
|
|
|
def input(self):
|
|
"""
|
|
Audio input from phone (mu-law 8kHz -> PCM 16kHz conversion).
|
|
|
|
Returns a TwilioInputProcessor that decodes mu-law encoded audio
|
|
from the Twilio Media Stream and converts it to PCM 16kHz 16bit mono
|
|
for the pipeline (Whisper STT expects 16kHz PCM).
|
|
"""
|
|
return TwilioInputProcessor(self)
|
|
|
|
def output(self):
|
|
"""
|
|
Audio output to phone (PCM 16kHz -> mu-law 8kHz conversion).
|
|
|
|
Returns a TwilioOutputProcessor that converts PCM 16kHz audio from
|
|
the pipeline (TTS output) to mu-law 8kHz for Twilio phone playback.
|
|
"""
|
|
return TwilioOutputProcessor(self)
|
|
|
|
|
|
class TwilioInputProcessor:
|
|
"""Converts mu-law 8kHz audio from Twilio to PCM 16kHz for the pipeline."""
|
|
|
|
def __init__(self, transport: TwilioPhoneTransport):
|
|
self.transport = transport
|
|
|
|
def process_audio(self, mulaw_bytes: bytes) -> bytes:
|
|
"""
|
|
Convert mu-law 8kHz audio to PCM 16kHz 16bit mono.
|
|
|
|
Steps:
|
|
1. Decode mu-law to PCM 16bit (8kHz)
|
|
2. Resample from 8kHz to 16kHz (2x upsampling)
|
|
"""
|
|
# mu-law to linear PCM 16bit at 8kHz
|
|
pcm_8khz = audioop.ulaw2lin(mulaw_bytes, 2)
|
|
|
|
# Resample from 8kHz to 16kHz
|
|
pcm_16khz, _ = audioop.ratecv(pcm_8khz, 2, 1, 8000, 16000, None)
|
|
|
|
return pcm_16khz
|
|
|
|
|
|
class TwilioOutputProcessor:
|
|
"""Converts PCM 16kHz audio from the pipeline to mu-law 8kHz for Twilio."""
|
|
|
|
def __init__(self, transport: TwilioPhoneTransport):
|
|
self.transport = transport
|
|
|
|
def process_audio(self, pcm_bytes: bytes) -> bytes:
|
|
"""
|
|
Convert PCM 16kHz 16bit mono to mu-law 8kHz.
|
|
|
|
Steps:
|
|
1. Resample from 16kHz to 8kHz (2x downsampling)
|
|
2. Encode linear PCM to mu-law
|
|
"""
|
|
# Resample from 16kHz to 8kHz
|
|
pcm_8khz, _ = audioop.ratecv(pcm_bytes, 2, 1, 16000, 8000, None)
|
|
|
|
# Linear PCM to mu-law
|
|
mulaw_bytes = audioop.lin2ulaw(pcm_8khz, 2)
|
|
|
|
return mulaw_bytes
|
|
|
|
async def send_audio(self, pcm_bytes: bytes):
|
|
"""Convert PCM audio to mu-law and send via Twilio Media Stream."""
|
|
mulaw_bytes = self.process_audio(pcm_bytes)
|
|
|
|
# Twilio expects base64-encoded mu-law audio in a JSON media message
|
|
payload = json.dumps({
|
|
"event": "media",
|
|
"streamSid": self.transport._stream_sid,
|
|
"media": {
|
|
"payload": base64.b64encode(mulaw_bytes).decode("ascii"),
|
|
},
|
|
})
|
|
|
|
await self.transport.websocket.send_text(payload)
|