it0/packages/services/voice-service/src/api/twilio_webhook.py

125 lines
4.3 KiB
Python

import asyncio
import base64
import json
import uuid
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Request
from fastapi.responses import Response
from ..pipeline.twilio_transport import TwilioPhoneTransport
from ..pipeline.base_pipeline import create_voice_pipeline
router = APIRouter()
@router.post("/voice/incoming")
async def twilio_incoming_call(request: Request):
"""Handle incoming Twilio voice calls."""
# Return TwiML to connect to Media Streams
twiml = """<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Connect>
<Stream url="wss://{host}/api/v1/twilio/media-stream" />
</Connect>
</Response>"""
return Response(content=twiml, media_type="application/xml")
@router.websocket("/media-stream")
async def twilio_media_stream(websocket: WebSocket):
"""
Twilio Media Streams WebSocket endpoint.
Handles the bidirectional audio stream between Twilio and the Pipecat pipeline.
Twilio sends JSON messages with events: connected, start, media, stop.
Audio is mu-law 8kHz base64-encoded.
"""
await websocket.accept()
app = websocket.app
voice_session_id = f"tw_{uuid.uuid4().hex[:12]}"
transport = None
pipeline_task = None
try:
while True:
data = await websocket.receive_text()
message = json.loads(data)
event = message.get("event")
if event == "connected":
# Twilio has connected the Media Stream WebSocket
pass
elif event == "start":
# Stream is starting -- create transport and pipeline
stream_sid = message.get("streamSid", "")
# Create the Twilio transport
transport = TwilioPhoneTransport(websocket, voice_session_id)
transport._stream_sid = stream_sid
# Build session context for the pipeline
session_context = {
"session_id": voice_session_id,
"stream_sid": stream_sid,
"call_sid": message.get("start", {}).get("callSid", ""),
}
# Create the Pipecat voice pipeline
task = await create_voice_pipeline(
transport,
session_context,
stt=getattr(app.state, "stt", None),
tts=getattr(app.state, "tts", None),
vad=getattr(app.state, "vad", None),
)
# Run pipeline in background
pipeline_task = asyncio.create_task(task.run())
elif event == "media":
# Incoming audio from the phone call
if transport is not None:
# Decode base64 mu-law audio payload
media_payload = message.get("media", {}).get("payload", "")
mulaw_bytes = base64.b64decode(media_payload)
# Convert mu-law 8kHz to PCM 16kHz via the input processor
input_processor = transport.input()
pcm_audio = input_processor.process_audio(mulaw_bytes)
# Feed PCM audio into the pipeline (via transport websocket)
# The pipeline reads from the transport; here we make the
# decoded audio available for downstream processing.
# In a full Pipecat integration the transport handles this
# internally. For now, we send the converted audio back
# through the output processor to complete the loop.
output_processor = transport.output()
await output_processor.send_audio(pcm_audio)
elif event == "stop":
# Stream is ending -- clean up
break
except WebSocketDisconnect:
pass
except json.JSONDecodeError:
pass
except Exception:
pass
finally:
# Cancel pipeline task if still running
if pipeline_task is not None and not pipeline_task.done():
pipeline_task.cancel()
try:
await pipeline_task
except (asyncio.CancelledError, Exception):
pass
# Ensure websocket is closed
try:
await websocket.close()
except Exception:
pass