Однако, когда я использовал их API для получения звука, открыл его и передал в twilio, он воспроизводился на телефоне правильно. .
Ниже приведен мой код, в котором я пытаюсь сгенерировать звук из одиннадцати лабораторий, а затем отправить его в свой сокет twilio.
Код: Выделить всё
async def text_to_speech_stream(text: str):
voice_id = "pNInz6obpgDQGcFmaJgB"
model_id = "eleven_multilingual_v2"
# Construct the WebSocket URL
url = f"wss://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream-input?model_id={model_id}"
# Set up headers
headers = {
'xi-api-key': ""
}
# Payload to send
payload = {
"text": " ",
"voice_settings": {
"stability": 0.0,
"similarity_boost": 1.0,
"style": 0.0,
"use_speaker_boost": True
},
"model_id": model_id,
"voice_id": voice_id,
"xi-api-key": "",
"output_format": "ulaw_8000",
"flush":True
}
async with websockets.connect(url) as ws:
print("websocket connected: 11labs")
# Send the payload as JSON
await ws.send(json.dumps(payload))
await ws.send(json.dumps({"text":text}))
await ws.send(json.dumps({"text": ""}))
# Receive audio chunks
start_time = time.time()
print("waiting for revecivng")
print()
audio_buffer = bytearray()
while True:
try:
message = await ws.recv()
data = json.loads(message)
if data.get("audio"):
# Audio chunk received
print("yielded in: ",time.time()-start_time, flush=True)
yield data["audio"]#base64.b64decode(data["audio"])
elif data.get('isFinal'):
break
elif isinstance(message, str):
# Text message received (e.g., errors or status updates)
data = json.loads(message)
if data.get("warning"):
print(f"Warning: {data['warning']}")
elif data.get("error"):
print(f"Error: {data['error']}")
break
else:
print(f"Message from ElevenLabs: ")
else:
print("Unknown message type received")
except websockets.exceptions.ConnectionClosedOK:
# Connection closed gracefully
break
except Exception as e:
print(f"Error in ElevenLabs WebSocket: {e}")
break
async def send_to_twilio_eleven(websocket, message, stream_sid, interruption_event):
print("Starting TTS streaming from ElevenLabs")
try:
start_time = time.time()
print("message is: ", message)
audio_payload = ""
count = 0
# audio_payload = text_to_speech_stream(message) # for API
async for audio_chunk in text_to_speech_stream(message):
print("Chunk yielded in : ",time.time()-start_time)
# Trying to buffer. Facing same issue even when streamed directly without buffering
audio_payload += audio_chunk
# Construct the message to send to Twilio
audio_delta = {
"event": "media",
"streamSid": stream_sid,
"media": {
"payload": audio_payload
}
}
# ideal for sending chunks
if interruption_event.is_set():
print("Interruption detected")
return
# Send the message to Twilio
await websocket.send_json(audio_delta)
print("received audio sent to twilio")
# Yield control to the event loop
await asyncio.sleep(0)
except Exception as e:
print(f"Error in send_to_twilio_eleven: {e}")
Подробнее здесь: https://stackoverflow.com/questions/790 ... ream-audio