Как отправлять аудиофайлы носителя на телефонные звонки Twilio Stream

Как отправлять аудиофайлы носителя на телефонные звонки Twilio Stream ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Как отправлять аудиофайлы носителя на телефонные звонки Twilio Stream

Цитата

Сообщение Anonymous » 03 июн 2025, 11:36

Я в основном строю простого помощника GPT с помощью телефонных звонков, используя Twilio, используя Google Cloud Speech Im транскрипции ввода пользователя. На данный момент у меня есть образец TTS_Output4 аудиофайл, который я хочу сыграть после завершения транскрипции. Я изучил различные источники, но не могу найти что -нибудь, чтобы воспроизвести звук.

Код: Выделить всё

import base64
import json
import threading
from io import BytesIO
import time
from pydub import AudioSegment
current_sentence = ""
last_timestamp = 0
sentence_timeout = 1.5

from flask import Flask, render_template
from flask_sockets import Sockets
from google.cloud.speech import RecognitionConfig, StreamingRecognitionConfig
from gtts import gTTS
import os
import openai  # GPT-4 integration

from SpeechClientBridge import SpeechClientBridge

# GPT-4 API setup
openai.api_key = os.getenviron("OpenAI_API_key")

HTTP_SERVER_PORT = 8080

config = RecognitionConfig(
encoding=RecognitionConfig.AudioEncoding.MULAW,
sample_rate_hertz=8000,
language_code="en-US",
)
streaming_config = StreamingRecognitionConfig(config=config, interim_results=True)

app = Flask(__name__)
sockets = Sockets(app)

@app.route("/twiml", methods=["POST"])
def return_twiml():
print("POST TwiML")
return render_template("streams.xml")

def on_transcription_response(response, ws):
global current_sentence, last_timestamp

if not response.results:
return

result = response.results[0]
if not result.alternatives:
return

transcription = result.alternatives[0].transcript
stability = result.stability
is_final = result.is_final

current_time = time.time()

if is_final or (current_time - last_timestamp > sentence_timeout and stability > 0.8):
# Append only the new part of the transcription
if len(transcription) > len(current_sentence):
current_sentence += transcription[len(current_sentence):]

print("Complete Sentence:", current_sentence)

gpt_response = get_gpt_response(current_sentence)
print("GPT Response:", gpt_response)
send_static_audio(ws)

# send_gpt_response_as_audio(gpt_response, ws)

current_sentence = ""
last_timestamp = current_time
else:

if len(transcription) >  len(current_sentence):
current_sentence = transcription
last_timestamp = current_time

def get_gpt_response(prompt):
"""Get response from GPT-4 using the v1/chat/completions endpoint."""
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
max_tokens=150
)
return response['choices'][0]['message']['content'].strip()

def send_gpt_response_as_audio(text, ws):
try:
# Convert the GPT response to speech (TTS)
tts = gTTS(text, lang='en')
audio_fp = BytesIO()
tts.write_to_fp(audio_fp)
audio_fp.seek(0)

# Convert the TTS audio to the required format for Twilio
audio_segment = AudioSegment.from_mp3(audio_fp)
audio_segment = audio_segment.set_frame_rate(8000).set_channels(1).set_sample_width(1)
audio_data = BytesIO()
audio_segment.export(audio_data, format="wav", codec="pcm_mulaw")
audio_data = audio_data.getvalue()

# Split and send the audio data in chunks
CHUNK_SIZE = 1024  # Adjust chunk size if necessary
for i in range(0, len(audio_data), CHUNK_SIZE):
chunk = audio_data[i:i + CHUNK_SIZE]
audio_b64 = base64.b64encode(chunk).decode('utf-8')
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
ws.send(message)
# sleep(0.1)  # Small delay to prevent overwhelming the WebSocket connection

print("Audio sent successfully")

except Exception as e:
print(f"Error sending audio: {e}")

def send_static_audio(ws):
try:
# Load a static audio file (in PCM mu-law format)
with open("tts_output4.wav", "rb") as f:
audio_data = f.read()

audio_b64 = base64.b64encode(audio_data).decode('utf-8')
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
ws.send(message)
# sleep(0.1)  # Small delay between chunks

print("Static audio sent successfully")
except Exception as e:
print(f"Error sending static audio: {e}")

def convert_text_to_speech(text, save_locally=True):
print(f"Converting text to speech: {text}")
try:
tts = gTTS(text, lang='en')
audio_fp = BytesIO()
tts.write_to_fp(audio_fp)
audio_fp.seek(0)
audio_data = audio_fp.read()
print(f"TTS conversion complete, audio data length: {len(audio_data)}")

if save_locally:
with open("tts_output.mp3", "wb") as f:
f.write(audio_data)
print("TTS output saved locally as tts_output.mp3")

return audio_data
except Exception as e:
print(f"Error in TTS conversion: {e}")
return None

def convert_audio_for_twilio(audio_data):
"""Convert audio to 8-bit mu-law audio at 8kHz."""
audio = AudioSegment.from_mp3(BytesIO(audio_data))
audio = audio.set_frame_rate(8000).set_channels(1)
buffer = BytesIO()
audio.export(buffer, format="wav", codec="pcm_mulaw")
return buffer.getvalue()

def send_audio_to_twilio(audio_data, ws):
"""Send the audio data to Twilio via WebSocket."""
if ws.closed:
print("WebSocket is closed.  Cannot send audio.")
return
audio_b64 = base64.b64encode(audio_data).decode('utf-8')
try:
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
print(f"Sending audio message of length: {len(message)}")
ws.send(message)
print("Audio sent successfully")
except Exception as e:
print(f"Error sending audio: {e}")

@sockets.route("/")
def transcript(ws):
print("WS connection opened")
bridge = SpeechClientBridge(streaming_config, lambda response: on_transcription_response(response, ws))
t = threading.Thread(target=bridge.start)
t.start()

while not ws.closed:
message = ws.receive()
if message is None:
bridge.add_request(None)
bridge.terminate()
break

data = json.loads(message)
if data["event"] in ("connected", "start"):
print(f"Media WS: Received event '{data['event']}': {message}")
continue
if data["event"] == "media":
media = data["media"]
chunk = base64.b64decode(media["payload"])
bridge.add_request(chunk)
if data["event"] == "stop":
print(f"Media WS: Received event 'stop': {message}")
print("Stopping...")
break

bridge.terminate()
print("WS connection closed")

if __name__ == "__main__":
from gevent import pywsgi
from geventwebsocket.handler import WebSocketHandler

server = pywsgi.WSGIServer(
("", HTTP_SERVER_PORT), app, handler_class=WebSocketHandler
)
print("Server listening on: http://localhost:" + str(HTTP_SERVER_PORT))
server.serve_forever()
< /code>
и просто для ссылки, это мой файл Streams.xml: < /p>

Любая справка кода ценится

Подробнее здесь: https://stackoverflow.com/questions/788 ... phone-call

1748939779

Anonymous

 Я в основном строю простого помощника GPT с помощью телефонных звонков, используя Twilio, используя Google Cloud Speech Im транскрипции ввода пользователя. На данный момент у меня есть образец TTS_Output4 аудиофайл, который я хочу сыграть после завершения транскрипции.  Я изучил различные источники, но не могу найти что -нибудь, чтобы воспроизвести звук.[code]
import base64
import json
import threading
from io import BytesIO
import time
from pydub import AudioSegment
current_sentence = ""
last_timestamp = 0
sentence_timeout = 1.5

from flask import Flask, render_template
from flask_sockets import Sockets
from google.cloud.speech import RecognitionConfig, StreamingRecognitionConfig
from gtts import gTTS
import os
import openai  # GPT-4 integration

from SpeechClientBridge import SpeechClientBridge

# GPT-4 API setup
openai.api_key = os.getenviron("OpenAI_API_key")

HTTP_SERVER_PORT = 8080

config = RecognitionConfig(
encoding=RecognitionConfig.AudioEncoding.MULAW,
sample_rate_hertz=8000,
language_code="en-US",
)
streaming_config = StreamingRecognitionConfig(config=config, interim_results=True)

app = Flask(__name__)
sockets = Sockets(app)

@app.route("/twiml", methods=["POST"])
def return_twiml():
print("POST TwiML")
return render_template("streams.xml")

def on_transcription_response(response, ws):
global current_sentence, last_timestamp

if not response.results:
return

result = response.results[0]
if not result.alternatives:
return

transcription = result.alternatives[0].transcript
stability = result.stability
is_final = result.is_final

current_time = time.time()

if is_final or (current_time - last_timestamp > sentence_timeout and stability > 0.8):
# Append only the new part of the transcription
if len(transcription) > len(current_sentence):
current_sentence += transcription[len(current_sentence):]

print("Complete Sentence:", current_sentence)

gpt_response = get_gpt_response(current_sentence)
print("GPT Response:", gpt_response)
send_static_audio(ws)

# send_gpt_response_as_audio(gpt_response, ws)

current_sentence = ""
last_timestamp = current_time
else:

if len(transcription) >  len(current_sentence):
current_sentence = transcription
last_timestamp = current_time

def get_gpt_response(prompt):
"""Get response from GPT-4 using the v1/chat/completions endpoint."""
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
max_tokens=150
)
return response['choices'][0]['message']['content'].strip()

def send_gpt_response_as_audio(text, ws):
try:
# Convert the GPT response to speech (TTS)
tts = gTTS(text, lang='en')
audio_fp = BytesIO()
tts.write_to_fp(audio_fp)
audio_fp.seek(0)

# Convert the TTS audio to the required format for Twilio
audio_segment = AudioSegment.from_mp3(audio_fp)
audio_segment = audio_segment.set_frame_rate(8000).set_channels(1).set_sample_width(1)
audio_data = BytesIO()
audio_segment.export(audio_data, format="wav", codec="pcm_mulaw")
audio_data = audio_data.getvalue()

# Split and send the audio data in chunks
CHUNK_SIZE = 1024  # Adjust chunk size if necessary
for i in range(0, len(audio_data), CHUNK_SIZE):
chunk = audio_data[i:i + CHUNK_SIZE]
audio_b64 = base64.b64encode(chunk).decode('utf-8')
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
ws.send(message)
# sleep(0.1)  # Small delay to prevent overwhelming the WebSocket connection

print("Audio sent successfully")

except Exception as e:
print(f"Error sending audio: {e}")

def send_static_audio(ws):
try:
# Load a static audio file (in PCM mu-law format)
with open("tts_output4.wav", "rb") as f:
audio_data = f.read()

audio_b64 = base64.b64encode(audio_data).decode('utf-8')
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
ws.send(message)
# sleep(0.1)  # Small delay between chunks

print("Static audio sent successfully")
except Exception as e:
print(f"Error sending static audio: {e}")

def convert_text_to_speech(text, save_locally=True):
print(f"Converting text to speech: {text}")
try:
tts = gTTS(text, lang='en')
audio_fp = BytesIO()
tts.write_to_fp(audio_fp)
audio_fp.seek(0)
audio_data = audio_fp.read()
print(f"TTS conversion complete, audio data length: {len(audio_data)}")

if save_locally:
with open("tts_output.mp3", "wb") as f:
f.write(audio_data)
print("TTS output saved locally as tts_output.mp3")

return audio_data
except Exception as e:
print(f"Error in TTS conversion: {e}")
return None

def convert_audio_for_twilio(audio_data):
"""Convert audio to 8-bit mu-law audio at 8kHz."""
audio = AudioSegment.from_mp3(BytesIO(audio_data))
audio = audio.set_frame_rate(8000).set_channels(1)
buffer = BytesIO()
audio.export(buffer, format="wav", codec="pcm_mulaw")
return buffer.getvalue()

def send_audio_to_twilio(audio_data, ws):
"""Send the audio data to Twilio via WebSocket."""
if ws.closed:
print("WebSocket is closed.  Cannot send audio.")
return
audio_b64 = base64.b64encode(audio_data).decode('utf-8')
try:
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
print(f"Sending audio message of length: {len(message)}")
ws.send(message)
print("Audio sent successfully")
except Exception as e:
print(f"Error sending audio: {e}")

@sockets.route("/")
def transcript(ws):
print("WS connection opened")
bridge = SpeechClientBridge(streaming_config, lambda response: on_transcription_response(response, ws))
t = threading.Thread(target=bridge.start)
t.start()

while not ws.closed:
message = ws.receive()
if message is None:
bridge.add_request(None)
bridge.terminate()
break

data = json.loads(message)
if data["event"] in ("connected", "start"):
print(f"Media WS: Received event '{data['event']}': {message}")
continue
if data["event"] == "media":
media = data["media"]
chunk = base64.b64decode(media["payload"])
bridge.add_request(chunk)
if data["event"] == "stop":
print(f"Media WS: Received event 'stop': {message}")
print("Stopping...")
break

bridge.terminate()
print("WS connection closed")

if __name__ == "__main__":
from gevent import pywsgi
from geventwebsocket.handler import WebSocketHandler

server = pywsgi.WSGIServer(
("", HTTP_SERVER_PORT), app, handler_class=WebSocketHandler
)
print("Server listening on: http://localhost:" + str(HTTP_SERVER_PORT))
server.serve_forever()
< /code>
и просто для ссылки, это мой файл Streams.xml: < /p>







[/code]
Любая справка кода ценится :) 
 

Подробнее здесь: [url]https://stackoverflow.com/questions/78884546/how-to-send-audio-files-media-to-twilio-stream-phone-call[/url]

Ответить Пред. тема След. тема

1 сообщение • Страница 1 из 1

Быстрый ответ

Заголовок:

Имя пользователя:

Изменение регистра текста:

Смайлики

Ещё смайлики…

К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми. Можно прикреплять файлы, перетаскивая их в окно сообщения.

Максимально разрешённый размер вложения: 15 МБ.

Имя файла:

Комментарий к файлу:

Имя файла	Комментарий к файлу	Размер	Статус

Похожие темы

Ответы

Просмотры

Последнее сообщение

Разрешение «Разрешить [имя приложения] совершать телефонные звонки и управлять ими» и READ_PHONE_STATE

Последнее сообщение Anonymous « 02 июн 2024, 09:03
Добавлено в форуме Android

Anonymous » 02 июн 2024, 09:03 » в форуме Android

У меня есть приложение Unity (v5.2.4), созданное для Android, которое запрашивает разрешение на «совершение телефонных звонков и управление ими» на Android (v6.0.1). Есть ли исследования, похоже, что это может быть связано с READ_PHONE_STATE....

0 Ответы

75 Просмотры

Последнее сообщение Anonymous
02 июн 2024, 09:03
Разрешение «Разрешить [имя приложения] совершать телефонные звонки и управлять ими» и READ_PHONE_STATE

Последнее сообщение Anonymous « 30 сен 2024, 15:05
Добавлено в форуме Android

Anonymous » 30 сен 2024, 15:05 » в форуме Android

У меня есть приложение Unity (v5.2.4), созданное для Android, которое запрашивает разрешение «совершать телефонные звонки и управлять ими» на Android (v6.0.1). Есть ли исследования, похоже, что это может быть связано с READ_PHONE_STATE.

Поэтому я...

0 Ответы

17 Просмотры

Последнее сообщение Anonymous
30 сен 2024, 15:05
«Разрешить [имя приложения] делать и управлять разрешением на телефонные звонки и read_phone_state

Последнее сообщение Anonymous « 28 янв 2025, 21:03
Добавлено в форуме Android

Anonymous » 28 янв 2025, 21:03 » в форуме Android

У меня есть приложение Unity (v5.2.4), созданное для Android, которое запрашивает разрешение «делать и управлять телефонными звонками» на Android (v6.0.1). Похоже, что некоторые исследования могут быть связаны с read_phone_state.

Таким образом, я...

0 Ответы

11 Просмотры

Последнее сообщение Anonymous
28 янв 2025, 21:03
Как обнаружить телефонные звонки, когда приложение убито без использования службы переднего плана в Android?

Последнее сообщение Anonymous « 16 фев 2025, 18:20
Добавлено в форуме Android

Anonymous » 16 фев 2025, 18:20 » в форуме Android

Задача задачи
Я разрабатываю приложение для Android, которое необходимо обнаружить входящие и исходящие телефонные звонки, даже когда приложение убито. Я внедрил Broadcastreceiver ( CallReceiver ) прослушать...

0 Ответы

16 Просмотры

Последнее сообщение Anonymous
16 фев 2025, 18:20
Как слушать телефонные звонки в приложении Flutter?

Последнее сообщение Anonymous « 31 мар 2025, 22:43
Добавлено в форуме Android

Anonymous » 31 мар 2025, 22:43 » в форуме Android

Я постараюсь спросить его настолько простым способом.// My App is open at any random time call received
(if call_Received) { // How do I do this bit via dart code?
sample()
}

void sample() { // Any random function
// Some code to perform
}...

0 Ответы

6 Просмотры

Последнее сообщение Anonymous
31 мар 2025, 22:43

Вернуться в «Python»