Я работаю над приложением, использующим Azure, Gemini, Python и Dart, и хочу обеспечить правильное произношение между языками. Например, я хочу перевести между немецким и испанским языками: цель состоит в том, чтобы слова «привет» -> «хола» произносились правильно на обоих языках. Azure отлично справляется с предложениями, но с дословным переводом у него проблемы.
Вот мой код. Не могли бы вы помочь мне решить проблему?
class TranslationService:
def __init__(self):
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY not found in environment variables")
genai.configure(api_key=api_key)
self.generation_config = {
"temperature": 1,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
self.model = GenerativeModel(
model_name="gemini-2.0-flash-exp",
generation_config=self.generation_config
)
self.tts_service = EnhancedTTSService()
# Initialize chat session with translation instructions
self.chat_session = self.model.start_chat(
history=[
{
"role": "user",
"parts": [
"""
Translate the following text providing word-by-word translations with arrows:
Text: "{input_text}"
Required format for each language:
1. GERMAN TRANSLATION:
a) Complete sentence:
"Full sentence in German"
b) Word-by-word:
Ich -> Yo
suche -> busco
einen -> un
Job -> trabajo
damit -> para que
ich -> yo
finanziell -> económicamente
unabhängig -> independiente
sein -> ser
kann -> pueda
c) Formal complete sentence:
"Full formal sentence in German"
d) Formal word-by-word:
Ich -> Yo
suche -> busco
nach -> por
einem -> un
Job -> trabajo
um -> para
finanziell -> económicamente
unabhängig -> independiente
zu -> ser
sein -> ser
2. ENGLISH TRANSLATION:
[Same format as German]
3. FRENCH TRANSLATION:
[Same format as German]
4. ITALIAN TRANSLATION:
[Same format as German]
IMPORTANT FORMATTING RULES:
- Use -> for word translations (NOT = or parentheses)
- Put each word-translation pair on a new line
- Keep one space before and after the arrow
- Include articles, prepositions, and auxiliary verbs as separate entries
- Maintain consistent spacing throughout
"""
]
}
]
)
def _restore_accents(self, text: str) -> str:
"""Restore proper accents and special characters."""
accent_map = {
"a": "á", "e": "é", "i": "í", "o": "ó", "u": "ú", "n": "ñ",
"A": "Á", "E": "É", "I": "Í", "O": "Ó", "U": "Ú", "N": "Ñ",
}
patterns = {
r"([aeiou])´": lambda m: accent_map[m.group(1)],
r"([AEIOU])´": lambda m: accent_map[m.group(1)],
r"n~": "ñ",
r"N~": "Ñ",
}
for pattern, replacement in patterns.items():
if callable(replacement):
text = re.sub(pattern, replacement, text)
else:
text = re.sub(pattern, replacement, text)
return text
async def process_prompt(self, text: str, source_lang: str, target_lang: str) -> Translation:
try:
response = self.chat_session.send_message(text)
generated_text = response.text
print(f"Generated text from Gemini: {generated_text[:100]}...")
audio_filename = await self.tts_service.text_to_speech(
text=generated_text
)
if audio_filename:
print(f"Successfully generated audio: {audio_filename}")
else:
print("Audio generation failed")
return Translation(
original_text=text,
translated_text=generated_text,
source_language=source_lang,
target_language=target_lang,
audio_path=audio_filename,
translations={"main": generated_text},
word_by_word=self._generate_word_by_word(text, generated_text),
grammar_explanations=self._generate_grammar_explanations(generated_text)
)
except Exception as e:
print(f"Error in process_prompt: {str(e)}")
raise Exception(f"Translation processing failed: {str(e)}")
def _generate_word_by_word(self, original: str, translated: str) -> dict[str, dict[str, str]]:
"""Generate word-by-word translation mapping."""
result = {}
original_words = original.split()
translated_words = translated.split()
for i, word in enumerate(original_words):
if i < len(translated_words):
result[word] = {
"translation": translated_words[i],
"pos": "unknown",
}
return result
def _auto_fix_spelling(self, text: str) -> str:
"""Fix spelling in the given text."""
words = re.findall(r"\b\w+\b|[^\w\s]", text)
corrected_words = []
for word in words:
if not re.match(r"\w+", word):
corrected_words.append(word)
continue
if self.spell.unknown([word]):
correction = self.spell.correction(word)
if correction:
if word.isupper():
correction = correction.upper()
elif word[0].isupper():
correction = correction.capitalize()
word = correction
corrected_words.append(word)
return " ".join(corrected_words)
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import os
from typing import Optional
from datetime import datetime
import asyncio
import re
class EnhancedTTSService:
def __init__(self):
# Initialize Speech Config
self.subscription_key = os.getenv("AZURE_SPEECH_KEY")
self.region = os.getenv("AZURE_SPEECH_REGION")
if not self.subscription_key or not self.region:
raise ValueError("Azure Speech credentials not found in environment variables")
# Create speech config
self.speech_config = SpeechConfig(
subscription=self.subscription_key,
region=self.region
)
self.speech_config.set_speech_synthesis_output_format(
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
)
# Voice mapping with specific styles and roles
self.voice_mapping = {
'en': 'en-US-JennyMultilingualNeural',
'es': 'es-ES-ArabellaMultilingualNeural',
'de': 'de-DE-SeraphinaMultilingualNeural'
}
def _get_temp_directory(self) -> str:
"""Create and return the temporary directory path"""
if os.name == 'nt': # Windows
temp_dir = os.path.join(os.environ.get('TEMP', ''), 'tts_audio')
else: # Unix/Linux
temp_dir = '/tmp/tts_audio'
os.makedirs(temp_dir, exist_ok=True)
return temp_dir
def _detect_language(self, text: str) -> str:
"""Detect the primary language of the text"""
# Simple language detection based on character patterns
if re.search(r'[äöüßÄÖÜ]', text):
return 'de'
elif re.search(r'[áéíóúñ¿¡]', text):
return 'es'
return 'en'
def _generate_ssml(self, text: str) -> str:
"""Generate valid SSML with proper escaping and language tags"""
# Clean the text
text = text.replace('&', '&').replace('', '>')
# Detect primary language
primary_lang = self._detect_language(text)
voice_name = self.voice_mapping.get(primary_lang, self.voice_mapping['en'])
ssml = f"""
{text}
"""
return ssml
async def text_to_speech(self, text: str, output_path: Optional[str] = None) -> Optional[str]:
"""Convert text to speech with robust error handling"""
synthesizer = None
try:
print(f"\nStarting TTS process for text: {text[:100]}...") # First 100 chars
# Generate output path if not provided
if not output_path:
temp_dir = self._get_temp_directory()
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_path = os.path.join(temp_dir, f"speech_{timestamp}.mp3")
# Configure audio output
audio_config = AudioOutputConfig(filename=output_path)
# Create synthesizer for this request
synthesizer = SpeechSynthesizer(
speech_config=self.speech_config,
audio_config=audio_config
)
# Generate and validate SSML
ssml = self._generate_ssml(text)
print(f"Generated SSML length: {len(ssml)} characters")
# Perform synthesis
print("Starting speech synthesis...")
result = await asyncio.get_event_loop().run_in_executor(
None,
lambda: synthesizer.speak_ssml_async(ssml).get()
)
# Handle result
if result.reason == ResultReason.SynthesizingAudioCompleted:
print("Speech synthesis completed successfully")
return os.path.basename(output_path)
elif result.reason == ResultReason.Canceled:
print(f"Speech synthesis canceled: {result.cancellation_details.reason}")
print(f"Error details: {result.cancellation_details.error_details}")
return None
return None
except Exception as e:
print(f"Exception in text_to_speech: {str(e)}")
return None
finally:
# Proper cleanup
if synthesizer:
try:
synthesizer.stop_speaking_async()
except:
pass
Это служба, которую я использую в Azure:
[img]https: //i.sstatic.net/XI2UDAIc.png[/img]
Я пробовал библиотеку «langid», но, похоже, в моем случае она не работает. Моя цель — услышать правильное произношение англо-испанских и немецко-испанских пар слов во время дословного перевода.
Я работаю над приложением, использующим Azure, Gemini, Python и Dart, и хочу обеспечить правильное произношение между языками. Например, я хочу перевести между немецким и испанским языками: цель состоит в том, чтобы слова «привет» -> «хола» произносились правильно на обоих языках. Azure отлично справляется с предложениями, но с дословным переводом у него проблемы. Вот мой код. Не могли бы вы помочь мне решить проблему? [list] [*]translation_service.py [/list] [code] class TranslationService: def __init__(self): load_dotenv() api_key = os.getenv("GEMINI_API_KEY") if not api_key: raise ValueError("GEMINI_API_KEY not found in environment variables")
# Initialize chat session with translation instructions self.chat_session = self.model.start_chat( history=[ { "role": "user", "parts": [ """ Translate the following text providing word-by-word translations with arrows:
Text: "{input_text}"
Required format for each language:
1. GERMAN TRANSLATION: a) Complete sentence: "Full sentence in German"
b) Word-by-word: Ich -> Yo suche -> busco einen -> un Job -> trabajo damit -> para que ich -> yo finanziell -> económicamente unabhängig -> independiente sein -> ser kann -> pueda
c) Formal complete sentence: "Full formal sentence in German"
d) Formal word-by-word: Ich -> Yo suche -> busco nach -> por einem -> un Job -> trabajo um -> para finanziell -> económicamente unabhängig -> independiente zu -> ser sein -> ser
2. ENGLISH TRANSLATION: [Same format as German]
3. FRENCH TRANSLATION: [Same format as German]
4. ITALIAN TRANSLATION: [Same format as German]
IMPORTANT FORMATTING RULES: - Use -> for word translations (NOT = or parentheses) - Put each word-translation pair on a new line - Keep one space before and after the arrow - Include articles, prepositions, and auxiliary verbs as separate entries - Maintain consistent spacing throughout """ ] } ] )
for pattern, replacement in patterns.items(): if callable(replacement): text = re.sub(pattern, replacement, text) else: text = re.sub(pattern, replacement, text)
for i, word in enumerate(original_words): if i < len(translated_words): result[word] = { "translation": translated_words[i], "pos": "unknown", } return result
def _auto_fix_spelling(self, text: str) -> str: """Fix spelling in the given text.""" words = re.findall(r"\b\w+\b|[^\w\s]", text) corrected_words = []
for word in words: if not re.match(r"\w+", word): corrected_words.append(word) continue
if self.spell.unknown([word]): correction = self.spell.correction(word) if correction: if word.isupper(): correction = correction.upper() elif word[0].isupper(): correction = correction.capitalize() word = correction
corrected_words.append(word)
return " ".join(corrected_words) [/code] [list] [*]tts_service.py [/list] [code] from azure.cognitiveservices.speech.audio import AudioOutputConfig import os from typing import Optional from datetime import datetime import asyncio import re
# Voice mapping with specific styles and roles self.voice_mapping = { 'en': 'en-US-JennyMultilingualNeural', 'es': 'es-ES-ArabellaMultilingualNeural', 'de': 'de-DE-SeraphinaMultilingualNeural' }
def _get_temp_directory(self) -> str: """Create and return the temporary directory path""" if os.name == 'nt': # Windows temp_dir = os.path.join(os.environ.get('TEMP', ''), 'tts_audio') else: # Unix/Linux temp_dir = '/tmp/tts_audio' os.makedirs(temp_dir, exist_ok=True) return temp_dir
def _detect_language(self, text: str) -> str: """Detect the primary language of the text""" # Simple language detection based on character patterns if re.search(r'[äöüßÄÖÜ]', text): return 'de' elif re.search(r'[áéíóúñ¿¡]', text): return 'es' return 'en'
def _generate_ssml(self, text: str) -> str: """Generate valid SSML with proper escaping and language tags""" # Clean the text text = text.replace('&', '&').replace('', '>')
async def text_to_speech(self, text: str, output_path: Optional[str] = None) -> Optional[str]: """Convert text to speech with robust error handling""" synthesizer = None try: print(f"\nStarting TTS process for text: {text[:100]}...") # First 100 chars
# Generate output path if not provided if not output_path: temp_dir = self._get_temp_directory() timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') output_path = os.path.join(temp_dir, f"speech_{timestamp}.mp3")
except Exception as e: print(f"Exception in text_to_speech: {str(e)}") return None
finally: # Proper cleanup if synthesizer: try: synthesizer.stop_speaking_async() except: pass
[/code] Это служба, которую я использую в Azure: [img]https: //i.sstatic.net/XI2UDAIc.png[/img]
Я пробовал библиотеку «langid», но, похоже, в моем случае она не работает. Моя цель — услышать правильное произношение англо-испанских и немецко-испанских пар слов во время дословного перевода.