54 lines
1.5 KiB
Python
54 lines
1.5 KiB
Python
from typing import Optional
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import edge_tts
|
|
HAS_EDGE_TTS = True
|
|
except ImportError:
|
|
HAS_EDGE_TTS = False
|
|
logger.warning("edge-tts not installed, TTS disabled")
|
|
|
|
VOICE_MAP = {
|
|
"zh": "zh-CN-XiaoxiaoNeural",
|
|
"en": "en-US-AriaNeural",
|
|
"ja": "ja-JP-NanamiNeural",
|
|
"ko": "ko-KR-SunHiNeural",
|
|
"fr": "fr-FR-DeniseNeural",
|
|
"de": "de-DE-KatjaNeural",
|
|
"es": "es-ES-ElviraNeural",
|
|
"pt": "pt-BR-FranciscaNeural",
|
|
"ru": "ru-RU-SvetlanaNeural",
|
|
"ar": "ar-SA-ZariyahNeural",
|
|
}
|
|
|
|
SUPPORTED_LANGS = list(VOICE_MAP.keys())
|
|
|
|
|
|
class TextToSpeechService:
|
|
@staticmethod
|
|
async def synthesize(text: str, lang: str = "en", rate: str = "", pitch: str = "") -> Optional[bytes]:
|
|
if not HAS_EDGE_TTS:
|
|
logger.warning("edge-tts not available")
|
|
return None
|
|
|
|
voice = VOICE_MAP.get(lang, VOICE_MAP["en"])
|
|
|
|
try:
|
|
kwargs = {"voice": voice, "rate": rate} if rate else {"voice": voice}
|
|
if pitch:
|
|
kwargs["pitch"] = pitch
|
|
communicate = edge_tts.Communicate(text, **kwargs)
|
|
audio_data = b""
|
|
async for chunk in communicate.stream():
|
|
if chunk["type"] == "audio":
|
|
audio_data += chunk["data"]
|
|
return audio_data if audio_data else None
|
|
except Exception as e:
|
|
logger.error(f"TTS failed: {e}")
|
|
return None
|
|
|
|
|
|
tts_service = TextToSpeechService()
|