from typing import Optional import logging logger = logging.getLogger(__name__) try: import edge_tts HAS_EDGE_TTS = True except ImportError: HAS_EDGE_TTS = False logger.warning("edge-tts not installed, TTS disabled") VOICE_MAP = { "zh": "zh-CN-XiaoxiaoNeural", "en": "en-US-AriaNeural", "ja": "ja-JP-NanamiNeural", "ko": "ko-KR-SunHiNeural", "fr": "fr-FR-DeniseNeural", "de": "de-DE-KatjaNeural", "es": "es-ES-ElviraNeural", "pt": "pt-BR-FranciscaNeural", "ru": "ru-RU-SvetlanaNeural", "ar": "ar-SA-ZariyahNeural", } SUPPORTED_LANGS = list(VOICE_MAP.keys()) class TextToSpeechService: @staticmethod async def synthesize(text: str, lang: str = "en", rate: str = "", pitch: str = "") -> Optional[bytes]: if not HAS_EDGE_TTS: logger.warning("edge-tts not available") return None voice = VOICE_MAP.get(lang, VOICE_MAP["en"]) try: kwargs = {"voice": voice, "rate": rate} if rate else {"voice": voice} if pitch: kwargs["pitch"] = pitch communicate = edge_tts.Communicate(text, **kwargs) audio_data = b"" async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_data += chunk["data"] return audio_data if audio_data else None except Exception as e: logger.error(f"TTS failed: {e}") return None tts_service = TextToSpeechService()