import * as crypto from 'crypto' import * as fs from 'fs' import * as path from 'path' import { execSync } from 'child_process' import { Injectable, Logger } from '@nestjs/common' const CACHE_DIR = '/tmp/tts-cache' interface TtsResult { hash: string filePath: string durationMs: number amplitudeData: number[] } const VALID_VOICES = new Set([ 'zh-CN-XiaoxiaoNeural', 'zh-CN-XiaoyiNeural', 'zh-CN-YunjianNeural', 'zh-CN-YunxiNeural', 'zh-CN-YunxiaNeural', 'zh-CN-YunyangNeural', 'zh-CN-liaoning-XiaobeiNeural', 'zh-CN-shaanxi-XiaoniNeural', ]) function validateVoice(voice: string): void { if (!VALID_VOICES.has(voice)) { throw new Error(`不支持的语音: ${voice}`) } } @Injectable() export class TtsService { private readonly logger = new Logger(TtsService.name) constructor() { if (!fs.existsSync(CACHE_DIR)) { fs.mkdirSync(CACHE_DIR, { recursive: true }) } } async synthesize(text: string, voice: string = 'zh-CN-XiaoxiaoNeural'): Promise { validateVoice(voice) const hash = crypto.createHash('md5').update(text + voice).digest('hex') const filePath = path.join(CACHE_DIR, `${hash}.mp3`) if (fs.existsSync(filePath)) { const durationMs = await this.getDuration(filePath) const amplitudeData = this.loadAmplitudeData(hash) if (amplitudeData) { return { hash, filePath, durationMs, amplitudeData } } } try { execSync( `edge-tts --voice "${voice}" --text "${this.escapeText(text)}" --write-media "${filePath}"`, { timeout: 30000 }, ) const durationMs = await this.getDuration(filePath) const amplitudeData = this.extractAmplitude(filePath, hash) this.logger.log(`TTS generated: hash=${hash} text="${text.slice(0, 40)}..." duration=${durationMs}ms`) return { hash, filePath, durationMs, amplitudeData } } catch (e) { this.logger.error(`TTS failed: ${e.message}`) throw e } } getCachedPath(hash: string): string | null { const filePath = path.join(CACHE_DIR, `${hash}.mp3`) return fs.existsSync(filePath) ? filePath : null } private extractAmplitude(mp3Path: string, hash: string): number[] { try { const pcmPath = `/tmp/tts-cache/${hash}.pcm` execSync( `ffmpeg -y -i "${mp3Path}" -f s16le -acodec pcm_s16le -ar 16000 -ac 1 "${pcmPath}" 2>/dev/null`, { timeout: 10000 }, ) const pcmBuf = fs.readFileSync(pcmPath) const samples = new Int16Array(pcmBuf.buffer, pcmBuf.byteOffset, pcmBuf.byteLength / 2) const chunkSize = Math.floor(16000 * 0.05) // 50ms const amplitudes: number[] = [] for (let i = 0; i < samples.length; i += chunkSize) { const end = Math.min(i + chunkSize, samples.length) let sumSq = 0 for (let j = i; j < end; j++) { sumSq += samples[j] * samples[j] } const rms = Math.sqrt(sumSq / (end - i)) amplitudes.push(Number((Math.min(1, rms / 16000)).toFixed(4))) } try { fs.unlinkSync(pcmPath) } catch {} const ampPath = `/tmp/tts-cache/${hash}.amp` fs.writeFileSync(ampPath, JSON.stringify(amplitudes)) return amplitudes } catch (e) { this.logger.warn(`振幅提取失败: ${e.message}`) return [] } } private loadAmplitudeData(hash: string): number[] | null { try { const ampPath = `/tmp/tts-cache/${hash}.amp` if (!fs.existsSync(ampPath)) return null return JSON.parse(fs.readFileSync(ampPath, 'utf8')) } catch { return null } } private escapeText(text: string): string { return text.replace(/"/g, '\\"').replace(/\n/g, ' ').replace(/\r/g, '') } private async getDuration(filePath: string): Promise { try { // Estimate duration from file size (~16kbps for mp3 at 22050Hz) const stat = fs.statSync(filePath) const bytesPerMs = 16 * 1024 / 8 / 1000 // 16kbps → bytes per ms return Math.round(stat.size / bytesPerMs) } catch { return 3000 } } }