Files
zhiyin/backend/src/modules/tts/tts.service.ts
T

126 lines
4.0 KiB
TypeScript

import * as crypto from 'crypto'
import * as fs from 'fs'
import * as path from 'path'
import { execSync } from 'child_process'
import { Injectable, Logger } from '@nestjs/common'
const CACHE_DIR = '/tmp/tts-cache'
interface TtsResult {
hash: string
filePath: string
durationMs: number
amplitudeData: number[]
}
const VALID_VOICES = new Set([
'zh-CN-XiaoxiaoNeural', 'zh-CN-XiaoyiNeural', 'zh-CN-YunjianNeural',
'zh-CN-YunxiNeural', 'zh-CN-YunxiaNeural', 'zh-CN-YunyangNeural',
'zh-CN-liaoning-XiaobeiNeural', 'zh-CN-shaanxi-XiaoniNeural',
])
function validateVoice(voice: string): void {
if (!VALID_VOICES.has(voice)) {
throw new Error(`不支持的语音: ${voice}`)
}
}
@Injectable()
export class TtsService {
private readonly logger = new Logger(TtsService.name)
constructor() {
if (!fs.existsSync(CACHE_DIR)) {
fs.mkdirSync(CACHE_DIR, { recursive: true })
}
}
async synthesize(text: string, voice: string = 'zh-CN-XiaoxiaoNeural'): Promise<TtsResult> {
validateVoice(voice)
const hash = crypto.createHash('md5').update(text + voice).digest('hex')
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
if (fs.existsSync(filePath)) {
const durationMs = await this.getDuration(filePath)
const amplitudeData = this.loadAmplitudeData(hash)
if (amplitudeData) {
return { hash, filePath, durationMs, amplitudeData }
}
}
try {
execSync(
`edge-tts --voice "${voice}" --text "${this.escapeText(text)}" --write-media "${filePath}"`,
{ timeout: 30000 },
)
const durationMs = await this.getDuration(filePath)
const amplitudeData = this.extractAmplitude(filePath, hash)
this.logger.log(`TTS generated: hash=${hash} text="${text.slice(0, 40)}..." duration=${durationMs}ms`)
return { hash, filePath, durationMs, amplitudeData }
} catch (e) {
this.logger.error(`TTS failed: ${e.message}`)
throw e
}
}
getCachedPath(hash: string): string | null {
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
return fs.existsSync(filePath) ? filePath : null
}
private extractAmplitude(mp3Path: string, hash: string): number[] {
try {
const pcmPath = `/tmp/tts-cache/${hash}.pcm`
execSync(
`ffmpeg -y -i "${mp3Path}" -f s16le -acodec pcm_s16le -ar 16000 -ac 1 "${pcmPath}" 2>/dev/null`,
{ timeout: 10000 },
)
const pcmBuf = fs.readFileSync(pcmPath)
const samples = new Int16Array(pcmBuf.buffer, pcmBuf.byteOffset, pcmBuf.byteLength / 2)
const chunkSize = Math.floor(16000 * 0.05) // 50ms
const amplitudes: number[] = []
for (let i = 0; i < samples.length; i += chunkSize) {
const end = Math.min(i + chunkSize, samples.length)
let sumSq = 0
for (let j = i; j < end; j++) {
sumSq += samples[j] * samples[j]
}
const rms = Math.sqrt(sumSq / (end - i))
amplitudes.push(Number((Math.min(1, rms / 16000)).toFixed(4)))
}
try { fs.unlinkSync(pcmPath) } catch {}
const ampPath = `/tmp/tts-cache/${hash}.amp`
fs.writeFileSync(ampPath, JSON.stringify(amplitudes))
return amplitudes
} catch (e) {
this.logger.warn(`振幅提取失败: ${e.message}`)
return []
}
}
private loadAmplitudeData(hash: string): number[] | null {
try {
const ampPath = `/tmp/tts-cache/${hash}.amp`
if (!fs.existsSync(ampPath)) return null
return JSON.parse(fs.readFileSync(ampPath, 'utf8'))
} catch {
return null
}
}
private escapeText(text: string): string {
return text.replace(/"/g, '\\"').replace(/\n/g, ' ').replace(/\r/g, '')
}
private async getDuration(filePath: string): Promise<number> {
try {
// Estimate duration from file size (~16kbps for mp3 at 22050Hz)
const stat = fs.statSync(filePath)
const bytesPerMs = 16 * 1024 / 8 / 1000 // 16kbps → bytes per ms
return Math.round(stat.size / bytesPerMs)
} catch {
return 3000
}
}
}