126 lines
4.0 KiB
TypeScript
126 lines
4.0 KiB
TypeScript
import * as crypto from 'crypto'
|
|
import * as fs from 'fs'
|
|
import * as path from 'path'
|
|
import { execSync } from 'child_process'
|
|
import { Injectable, Logger } from '@nestjs/common'
|
|
|
|
const CACHE_DIR = '/tmp/tts-cache'
|
|
|
|
interface TtsResult {
|
|
hash: string
|
|
filePath: string
|
|
durationMs: number
|
|
amplitudeData: number[]
|
|
}
|
|
|
|
const VALID_VOICES = new Set([
|
|
'zh-CN-XiaoxiaoNeural', 'zh-CN-XiaoyiNeural', 'zh-CN-YunjianNeural',
|
|
'zh-CN-YunxiNeural', 'zh-CN-YunxiaNeural', 'zh-CN-YunyangNeural',
|
|
'zh-CN-liaoning-XiaobeiNeural', 'zh-CN-shaanxi-XiaoniNeural',
|
|
])
|
|
|
|
function validateVoice(voice: string): void {
|
|
if (!VALID_VOICES.has(voice)) {
|
|
throw new Error(`不支持的语音: ${voice}`)
|
|
}
|
|
}
|
|
|
|
@Injectable()
|
|
export class TtsService {
|
|
private readonly logger = new Logger(TtsService.name)
|
|
|
|
constructor() {
|
|
if (!fs.existsSync(CACHE_DIR)) {
|
|
fs.mkdirSync(CACHE_DIR, { recursive: true })
|
|
}
|
|
}
|
|
|
|
async synthesize(text: string, voice: string = 'zh-CN-XiaoxiaoNeural'): Promise<TtsResult> {
|
|
validateVoice(voice)
|
|
const hash = crypto.createHash('md5').update(text + voice).digest('hex')
|
|
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
|
|
|
|
if (fs.existsSync(filePath)) {
|
|
const durationMs = await this.getDuration(filePath)
|
|
const amplitudeData = this.loadAmplitudeData(hash)
|
|
if (amplitudeData) {
|
|
return { hash, filePath, durationMs, amplitudeData }
|
|
}
|
|
}
|
|
|
|
try {
|
|
execSync(
|
|
`edge-tts --voice "${voice}" --text "${this.escapeText(text)}" --write-media "${filePath}"`,
|
|
{ timeout: 30000 },
|
|
)
|
|
const durationMs = await this.getDuration(filePath)
|
|
const amplitudeData = this.extractAmplitude(filePath, hash)
|
|
this.logger.log(`TTS generated: hash=${hash} text="${text.slice(0, 40)}..." duration=${durationMs}ms`)
|
|
return { hash, filePath, durationMs, amplitudeData }
|
|
} catch (e) {
|
|
this.logger.error(`TTS failed: ${e.message}`)
|
|
throw e
|
|
}
|
|
}
|
|
|
|
getCachedPath(hash: string): string | null {
|
|
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
|
|
return fs.existsSync(filePath) ? filePath : null
|
|
}
|
|
|
|
private extractAmplitude(mp3Path: string, hash: string): number[] {
|
|
try {
|
|
const pcmPath = `/tmp/tts-cache/${hash}.pcm`
|
|
execSync(
|
|
`ffmpeg -y -i "${mp3Path}" -f s16le -acodec pcm_s16le -ar 16000 -ac 1 "${pcmPath}" 2>/dev/null`,
|
|
{ timeout: 10000 },
|
|
)
|
|
const pcmBuf = fs.readFileSync(pcmPath)
|
|
const samples = new Int16Array(pcmBuf.buffer, pcmBuf.byteOffset, pcmBuf.byteLength / 2)
|
|
const chunkSize = Math.floor(16000 * 0.05) // 50ms
|
|
const amplitudes: number[] = []
|
|
for (let i = 0; i < samples.length; i += chunkSize) {
|
|
const end = Math.min(i + chunkSize, samples.length)
|
|
let sumSq = 0
|
|
for (let j = i; j < end; j++) {
|
|
sumSq += samples[j] * samples[j]
|
|
}
|
|
const rms = Math.sqrt(sumSq / (end - i))
|
|
amplitudes.push(Number((Math.min(1, rms / 16000)).toFixed(4)))
|
|
}
|
|
try { fs.unlinkSync(pcmPath) } catch {}
|
|
const ampPath = `/tmp/tts-cache/${hash}.amp`
|
|
fs.writeFileSync(ampPath, JSON.stringify(amplitudes))
|
|
return amplitudes
|
|
} catch (e) {
|
|
this.logger.warn(`振幅提取失败: ${e.message}`)
|
|
return []
|
|
}
|
|
}
|
|
|
|
private loadAmplitudeData(hash: string): number[] | null {
|
|
try {
|
|
const ampPath = `/tmp/tts-cache/${hash}.amp`
|
|
if (!fs.existsSync(ampPath)) return null
|
|
return JSON.parse(fs.readFileSync(ampPath, 'utf8'))
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
private escapeText(text: string): string {
|
|
return text.replace(/"/g, '\\"').replace(/\n/g, ' ').replace(/\r/g, '')
|
|
}
|
|
|
|
private async getDuration(filePath: string): Promise<number> {
|
|
try {
|
|
// Estimate duration from file size (~16kbps for mp3 at 22050Hz)
|
|
const stat = fs.statSync(filePath)
|
|
const bytesPerMs = 16 * 1024 / 8 / 1000 // 16kbps → bytes per ms
|
|
return Math.round(stat.size / bytesPerMs)
|
|
} catch {
|
|
return 3000
|
|
}
|
|
}
|
|
}
|