feat: TTS服务 + 数字人面试组件 (P1)

This commit is contained in:
yuzhiran
2026-06-12 09:42:06 +08:00
parent 065fe7a186
commit a55cb56be2
11 changed files with 553 additions and 32 deletions
+33
View File
@@ -0,0 +1,33 @@
import { Controller, Get, Post, Body, Param, Res, HttpException, HttpStatus } from '@nestjs/common'
import { Response } from 'express'
import * as fs from 'fs'
import { TtsService } from './tts.service'
import { Public } from '../../common/decorators/public.decorator'
@Controller('tts')
export class TtsController {
constructor(private ttsService: TtsService) {}
@Public()
@Post('synthesize')
async synthesize(@Body('text') text: string, @Body('voice') voice?: string) {
if (!text || text.length > 500) {
throw new HttpException('文本不能为空且不超过500字', HttpStatus.BAD_REQUEST)
}
const result = await this.ttsService.synthesize(text, voice)
return { hash: result.hash, durationMs: result.durationMs }
}
@Public()
@Get('audio/:hash')
async getAudio(@Param('hash') hash: string, @Res() res: Response) {
const filePath = this.ttsService.getCachedPath(hash)
if (!filePath) {
throw new HttpException('音频不存在', HttpStatus.NOT_FOUND)
}
const stream = fs.createReadStream(filePath)
res.setHeader('Content-Type', 'audio/mpeg')
res.setHeader('Cache-Control', 'public, max-age=31536000')
stream.pipe(res)
}
}
+10
View File
@@ -0,0 +1,10 @@
import { Module } from '@nestjs/common'
import { TtsController } from './tts.controller'
import { TtsService } from './tts.service'
@Module({
controllers: [TtsController],
providers: [TtsService],
exports: [TtsService],
})
export class TtsModule {}
+67
View File
@@ -0,0 +1,67 @@
import * as crypto from 'crypto'
import * as fs from 'fs'
import * as path from 'path'
import { execSync } from 'child_process'
import { Injectable, Logger } from '@nestjs/common'
const CACHE_DIR = '/tmp/tts-cache'
interface TtsResult {
hash: string
filePath: string
durationMs: number
}
@Injectable()
export class TtsService {
private readonly logger = new Logger(TtsService.name)
constructor() {
if (!fs.existsSync(CACHE_DIR)) {
fs.mkdirSync(CACHE_DIR, { recursive: true })
}
}
async synthesize(text: string, voice: string = 'zh-CN-XiaoxiaoNeural'): Promise<TtsResult> {
const hash = crypto.createHash('md5').update(text + voice).digest('hex')
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
if (fs.existsSync(filePath)) {
const durationMs = await this.getDuration(filePath)
return { hash, filePath, durationMs }
}
try {
execSync(
`edge-tts --voice "${voice}" --text "${this.escapeText(text)}" --write-media "${filePath}"`,
{ timeout: 30000 },
)
const durationMs = await this.getDuration(filePath)
this.logger.log(`TTS generated: hash=${hash} text="${text.slice(0, 40)}..." duration=${durationMs}ms`)
return { hash, filePath, durationMs }
} catch (e) {
this.logger.error(`TTS failed: ${e.message}`)
throw e
}
}
getCachedPath(hash: string): string | null {
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
return fs.existsSync(filePath) ? filePath : null
}
private escapeText(text: string): string {
return text.replace(/"/g, '\\"').replace(/\n/g, ' ').replace(/\r/g, '')
}
private async getDuration(filePath: string): Promise<number> {
try {
// Estimate duration from file size (~16kbps for mp3 at 22050Hz)
const stat = fs.statSync(filePath)
const bytesPerMs = 16 * 1024 / 8 / 1000 // 16kbps → bytes per ms
return Math.round(stat.size / bytesPerMs)
} catch {
return 3000
}
}
}