feat: TTS服务 + 数字人面试组件 (P1)

This commit is contained in:
yuzhiran
2026-06-12 09:42:06 +08:00
parent 065fe7a186
commit a55cb56be2
11 changed files with 553 additions and 32 deletions
@@ -28,7 +28,9 @@ export class InterviewController {
@Param('id') id: string,
@CurrentUser('userId') userId: string,
@Body('answer') answer: string,
@Body('avatar') avatar?: boolean,
) {
if (avatar) return this.interviewService.answerWithAvatar(id, userId, answer)
return this.interviewService.answer(id, userId, answer)
}
@@ -5,6 +5,7 @@ import { InterviewService } from './interview.service'
import { Interview, InterviewSchema } from './interview.schema'
import { Progress, ProgressSchema } from '../schemas/progress.schema'
import { UserModule } from '../user/user.module'
import { TtsModule } from '../tts/tts.module'
@Module({
imports: [
@@ -13,6 +14,7 @@ import { UserModule } from '../user/user.module'
{ name: Progress.name, schema: ProgressSchema },
]),
UserModule,
TtsModule,
],
controllers: [InterviewController],
providers: [InterviewService],
@@ -6,6 +6,7 @@ import { Progress, ProgressDocument } from '../schemas/progress.schema'
import { AiService } from '../ai/ai.service'
import { UserService } from '../user/user.service'
import { QuotaService } from '../user/quota.service'
import { TtsService } from '../tts/tts.service'
import { analyzeSpeech } from '../../common/utils/filler-words'
@Injectable()
@@ -16,6 +17,7 @@ export class InterviewService {
private aiService: AiService,
private userService: UserService,
private quotaService: QuotaService,
private ttsService: TtsService,
) {}
async create(userId: string, position: string) {
@@ -99,6 +101,20 @@ ${conversationHistory}
}
}
async answerWithAvatar(interviewId: string, userId: string, answer: string) {
const base = await this.answer(interviewId, userId, answer)
const aiMsg = base.messages?.find(m => m.role === 'ai')
if (aiMsg?.content) {
try {
const tts = await this.ttsService.synthesize(aiMsg.content)
return { ...base, ttsHash: tts.hash, ttsDurationMs: tts.durationMs }
} catch {
// TTS failure is non-critical, return without audio
}
}
return base
}
async complete(interviewId: string, userId: string) {
const interview = await this.interviewModel.findOne({ _id: interviewId, userId }).exec()
if (!interview) throw new HttpException('面试不存在', HttpStatus.NOT_FOUND)
+33
View File
@@ -0,0 +1,33 @@
import { Controller, Get, Post, Body, Param, Res, HttpException, HttpStatus } from '@nestjs/common'
import { Response } from 'express'
import * as fs from 'fs'
import { TtsService } from './tts.service'
import { Public } from '../../common/decorators/public.decorator'
@Controller('tts')
export class TtsController {
constructor(private ttsService: TtsService) {}
@Public()
@Post('synthesize')
async synthesize(@Body('text') text: string, @Body('voice') voice?: string) {
if (!text || text.length > 500) {
throw new HttpException('文本不能为空且不超过500字', HttpStatus.BAD_REQUEST)
}
const result = await this.ttsService.synthesize(text, voice)
return { hash: result.hash, durationMs: result.durationMs }
}
@Public()
@Get('audio/:hash')
async getAudio(@Param('hash') hash: string, @Res() res: Response) {
const filePath = this.ttsService.getCachedPath(hash)
if (!filePath) {
throw new HttpException('音频不存在', HttpStatus.NOT_FOUND)
}
const stream = fs.createReadStream(filePath)
res.setHeader('Content-Type', 'audio/mpeg')
res.setHeader('Cache-Control', 'public, max-age=31536000')
stream.pipe(res)
}
}
+10
View File
@@ -0,0 +1,10 @@
import { Module } from '@nestjs/common'
import { TtsController } from './tts.controller'
import { TtsService } from './tts.service'
@Module({
controllers: [TtsController],
providers: [TtsService],
exports: [TtsService],
})
export class TtsModule {}
+67
View File
@@ -0,0 +1,67 @@
import * as crypto from 'crypto'
import * as fs from 'fs'
import * as path from 'path'
import { execSync } from 'child_process'
import { Injectable, Logger } from '@nestjs/common'
const CACHE_DIR = '/tmp/tts-cache'
interface TtsResult {
hash: string
filePath: string
durationMs: number
}
@Injectable()
export class TtsService {
private readonly logger = new Logger(TtsService.name)
constructor() {
if (!fs.existsSync(CACHE_DIR)) {
fs.mkdirSync(CACHE_DIR, { recursive: true })
}
}
async synthesize(text: string, voice: string = 'zh-CN-XiaoxiaoNeural'): Promise<TtsResult> {
const hash = crypto.createHash('md5').update(text + voice).digest('hex')
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
if (fs.existsSync(filePath)) {
const durationMs = await this.getDuration(filePath)
return { hash, filePath, durationMs }
}
try {
execSync(
`edge-tts --voice "${voice}" --text "${this.escapeText(text)}" --write-media "${filePath}"`,
{ timeout: 30000 },
)
const durationMs = await this.getDuration(filePath)
this.logger.log(`TTS generated: hash=${hash} text="${text.slice(0, 40)}..." duration=${durationMs}ms`)
return { hash, filePath, durationMs }
} catch (e) {
this.logger.error(`TTS failed: ${e.message}`)
throw e
}
}
getCachedPath(hash: string): string | null {
const filePath = path.join(CACHE_DIR, `${hash}.mp3`)
return fs.existsSync(filePath) ? filePath : null
}
private escapeText(text: string): string {
return text.replace(/"/g, '\\"').replace(/\n/g, ' ').replace(/\r/g, '')
}
private async getDuration(filePath: string): Promise<number> {
try {
// Estimate duration from file size (~16kbps for mp3 at 22050Hz)
const stat = fs.statSync(filePath)
const bytesPerMs = 16 * 1024 / 8 / 1000 // 16kbps → bytes per ms
return Math.round(stat.size / bytesPerMs)
} catch {
return 3000
}
}
}