import { Controller, Get, Post, Body, Param, Res, HttpException, HttpStatus, UseGuards, UploadedFile, UseInterceptors } from '@nestjs/common' import { FileInterceptor } from '@nestjs/platform-express' import { Response } from 'express' import * as fs from 'fs' import * as path from 'path' import { execSync } from 'child_process' import { TtsService } from './tts.service' import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard' import { Public } from '../../common/decorators/public.decorator' @Controller('tts') export class TtsController { constructor(private ttsService: TtsService) {} @UseGuards(JwtAuthGuard) @Post('synthesize') async synthesize(@Body('text') text: string, @Body('voice') voice?: string) { if (!text || text.length > 500) { throw new HttpException('文本不能为空且不超过500字', HttpStatus.BAD_REQUEST) } const result = await this.ttsService.synthesize(text, voice) return { hash: result.hash, durationMs: result.durationMs, amplitudeData: result.amplitudeData } } @Public() @Get('audio/:hash') async getAudio(@Param('hash') hash: string, @Res() res: Response) { const filePath = this.ttsService.getCachedPath(hash) if (!filePath) { throw new HttpException('音频不存在', HttpStatus.NOT_FOUND) } const stream = fs.createReadStream(filePath) res.setHeader('Content-Type', 'audio/mpeg') res.setHeader('Cache-Control', 'public, max-age=31536000') stream.pipe(res) } @UseGuards(JwtAuthGuard) @Post('asr') @UseInterceptors(FileInterceptor('audio', { dest: '/tmp/asr_uploads' })) async recognize(@UploadedFile() file: any) { if (!file) throw new HttpException('请上传音频文件', HttpStatus.BAD_REQUEST) const uploadDir = '/tmp/asr_uploads' if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true }) const ext = path.extname(file.originalname) || '.mp3' const dest = path.join(uploadDir, file.filename + ext) fs.renameSync(file.path, dest) try { if (process.env.OPENAI_API_KEY) { const result = execSync( `curl -s -X POST https://api.openai.com/v1/audio/transcriptions \ -H "Authorization: Bearer ${process.env.OPENAI_API_KEY}" \ -H "Content-Type: multipart/form-data" \ -F "file=@${dest}" \ -F "model=whisper-1" \ -F "language=zh"`, { encoding: 'utf8', timeout: 30000 }, ) const parsed = JSON.parse(result) if (parsed.text) return { text: parsed.text.trim() } } const whisperResult = execSync(`python3 -c 'import sys, whisper; model = whisper.load_model("tiny"); print(model.transcribe(sys.argv[1], language="zh")["text"].strip())' "${dest}"`, { encoding: 'utf8', timeout: 60000 }) if (whisperResult && whisperResult.trim()) { return { text: whisperResult.trim() } } } catch {} return { text: '' } } }