zhiyin/backend/src/modules/tts/tts.controller.ts

import { Controller, Get, Post, Body, Param, Res, HttpException, HttpStatus, UseGuards, UploadedFile, UseInterceptors } from '@nestjs/common'
import { FileInterceptor } from '@nestjs/platform-express'
import { Response } from 'express'
import * as fs from 'fs'
import * as path from 'path'
import { execSync } from 'child_process'
import { TtsService } from './tts.service'
import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard'
import { Public } from '../../common/decorators/public.decorator'

@Controller('tts')
export class TtsController {
  constructor(private ttsService: TtsService) {}

  @UseGuards(JwtAuthGuard)
  @Post('synthesize')
  async synthesize(@Body('text') text: string, @Body('voice') voice?: string) {
    if (!text || text.length > 500) {
      throw new HttpException('文本不能为空且不超过500字', HttpStatus.BAD_REQUEST)
    }
    const result = await this.ttsService.synthesize(text, voice)
    return { hash: result.hash, durationMs: result.durationMs, amplitudeData: result.amplitudeData }
  }

  @Public()
  @Get('audio/:hash')
  async getAudio(@Param('hash') hash: string, @Res() res: Response) {
    const filePath = this.ttsService.getCachedPath(hash)
    if (!filePath) {
      throw new HttpException('音频不存在', HttpStatus.NOT_FOUND)
    }
    const stream = fs.createReadStream(filePath)
    res.setHeader('Content-Type', 'audio/mpeg')
    res.setHeader('Cache-Control', 'public, max-age=31536000')
    stream.pipe(res)
  }

  @UseGuards(JwtAuthGuard)
  @Post('asr')
  @UseInterceptors(FileInterceptor('audio', { dest: '/tmp/asr_uploads' }))
  async recognize(@UploadedFile() file: any) {
    if (!file) throw new HttpException('请上传音频文件', HttpStatus.BAD_REQUEST)
    const uploadDir = '/tmp/asr_uploads'
    if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true })
    const ext = path.extname(file.originalname) || '.mp3'
    const dest = path.join(uploadDir, file.filename + ext)
    fs.renameSync(file.path, dest)
    try {
      if (process.env.OPENAI_API_KEY) {
        const result = execSync(
          `curl -s -X POST https://api.openai.com/v1/audio/transcriptions \
            -H "Authorization: Bearer ${process.env.OPENAI_API_KEY}" \
            -H "Content-Type: multipart/form-data" \
            -F "file=@${dest}" \
            -F "model=whisper-1" \
            -F "language=zh"`,
          { encoding: 'utf8', timeout: 30000 },
        )
        const parsed = JSON.parse(result)
        if (parsed.text) return { text: parsed.text.trim() }
      }
      const whisperResult = execSync(`python3 -c 'import sys, whisper; model = whisper.load_model("tiny"); print(model.transcribe(sys.argv[1], language="zh")["text"].strip())' "${dest}"`, { encoding: 'utf8', timeout: 60000 })
      if (whisperResult && whisperResult.trim()) {
        return { text: whisperResult.trim() }
      }
    } catch {}
    return { text: '' }
  }
}