feat: realistic face avatar + voice input + ASR endpoint
This commit is contained in:
@@ -1,14 +1,18 @@
|
||||
import { Controller, Get, Post, Body, Param, Res, HttpException, HttpStatus } from '@nestjs/common'
|
||||
import { Controller, Get, Post, Body, Param, Res, HttpException, HttpStatus, UseGuards, UploadedFile, UseInterceptors } from '@nestjs/common'
|
||||
import { FileInterceptor } from '@nestjs/platform-express'
|
||||
import { Response } from 'express'
|
||||
import * as fs from 'fs'
|
||||
import * as path from 'path'
|
||||
import { execSync } from 'child_process'
|
||||
import { TtsService } from './tts.service'
|
||||
import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard'
|
||||
import { Public } from '../../common/decorators/public.decorator'
|
||||
|
||||
@Controller('tts')
|
||||
export class TtsController {
|
||||
constructor(private ttsService: TtsService) {}
|
||||
|
||||
@Public()
|
||||
@UseGuards(JwtAuthGuard)
|
||||
@Post('synthesize')
|
||||
async synthesize(@Body('text') text: string, @Body('voice') voice?: string) {
|
||||
if (!text || text.length > 500) {
|
||||
@@ -30,4 +34,36 @@ export class TtsController {
|
||||
res.setHeader('Cache-Control', 'public, max-age=31536000')
|
||||
stream.pipe(res)
|
||||
}
|
||||
|
||||
@UseGuards(JwtAuthGuard)
|
||||
@Post('asr')
|
||||
@UseInterceptors(FileInterceptor('audio', { dest: '/tmp/asr_uploads' }))
|
||||
async recognize(@UploadedFile() file: any) {
|
||||
if (!file) throw new HttpException('请上传音频文件', HttpStatus.BAD_REQUEST)
|
||||
const uploadDir = '/tmp/asr_uploads'
|
||||
if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true })
|
||||
const ext = path.extname(file.originalname) || '.mp3'
|
||||
const dest = path.join(uploadDir, file.filename + ext)
|
||||
fs.renameSync(file.path, dest)
|
||||
try {
|
||||
if (process.env.OPENAI_API_KEY) {
|
||||
const result = execSync(
|
||||
`curl -s -X POST https://api.openai.com/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer ${process.env.OPENAI_API_KEY}" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "file=@${dest}" \
|
||||
-F "model=whisper-1" \
|
||||
-F "language=zh"`,
|
||||
{ encoding: 'utf8', timeout: 30000 },
|
||||
)
|
||||
const parsed = JSON.parse(result)
|
||||
if (parsed.text) return { text: parsed.text.trim() }
|
||||
}
|
||||
const whisperResult = execSync(`whisper "${dest}" --language zh --output_format txt 2>/dev/null`, { encoding: 'utf8', timeout: 60000 })
|
||||
if (whisperResult && whisperResult.trim()) {
|
||||
return { text: whisperResult.trim() }
|
||||
}
|
||||
} catch {}
|
||||
return { text: '' }
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user