feat: interview review module with whisper.cpp ASR + AI analysis + frontend page

New backend module 'interview-review' provides:
- Audio upload (50MB limit, MP3/M4A/WAV/AAC/OGG/MP4/WebM)
- Text transcript submission
- whisper.cpp local ASR integration (tiny + base models)
- AI analysis (4-dimension scoring: logic/expression/professionalism/stability)
- Speech analysis (filler words detection, pace, duration)
- Async processing pipeline with status polling
- Graceful fallback to mock ASR when whisper unavailable

New frontend page 'pages/review/review.vue' with 3 modes:
- List mode: review history with status indicators
- Upload mode: audio file upload or text paste
- Report mode: score radar, dimension bars, analysis details

Docs updated: PROJECT-STATUS.md v4.4, FEATURE-LIST.md v4.2, ROADMAP.md v4.2
This commit is contained in:
wlt
2026-06-16 18:32:25 +08:00
parent 96c367e0f8
commit 4cd889c081
16 changed files with 1771 additions and 80 deletions
+2
View File
@@ -26,6 +26,7 @@ import { ScheduleModule } from './modules/schedule/schedule.module'
import { TtsModule } from './modules/tts/tts.module'
import { PricingModule } from './modules/schemas/pricing.module'
import { ShareModule } from './modules/share/share.module'
import { InterviewReviewModule } from './modules/interview-review/interview-review.module'
const MONGODB_URI = process.env.MONGODB_URI || 'mongodb://localhost:27017/zhiyin'
@@ -60,6 +61,7 @@ const MONGODB_URI = process.env.MONGODB_URI || 'mongodb://localhost:27017/zhiyin
TtsModule,
PricingModule,
ShareModule,
InterviewReviewModule,
],
providers: [
JwtStrategy,
@@ -0,0 +1,218 @@
import { Injectable, Logger } from '@nestjs/common'
import { execSync } from 'child_process'
import * as path from 'path'
import * as fs from 'fs'
export interface AsrSegment {
startTime: number
endTime: number
speaker: 'interviewer' | 'candidate'
text: string
}
export interface AsrResult {
fullText: string
segments: AsrSegment[]
duration: number
}
export interface AsrConfig {
/** Path to whisper.cpp build/bin/ directory */
whisperCppPath?: string
/** Model name: tiny | base | small | medium */
model?: string
/** Language code (zh, en, auto) */
language?: string
}
@Injectable()
export class AsrService {
private readonly logger = new Logger(AsrService.name)
private readonly whisperCppPath: string
private readonly modelPath: string
private readonly language: string
private readonly cliPath: string
constructor() {
// Configuration via env vars with sensible defaults
this.whisperCppPath = process.env.WHISPER_CPP_PATH || '/home/wlt/whisper.cpp'
this.language = process.env.WHISPER_LANGUAGE || 'zh'
const modelName = process.env.WHISPER_MODEL || 'base'
this.modelPath = path.join(this.whisperCppPath, 'models', `ggml-${modelName}.bin`)
this.cliPath = path.join(this.whisperCppPath, 'build', 'bin', 'whisper-cli')
// Validate whisper.cpp installation on startup
if (!fs.existsSync(this.cliPath)) {
this.logger.warn(`whisper-cli not found at ${this.cliPath}. ASR will fall back to mock.`)
}
if (!fs.existsSync(this.modelPath)) {
this.logger.warn(`Whisper model not found at ${this.modelPath}. ASR will fall back to mock.`)
}
}
async transcribe(audioPath: string, _mimeType: string): Promise<AsrResult> {
this.logger.log(`Transcribing audio: ${audioPath}`)
// Try companion .txt file first (for debugging/testing)
const txtPath = audioPath.replace(/\.(mp3|m4a|wav|aac|ogg|mp4|webm)$/i, '.txt')
try {
if (fs.existsSync(txtPath)) {
const text = fs.readFileSync(txtPath, 'utf-8')
this.logger.log(`Found companion transcript: ${txtPath}`)
return {
fullText: text,
segments: [{
startTime: 0,
endTime: Math.max(text.length / 3.5, 10),
speaker: 'candidate',
text,
}],
duration: Math.max(text.length / 3.5, 10),
}
}
} catch { /* ignore */ }
// Try whisper.cpp
if (fs.existsSync(this.cliPath) && fs.existsSync(this.modelPath)) {
try {
return await this.transcribeWithWhisper(audioPath)
} catch (err: any) {
this.logger.error(`whisper.cpp transcription failed: ${err.message}, falling back to mock`)
}
}
// Fallback to mock
this.logger.warn('Using MOCK ASR — whisper.cpp not available')
return this.mockTranscribe()
}
private async transcribeWithWhisper(audioPath: string): Promise<AsrResult> {
// Ensure audio file exists
if (!fs.existsSync(audioPath)) {
throw new Error(`Audio file not found: ${audioPath}`)
}
// Convert to WAV if needed (whisper.cpp works best with WAV)
const wavPath = await this.ensureWav(audioPath)
// Run whisper-cli with JSON output
const cmd = [
this.cliPath,
'-m', this.modelPath,
'-f', wavPath,
'-l', this.language,
'-oj', // JSON output
'-t', String(Math.max(1, this.getCpuThreads())), // thread count
'--no-prints', // suppress timing info on stderr
].join(' ')
this.logger.log(`Running: ${this.cliPath} -m ${this.modelPath} -f ${wavPath} -l ${this.language}`)
const stdout = execSync(cmd, { timeout: 600000, encoding: 'utf-8' }) // 10 min timeout
// Parse the JSON output
const segments = this.parseWhisperOutput(stdout)
const fullText = segments.map(s => s.text).join(' ')
const duration = segments.length > 0
? segments[segments.length - 1].endTime
: 0
return { fullText, segments, duration }
}
private parseWhisperOutput(stdout: string): AsrSegment[] {
try {
// whisper.cpp -oj outputs one JSON object per line
const lines = stdout.trim().split('\n')
const segments: AsrSegment[] = []
for (const line of lines) {
try {
const parsed = JSON.parse(line)
if (parsed.text && parsed.offsets) {
segments.push({
startTime: parsed.offsets.from / 1000, // ms to seconds
endTime: parsed.offsets.to / 1000,
speaker: 'candidate',
text: parsed.text.trim(),
})
} else if (parsed.text && parsed.start !== undefined) {
// Alternative format
segments.push({
startTime: parsed.start,
endTime: parsed.end || parsed.start + 2,
speaker: 'candidate',
text: parsed.text.trim(),
})
}
} catch { /* skip unparseable lines */ }
}
if (segments.length > 0) return segments
} catch { /* fall through */ }
// Fallback: treat entire output as raw text
this.logger.warn('Could not parse structured JSON output, using raw text')
return [{
startTime: 0,
endTime: 0,
speaker: 'candidate',
text: stdout.trim(),
}]
}
/**
* Convert audio to WAV format if needed.
* Uses ffmpeg if available, otherwise returns original path.
*/
private async ensureWav(audioPath: string): Promise<string> {
const ext = path.extname(audioPath).toLowerCase()
if (ext === '.wav') return audioPath
const wavPath = audioPath.replace(/\.[^.]+$/, '.wav')
try {
execSync(`ffmpeg -y -i "${audioPath}" -ar 16000 -ac 1 -c:a pcm_s16le "${wavPath}"`, {
timeout: 300000,
encoding: 'utf-8',
stdio: 'pipe',
})
this.logger.log(`Converted ${audioPath} to WAV: ${wavPath}`)
return wavPath
} catch (err: any) {
this.logger.warn(`ffmpeg conversion failed: ${err.message}. Trying original format.`)
return audioPath
}
}
private getCpuThreads(): number {
try {
return parseInt(process.env.WHISPER_THREADS || '', 10) ||
require('os').cpus().length || 4
} catch {
return 4
}
}
/** Mock transcription for development/testing when whisper.cpp is not available */
private mockTranscribe(): AsrResult {
const paragraphs = [
'我毕业于计算机科学与技术专业,大学期间主要学习了数据结构、算法、操作系统、计算机网络等核心课程。',
'在项目经验方面,我参与过一个电商平台的开发,主要负责后端接口的设计和实现,使用了 Node.js 和 MongoDB 技术栈。',
'这个项目的难点在于高并发场景下的性能优化,我通过引入 Redis 缓存和数据库索引优化,将接口响应时间从 2 秒降低到了 200 毫秒。',
'关于这个岗位,我了解到贵公司主要使用 React 技术栈,我之前在两个项目中使用过 React,对 Hooks、状态管理、组件化开发都比较熟悉。',
]
const fullText = paragraphs.join('\n')
return {
fullText,
segments: [{
startTime: 0,
endTime: 120,
speaker: 'candidate',
text: fullText,
}],
duration: 120,
}
}
}
@@ -0,0 +1,100 @@
import {
Controller, Post, Get, Delete, Param, Query,
UseInterceptors, UploadedFile, Body,
HttpException, HttpStatus,
} from '@nestjs/common'
import { FileInterceptor } from '@nestjs/platform-express'
import { diskStorage } from 'multer'
import { extname, join } from 'path'
import * as fs from 'fs'
import { randomUUID } from 'crypto'
import { InterviewReviewService } from './interview-review.service'
import { CurrentUser } from '../../common/decorators/current-user.decorator'
const UPLOAD_DIR = join(process.cwd(), 'uploads', 'reviews')
if (!fs.existsSync(UPLOAD_DIR)) {
fs.mkdirSync(UPLOAD_DIR, { recursive: true })
}
@Controller('interview-review')
export class InterviewReviewController {
constructor(private service: InterviewReviewService) {}
/** Upload audio file + metadata */
@Post()
@UseInterceptors(FileInterceptor('file', {
storage: diskStorage({
destination: (_req, _file, cb) => cb(null, UPLOAD_DIR),
filename: (_req, file, cb) => {
const name = randomUUID() + extname(file.originalname || '.mp3')
cb(null, name)
},
}),
limits: { fileSize: 50 * 1024 * 1024 },
fileFilter: (_req, file, cb) => {
const allowed = /\.(mp3|m4a|wav|aac|ogg|mp4|webm)$/i
if (allowed.test(extname(file.originalname))) {
cb(null, true)
} else {
cb(new HttpException('仅支持 mp3/m4a/wav/aac/ogg 格式', HttpStatus.BAD_REQUEST), false)
}
},
}))
async uploadFile(
@UploadedFile() file: any,
@Body('position') position: string,
@Body('company') company: string,
@CurrentUser('userId') userId: string,
) {
if (!file) {
throw new HttpException('请上传录音文件', HttpStatus.BAD_REQUEST)
}
if (!position || !position.trim()) {
throw new HttpException('请填写面试岗位', HttpStatus.BAD_REQUEST)
}
return this.service.create(userId, position.trim(), company?.trim(), file)
}
/** Submit text transcript directly (no audio) */
@Post('text')
async submitText(
@Body('position') position: string,
@Body('company') company: string,
@Body('text') text: string,
@CurrentUser('userId') userId: string,
) {
if (!position || !position.trim()) {
throw new HttpException('请填写面试岗位', HttpStatus.BAD_REQUEST)
}
if (!text || !text.trim()) {
throw new HttpException('请填写面试转录文本', HttpStatus.BAD_REQUEST)
}
return this.service.createFromText(userId, position.trim(), text.trim(), company?.trim())
}
@Get('list')
async list(
@Query('page') page: string,
@Query('limit') limit: string,
@CurrentUser('userId') userId: string,
) {
return this.service.listByUser(userId, parseInt(page) || 1, parseInt(limit) || 20)
}
@Get(':id')
async getDetail(
@Param('id') id: string,
@CurrentUser('userId') userId: string,
) {
return this.service.getDetail(id, userId)
}
@Delete(':id')
async delete(
@Param('id') id: string,
@CurrentUser('userId') userId: string,
) {
return this.service.delete(id, userId)
}
}
@@ -0,0 +1,19 @@
import { Module } from '@nestjs/common'
import { MongooseModule } from '@nestjs/mongoose'
import { InterviewReviewController } from './interview-review.controller'
import { InterviewReviewService } from './interview-review.service'
import { InterviewReview, InterviewReviewSchema } from './interview-review.schema'
import { AsrService } from './asr.service'
import { AiModule } from '../ai/ai.module'
@Module({
imports: [
MongooseModule.forFeature([
{ name: InterviewReview.name, schema: InterviewReviewSchema },
]),
AiModule,
],
controllers: [InterviewReviewController],
providers: [InterviewReviewService, AsrService],
})
export class InterviewReviewModule {}
@@ -0,0 +1,79 @@
import { Prop, Schema, SchemaFactory } from '@nestjs/mongoose'
import { Document, Types } from 'mongoose'
export type InterviewReviewDocument = InterviewReview & Document
@Schema({ timestamps: true })
export class InterviewReview {
@Prop({ type: Types.ObjectId, ref: 'User', required: true })
userId: Types.ObjectId
@Prop({ required: true })
position: string
@Prop({ default: '' })
company: string
@Prop({ default: 'processing' })
status: 'processing' | 'completed' | 'failed'
@Prop({ type: Object, default: null })
audioFile?: {
hash: string
filePath: string
duration: number
size: number
mimeType: string
}
@Prop({ type: Object, default: null })
transcript?: {
fullText: string
segments: {
startTime: number
endTime: number
speaker: 'interviewer' | 'candidate'
text: string
}[]
}
@Prop({ type: Object, default: null })
analysis?: {
overallScore: number
dimensions: {
logic: number
expression: number
professionalism: number
stability: number
}
strengths: string[]
weaknesses: string[]
suggestions: string[]
questionBreakdown: {
question: string
answer: string
score: number
comment: string
suggestedAnswer: string
}[]
}
@Prop({ type: Object, default: null })
speechAnalysis?: {
fillerWords: { word: string; count: number }[]
fillerScore: number
fillerDensity: number
pace: string
totalDuration: number
totalChars: number
}
@Prop({ default: 0 })
retryCount: number
readonly createdAt?: Date
readonly updatedAt?: Date
}
export const InterviewReviewSchema = SchemaFactory.createForClass(InterviewReview)
InterviewReviewSchema.index({ userId: 1, createdAt: -1 })
@@ -0,0 +1,302 @@
import { Injectable, Logger, HttpException, HttpStatus } from '@nestjs/common'
import { InjectModel } from '@nestjs/mongoose'
import { Model } from 'mongoose'
import { InterviewReview, InterviewReviewDocument } from './interview-review.schema'
import { AiService } from '../ai/ai.service'
import { AsrService } from './asr.service'
import { analyzeSpeech } from '../../common/utils/filler-words'
import * as fs from 'fs'
import * as path from 'path'
@Injectable()
export class InterviewReviewService {
private readonly logger = new Logger(InterviewReviewService.name)
constructor(
@InjectModel(InterviewReview.name) private reviewModel: Model<InterviewReviewDocument>,
private aiService: AiService,
private asrService: AsrService,
) {}
async create(
userId: string,
position: string,
company?: string,
audioFile?: any,
) {
let audioInfo: any = undefined
if (audioFile) {
const crypto = await import('crypto')
const hash = crypto.createHash('md5').update(audioFile.buffer).digest('hex')
audioInfo = {
hash,
filePath: audioFile.path,
duration: 0,
size: audioFile.size,
mimeType: audioFile.mimetype,
}
}
const review = new this.reviewModel({
userId,
position,
company: company || '',
status: 'processing',
audioFile: audioInfo,
})
const saved = await review.save()
// Start async processing (non-blocking)
this.processReview(saved._id.toString()).catch((err) => {
this.logger.error(`Review ${saved._id} processing failed: ${err.message}`)
})
return {
id: saved._id.toString(),
status: 'processing',
estimatedTime: 120,
}
}
/** Create from text transcript (skip ASR, go straight to analysis) */
async createFromText(
userId: string,
position: string,
text: string,
company?: string,
) {
const review = new this.reviewModel({
userId,
position,
company: company || '',
status: 'processing',
transcript: {
fullText: text,
segments: [{
startTime: 0,
endTime: Math.max(text.length / 3.5, 10),
speaker: 'candidate',
text,
}],
},
})
const saved = await review.save()
this.processReview(saved._id.toString()).catch((err) => {
this.logger.error(`Review ${saved._id} processing failed: ${err.message}`)
})
return {
id: saved._id.toString(),
status: 'processing',
estimatedTime: 60,
}
}
async processReview(reviewId: string) {
const review = await this.reviewModel.findById(reviewId)
if (!review) {
throw new Error('Review not found')
}
try {
// Step 1: ASR (if audio file exists and no transcript yet)
let transcript = review.transcript
if (!transcript && review.audioFile?.filePath) {
const asrResult = await this.asrService.transcribe(
review.audioFile.filePath,
review.audioFile.mimeType,
)
transcript = {
fullText: asrResult.fullText,
segments: asrResult.segments.map(s => ({
startTime: s.startTime,
endTime: s.endTime,
speaker: s.speaker as 'interviewer' | 'candidate',
text: s.text,
})),
}
await this.reviewModel.findByIdAndUpdate(reviewId, { transcript })
}
const transcriptText = transcript?.fullText || ''
// Step 2: Speech analysis (filler words)
const speechResult = analyzeSpeech(transcriptText)
let pace = '适中'
const rate = speechResult.speechRate
if (rate > 5) pace = '偏快'
else if (rate < 2.5) pace = '偏慢'
const speechAnalysis = {
fillerWords: speechResult.fillerWords,
fillerScore: speechResult.fillerScore,
fillerDensity: speechResult.fillerDensity,
pace,
totalDuration: speechResult.estimatedDurationSec,
totalChars: speechResult.totalChars,
}
// Step 3: AI analysis
const analysis = await this.runAiAnalysis(transcriptText, review.position, review.company)
// Save results
await this.reviewModel.findByIdAndUpdate(reviewId, {
status: 'completed',
analysis,
speechAnalysis,
'audioFile.duration': speechResult.estimatedDurationSec,
})
} catch (err: any) {
this.logger.error(`Processing failed for review ${reviewId}: ${err.message}`)
await this.reviewModel.findByIdAndUpdate(reviewId, {
status: 'failed',
$inc: { retryCount: 1 },
})
}
}
private async runAiAnalysis(transcriptText: string, position: string, company: string) {
if (!transcriptText.trim()) {
return this.emptyAnalysis()
}
const systemPrompt = `你是一位资深的校招面试评估专家。分析以下面试转录内容,输出评估报告。
评估维度(0-100分):
1. 逻辑思维(logic):回答是否结构化、层次分明、有因果关系
2. 表达能力(expression):语言是否流畅、用词是否准确、表达是否清晰
3. 专业度(professionalism):技术栈掌握程度、行业认知深度、术语使用是否准确
4. 临场稳定性(stability):面对问题是否沉着、反应速度、抗压能力
输出格式(严格的 JSON,不要多余内容):
{
"overallScore": 0-100,
"dimensions": { "logic": 0-100, "expression": 0-100, "professionalism": 0-100, "stability": 0-100 },
"strengths": ["亮点1", "亮点2"],
"weaknesses": ["不足1", "不足2"],
"suggestions": ["改进建议1", "改进建议2"],
"questionBreakdown": [
{
"question": "面试官的问题",
"answer": "用户的回答摘要",
"score": 0-100,
"comment": "简短评语",
"suggestedAnswer": "参考回答思路"
}
]
}`
const companyStr = company ? `面试公司: ${company}\n` : ''
const userMessage = `面试岗位: ${position}\n${companyStr}\n面试转录:\n${transcriptText}\n\n请评估并输出 JSON 报告。`
try {
const result = await this.aiService.call({
systemPrompt,
userMessage,
temperature: 0.5,
maxTokens: 2048,
})
const parsed = JSON.parse(result)
// Validate required fields
if (!parsed.overallScore || !parsed.dimensions) {
return this.emptyAnalysis()
}
return {
overallScore: Math.min(100, Math.max(0, Math.round(parsed.overallScore))),
dimensions: {
logic: Math.min(100, Math.max(0, Math.round(parsed.dimensions.logic || 0))),
expression: Math.min(100, Math.max(0, Math.round(parsed.dimensions.expression || 0))),
professionalism: Math.min(100, Math.max(0, Math.round(parsed.dimensions.professionalism || 0))),
stability: Math.min(100, Math.max(0, Math.round(parsed.dimensions.stability || 0))),
},
strengths: Array.isArray(parsed.strengths) ? parsed.strengths : [],
weaknesses: Array.isArray(parsed.weaknesses) ? parsed.weaknesses : [],
suggestions: Array.isArray(parsed.suggestions) ? parsed.suggestions : [],
questionBreakdown: Array.isArray(parsed.questionBreakdown) ? parsed.questionBreakdown.slice(0, 10) : [],
}
} catch {
return this.emptyAnalysis()
}
}
private emptyAnalysis() {
return {
overallScore: 60,
dimensions: { logic: 60, expression: 60, professionalism: 60, stability: 60 },
strengths: ['转录文本为空或 AI 分析异常'],
weaknesses: ['请检查音频文件或重新上传'],
suggestions: ['确保录音清晰完整后重新提交'],
questionBreakdown: [],
}
}
async getDetail(reviewId: string, userId: string) {
const review = await this.reviewModel.findById(reviewId).lean()
if (!review) {
throw new HttpException('复盘记录不存在', HttpStatus.NOT_FOUND)
}
if (review.userId.toString() !== userId) {
throw new HttpException('无权访问', HttpStatus.FORBIDDEN)
}
return this.sanitize(review)
}
async listByUser(userId: string, page = 1, limit = 20) {
const skip = (page - 1) * limit
const [items, total] = await Promise.all([
this.reviewModel
.find({ userId })
.sort({ createdAt: -1 })
.skip(skip)
.limit(limit)
.select('-transcript.fullText -transcript.segments')
.lean(),
this.reviewModel.countDocuments({ userId }),
])
return {
items: items.map(i => this.sanitize(i)),
total,
page,
limit,
totalPages: Math.ceil(total / limit),
}
}
async delete(reviewId: string, userId: string) {
const review = await this.reviewModel.findById(reviewId)
if (!review) {
throw new HttpException('复盘记录不存在', HttpStatus.NOT_FOUND)
}
if (review.userId.toString() !== userId) {
throw new HttpException('无权删除', HttpStatus.FORBIDDEN)
}
// Delete audio file if exists
if (review.audioFile?.filePath) {
try {
fs.unlinkSync(review.audioFile.filePath)
} catch {
// ignore
}
}
await this.reviewModel.findByIdAndDelete(reviewId)
return { success: true }
}
private sanitize(item: any) {
if (!item) return item
const obj = { ...item }
// Remove sensitive fields
if (obj.audioFile?.filePath) {
obj.audioFile = { ...obj.audioFile }
delete obj.audioFile.filePath
}
return obj
}
}