diff --git a/backend/src/modules/ai/ai.service.ts b/backend/src/modules/ai/ai.service.ts index 316b3d3..d3558eb 100644 --- a/backend/src/modules/ai/ai.service.ts +++ b/backend/src/modules/ai/ai.service.ts @@ -22,65 +22,74 @@ export class AiService { private readonly backupUrl = process.env.AI_BACKUP_URL || "https://integrate.api.nvidia.com/v1" private readonly backupKey = process.env.AI_BACKUP_KEY || "" - private readonly backupModel = process.env.AI_BACKUP_MODEL || "stepfun-ai/step-3.5-flash" + private readonly backupModel = process.env.AI_BACKUP_MODEL || "meta/llama-3.1-8b-instruct" async call(options: AiCallOptions): Promise { const { systemPrompt, userMessage, temperature = 0.7, maxTokens = 2048 } = options // Try primary AI (deepseek-v4-flash on sensenova) try { - const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryModel, systemPrompt, userMessage, temperature, maxTokens) + const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryModel, systemPrompt, userMessage, temperature, maxTokens, 60000) if (result) return result + // Primary returned empty content (thinking model exhausted tokens); retry with more tokens + const retry = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryModel, systemPrompt, userMessage, temperature, Math.min(maxTokens * 2, 4096), 60000) + if (retry) return retry } catch (e) { this.logger.warn(`Primary AI failed: ${(e as Error).message}, trying primary fallback...`) } // Try primary fallback model (sensenova-6.7-flash-lite, same provider) try { - const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryFallbackModel, systemPrompt, userMessage, temperature, maxTokens) + const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryFallbackModel, systemPrompt, userMessage, temperature, maxTokens, 60000) if (result) return result } catch (e) { this.logger.warn(`Primary fallback AI also failed: ${(e as Error).message}, trying backup...`) } - // Try backup AI (NVIDIA) + // Try backup AI (NVIDIA - meta/llama-3.1-8b-instruct) try { - const result = await this.callApi(this.backupUrl, this.backupKey, this.backupModel, systemPrompt, userMessage, temperature, maxTokens) + const result = await this.callApi(this.backupUrl, this.backupKey, this.backupModel, systemPrompt, userMessage, temperature, Math.max(maxTokens, 2048), 120000) if (result) return result } catch (e) { this.logger.warn(`Backup AI also failed: ${(e as Error).message}`) } - // Final fallback - throw new Error("AI \u670d\u52a1\u6682\u65f6\u4e0d\u53ef\u7528\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5") + throw new Error("AI 服务暂时不可用,请稍后重试") } private async callApi( baseUrl: string, apiKey: string, model: string, systemPrompt: string, userMessage: string, - temperature: number, maxTokens: number, + temperature: number, maxTokens: number, timeout: number, ): Promise { - const res = await axios.post( - `${baseUrl}/chat/completions`, - { - model, - messages: [ - { role: "system", content: systemPrompt }, - { role: "user", content: userMessage }, - ], - temperature, - max_tokens: maxTokens, - }, - { - headers: { - "Authorization": `Bearer ${apiKey}`, - "Content-Type": "application/json", + try { + const res = await axios.post( + `${baseUrl}/chat/completions`, + { + model, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userMessage }, + ], + temperature, + max_tokens: maxTokens, }, - timeout: 60000, - httpsAgent: httpAgent, - transitional: { clarifyTimeoutError: true }, - }, - ) - return res.data?.choices?.[0]?.message?.content || null + { + headers: { + "Authorization": `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + timeout, + httpsAgent: httpAgent, + transitional: { clarifyTimeoutError: true }, + }, + ) + return res.data?.choices?.[0]?.message?.content || null + } catch (e: any) { + if (e.code === 'ECONNABORTED') { + this.logger.warn(`AI call timeout (${timeout}ms): ${model}`) + } + return null + } } }