fix: AI LLM backup model not producing content; add retry for thinking models
- Replace backup model stepfun-ai/step-3.5-flash with meta/llama-3.1-8b-instruct (stepfun is a thinking model that uses all tokens on reasoning and never outputs content, causing all 3 fallthroughs to fail) - Add retry with doubled max_tokens when primary model returns empty content (deepseek-v4-flash thinking can exhaust token budget) - Increase backup timeout to 120s and max_tokens to min 2048 - Move callApi error handling to return null instead of throw for cleaner fallthrough logic with timeout logging
This commit is contained in:
@@ -22,44 +22,47 @@ export class AiService {
|
|||||||
|
|
||||||
private readonly backupUrl = process.env.AI_BACKUP_URL || "https://integrate.api.nvidia.com/v1"
|
private readonly backupUrl = process.env.AI_BACKUP_URL || "https://integrate.api.nvidia.com/v1"
|
||||||
private readonly backupKey = process.env.AI_BACKUP_KEY || ""
|
private readonly backupKey = process.env.AI_BACKUP_KEY || ""
|
||||||
private readonly backupModel = process.env.AI_BACKUP_MODEL || "stepfun-ai/step-3.5-flash"
|
private readonly backupModel = process.env.AI_BACKUP_MODEL || "meta/llama-3.1-8b-instruct"
|
||||||
|
|
||||||
async call(options: AiCallOptions): Promise<string> {
|
async call(options: AiCallOptions): Promise<string> {
|
||||||
const { systemPrompt, userMessage, temperature = 0.7, maxTokens = 2048 } = options
|
const { systemPrompt, userMessage, temperature = 0.7, maxTokens = 2048 } = options
|
||||||
|
|
||||||
// Try primary AI (deepseek-v4-flash on sensenova)
|
// Try primary AI (deepseek-v4-flash on sensenova)
|
||||||
try {
|
try {
|
||||||
const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryModel, systemPrompt, userMessage, temperature, maxTokens)
|
const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryModel, systemPrompt, userMessage, temperature, maxTokens, 60000)
|
||||||
if (result) return result
|
if (result) return result
|
||||||
|
// Primary returned empty content (thinking model exhausted tokens); retry with more tokens
|
||||||
|
const retry = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryModel, systemPrompt, userMessage, temperature, Math.min(maxTokens * 2, 4096), 60000)
|
||||||
|
if (retry) return retry
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
this.logger.warn(`Primary AI failed: ${(e as Error).message}, trying primary fallback...`)
|
this.logger.warn(`Primary AI failed: ${(e as Error).message}, trying primary fallback...`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try primary fallback model (sensenova-6.7-flash-lite, same provider)
|
// Try primary fallback model (sensenova-6.7-flash-lite, same provider)
|
||||||
try {
|
try {
|
||||||
const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryFallbackModel, systemPrompt, userMessage, temperature, maxTokens)
|
const result = await this.callApi(this.primaryUrl, this.primaryKey, this.primaryFallbackModel, systemPrompt, userMessage, temperature, maxTokens, 60000)
|
||||||
if (result) return result
|
if (result) return result
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
this.logger.warn(`Primary fallback AI also failed: ${(e as Error).message}, trying backup...`)
|
this.logger.warn(`Primary fallback AI also failed: ${(e as Error).message}, trying backup...`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try backup AI (NVIDIA)
|
// Try backup AI (NVIDIA - meta/llama-3.1-8b-instruct)
|
||||||
try {
|
try {
|
||||||
const result = await this.callApi(this.backupUrl, this.backupKey, this.backupModel, systemPrompt, userMessage, temperature, maxTokens)
|
const result = await this.callApi(this.backupUrl, this.backupKey, this.backupModel, systemPrompt, userMessage, temperature, Math.max(maxTokens, 2048), 120000)
|
||||||
if (result) return result
|
if (result) return result
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
this.logger.warn(`Backup AI also failed: ${(e as Error).message}`)
|
this.logger.warn(`Backup AI also failed: ${(e as Error).message}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final fallback
|
throw new Error("AI 服务暂时不可用,请稍后重试")
|
||||||
throw new Error("AI \u670d\u52a1\u6682\u65f6\u4e0d\u53ef\u7528\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private async callApi(
|
private async callApi(
|
||||||
baseUrl: string, apiKey: string, model: string,
|
baseUrl: string, apiKey: string, model: string,
|
||||||
systemPrompt: string, userMessage: string,
|
systemPrompt: string, userMessage: string,
|
||||||
temperature: number, maxTokens: number,
|
temperature: number, maxTokens: number, timeout: number,
|
||||||
): Promise<string | null> {
|
): Promise<string | null> {
|
||||||
|
try {
|
||||||
const res = await axios.post(
|
const res = await axios.post(
|
||||||
`${baseUrl}/chat/completions`,
|
`${baseUrl}/chat/completions`,
|
||||||
{
|
{
|
||||||
@@ -76,11 +79,17 @@ export class AiService {
|
|||||||
"Authorization": `Bearer ${apiKey}`,
|
"Authorization": `Bearer ${apiKey}`,
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
timeout: 60000,
|
timeout,
|
||||||
httpsAgent: httpAgent,
|
httpsAgent: httpAgent,
|
||||||
transitional: { clarifyTimeoutError: true },
|
transitional: { clarifyTimeoutError: true },
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return res.data?.choices?.[0]?.message?.content || null
|
return res.data?.choices?.[0]?.message?.content || null
|
||||||
|
} catch (e: any) {
|
||||||
|
if (e.code === 'ECONNABORTED') {
|
||||||
|
this.logger.warn(`AI call timeout (${timeout}ms): ${model}`)
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user