feat: AI assistant phase 2 - configurable prompt, action operations, FAQ matching, NVIDIA provider
- Admin-configurable AI prompt/quick questions from system_configs DB - GET /api/v1/ai/quick-questions endpoint for fetching quick questions - Local FAQ matching for instant responses (avoid AI calls for common Qs) - AI action extraction: "add customer" intent detected, structured data returned - Frontend action confirmation card with editable fields, calls customer API on confirm - NVIDIA provider (stepfun-ai/step-3.5-flash) for faster chat vs deepseek-v4-flash - Fixed httpx client timeout preventing backend hangs - Added log_usage calls for auth events (register/login/guest/wechat) - Admin tabs (users/stats/logs/config) fully functional with real backend - AiAssistant component added to all tabbar pages
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
from typing import Dict, Any, Optional, List
|
||||
from app.ai.providers.openai import OpenAIProvider, SYSTEM_PROMPTS
|
||||
import logging
|
||||
import time
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NvidiaProvider(OpenAIProvider):
|
||||
def __init__(self, api_key: str, model: str = "stepfun-ai/step-3.5-flash", base_url: str = "https://integrate.api.nvidia.com/v1"):
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
http_client=httpx.AsyncClient(timeout=httpx.Timeout(60.0)),
|
||||
)
|
||||
self._name = f"nvidia-{model}"
|
||||
|
||||
async def chat(self, message: str, history: list = None, system_prompt: str = None) -> Dict[str, Any]:
|
||||
t0 = time.time()
|
||||
|
||||
system = system_prompt or SYSTEM_PROMPTS["chat"]
|
||||
messages = [{"role": "system", "content": system}]
|
||||
if history:
|
||||
for h in history[-10:]:
|
||||
messages.append(h)
|
||||
messages.append({"role": "user", "content": message})
|
||||
t1 = time.time()
|
||||
|
||||
kwargs = {
|
||||
"model": self.model,
|
||||
"messages": messages,
|
||||
"max_tokens": 300,
|
||||
"temperature": 0.3,
|
||||
}
|
||||
resp = await self.client.chat.completions.create(**kwargs)
|
||||
t2 = time.time()
|
||||
|
||||
content = resp.choices[0].message.content or ""
|
||||
if not content and hasattr(resp.choices[0].message, "reasoning"):
|
||||
content = resp.choices[0].message.reasoning
|
||||
t3 = time.time()
|
||||
|
||||
logger.info(
|
||||
f"NVIDIA timing: build_msgs={t1-t0:.1f}s api_call={t2-t1:.1f}s process={t3-t2:.1f}s "
|
||||
f"chars_in={sum(len(m.get('content','')) for m in messages)} chars_out={len(content)}"
|
||||
)
|
||||
|
||||
return {"reply": content, "provider": self.name, "model": self.model}
|
||||
Reference in New Issue
Block a user