chore: post-deployment cleanup and docs update

- Make AI routing rules DB-driven (read from system_configs, removed from config.py)
- Add translation quota tracking to LLM translation (OpenAIProvider)
- Add Alibaba MT ECS RAM role support (STS token, no AccessKey needed)
- Fix admin sidebar link for AI模型配置 page
- Fix Quota.vue API path (quotas → translation-quotas)
- Fix login auto-redirect to dashboard
- Add provider dropdown selects to AI routing config UI
- Clean up stale ai_provider_* system_configs records
- Remove OpencodeGo, Spark providers (code + DB)
- Update deploy config: nginx port 8000, systemd cwd
This commit is contained in:
TradeMate Dev
2026-06-02 15:40:02 +08:00
parent fa3050a17c
commit f17a6ccbac
28 changed files with 1140 additions and 209 deletions
+1 -3
View File
@@ -1,8 +1,6 @@
from .openai import OpenAIProvider
from .spark import SparkProvider
from .sensenova import SensenovaProvider
from .opencode_go import OpencodeGoProvider
from .nvidia import NvidiaProvider
from .alibaba import AlibabaMTProvider
__all__ = ["OpenAIProvider", "SparkProvider", "SensenovaProvider", "OpencodeGoProvider", "NvidiaProvider", "AlibabaMTProvider"]
__all__ = ["OpenAIProvider", "SensenovaProvider", "NvidiaProvider", "AlibabaMTProvider"]
+48 -2
View File
@@ -1,11 +1,13 @@
from typing import Dict, Any, Optional
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkalimt.request.v20181012 import TranslateGeneralRequest, TranslateECommerceRequest
from app.services.translation_quota import TranslationQuotaService
from app.database import AsyncSessionLocal
import asyncio
import json
import logging
import os
logger = logging.getLogger(__name__)
@@ -16,11 +18,55 @@ ALIBABA_LANG_MAP = {
"id": "id", "ms": "ms", "tl": "tl", "hi": "hi",
}
ECS_METADATA_URL = "http://100.100.100.200/latest/meta-data/ram/security-credentials/"
def _fetch_ecs_ram_credentials():
try:
import urllib.request
req = urllib.request.Request(ECS_METADATA_URL, method="GET")
with urllib.request.urlopen(req, timeout=2) as resp:
role_name = resp.read().decode().strip()
if not role_name:
logger.warning("ECS metadata returned empty role name")
return None
url = f"{ECS_METADATA_URL}{role_name}"
req = urllib.request.Request(url, method="GET")
with urllib.request.urlopen(req, timeout=2) as resp:
data = json.loads(resp.read().decode())
if data.get("Code") == "Success":
logger.info(f"Fetched STS token for role {role_name}, expires {data.get('Expiration')}")
return (data["AccessKeyId"], data["AccessKeySecret"], data["SecurityToken"])
else:
logger.warning(f"ECS metadata returned non-success: {data.get('Code')}")
except Exception as e:
logger.debug(f"ECS metadata fetch failed: {e}")
return None
def _build_acs_client(access_key_id: str = "", access_key_secret: str = "",
region_id: str = "cn-hangzhou") -> AcsClient:
creds = _fetch_ecs_ram_credentials()
if creds:
ak, sk, token = creds
sts_cred = StsTokenCredential(ak, sk, token)
client = AcsClient(credential=sts_cred, region_id=region_id)
logger.info("Alibaba MT using ECS RAM role (STS token)")
return client
ak = access_key_id or os.getenv("ALIBABA_ACCESS_KEY_ID", "")
sk = access_key_secret or os.getenv("ALIBABA_ACCESS_KEY_SECRET", "")
if ak and sk:
logger.info("Alibaba MT using AccessKey credentials")
return AcsClient(ak, sk, region_id)
raise ValueError("No Alibaba Cloud credentials found (neither ECS RAM role nor AccessKey)")
class AlibabaMTProvider:
def __init__(self, access_key_id: str, access_key_secret: str,
def __init__(self, access_key_id: str = "", access_key_secret: str = "",
region_id: str = "cn-hangzhou"):
self.client = AcsClient(access_key_id, access_key_secret, region_id)
self.client = _build_acs_client(access_key_id, access_key_secret, region_id)
self._name = "alibaba-mt"
async def translate(self, text: str, source_lang: Optional[str],
+14
View File
@@ -51,6 +51,20 @@ class OpenAIProvider(AIProvider):
self._cheap_model = "gpt-4o-mini" if model == "gpt-4o" else model
async def translate(self, text: str, source_lang: Optional[str], target_lang: str, context: Optional[str] = None) -> Dict[str, Any]:
from app.services.translation_quota import TranslationQuotaService
from app.database import AsyncSessionLocal
async with AsyncSessionLocal() as db:
quota_svc = TranslationQuotaService(db)
if not await quota_svc.check_quota("llm"):
raise Exception("LLM translation quota exhausted or disabled")
result = await self._do_translate(text, source_lang, target_lang, context)
if result and result.get("translated_text"):
await quota_svc.consume("llm", len(text))
await db.commit()
return result
async def _do_translate(self, text: str, source_lang: Optional[str], target_lang: str, context: Optional[str] = None) -> Dict[str, Any]:
system = SYSTEM_PROMPTS["translate"]
if context:
system += f"\nContext: this is about {context}"
-7
View File
@@ -1,7 +0,0 @@
from app.ai.providers.openai import OpenAIProvider
class OpencodeGoProvider(OpenAIProvider):
def __init__(self, api_key: str, model: str = "deepseek-v4-flash", base_url: str = "https://opencode.ai/zen/go/v1"):
super().__init__(api_key=api_key, model=model, base_url=base_url)
self._name = f"opencode-go-{model}"
-90
View File
@@ -1,90 +0,0 @@
from typing import Dict, Any, Optional
import json
from app.ai.base import AIProvider
SYSTEM_PROMPTS = {
"translate": "You are a professional translator specialized in foreign trade. "
"Translate business terms accurately. Return ONLY the translated text.",
"reply": "You are an experienced foreign trade sales expert. Write professional, "
"clear business replies. Return ONLY the reply text.",
"marketing": "You are a creative copywriter for international trade. "
"Return ONLY the marketing copy, no explanations.",
"extract": "Extract structured data from text. Return ONLY valid JSON.",
}
class SparkProvider(AIProvider):
def __init__(self, api_key: str, model: str = "astron-code-latest", base_url: str = None):
from app.config import settings
try:
from openai import AsyncOpenAI
except ImportError:
raise ImportError("openai>=1.0 is required for SparkProvider")
self.client = AsyncOpenAI(
api_key=api_key,
base_url=base_url or settings.IFLYTEK_API_BASE,
)
self.model = model
self._name = f"spark-{model}"
async def translate(self, text: str, source_lang: Optional[str], target_lang: str, context: Optional[str] = None) -> Dict[str, Any]:
system = SYSTEM_PROMPTS["translate"]
if context:
system += f"\nContext: {context}"
prompt = f"Translate {f'from {source_lang} ' if source_lang and source_lang != 'auto' else ''}to {target_lang}:\n\n{text}"
content = await self._call(system, prompt)
return {"translated_text": content, "provider": self.name}
async def reply(self, inquiry: str, context: Optional[Dict[str, Any]] = None, tone: str = "professional", preference_context: Optional[str] = None) -> Dict[str, Any]:
system = SYSTEM_PROMPTS["reply"] + f"\nTone: {tone}"
if preference_context:
system += f"\nUser preference: {preference_context}"
ctx = ""
if context:
ctx = "\n".join(f"{k}: {v}" for k, v in context.items() if v)
prompt = f"{ctx}\nCustomer inquiry:\n{inquiry}\n\nWrite a reply:"
content = await self._call(system, prompt)
return {"reply": content, "provider": self.name}
async def generate_marketing(self, product_info: Dict[str, Any], target: str, style: str = "professional", language: str = "en", preference_context: Optional[str] = None) -> Dict[str, Any]:
system = SYSTEM_PROMPTS["marketing"] + f"\nStyle: {style}\nAudience: {target}\nLanguage: {language}"
if preference_context:
system += f"\nUser preference: {preference_context}"
info = json.dumps(product_info, ensure_ascii=False)
prompt = f"Product:\n{info}\n\nGenerate marketing copy:"
content = await self._call(system, prompt, max_tokens=1500)
return {"content": content, "provider": self.name}
async def extract_info(self, text: str, schema: Dict[str, Any]) -> Dict[str, Any]:
system = SYSTEM_PROMPTS["extract"]
prompt = f"Schema:\n{json.dumps(schema, indent=2)}\n\nText:\n{text}\n\nJSON:"
content = await self._call(system, prompt, response_format={"type": "json_object"})
try:
data = json.loads(content)
return {"data": data, "confidence": 0.9, "provider": self.name}
except json.JSONDecodeError:
return {"data": {}, "confidence": 0.0, "provider": self.name}
async def _call(self, system: str, prompt: str, max_tokens: int = 1000, response_format: Optional[Dict] = None) -> str:
kwargs = {
"model": self.model,
"messages": [
{"role": "system", "content": system},
{"role": "user", "content": prompt},
],
"max_tokens": max_tokens,
"temperature": 0.7,
}
if response_format:
kwargs["response_format"] = response_format
resp = await self.client.chat.completions.create(**kwargs)
return resp.choices[0].message.content
@property
def name(self) -> str:
return self._name
@property
def cost_per_1k_tokens(self) -> float:
return 0.0
+59 -30
View File
@@ -1,17 +1,26 @@
from typing import Dict, Any, Optional, List
from app.ai.base import AIProvider
from app.ai.providers import SparkProvider, SensenovaProvider, OpencodeGoProvider, NvidiaProvider, AlibabaMTProvider
from app.config import settings
from app.ai.providers import SensenovaProvider, NvidiaProvider, AlibabaMTProvider
from app.ai.trade_corpus import TradeCorpus
from app.config import settings
import logging
logger = logging.getLogger(__name__)
DEFAULT_ROUTING: Dict[str, dict] = {
"translate": {"primary": "sensenova", "fallback": ["alibaba-mt", "nvidia"]},
"reply": {"primary": "sensenova", "fallback": ["nvidia"]},
"marketing": {"primary": "sensenova", "fallback": ["nvidia"]},
"extract": {"primary": "sensenova", "fallback": ["nvidia"]},
"quotation": {"primary": "sensenova", "fallback": ["nvidia"]},
"chat": {"primary": "sensenova", "fallback": ["nvidia"]},
}
class AIRouter:
def __init__(self):
self.providers: Dict[str, AIProvider] = {}
self.routing_rules = settings.AI_ROUTING
self.routing_rules = dict(DEFAULT_ROUTING)
self.corpus = TradeCorpus()
async def reload_from_db(self, db_session) -> int:
@@ -38,8 +47,47 @@ class AIRouter:
else:
logger.warning("No enabled AI providers found in DB")
await self._load_routing_rules(db_session)
return len(rows)
async def _load_routing_rules(self, db_session):
from app.models.system_config import SystemConfig
from sqlalchemy import select
# Try consolidated key first
result = await db_session.execute(
select(SystemConfig).where(SystemConfig.key == "ai_routing")
)
cfg = result.scalar_one_or_none()
if cfg and isinstance(cfg.value, dict):
self.routing_rules = {**DEFAULT_ROUTING, **cfg.value}
logger.info("Loaded routing rules from system_configs (ai_routing)")
return
# Fallback: load individual per-task keys
task_keys = {
"translate": "ai_provider_translate",
"reply": "ai_provider_reply",
"marketing": "ai_provider_marketing",
"extract": "ai_provider_extract",
"quotation": "ai_provider_quotation",
}
loaded = {}
for task, key in task_keys.items():
result = await db_session.execute(
select(SystemConfig).where(SystemConfig.key == key)
)
cfg = result.scalar_one_or_none()
if cfg and isinstance(cfg.value, dict):
loaded[task] = cfg.value
if loaded:
self.routing_rules = {**DEFAULT_ROUTING, **loaded}
logger.info(f"Loaded routing rules from system_configs (individual keys): {list(loaded.keys())}")
else:
self.routing_rules = dict(DEFAULT_ROUTING)
logger.info("No routing rules in system_configs, using defaults")
async def seed_from_env(self, db_session) -> int:
from app.models.ai_provider import AIProvider
@@ -53,34 +101,19 @@ class AIRouter:
base_url=settings.SENSENOVA_BASE_URL,
model_name=settings.SENSENOVA_MODEL, priority=0, enabled=True,
))
if settings.OPENCODE_GO_API_KEY:
seeds.append(AIProvider(
name="OpencodeGo", provider_type="opencode_go",
api_key=settings.OPENCODE_GO_API_KEY,
base_url=settings.OPENCODE_GO_BASE_URL,
model_name=settings.OPENCODE_GO_MODEL, priority=1, enabled=True,
))
if settings.NVIDIA_API_KEY:
seeds.append(AIProvider(
name="NVIDIA", provider_type="nvidia",
api_key=settings.NVIDIA_API_KEY,
base_url=settings.NVIDIA_BASE_URL,
model_name=settings.NVIDIA_MODEL, priority=2, enabled=True,
))
if settings.IFLYTEK_API_KEY:
seeds.append(AIProvider(
name="讯飞 Spark", provider_type="spark",
api_key=settings.IFLYTEK_API_KEY,
base_url=settings.IFLYTEK_API_BASE,
model_name=settings.IFLYTEK_MODEL, priority=3, enabled=True,
))
if settings.ALIBABA_ACCESS_KEY_ID and settings.ALIBABA_ACCESS_KEY_SECRET:
seeds.append(AIProvider(
name="阿里翻译", provider_type="alibaba-mt",
api_key=settings.ALIBABA_ACCESS_KEY_ID,
api_secret=settings.ALIBABA_ACCESS_KEY_SECRET,
model_name="alibaba-mt", priority=4, enabled=True,
model_name=settings.NVIDIA_MODEL, priority=1, enabled=True,
))
seeds.append(AIProvider(
name="阿里翻译", provider_type="alibaba-mt",
api_key=settings.ALIBABA_ACCESS_KEY_ID or "",
api_secret=settings.ALIBABA_ACCESS_KEY_SECRET or "",
model_name="alibaba-mt", priority=3, enabled=True,
))
for p in seeds:
db_session.add(p)
@@ -99,12 +132,8 @@ class AIRouter:
t = p.provider_type
if t == "sensenova":
return SensenovaProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
elif t == "opencode_go":
return OpencodeGoProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
elif t == "nvidia":
return NvidiaProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
elif t == "spark":
return SparkProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
elif t == "alibaba-mt":
return AlibabaMTProvider(access_key_id=p.api_key, access_key_secret=p.api_secret or "")
else:
@@ -117,7 +146,7 @@ class AIRouter:
def get_providers_for_task(self, task_type: str) -> List[AIProvider]:
rules = self.routing_rules.get(
task_type,
{"primary": "sensenova", "fallback": ["opencode_go"]},
{"primary": "sensenova", "fallback": ["nvidia"]},
)
ordered = []
seen = set()