chore: post-deployment cleanup and docs update
- Make AI routing rules DB-driven (read from system_configs, removed from config.py) - Add translation quota tracking to LLM translation (OpenAIProvider) - Add Alibaba MT ECS RAM role support (STS token, no AccessKey needed) - Fix admin sidebar link for AI模型配置 page - Fix Quota.vue API path (quotas → translation-quotas) - Fix login auto-redirect to dashboard - Add provider dropdown selects to AI routing config UI - Clean up stale ai_provider_* system_configs records - Remove OpencodeGo, Spark providers (code + DB) - Update deploy config: nginx port 8000, systemd cwd
This commit is contained in:
@@ -1,8 +1,6 @@
|
||||
from .openai import OpenAIProvider
|
||||
from .spark import SparkProvider
|
||||
from .sensenova import SensenovaProvider
|
||||
from .opencode_go import OpencodeGoProvider
|
||||
from .nvidia import NvidiaProvider
|
||||
from .alibaba import AlibabaMTProvider
|
||||
|
||||
__all__ = ["OpenAIProvider", "SparkProvider", "SensenovaProvider", "OpencodeGoProvider", "NvidiaProvider", "AlibabaMTProvider"]
|
||||
__all__ = ["OpenAIProvider", "SensenovaProvider", "NvidiaProvider", "AlibabaMTProvider"]
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
from typing import Dict, Any, Optional
|
||||
from aliyunsdkcore.client import AcsClient
|
||||
from aliyunsdkcore.auth.credentials import StsTokenCredential
|
||||
from aliyunsdkalimt.request.v20181012 import TranslateGeneralRequest, TranslateECommerceRequest
|
||||
from app.services.translation_quota import TranslationQuotaService
|
||||
from app.database import AsyncSessionLocal
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -16,11 +18,55 @@ ALIBABA_LANG_MAP = {
|
||||
"id": "id", "ms": "ms", "tl": "tl", "hi": "hi",
|
||||
}
|
||||
|
||||
ECS_METADATA_URL = "http://100.100.100.200/latest/meta-data/ram/security-credentials/"
|
||||
|
||||
|
||||
def _fetch_ecs_ram_credentials():
|
||||
try:
|
||||
import urllib.request
|
||||
req = urllib.request.Request(ECS_METADATA_URL, method="GET")
|
||||
with urllib.request.urlopen(req, timeout=2) as resp:
|
||||
role_name = resp.read().decode().strip()
|
||||
if not role_name:
|
||||
logger.warning("ECS metadata returned empty role name")
|
||||
return None
|
||||
url = f"{ECS_METADATA_URL}{role_name}"
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
with urllib.request.urlopen(req, timeout=2) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
if data.get("Code") == "Success":
|
||||
logger.info(f"Fetched STS token for role {role_name}, expires {data.get('Expiration')}")
|
||||
return (data["AccessKeyId"], data["AccessKeySecret"], data["SecurityToken"])
|
||||
else:
|
||||
logger.warning(f"ECS metadata returned non-success: {data.get('Code')}")
|
||||
except Exception as e:
|
||||
logger.debug(f"ECS metadata fetch failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _build_acs_client(access_key_id: str = "", access_key_secret: str = "",
|
||||
region_id: str = "cn-hangzhou") -> AcsClient:
|
||||
creds = _fetch_ecs_ram_credentials()
|
||||
if creds:
|
||||
ak, sk, token = creds
|
||||
sts_cred = StsTokenCredential(ak, sk, token)
|
||||
client = AcsClient(credential=sts_cred, region_id=region_id)
|
||||
logger.info("Alibaba MT using ECS RAM role (STS token)")
|
||||
return client
|
||||
|
||||
ak = access_key_id or os.getenv("ALIBABA_ACCESS_KEY_ID", "")
|
||||
sk = access_key_secret or os.getenv("ALIBABA_ACCESS_KEY_SECRET", "")
|
||||
if ak and sk:
|
||||
logger.info("Alibaba MT using AccessKey credentials")
|
||||
return AcsClient(ak, sk, region_id)
|
||||
|
||||
raise ValueError("No Alibaba Cloud credentials found (neither ECS RAM role nor AccessKey)")
|
||||
|
||||
|
||||
class AlibabaMTProvider:
|
||||
def __init__(self, access_key_id: str, access_key_secret: str,
|
||||
def __init__(self, access_key_id: str = "", access_key_secret: str = "",
|
||||
region_id: str = "cn-hangzhou"):
|
||||
self.client = AcsClient(access_key_id, access_key_secret, region_id)
|
||||
self.client = _build_acs_client(access_key_id, access_key_secret, region_id)
|
||||
self._name = "alibaba-mt"
|
||||
|
||||
async def translate(self, text: str, source_lang: Optional[str],
|
||||
|
||||
@@ -51,6 +51,20 @@ class OpenAIProvider(AIProvider):
|
||||
self._cheap_model = "gpt-4o-mini" if model == "gpt-4o" else model
|
||||
|
||||
async def translate(self, text: str, source_lang: Optional[str], target_lang: str, context: Optional[str] = None) -> Dict[str, Any]:
|
||||
from app.services.translation_quota import TranslationQuotaService
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
quota_svc = TranslationQuotaService(db)
|
||||
if not await quota_svc.check_quota("llm"):
|
||||
raise Exception("LLM translation quota exhausted or disabled")
|
||||
result = await self._do_translate(text, source_lang, target_lang, context)
|
||||
if result and result.get("translated_text"):
|
||||
await quota_svc.consume("llm", len(text))
|
||||
await db.commit()
|
||||
return result
|
||||
|
||||
async def _do_translate(self, text: str, source_lang: Optional[str], target_lang: str, context: Optional[str] = None) -> Dict[str, Any]:
|
||||
system = SYSTEM_PROMPTS["translate"]
|
||||
if context:
|
||||
system += f"\nContext: this is about {context}"
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
from app.ai.providers.openai import OpenAIProvider
|
||||
|
||||
|
||||
class OpencodeGoProvider(OpenAIProvider):
|
||||
def __init__(self, api_key: str, model: str = "deepseek-v4-flash", base_url: str = "https://opencode.ai/zen/go/v1"):
|
||||
super().__init__(api_key=api_key, model=model, base_url=base_url)
|
||||
self._name = f"opencode-go-{model}"
|
||||
@@ -1,90 +0,0 @@
|
||||
from typing import Dict, Any, Optional
|
||||
import json
|
||||
from app.ai.base import AIProvider
|
||||
|
||||
|
||||
SYSTEM_PROMPTS = {
|
||||
"translate": "You are a professional translator specialized in foreign trade. "
|
||||
"Translate business terms accurately. Return ONLY the translated text.",
|
||||
"reply": "You are an experienced foreign trade sales expert. Write professional, "
|
||||
"clear business replies. Return ONLY the reply text.",
|
||||
"marketing": "You are a creative copywriter for international trade. "
|
||||
"Return ONLY the marketing copy, no explanations.",
|
||||
"extract": "Extract structured data from text. Return ONLY valid JSON.",
|
||||
}
|
||||
|
||||
|
||||
class SparkProvider(AIProvider):
|
||||
def __init__(self, api_key: str, model: str = "astron-code-latest", base_url: str = None):
|
||||
from app.config import settings
|
||||
try:
|
||||
from openai import AsyncOpenAI
|
||||
except ImportError:
|
||||
raise ImportError("openai>=1.0 is required for SparkProvider")
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url or settings.IFLYTEK_API_BASE,
|
||||
)
|
||||
self.model = model
|
||||
self._name = f"spark-{model}"
|
||||
|
||||
async def translate(self, text: str, source_lang: Optional[str], target_lang: str, context: Optional[str] = None) -> Dict[str, Any]:
|
||||
system = SYSTEM_PROMPTS["translate"]
|
||||
if context:
|
||||
system += f"\nContext: {context}"
|
||||
prompt = f"Translate {f'from {source_lang} ' if source_lang and source_lang != 'auto' else ''}to {target_lang}:\n\n{text}"
|
||||
content = await self._call(system, prompt)
|
||||
return {"translated_text": content, "provider": self.name}
|
||||
|
||||
async def reply(self, inquiry: str, context: Optional[Dict[str, Any]] = None, tone: str = "professional", preference_context: Optional[str] = None) -> Dict[str, Any]:
|
||||
system = SYSTEM_PROMPTS["reply"] + f"\nTone: {tone}"
|
||||
if preference_context:
|
||||
system += f"\nUser preference: {preference_context}"
|
||||
ctx = ""
|
||||
if context:
|
||||
ctx = "\n".join(f"{k}: {v}" for k, v in context.items() if v)
|
||||
prompt = f"{ctx}\nCustomer inquiry:\n{inquiry}\n\nWrite a reply:"
|
||||
content = await self._call(system, prompt)
|
||||
return {"reply": content, "provider": self.name}
|
||||
|
||||
async def generate_marketing(self, product_info: Dict[str, Any], target: str, style: str = "professional", language: str = "en", preference_context: Optional[str] = None) -> Dict[str, Any]:
|
||||
system = SYSTEM_PROMPTS["marketing"] + f"\nStyle: {style}\nAudience: {target}\nLanguage: {language}"
|
||||
if preference_context:
|
||||
system += f"\nUser preference: {preference_context}"
|
||||
info = json.dumps(product_info, ensure_ascii=False)
|
||||
prompt = f"Product:\n{info}\n\nGenerate marketing copy:"
|
||||
content = await self._call(system, prompt, max_tokens=1500)
|
||||
return {"content": content, "provider": self.name}
|
||||
|
||||
async def extract_info(self, text: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
||||
system = SYSTEM_PROMPTS["extract"]
|
||||
prompt = f"Schema:\n{json.dumps(schema, indent=2)}\n\nText:\n{text}\n\nJSON:"
|
||||
content = await self._call(system, prompt, response_format={"type": "json_object"})
|
||||
try:
|
||||
data = json.loads(content)
|
||||
return {"data": data, "confidence": 0.9, "provider": self.name}
|
||||
except json.JSONDecodeError:
|
||||
return {"data": {}, "confidence": 0.0, "provider": self.name}
|
||||
|
||||
async def _call(self, system: str, prompt: str, max_tokens: int = 1000, response_format: Optional[Dict] = None) -> str:
|
||||
kwargs = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.7,
|
||||
}
|
||||
if response_format:
|
||||
kwargs["response_format"] = response_format
|
||||
resp = await self.client.chat.completions.create(**kwargs)
|
||||
return resp.choices[0].message.content
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def cost_per_1k_tokens(self) -> float:
|
||||
return 0.0
|
||||
+59
-30
@@ -1,17 +1,26 @@
|
||||
from typing import Dict, Any, Optional, List
|
||||
from app.ai.base import AIProvider
|
||||
from app.ai.providers import SparkProvider, SensenovaProvider, OpencodeGoProvider, NvidiaProvider, AlibabaMTProvider
|
||||
from app.config import settings
|
||||
from app.ai.providers import SensenovaProvider, NvidiaProvider, AlibabaMTProvider
|
||||
from app.ai.trade_corpus import TradeCorpus
|
||||
from app.config import settings
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_ROUTING: Dict[str, dict] = {
|
||||
"translate": {"primary": "sensenova", "fallback": ["alibaba-mt", "nvidia"]},
|
||||
"reply": {"primary": "sensenova", "fallback": ["nvidia"]},
|
||||
"marketing": {"primary": "sensenova", "fallback": ["nvidia"]},
|
||||
"extract": {"primary": "sensenova", "fallback": ["nvidia"]},
|
||||
"quotation": {"primary": "sensenova", "fallback": ["nvidia"]},
|
||||
"chat": {"primary": "sensenova", "fallback": ["nvidia"]},
|
||||
}
|
||||
|
||||
|
||||
class AIRouter:
|
||||
def __init__(self):
|
||||
self.providers: Dict[str, AIProvider] = {}
|
||||
self.routing_rules = settings.AI_ROUTING
|
||||
self.routing_rules = dict(DEFAULT_ROUTING)
|
||||
self.corpus = TradeCorpus()
|
||||
|
||||
async def reload_from_db(self, db_session) -> int:
|
||||
@@ -38,8 +47,47 @@ class AIRouter:
|
||||
else:
|
||||
logger.warning("No enabled AI providers found in DB")
|
||||
|
||||
await self._load_routing_rules(db_session)
|
||||
return len(rows)
|
||||
|
||||
async def _load_routing_rules(self, db_session):
|
||||
from app.models.system_config import SystemConfig
|
||||
from sqlalchemy import select
|
||||
|
||||
# Try consolidated key first
|
||||
result = await db_session.execute(
|
||||
select(SystemConfig).where(SystemConfig.key == "ai_routing")
|
||||
)
|
||||
cfg = result.scalar_one_or_none()
|
||||
if cfg and isinstance(cfg.value, dict):
|
||||
self.routing_rules = {**DEFAULT_ROUTING, **cfg.value}
|
||||
logger.info("Loaded routing rules from system_configs (ai_routing)")
|
||||
return
|
||||
|
||||
# Fallback: load individual per-task keys
|
||||
task_keys = {
|
||||
"translate": "ai_provider_translate",
|
||||
"reply": "ai_provider_reply",
|
||||
"marketing": "ai_provider_marketing",
|
||||
"extract": "ai_provider_extract",
|
||||
"quotation": "ai_provider_quotation",
|
||||
}
|
||||
loaded = {}
|
||||
for task, key in task_keys.items():
|
||||
result = await db_session.execute(
|
||||
select(SystemConfig).where(SystemConfig.key == key)
|
||||
)
|
||||
cfg = result.scalar_one_or_none()
|
||||
if cfg and isinstance(cfg.value, dict):
|
||||
loaded[task] = cfg.value
|
||||
|
||||
if loaded:
|
||||
self.routing_rules = {**DEFAULT_ROUTING, **loaded}
|
||||
logger.info(f"Loaded routing rules from system_configs (individual keys): {list(loaded.keys())}")
|
||||
else:
|
||||
self.routing_rules = dict(DEFAULT_ROUTING)
|
||||
logger.info("No routing rules in system_configs, using defaults")
|
||||
|
||||
async def seed_from_env(self, db_session) -> int:
|
||||
from app.models.ai_provider import AIProvider
|
||||
|
||||
@@ -53,34 +101,19 @@ class AIRouter:
|
||||
base_url=settings.SENSENOVA_BASE_URL,
|
||||
model_name=settings.SENSENOVA_MODEL, priority=0, enabled=True,
|
||||
))
|
||||
if settings.OPENCODE_GO_API_KEY:
|
||||
seeds.append(AIProvider(
|
||||
name="OpencodeGo", provider_type="opencode_go",
|
||||
api_key=settings.OPENCODE_GO_API_KEY,
|
||||
base_url=settings.OPENCODE_GO_BASE_URL,
|
||||
model_name=settings.OPENCODE_GO_MODEL, priority=1, enabled=True,
|
||||
))
|
||||
if settings.NVIDIA_API_KEY:
|
||||
seeds.append(AIProvider(
|
||||
name="NVIDIA", provider_type="nvidia",
|
||||
api_key=settings.NVIDIA_API_KEY,
|
||||
base_url=settings.NVIDIA_BASE_URL,
|
||||
model_name=settings.NVIDIA_MODEL, priority=2, enabled=True,
|
||||
))
|
||||
if settings.IFLYTEK_API_KEY:
|
||||
seeds.append(AIProvider(
|
||||
name="讯飞 Spark", provider_type="spark",
|
||||
api_key=settings.IFLYTEK_API_KEY,
|
||||
base_url=settings.IFLYTEK_API_BASE,
|
||||
model_name=settings.IFLYTEK_MODEL, priority=3, enabled=True,
|
||||
))
|
||||
if settings.ALIBABA_ACCESS_KEY_ID and settings.ALIBABA_ACCESS_KEY_SECRET:
|
||||
seeds.append(AIProvider(
|
||||
name="阿里翻译", provider_type="alibaba-mt",
|
||||
api_key=settings.ALIBABA_ACCESS_KEY_ID,
|
||||
api_secret=settings.ALIBABA_ACCESS_KEY_SECRET,
|
||||
model_name="alibaba-mt", priority=4, enabled=True,
|
||||
model_name=settings.NVIDIA_MODEL, priority=1, enabled=True,
|
||||
))
|
||||
seeds.append(AIProvider(
|
||||
name="阿里翻译", provider_type="alibaba-mt",
|
||||
api_key=settings.ALIBABA_ACCESS_KEY_ID or "",
|
||||
api_secret=settings.ALIBABA_ACCESS_KEY_SECRET or "",
|
||||
model_name="alibaba-mt", priority=3, enabled=True,
|
||||
))
|
||||
|
||||
for p in seeds:
|
||||
db_session.add(p)
|
||||
@@ -99,12 +132,8 @@ class AIRouter:
|
||||
t = p.provider_type
|
||||
if t == "sensenova":
|
||||
return SensenovaProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
|
||||
elif t == "opencode_go":
|
||||
return OpencodeGoProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
|
||||
elif t == "nvidia":
|
||||
return NvidiaProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
|
||||
elif t == "spark":
|
||||
return SparkProvider(api_key=p.api_key, model=p.model_name, base_url=p.base_url)
|
||||
elif t == "alibaba-mt":
|
||||
return AlibabaMTProvider(access_key_id=p.api_key, access_key_secret=p.api_secret or "")
|
||||
else:
|
||||
@@ -117,7 +146,7 @@ class AIRouter:
|
||||
def get_providers_for_task(self, task_type: str) -> List[AIProvider]:
|
||||
rules = self.routing_rules.get(
|
||||
task_type,
|
||||
{"primary": "sensenova", "fallback": ["opencode_go"]},
|
||||
{"primary": "sensenova", "fallback": ["nvidia"]},
|
||||
)
|
||||
ordered = []
|
||||
seen = set()
|
||||
|
||||
Reference in New Issue
Block a user