docs: update project docs and clean up redundant files

- PROGRESS.md: update to 2026-05-29 with security hardening (T-005),
  4-frontend architecture, AI provider refactoring, discovery features,
  landing page/referral/quota, desktop layout, admin AI management
- AGENTS.md: add AI provider list (Alibaba/NVIDIA, removed Claude/DeepL/Local),
  DB-driven config, CSRF/rate-limit/CORS notes, admin_ai reload quirk
- .env.example: sync with actual config, replace deprecated providers
  with current Sensenova/OpencodeGo/NVIDIA/Spark/Alibaba
- docs/PROJECT_STATUS.md: archive (fully superseded by PROGRESS.md)
- Remove generated JS files (_bing_search.js, _batch_search.js)
- Remove empty directories (data/corpus, data/models)
- Remove backend/.coverage (test artifact)
- Fix services/.gitignore to cover _bing_search.js
- Include pending AI provider DB admin feature (admin_ai, AIProvider model,
  AIProviders.vue, migration) and T-008 test report
This commit is contained in:
TradeMate Dev
2026-05-29 11:15:33 +08:00
parent c04fa2c19f
commit 5d2bced39f
31 changed files with 1933 additions and 816 deletions
+1
View File
@@ -1 +1,2 @@
_batch_search.js
_bing_search.js
+5 -5
View File
@@ -288,11 +288,11 @@ class AdminService:
async def _seed_default_configs(self):
defaults = [
SystemConfig(key="ai_provider_translate", value={"primary": "sensenova", "fallback": ["openai", "local"]}, description="翻译任务 AI 模型选择"),
SystemConfig(key="ai_provider_reply", value={"primary": "sensenova", "fallback": ["anthropic", "local"]}, description="回复建议 AI 模型选择"),
SystemConfig(key="ai_provider_marketing", value={"primary": "sensenova", "fallback": ["openai", "local"]}, description="营销文案 AI 模型选择"),
SystemConfig(key="ai_provider_extract", value={"primary": "sensenova", "fallback": ["openai"]}, description="信息提取 AI 模型选择"),
SystemConfig(key="ai_provider_quotation", value={"primary": "sensenova", "fallback": ["openai"]}, description="报价单 AI 模型选择"),
SystemConfig(key="ai_provider_translate", value={"primary": "sensenova", "fallback": ["alibaba-mt", "opencode_go"]}, description="翻译任务 AI 模型选择"),
SystemConfig(key="ai_provider_reply", value={"primary": "sensenova", "fallback": ["opencode_go"]}, description="回复建议 AI 模型选择"),
SystemConfig(key="ai_provider_marketing", value={"primary": "sensenova", "fallback": ["opencode_go"]}, description="营销文案 AI 模型选择"),
SystemConfig(key="ai_provider_extract", value={"primary": "sensenova", "fallback": ["opencode_go"]}, description="信息提取 AI 模型选择"),
SystemConfig(key="ai_provider_quotation", value={"primary": "sensenova", "fallback": ["opencode_go"]}, description="报价单 AI 模型选择"),
SystemConfig(key="feature_guest_mode", value={"enabled": True}, description="游客模式开关"),
SystemConfig(key="feature_wechat_login", value={"enabled": False}, description="微信登录开关"),
SystemConfig(key="feature_registration", value={"enabled": True}, description="新用户注册开关"),
+114 -106
View File
@@ -1,122 +1,130 @@
import asyncio
import json
import logging
import os
import subprocess
import re
from typing import List, Dict
import functools
from mcp.server.fastmcp import FastMCP
import requests
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
NODE_BIN = "/usr/bin/node"
BATCH_SCRIPT = r"""
const p = require('puppeteer');
(async () => {
const queries = JSON.parse(process.argv[process.argv.length - 2]);
const max = parseInt(process.argv[process.argv.length - 1] || '6', 10);
const sk = ['bing.com','google.com','facebook.com','twitter.com','instagram.com','youtube.com','reddit.com','amazon.com','walmart.com','w3.org','whatsapp.com','wechat.com','qq.com','taobao.com','tmall.com','alipay.com','zhihu.com','baike.baidu.com','sogou.com','163.com','sohu.com','sina.com','iciba.com','cambridge','britannica','sciencedirect','mdpi.com','springer','wiley.com','acm.org','ieee.org','researchgate','semanticscholar','ncbi.nlm.nih','nature.com','oup.com','sagepub','tandfonline','pinterest','ebay','dictionary','translate'];
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
}
try {
const b = await p.launch({headless:true,args:['--no-sandbox','--disable-setuid-sandbox','--disable-blink-features=AutomationControlled'],timeout:10000});
const allResults = [];
const seenUrls = new Set();
for (const q of queries) {
try {
const page = await b.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
await page.setExtraHTTPHeaders({'Accept-Language':'en-US,en;q=0.9'});
const url = 'https://www.bing.com/search?q=' + encodeURIComponent(q) + '&setlang=en-US&cc=US';
await page.goto(url, {waitUntil:'domcontentloaded',timeout:8000});
await page.waitForSelector('.b_algo', {timeout:4000}).catch(()=>{});
const results = await page.evaluate((m, sk) => {
const found = []; const seen = new Set();
document.querySelectorAll('li.b_algo').forEach(li => {
const a = li.querySelector('h2 a'); if (!a) return;
let url = (a.href || '').replace(/\/$/,'');
if (!url.startsWith('http') || seen.has(url)) return;
seen.add(url);
if (sk.some(d => url.includes(d))) return;
const hostname = url.replace(/^https?:\/\//,'').split('/')[0];
if (hostname.endsWith('.edu') || hostname.endsWith('.ac') || hostname.endsWith('.gov')) return;
const title = (a.textContent||'').trim().substring(0,100);
const s = li.querySelector('.b_caption p, .b_lineclamp2');
found.push({title, url, snippet:s?s.textContent.trim().substring(0,200):''});
});
return found.slice(0,m);
}, max, sk);
for (const r of results) {
if (!seenUrls.has(r.url)) {
seenUrls.add(r.url);
allResults.push(r);
}
}
await page.close();
} catch(e) { /* skip failed query */ }
}
console.log(JSON.stringify(allResults.slice(0, max * queries.length)));
await b.close();
} catch(e) { console.log('[]'); }
})();
"""
SKIP_DOMAINS = {
"iciba.com", "baike.baidu.com", "cambridge.org", "dictionary.cambridge.org",
"collinsdictionary.com", "dictionary.com", "merriam-webster.com",
"thesaurus.com", "britannica.com", "wikipedia.org", "wikihow.com",
"facebook.com", "twitter.com", "instagram.com", "youtube.com",
"reddit.com", "pinterest.com", "amazon.com", "ebay.com",
"walmart.com", "target.com", "bestbuy.com", "homedepot.com",
"linkedin.com", "bing.com", "google.com",
}
SKIP_TITLE_PATTERNS = [
r'^是什么意思$', r'^翻译$', r'^词典$', r'^字典$',
r'翻译$', r'^百度百科', r'^维基百科',
]
BATCH_SCRIPT_FILE = os.path.join(os.path.dirname(__file__), "_batch_search.js")
NODE_MODULES = os.path.join(PROJECT_ROOT, "node_modules")
def _is_junk(item: Dict[str, str]) -> bool:
url = item.get("url", "")
title = item.get("title", "")
hostname = url.replace("https://", "").replace("http://", "").split("/")[0]
if any(d in hostname for d in SKIP_DOMAINS):
return True
if any(d in url for d in SKIP_DOMAINS):
return True
for p in SKIP_TITLE_PATTERNS:
if re.search(p, title):
return True
if hostname.endswith(".edu") or hostname.endswith(".ac") or hostname.endswith(".gov"):
return True
return False
def _search_bing(query: str, count: int = 6) -> List[Dict[str, str]]:
try:
is_cjk = bool(re.search(r'[\u4e00-\u9fff]', query))
params = {"q": query, "count": count}
if not is_cjk:
params.update({"setlang": "en-US", "cc": "US"})
url = "https://www.bing.com/search"
resp = requests.get(url, params=params, headers=HEADERS, timeout=10)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
results = []
seen = set()
for li in soup.select("li.b_algo"):
a = li.select_one("h2 a")
if not a:
continue
href = a.get("href", "")
if not href.startswith("http") or href in seen:
continue
seen.add(href)
title = a.get_text(strip=True)[:120]
snippet_el = li.select_one(".b_caption p, .b_lineclamp2")
snippet = snippet_el.get_text(strip=True)[:300] if snippet_el else ""
entry = {"title": title, "url": href, "snippet": snippet, "engine": "bing"}
if not _is_junk(entry):
results.append(entry)
if len(results) >= count:
break
return results
except Exception as e:
logger.warning(f"Bing search failed: {e}")
return []
def _search_360(query: str, count: int = 6) -> List[Dict[str, str]]:
try:
resp = requests.get("https://www.so.com/s", params={"q": query}, headers=HEADERS, timeout=10)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
results = []
seen = set()
for li in soup.select(".result-list li, .result"):
a = li.select_one("h3 a")
if not a:
continue
href = a.get("href", "")
if not href or href in seen:
continue
seen.add(href)
title = a.get_text(strip=True)[:120]
snippet_el = li.select_one(".masonry-text, .res-desc")
snippet = snippet_el.get_text(strip=True)[:300] if snippet_el else ""
entry = {"title": title, "url": href, "snippet": snippet, "engine": "360"}
if not _is_junk(entry):
results.append(entry)
if len(results) >= count:
break
return results
except Exception as e:
logger.warning(f"360 search failed: {e}")
return []
async def search_bing_batch(queries: List[str], max_per_query: int = 6) -> List[Dict[str, str]]:
loop = asyncio.get_running_loop()
try:
with open(BATCH_SCRIPT_FILE, "w") as f:
f.write(BATCH_SCRIPT)
env = os.environ.copy()
env["NODE_PATH"] = NODE_MODULES
fn = functools.partial(
subprocess.run,
[NODE_BIN, BATCH_SCRIPT_FILE, json.dumps(queries), str(max_per_query)],
capture_output=True, text=True, timeout=120, cwd=PROJECT_ROOT, env=env,
)
result = await loop.run_in_executor(None, fn)
for line in result.stdout.strip().split("\n"):
line = line.strip()
if line.startswith("["):
return json.loads(line)
return []
except subprocess.TimeoutExpired:
logger.warning("Bing batch search timed out")
return []
except (json.JSONDecodeError, Exception) as e:
logger.warning(f"Bing batch search error: {e}")
return []
all_results = []
seen_urls = set()
for query in queries:
loop = asyncio.get_running_loop()
bing_task = loop.run_in_executor(None, _search_bing, query, max_per_query)
so_task = loop.run_in_executor(None, _search_360, query, max_per_query)
bing_results, so_results = await asyncio.gather(bing_task, so_task)
for entry in bing_results + so_results:
url = entry["url"].rstrip("/")
if url not in seen_urls:
seen_urls.add(url)
all_results.append(entry)
return all_results
async def search_bing(query: str, max_results: int = 10) -> List[Dict[str, str]]:
return await search_bing_batch([query], max_per_query=max_results)
mcp = FastMCP("trade-search", log_level="WARNING")
@mcp.tool(
name="web_search",
description="Search the web for companies, buyers, or business information. Returns title, URL, and snippet for each result. Useful for finding potential customers, researching companies, or gathering market intelligence.",
)
async def web_search(query: str, max_results: int = 10) -> str:
results = await search_bing(query, max_results)
if not results:
return json.dumps({"results": [], "error": None})
return json.dumps({"results": results, "error": None})
def main():
asyncio.run(mcp.run_stdio_async())
if __name__ == "__main__":
main()
+2 -1
View File
@@ -3,6 +3,7 @@ from sqlalchemy import select, func
from fastapi import HTTPException, Depends
from datetime import datetime, date
from sqlalchemy import Date
from typing import Tuple
import logging
from app.models import UsageLog, SystemConfig, User, Customer, Product
@@ -75,7 +76,7 @@ class UsageService:
result = await self.db.execute(stmt)
return result.scalar() or 0
async def check_quota(self, user_id: str, action: str, chars: int = 0) -> tuple[bool, str]:
async def check_quota(self, user_id: str, action: str, chars: int = 0) -> Tuple[bool, str]:
tier = await self.get_tier(user_id)
limits = await self.get_limits(tier)
limit_key = ACTION_MAP.get(action)