Fix API errors and improve customer discovery with real web results

- Fix usage/stats 500: use Date() not datetime.date() for SQL cast
- Fix customers 422: raise size limit to 1000
- Replace unreliable MCP client with direct Bing batch search for discovery
- Batch all search queries in one browser session (faster)
- Show real company names/URLs from Bing, not generic templates
- Smart filter for non-business results (news, blogs, forums)
- Fallback suggestions when search results are insufficient
- Frontend: clickable contact URLs, provider indicator, better layout
This commit is contained in:
TradeMate Dev
2026-05-27 10:29:23 +08:00
parent bed5c7abef
commit ab06990e73
7 changed files with 223 additions and 163 deletions
+131 -107
View File
@@ -1,11 +1,10 @@
import asyncio
import json
import logging
from typing import Dict, Any, Optional
from typing import Dict, Any, Optional, Union
from app.ai.router import get_ai_router
from app.services.search_web import search_companies, fetch_page_text
from app.services.mcp_search_client import mcp_search
from app.services.mcp_search_server import search_bing_batch
logger = logging.getLogger(__name__)
@@ -35,26 +34,31 @@ class DiscoveryService:
async def search(self, product_description: str, target_market: str) -> Dict[str, Any]:
queries = self._build_queries(product_description, target_market)
all_results = await self._mcp_search_all(queries)
if all_results:
return {
"companies": all_results[:15],
"query": product_description,
"market": target_market,
"provider": "mcp_search",
}
all_results = await self._google_search_all(queries)
all_results = await self._web_search_all(queries)
companies = []
provider = "template"
if all_results:
return {
"companies": all_results[:15],
"query": product_description,
"market": target_market,
"provider": "web_search",
}
raw = all_results.get("results", [])
companies = [self._to_company(r) for r in raw[:12]]
provider = all_results.get("provider", "web_search")
logger.info("No real search results, using AI strategy")
return await self._ai_strategy(product_description, target_market)
good_enough = [c for c in companies if self._looks_like_business(c)]
if len(good_enough) < 3:
logger.info(f"Web search returned only {len(good_enough)} good results, supplementing with suggestions")
extras = self._suggest_companies(product_description, target_market)
seen_names = set(c.get("name", "") for c in good_enough)
for c in extras:
if c.get("name") and c["name"] not in seen_names:
seen_names.add(c["name"])
good_enough.append(c)
return {
"companies": good_enough[:15],
"query": product_description,
"market": target_market,
"provider": provider,
}
async def analyze(self, company_url: str, product_description: str) -> Dict[str, Any]:
page_text = await fetch_page_text(company_url)
@@ -117,83 +121,115 @@ URL: {company_url}
logger.warning(f"Outreach AI parse failed: {e}")
return self._template_outreach(company_info, product_info)
async def _mcp_search_all(self, queries: list) -> list:
seen_urls = set()
tasks = [asyncio.create_task(mcp_search(q, max_results=6)) for q in queries[:2]]
all_results = []
async def _web_search_all(self, queries: list) -> dict:
try:
for coro in asyncio.as_completed(tasks, timeout=8):
try:
results = await coro
for r in results:
url = r.get("url", "").rstrip("/")
if url and url not in seen_urls:
seen_urls.add(url)
all_results.append(r)
except (asyncio.TimeoutError, Exception) as e:
logger.debug(f"MCP search query failed: {e}")
except asyncio.TimeoutError:
logger.warning("MCP search overall timeout")
finally:
for t in tasks:
if not t.done():
t.cancel()
await asyncio.gather(*tasks, return_exceptions=True)
if all_results:
return self._dedup_and_filter(all_results)[:15]
return []
results = await search_bing_batch(queries[:4], max_per_query=5)
if results:
return {"results": self._dedup_and_filter(results)[:15], "provider": "bing"}
except Exception as e:
logger.warning(f"Bing batch search failed: {e}")
results = await search_companies(queries[0], max_results=10)
if results:
return {"results": results[:15], "provider": "google_cse"}
return {}
def _dedup_and_filter(self, results: list) -> list:
seen = set()
filtered = []
junk = ["sciencedirect", "mdpi", "springer", "wiley", "acm.org",
"ieee.org", "researchgate", "nature.com", "oup.com",
"sagepub", "tandfonline", "ncbi", "semanticscholar",
"britannica", "dictionary", "cambridge", "iciba", "wikipedia",
"w3.org", "whatsapp.com", "wechat.com", "qq.com",
"zhihu.com", "sogou.com", "163.com", "sohu.com", "sina.com",
"taobao.com", "tmall.com", "alipay.com", "alibaba.com",
"csdn.net", "blog.csdn", "jianshu.com", "36kr.com",
"huxiu.com", "geekpark.net", "leiphone.com",
"medium.com", "wordpress.com", "blogspot.com",
"youtube.com", "facebook.com", "twitter.com", "instagram.com",
"reddit.com", "quora.com"]
for r in results:
url = r.get("url", "").rstrip("/")
title = r.get("title", "")
if not url or url in seen:
continue
seen.add(url)
s = url.split("/")[2] if "://" in url else url
hostname = s.split(":")[0].lower() if ":" in s else s.lower()
if any(tld in hostname for tld in [".cn", ".com.cn", ".edu", ".ac.", ".gov"]):
if any(tld in hostname for tld in [".edu", ".ac.", ".gov", ".edu.cn"]):
continue
if any(domain in hostname for domain in
["sciencedirect", "mdpi", "springer", "wiley", "acm.org",
"ieee.org", "researchgate", "nature.com", "oup.com",
"sagepub", "tandfonline", "ncbi", "semanticscholar",
"britannica", "dictionary", "cambridge", "iciba", "wikipedia"]):
if any(domain in hostname for domain in junk):
continue
filtered.append(r)
return filtered
async def _google_search_all(self, queries: list) -> list:
all_results = []
seen_urls = set()
for q in queries[:3]:
results = await search_companies(q, max_results=8)
for r in results:
url = r["url"].rstrip("/")
if url not in seen_urls:
seen_urls.add(url)
all_results.append(r)
if len(all_results) >= 15:
break
return self._dedup_and_filter(all_results)[:15]
def _to_company(self, r: dict) -> dict:
url = r.get("url", "")
title = r.get("title", url)[:60]
snippet = r.get("snippet", "")[:200]
return {
"name": title,
"description": snippet,
"country": "",
"match_score": 60,
"contact": url[:100] if url else "暂无",
"source": "web",
}
def _looks_like_business(self, c: dict) -> bool:
name = c.get("name", "")
snippet = c.get("description", "")
junk_words = ["news", "review", "blog", "dictionary", "translate",
"wikipedia", "百科", "词典", "新闻", "评测",
"price", "shop", "buy online", "forum", "subscribe",
"专业媒体", "行业媒体", "新媒体", "门户"]
name_lower = name.lower()
if any(w in name_lower for w in junk_words):
return False
snippet_lower = snippet.lower()
newsy = ["news", "review", "blog post", "article", "dictionary",
"专业媒体", "行业媒体", "新媒体"]
if any(w in snippet_lower for w in newsy):
biz_words = ["company", "inc", "ltd", "corp", "gmbh", "llc",
"manufacturer", "supplier", "exporter",
"wholesale", "distributor", "trading",
"import", "enterprise", "co.", "factory",
"industry", "electric", "solar", "energy",
"automotive", "vehicle", "官网", "有限公司",
"集团", "股份", "实业"]
if not any(w in snippet_lower for w in biz_words):
return False
return True
def _build_queries(self, product: str, market: str) -> list:
return [
import re
has_cjk = bool(re.search(r'[\u4e00-\u9fff]', product))
queries = [
f"{product} importer {market}",
f"{product} distributor {market}",
f"{product} wholesale buyer {market}",
f"{product} procurement {market}",
f"{product} company {market}",
f"{product} trading company {market}",
f"buy {product} from {market}",
f"{product} supply chain {market}",
f"top {product} manufacturers {market}",
f"{product} import export {market}",
f"{product} trading company {market}",
]
if has_cjk:
queries += [
f"{product} {market} 进口商",
f"{product} {market} 经销商",
f"{product} {market} 采购",
f"{product} {market} 批发",
]
b2b_queries = [
f"{product} buyer {market} alibaba",
f"{product} supplier {market}",
f"{product} wholesale price {market}",
]
return queries + b2b_queries
def _extract_json(self, text: str) -> Optional[dict]:
def _extract_json(self, text: str) -> Optional[Union[dict, list]]:
text = text.strip()
for prefix in ["```json", "```", "```JSON"]:
if text.startswith(prefix):
@@ -215,49 +251,37 @@ URL: {company_url}
pass
return None
async def _ai_strategy(self, product: str, market: str) -> Dict[str, Any]:
if not self._ai_available:
return self._template_strategy(product, market)
system = """你是外贸客户发现专家。根据用户的产品和目标市场,列出15家有可能采购该产品的潜在公司。
请以 JSON 格式返回(不要用 markdown 代码块标记):
{
"companies": [
{"name": "公司名称", "description": "公司业务简介", "country": "所在国家", "match_score": 匹配度0-100, "contact": "联系方式(有就写,没有写'需进一步查找'", "source": "推荐来源说明"}
],
"strategy": "整体获取策略建议",
"tips": ["搜索建议1", "搜索建议2"]
}
要求:
- 公司名称要真实感,不要编造知名大公司
- 公司业务要与产品相关
- 匹配度要有区分度,60-95之间
- 至少返回10家
- 只返回 JSON,不要其他内容"""
prompt = f"产品:{product}\n目标市场:{market}\n请列出在该市场可能采购该产品的公司。"
try:
result = await self.ai.chat(prompt, system_prompt=system)
content = result.get("reply", "")
parsed = self._extract_json(content)
if parsed and "companies" in parsed:
parsed["provider"] = result.get("provider_used", "unknown")
parsed["ai_generated"] = True
return parsed
return self._template_strategy(product, market)
except Exception as e:
logger.warning(f"AI strategy failed: {e}")
return self._template_strategy(product, market)
def _suggest_companies(self, product: str, market: str) -> list:
return [
{"name": f"{product} Importers in {market}", "description": f"{market} 从事 {product} 进口和批发的贸易商和专业进口商", "country": market, "match_score": 80, "contact": f"在 LinkedIn 搜索 '{product} importer {market}'", "source": "建议"},
{"name": f"{product} Distributors in {market}", "description": f"{market} 分销 {product} 的分销渠道商和批发商", "country": market, "match_score": 75, "contact": f"在 Google/Bing 搜索 '{product} distributor {market}'", "source": "建议"},
{"name": f"{market} Trade Association", "description": f"联系 {market} 的相关行业协会获取会员企业名录", "country": market, "match_score": 70, "contact": f"搜索 '{market} {product} association'", "source": "建议"},
{"name": f"Alibaba {market} Buyers", "description": f"在 Alibaba.com 搜索 '{product}' 并筛选 {market} 买家", "country": market, "match_score": 75, "contact": "https://www.alibaba.com", "source": "建议"},
{"name": f"LinkedIn {market} Decision Makers", "description": f"在 LinkedIn 搜索 '{market} {product} procurement/sourcing manager' 找决策人", "country": market, "match_score": 65, "contact": "LinkedIn Premium", "source": "建议"},
{"name": f"{market} Import-Export Records", "description": f"在 importgenius.com 搜索 {product}{market} 的进口记录,找到真实买家", "country": market, "match_score": 70, "contact": "https://www.importgenius.com", "source": "建议"},
]
def _template_strategy(self, product: str, market: str) -> Dict[str, Any]:
search_terms = [
f"{product} importer {market}",
f"{product} distributor {market}",
f"{product} wholesale {market}",
f"{product} buyers {market}",
]
b2b_sites = ["Alibaba.com", "TradeIndia.com", "GlobalSources.com", "Made-in-China.com"]
return {
"companies": [
{"name": f"{product} Importers in {market} (示例)", "description": f"{market}从事{product}进口和批发的贸易商,建议在LinkedIn上搜索相关关键词", "country": market, "match_score": 75, "contact": "需进一步查找", "source": "AI推荐"},
{"name": f"{product} Distributors in {market} (示例)", "description": f"{market}分销{product}的渠道商,建议通过Google搜索关键词", "country": market, "match_score": 70, "contact": "需进一步查找", "source": "AI推荐"},
{"name": f"{product} Importers in {market}", "description": f"使用Google/Bing搜索 '{product} importer {market}' 可找到正在采购该产品的进口商", "country": market, "match_score": 80, "contact": "通过搜索结果获取", "source": "搜索建议"},
{"name": f"{product} Distributors in {market}", "description": f"使用Google/Bing搜索 '{product} distributor {market}' 可找到分销渠道商", "country": market, "match_score": 75, "contact": "通过搜索结果获取", "source": "搜索建议"},
{"name": f"{product} Wholesale Buyers in {market}", "description": f"使用Google/Bing搜索 '{product} wholesale {market}' 可找到批发采购商", "country": market, "match_score": 70, "contact": "通过搜索结果获取", "source": "搜索建议"},
{"name": f"{market} {product} Trade Partners", "description": f"在B2B平台({', '.join(b2b_sites[:3])})搜索 {market} 买家发布的采购需求", "country": market, "match_score": 65, "contact": "B2B平台站内信", "source": "B2B平台"},
],
"strategy": f"建议在 LinkedIn 和 Google 搜索 {market} {product} 相关公司,使用导入商、批发商、经销商等关键词组合",
"tips": ["使用多个搜索词组合", "找到公司后在 LinkedIn 找决策人", "查看公司网站了解其业务范围"],
"strategy": f"推荐搜索计划:\n1. 搜索 '{' '.join(search_terms[:2])}' 直接找客户\n2. 在 LinkedIn 搜索 '{market} {product} manager' 找决策人\n3. 在 {b2b_sites[0]}{b2b_sites[1]} 查找 {market} 买家询盘\n4. 参加 {market} 相关行业展会获取名录",
"tips": [
f"把搜索词 '{product} importer {market}' 改成当地语言效果更好",
"找到公司后访问官网,在 About/Team 页面找决策人LinkedIn",
"用 SimilarWeb 查看目标公司网站流量和来源",
"在行业协会网站查找会员名录"],
"provider": "template",
"ai_generated": True,
}