import asyncio import json import logging from typing import Dict, Any, Optional from app.ai.router import get_ai_router from app.services.search_web import search_companies, fetch_page_text from app.services.mcp_search_client import mcp_search logger = logging.getLogger(__name__) ANALYZE_MATCH_PROMPT = """你是外贸客户分析专家。分析目标公司的业务描述,判断其与用户产品的匹配度。 请以 JSON 格式返回(不要用 markdown 代码块标记): { "match_score": 0-100, "match_reason": "为什么匹配/不匹配", "company_summary": "这家公司的主要业务", "product_fit": "产品匹配度说明", "contact_info": { "emails": ["找到的邮箱"], "phones": ["找到的电话"], "social": ["LinkedIn等社媒链接"] } } 只返回 JSON,不要其他内容。""" class DiscoveryService: def __init__(self): ai_router = get_ai_router() self.ai = ai_router self._ai_available = len(ai_router.providers) > 0 async def search(self, product_description: str, target_market: str) -> Dict[str, Any]: queries = self._build_queries(product_description, target_market) all_results = await self._mcp_search_all(queries) if all_results: return { "companies": all_results[:15], "query": product_description, "market": target_market, "provider": "mcp_search", } all_results = await self._google_search_all(queries) if all_results: return { "companies": all_results[:15], "query": product_description, "market": target_market, "provider": "web_search", } logger.info("No real search results, using AI strategy") return await self._ai_strategy(product_description, target_market) async def analyze(self, company_url: str, product_description: str) -> Dict[str, Any]: page_text = await fetch_page_text(company_url) company_info = {"url": company_url} if page_text: company_info["page_text"] = page_text[:2500] if not self._ai_available: return self._template_analysis(company_url) prompt = f"""用户的产品:{product_description} 目标公司信息: URL: {company_url} 网页内容:{page_text[:2500] if page_text else "无法获取网页内容"} 请分析该公司的业务与用户产品的匹配度。""" try: result = await self.ai.chat(prompt, system_prompt=ANALYZE_MATCH_PROMPT) content = result.get("reply", "") parsed = self._extract_json(content) if parsed: parsed["url"] = company_url parsed["provider"] = result.get("provider_used", "unknown") return parsed except (json.JSONDecodeError, Exception) as e: logger.warning(f"Analysis AI parse failed: {e}") return self._template_analysis(company_url) async def outreach(self, company_info: Dict[str, Any], product_info: Dict[str, Any]) -> Dict[str, Any]: if not self._ai_available: return self._template_outreach(company_info, product_info) prompt = f"""目标公司信息: {json.dumps(company_info, ensure_ascii=False)} 我的产品信息: {json.dumps(product_info, ensure_ascii=False)} 请生成个性化触达文案。""" system = """你是外贸开发信专家。根据目标公司信息和你的产品,生成个性化触达文案。 请以 JSON 格式返回(不要用 markdown 代码块标记): { "subject": "邮件标题(如适用)", "linkedin_message": "LinkedIn 私信文案(150字以内)", "whatsapp_message": "WhatsApp 消息文案(100字以内)", "email_body": "邮件正文(含开头问候、自我介绍、价值主张、行动号召、签名)", "key_points": ["客户关注的3个要点"], "tips": ["发送时的建议"] }""" try: result = await self.ai.chat(prompt, system_prompt=system) content = result.get("reply", "") parsed = self._extract_json(content) if parsed: parsed["provider"] = result.get("provider_used", "unknown") return parsed except (json.JSONDecodeError, Exception) as e: logger.warning(f"Outreach AI parse failed: {e}") return self._template_outreach(company_info, product_info) async def _mcp_search_all(self, queries: list) -> list: seen_urls = set() tasks = [asyncio.create_task(mcp_search(q, max_results=6)) for q in queries[:2]] all_results = [] try: for coro in asyncio.as_completed(tasks, timeout=8): try: results = await coro for r in results: url = r.get("url", "").rstrip("/") if url and url not in seen_urls: seen_urls.add(url) all_results.append(r) except (asyncio.TimeoutError, Exception) as e: logger.debug(f"MCP search query failed: {e}") except asyncio.TimeoutError: logger.warning("MCP search overall timeout") finally: for t in tasks: if not t.done(): t.cancel() await asyncio.gather(*tasks, return_exceptions=True) if all_results: return self._dedup_and_filter(all_results)[:15] return [] def _dedup_and_filter(self, results: list) -> list: seen = set() filtered = [] for r in results: url = r.get("url", "").rstrip("/") title = r.get("title", "") if not url or url in seen: continue seen.add(url) s = url.split("/")[2] if "://" in url else url hostname = s.split(":")[0].lower() if ":" in s else s.lower() if any(tld in hostname for tld in [".cn", ".com.cn", ".edu", ".ac.", ".gov"]): continue if any(domain in hostname for domain in ["sciencedirect", "mdpi", "springer", "wiley", "acm.org", "ieee.org", "researchgate", "nature.com", "oup.com", "sagepub", "tandfonline", "ncbi", "semanticscholar", "britannica", "dictionary", "cambridge", "iciba", "wikipedia"]): continue filtered.append(r) return filtered async def _google_search_all(self, queries: list) -> list: all_results = [] seen_urls = set() for q in queries[:3]: results = await search_companies(q, max_results=8) for r in results: url = r["url"].rstrip("/") if url not in seen_urls: seen_urls.add(url) all_results.append(r) if len(all_results) >= 15: break return self._dedup_and_filter(all_results)[:15] def _build_queries(self, product: str, market: str) -> list: return [ f"{product} importer {market}", f"{product} distributor {market}", f"{product} wholesale buyer {market}", f"{product} procurement {market}", f"{product} company {market}", f"buy {product} from {market}", f"{product} supply chain {market}", f"top {product} manufacturers {market}", f"{product} import export {market}", f"{product} trading company {market}", ] def _extract_json(self, text: str) -> Optional[dict]: text = text.strip() for prefix in ["```json", "```", "```JSON"]: if text.startswith(prefix): text = text[len(prefix):] for suffix in ["```"]: if text.endswith(suffix): text = text[:-len(suffix)] text = text.strip() try: return json.loads(text) except json.JSONDecodeError: import re brace = text.find("{") end = text.rfind("}") if brace >= 0 and end > brace: try: return json.loads(text[brace:end+1]) except json.JSONDecodeError: pass return None async def _ai_strategy(self, product: str, market: str) -> Dict[str, Any]: if not self._ai_available: return self._template_strategy(product, market) system = """你是外贸客户发现专家。根据用户的产品和目标市场,分析出潜在买家画像和获取策略。 请以 JSON 格式返回(不要用 markdown 代码块标记): { "buyer_personas": [{"type": "", "description": "", "channels": [], "search_queries": []}], "strategy": "", "tips": [] }""" prompt = f"产品:{product}\n目标市场:{market}\n请分析潜在买家画像和获取策略。" try: result = await self.ai.chat(prompt, system_prompt=system) content = result.get("reply", "") parsed = self._extract_json(content) if parsed: parsed["provider"] = result.get("provider_used", "unknown") return parsed return self._template_strategy(product, market) except Exception as e: logger.warning(f"AI strategy failed: {e}") return self._template_strategy(product, market) def _template_strategy(self, product: str, market: str) -> Dict[str, Any]: return { "buyer_personas": [ {"type": "进口商/批发商", "description": f"从中国进口{product}并在{market}批发的贸易商", "channels": ["LinkedIn", "Google"], "search_queries": [f"{product} importer {market}"]}, {"type": "品牌商/OEM买家", "description": f"在{market}销售自有品牌{product}的公司", "channels": ["LinkedIn", "行业展会"], "search_queries": [f"{product} manufacturer {market}"]}, ], "strategy": f"建议在 LinkedIn 和 Google 搜索 {market} 的 {product} 相关公司", "tips": ["使用多个搜索词", "找到公司后在 LinkedIn 找决策人"], "provider": "template", } def _template_analysis(self, url: str) -> Dict[str, Any]: return { "match_score": 50, "match_reason": "无法获取网页内容进行分析,建议手动查看", "url": url, "provider": "template", } def _template_outreach(self, company: Dict[str, Any], product: Dict[str, Any]) -> Dict[str, Any]: company_name = company.get("name", "") product_name = product.get("name", "") return { "subject": f"关于{product_name}的合作机会", "linkedin_message": f"您好!了解到贵司{company_name}在经营相关业务,我们专业生产{product_name},品质稳定,价格有竞争力。如有兴趣,我可以发详细资料供参考。", "whatsapp_message": f"Hello! We are a professional {product_name} manufacturer. Interested in exploring cooperation? Happy to share details.", "email_body": f"Dear {company_name} team,\n\nWe are a professional {product_name} manufacturer with competitive pricing and consistent quality. Would you be open to a quick chat to explore potential cooperation?\n\nBest regards,\n[Your Name]", "key_points": ["产品质量有保障", "价格有竞争力", "可定制"], "tips": ["发送前先了解对方背景", "LinkedIn 消息要简短"], "provider": "template", }