d2736d1ef6
- AI routing rules now stored in system_configs DB table instead of hardcoded config - Multi-model support via name|model composite key for same-provider routing - UnifiedPayService with HMAC-SHA256 gateway integration (alipay/wechat) - Admin payment panel: list, stats, search, filter, refund - WeChat mini-program CI/CD via miniprogram-ci (v1.0.9) - Translation quota extended to LLM provider tier - SearchService with DB-driven provider config (bing/google_cse/searxng) - Footer cleanup across admin/workspace/uni-app - Private key excluded from git tracking
137 lines
5.2 KiB
Python
137 lines
5.2 KiB
Python
import logging
|
|
from typing import List, Dict, Optional
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select
|
|
from app.models.search_provider import SearchProvider
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
IGNORE_DOMAINS = [
|
|
"google.com", "facebook.com", "twitter.com", "instagram.com",
|
|
"youtube.com", "reddit.com", "amazon.com", "ebay.com",
|
|
"wikipedia.org", "linkedin.com", "pinterest.com", "baidu.com",
|
|
"bing.com",
|
|
]
|
|
|
|
|
|
class SearchService:
|
|
def __init__(self, db: AsyncSession):
|
|
self.db = db
|
|
|
|
async def search(self, query: str, limit: int = 10) -> List[Dict[str, str]]:
|
|
providers = await self._get_enabled_providers()
|
|
for provider in providers:
|
|
try:
|
|
return await self._search_provider(provider, query, limit)
|
|
except Exception as e:
|
|
logger.warning(f"Search provider {provider.provider_type} failed: {e}")
|
|
return []
|
|
|
|
async def _get_enabled_providers(self) -> List[SearchProvider]:
|
|
result = await self.db.execute(
|
|
select(SearchProvider)
|
|
.where(SearchProvider.enabled == True)
|
|
.order_by(SearchProvider.priority)
|
|
)
|
|
return list(result.scalars().all())
|
|
|
|
async def _search_provider(self, provider: SearchProvider, query: str, limit: int) -> List[Dict[str, str]]:
|
|
pt = provider.provider_type
|
|
if pt == "searxng":
|
|
return await searxng_search(provider.api_endpoint, query, limit)
|
|
elif pt == "bing":
|
|
return await bing_search(provider.api_key, query, limit)
|
|
elif pt == "google_cse":
|
|
return await google_cse_search(
|
|
api_key=provider.api_key,
|
|
cx=provider.extra_config.get("cx", "") if provider.extra_config else "",
|
|
query=query,
|
|
limit=limit,
|
|
)
|
|
else:
|
|
raise ValueError(f"Unknown provider type: {pt}")
|
|
|
|
|
|
async def searxng_search(endpoint: Optional[str], query: str, limit: int) -> List[Dict[str, str]]:
|
|
if not endpoint:
|
|
raise ValueError("SearXNG endpoint not configured")
|
|
import httpx
|
|
async with httpx.AsyncClient(timeout=15.0) as client:
|
|
resp = await client.get(
|
|
endpoint.rstrip("/") + "/search",
|
|
params={"q": query, "format": "json", "language": "zh-CN,en", "categories": "general"},
|
|
headers={"User-Agent": "TradeMate/1.0"},
|
|
)
|
|
if resp.status_code != 200:
|
|
raise ValueError(f"SearXNG returned {resp.status_code}")
|
|
data = resp.json()
|
|
results = []
|
|
for item in (data.get("results", []) if isinstance(data, dict) else data):
|
|
url = item.get("url", "")
|
|
if any(d in url for d in IGNORE_DOMAINS):
|
|
continue
|
|
results.append({
|
|
"title": (item.get("title") or url)[:100],
|
|
"url": url.rstrip("/"),
|
|
"snippet": (item.get("content") or item.get("snippet") or "")[:200],
|
|
})
|
|
if len(results) >= limit:
|
|
break
|
|
return results
|
|
|
|
|
|
async def bing_search(api_key: Optional[str], query: str, limit: int) -> List[Dict[str, str]]:
|
|
if not api_key:
|
|
raise ValueError("Bing API key not configured")
|
|
import httpx
|
|
async with httpx.AsyncClient(timeout=15.0) as client:
|
|
resp = await client.get(
|
|
"https://api.bing.microsoft.com/v7.0/search",
|
|
params={"q": query, "count": min(limit, 50), "mkt": "en-US", "textFormat": "Raw"},
|
|
headers={"Ocp-Apim-Subscription-Key": api_key},
|
|
)
|
|
if resp.status_code != 200:
|
|
raise ValueError(f"Bing returned {resp.status_code}")
|
|
data = resp.json()
|
|
results = []
|
|
for item in data.get("webPages", {}).get("value", []):
|
|
url = item.get("url", "")
|
|
if any(d in url for d in IGNORE_DOMAINS):
|
|
continue
|
|
results.append({
|
|
"title": (item.get("name") or url)[:100],
|
|
"url": url.rstrip("/"),
|
|
"snippet": (item.get("snippet") or "")[:200],
|
|
})
|
|
if len(results) >= limit:
|
|
break
|
|
return results
|
|
|
|
|
|
async def google_cse_search(api_key: Optional[str], cx: Optional[str], query: str, limit: int) -> List[Dict[str, str]]:
|
|
if not api_key or not cx:
|
|
raise ValueError("Google CSE API key or CX not configured")
|
|
import httpx
|
|
async with httpx.AsyncClient(timeout=15.0) as client:
|
|
resp = await client.get(
|
|
"https://www.googleapis.com/customsearch/v1",
|
|
params={"key": api_key, "cx": cx, "q": query, "num": min(limit, 10), "lr": "lang_en"},
|
|
)
|
|
if resp.status_code != 200:
|
|
raise ValueError(f"Google CSE returned {resp.status_code}")
|
|
data = resp.json()
|
|
results = []
|
|
for item in data.get("items", []):
|
|
url = item.get("link", "")
|
|
if any(d in url for d in IGNORE_DOMAINS):
|
|
continue
|
|
results.append({
|
|
"title": (item.get("title") or url)[:100],
|
|
"url": url.rstrip("/"),
|
|
"snippet": (item.get("snippet") or "")[:200],
|
|
})
|
|
if len(results) >= limit:
|
|
break
|
|
return results
|
|
|