Add admin-frontend and user-frontend standalone projects, certification/invoice/discovery features, fix auth header and theme consistency
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
from typing import List, Dict, Optional
|
||||
import httpx
|
||||
import json
|
||||
import logging
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GOOGLE_CSE_URL = "https://www.googleapis.com/customsearch/v1"
|
||||
|
||||
IGNORE_DOMAINS = [
|
||||
"google.com", "facebook.com", "twitter.com", "instagram.com",
|
||||
"youtube.com", "reddit.com", "amazon.com", "ebay.com",
|
||||
"wikipedia.org", "linkedin.com", "pinterest.com", "baidu.com",
|
||||
"bing.com", "duckduckgo.com",
|
||||
]
|
||||
|
||||
|
||||
async def search_companies(query: str, max_results: int = 10) -> List[Dict[str, str]]:
|
||||
api_key = settings.GOOGLE_API_KEY or ""
|
||||
cse_id = settings.GOOGLE_CSE_ID or ""
|
||||
if api_key and cse_id:
|
||||
return await _google_cse(query, max_results, api_key, cse_id)
|
||||
logger.info("Google CSE not configured, using template results")
|
||||
return []
|
||||
|
||||
|
||||
async def _google_cse(query: str, max_results: int, api_key: str, cse_id: str) -> List[Dict[str, str]]:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||
resp = await client.get(GOOGLE_CSE_URL, params={
|
||||
"key": api_key,
|
||||
"cx": cse_id,
|
||||
"q": query,
|
||||
"num": min(max_results, 10),
|
||||
"lr": "lang_en",
|
||||
})
|
||||
if resp.status_code != 200:
|
||||
logger.warning(f"Google CSE returned {resp.status_code}")
|
||||
return []
|
||||
data = resp.json()
|
||||
results = []
|
||||
for item in data.get("items", []):
|
||||
url = item.get("link", "")
|
||||
if not url or any(d in url for d in IGNORE_DOMAINS):
|
||||
continue
|
||||
results.append({
|
||||
"title": item.get("title", url)[:100],
|
||||
"url": url.rstrip("/"),
|
||||
"snippet": item.get("snippet", "")[:200],
|
||||
})
|
||||
return results[:max_results]
|
||||
except Exception as e:
|
||||
logger.warning(f"Google CSE failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def fetch_page_text(url: str) -> Optional[str]:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
|
||||
resp = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
if resp.status_code == 200:
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
||||
tag.decompose()
|
||||
text = soup.get_text(separator=" ", strip=True)
|
||||
import re
|
||||
text = re.sub(r"\s+", " ", text)[:3000]
|
||||
return text if len(text) > 100 else None
|
||||
except Exception as e:
|
||||
logger.debug(f"fetch {url} failed: {e}")
|
||||
return None
|
||||
Reference in New Issue
Block a user