13e3992d4c
Security fixes: - Add file upload size limits (10MB) for customer and product imports - Add XLSX file validation with row limits and magic byte checking - Implement password validation (min 6 chars) in registration - Add rate limiting for guest login (5 per IP per 15 minutes) - Sanitize error messages to prevent information leakage - Fix XSS vulnerability by removing unsafe v-html usage - Enforce WhatsApp webhook signature verification - Add SSRF protection with URL validation and IP blocking - Fix marketing endpoints to use proper authentication Code quality improvements: - Create shared utility functions for UUID validation and string sanitization - Remove duplicate UUID validation code from admin modules - Remove dead code (pass statement in translation.py) - Fix aliyun SDK import compatibility
114 lines
4.1 KiB
Python
114 lines
4.1 KiB
Python
from typing import Dict, Any, Optional, List
|
|
from app.ai.router import get_ai_router
|
|
from app.ai.trade_corpus import TradeCorpus
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TranslationService:
|
|
def __init__(self):
|
|
self.ai = get_ai_router()
|
|
self.corpus = TradeCorpus()
|
|
|
|
async def translate(
|
|
self, text: str, target_lang: str, source_lang: Optional[str] = None,
|
|
context: Optional[str] = None, user_id: Optional[str] = None,
|
|
) -> Dict[str, Any]:
|
|
similar = await self.corpus.find_similar(text, "translate")
|
|
if similar:
|
|
best = similar[0]
|
|
if len(best["source"]) > 20 and self._similarity_ratio(text, best["source"]) > 0.85:
|
|
return {
|
|
"translated_text": best["target"],
|
|
"source_lang": source_lang or "auto",
|
|
"provider_used": "corpus_cache",
|
|
"from_cache": True,
|
|
}
|
|
|
|
result = await self.ai.translate(text, target_lang, source_lang, context)
|
|
translated = result.get("translated_text", "")
|
|
provider = result.get("provider_used", "unknown")
|
|
|
|
await self.corpus.record(
|
|
source_text=text,
|
|
target_text=translated,
|
|
task_type="translate",
|
|
provider=provider,
|
|
source_lang=source_lang,
|
|
target_lang=target_lang,
|
|
metadata={"user_id": user_id} if user_id else None,
|
|
)
|
|
|
|
result["source_lang"] = result.get("detected_source_lang", source_lang or "auto")
|
|
result["from_cache"] = False
|
|
return result
|
|
|
|
async def generate_reply(
|
|
self, inquiry: str, context: Optional[Dict[str, Any]] = None,
|
|
tone: str = "professional", count: int = 3,
|
|
preference_context: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
similar = await self.corpus.find_similar(inquiry, "reply")
|
|
results = []
|
|
tones = self._get_tones(tone, count)
|
|
|
|
for t in tones:
|
|
try:
|
|
result = await self.ai.reply(inquiry, context, t, preference_context)
|
|
results.append({
|
|
"reply": result.get("reply", ""),
|
|
"tone": t,
|
|
"provider": result.get("provider_used", "unknown"),
|
|
})
|
|
except Exception as e:
|
|
logger.warning(f"Reply generation failed for tone '{t}': {e}")
|
|
results.append({"reply": "", "tone": t, "error": str(e)})
|
|
|
|
return results
|
|
|
|
async def extract_info(self, text: str, extract_type: str = "auto") -> Dict[str, Any]:
|
|
schemas = {
|
|
"product": {
|
|
"type": "object",
|
|
"properties": {
|
|
"product_name": {"type": "string"},
|
|
"quantity": {"type": "string"},
|
|
"price": {"type": "string"},
|
|
"currency": {"type": "string"},
|
|
"delivery_terms": {"type": "string"},
|
|
"target_country": {"type": "string"},
|
|
},
|
|
},
|
|
"inquiry": {
|
|
"type": "object",
|
|
"properties": {
|
|
"intent": {"type": "string"},
|
|
"product_interest": {"type": "string"},
|
|
"quantity": {"type": "string"},
|
|
"budget": {"type": "string"},
|
|
"urgency": {"type": "string"},
|
|
"contact_info": {"type": "string"},
|
|
},
|
|
},
|
|
}
|
|
|
|
schema = schemas.get(extract_type, schemas["inquiry"])
|
|
result = await self.ai.extract(text, schema)
|
|
return result.get("data", {})
|
|
|
|
def _get_tones(self, base: str, count: int) -> List[str]:
|
|
tones = ["professional", "friendly", "formal"]
|
|
if base in tones:
|
|
tones.remove(base)
|
|
tones.insert(0, base)
|
|
return tones[:count]
|
|
|
|
def _similarity_ratio(self, a: str, b: str) -> float:
|
|
if not a or not b:
|
|
return 0.0
|
|
set_a, set_b = set(a.lower().split()), set(b.lower().split())
|
|
if not set_a or not set_b:
|
|
return 0.0
|
|
return len(set_a & set_b) / len(set_a | set_b)
|