from fastapi import APIRouter, Depends from sqlalchemy.ext.asyncio import AsyncSession from app.database import get_db from app.services.corpus_trainer import CorpusTrainer from app.api.v1.deps import get_current_user_id router = APIRouter() @router.post("/corpus/run") async def run_corpus_training( db: AsyncSession = Depends(get_db), ): trainer = CorpusTrainer(db) result = await trainer.run_pipeline() return result @router.post("/corpus/embeddings") async def compute_embeddings( batch_size: int = 50, db: AsyncSession = Depends(get_db), ): trainer = CorpusTrainer(db) result = await trainer.compute_embeddings(batch_size) return result @router.get("/corpus/stats") async def corpus_stats( db: AsyncSession = Depends(get_db), ): trainer = CorpusTrainer(db) return await trainer.get_stats() @router.post("/corpus/deduplicate") async def deduplicate_corpus( db: AsyncSession = Depends(get_db), ): trainer = CorpusTrainer(db) result = await trainer.deduplicate() return result