init

fahmiaziz98 · fahmiaziz98 · commit 9e5acab3f4d4 · 2025-11-02T23:19:44.000+07:00
diff --git a/src/api/routers/rerank.py b/src/api/routers/rerank.py
@@ -62,11 +62,8 @@ async def rerank_documents(
         )
 
     try:
-        # Extract kwargs but exclude rerank-specific fields
         kwargs = extract_embedding_kwargs(request)
 
-        # Remove fields that are already passed as positional arguments
-        # to avoid "got multiple values for argument" error
         kwargs.pop("query", None)
         kwargs.pop("documents", None)
         kwargs.pop("top_k", None)
@@ -80,19 +77,10 @@ async def rerank_documents(
                 detail=f"Model '{request.model_id}' is not a rerank model. Type: {config.type}",
             )
 
-        # Debug logs BEFORE calling rank_document
-        logger.debug(f"Rerank request - Query: '{request.query}'")
-        logger.debug(f"Documents to rank: {len(valid_docs)}")
-        if valid_docs:
-            logger.debug(f"First document: {valid_docs[0][1][:100]}...")
-        logger.debug(f"Top K: {request.top_k}")
-
         start = time.time()
 
-        # Extract documents for ranking
         documents_list = [doc for _, doc in valid_docs]
-        
-        # Call rank_document - returns only top_k results
+
         ranking_results = model.rank_document(
             query=request.query,
             documents=documents_list,
@@ -102,18 +90,10 @@ async def rerank_documents(
 
         processing_time = time.time() - start
 
-        # Debug logs AFTER rank_document
-        logger.debug(f"Ranking returned {len(ranking_results)} results")
-        if ranking_results:
-            logger.debug(f"Top result score: {ranking_results[0]}")
-
-        # Build results from ranking_results
-        # ranking_results already contains top_k items with scores
         results = []
         
         for rank_result in ranking_results:
-            # Get original index from valid_docs
-            doc_idx = rank_result.get('corpus_id', 0)  # Index in filtered list
+            doc_idx = rank_result.get('corpus_id', 0)  
             if doc_idx < len(valid_docs):
                 original_idx = valid_docs[doc_idx][0]  # Original index
                 doc_text = documents_list[doc_idx]
diff --git a/src/models/embeddings/rank.py b/src/models/embeddings/rank.py
@@ -115,8 +115,6 @@ def rank_document(
             self.load()
         
         try:
-            # model.rank returns List[Dict] with 'corpus_id' and 'score'
-            # Already sorted by score (highest first) and limited to top_k
             ranking_results = self.model.rank(
                 query, 
                 documents, 
@@ -157,14 +155,11 @@ def _normalize_rerank_scores(
         if not rankings:
             return []
         
-        # Extract raw scores
         raw_scores = [ranking["score"] for ranking in rankings]
         
-        # Min-Max normalization
         min_score = min(raw_scores)
         max_score = max(raw_scores)
         
-        # If all scores are the same, return max target value
         if max_score == min_score:
             return [
                 {
@@ -174,7 +169,6 @@ def _normalize_rerank_scores(
                 for r in rankings
             ]
         
-        # Normalize to target range
         target_min, target_max = target_range
         normalized_rankings = []