feat(knowledge): integrate reranking functionality into RetrieverResource

- Add reranking support to improve the quality of retrieved chunks
- Implement RerankEmbeddingFactory and RerankEmbeddingsRanker
- Update RetrieverResource to use reranking when configured
- Modify retrieve method to apply reranking if needed
This commit is contained in:
tam 2025-04-19 17:38:09 +08:00
parent cc287702bc
commit fbfc972908

View File

@ -5,8 +5,11 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type
import cachetools
from dbgpt._private.config import Config
from dbgpt.core import Chunk
from dbgpt.util.cache_utils import cached
from dbgpt.rag.retriever.rerank import RerankEmbeddingsRanker
from dbgpt.rag.embedding.embedding_factory import RerankEmbeddingFactory
from .base import Resource, ResourceParameters, ResourceType
@ -14,6 +17,7 @@ if TYPE_CHECKING:
from dbgpt.rag.retriever.base import BaseRetriever
from dbgpt.storage.vector_store.filters import MetadataFilters
CFG = Config()
@dataclasses.dataclass
class RetrieverResourceParameters(ResourceParameters):
@ -32,6 +36,12 @@ class RetrieverResource(Resource[ResourceParameters]):
"""Create a new RetrieverResource."""
self._name = name
self._retriever = retriever
app_config = CFG.SYSTEM_APP.config.configs.get("app_config")
rerank_embeddings = RerankEmbeddingFactory.get_instance(
CFG.SYSTEM_APP
).create()
self.need_rerank = bool(app_config.models.rerankers)
self.reranker = RerankEmbeddingsRanker(rerank_embeddings, topk=app_config.rag.rerank_top_k)
@property
def name(self) -> str:
@ -77,6 +87,9 @@ class RetrieverResource(Resource[ResourceParameters]):
if not question:
raise ValueError("Question is required for knowledge resource.")
chunks = await self.retrieve(question)
if self.need_rerank and len(chunks) > 1:
chunks = self.reranker.rank(candidates_with_scores=chunks, query=question)
content = "\n".join(
[f"--{i}--:" + chunk.content for i, chunk in enumerate(chunks)]
)