mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-30 15:21:02 +00:00
feat(knowledge): integrate reranking functionality into RetrieverResource
- Add reranking support to improve the quality of retrieved chunks - Implement RerankEmbeddingFactory and RerankEmbeddingsRanker - Update RetrieverResource to use reranking when configured - Modify retrieve method to apply reranking if needed
This commit is contained in:
parent
cc287702bc
commit
fbfc972908
@ -5,8 +5,11 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type
|
||||
|
||||
import cachetools
|
||||
|
||||
from dbgpt._private.config import Config
|
||||
from dbgpt.core import Chunk
|
||||
from dbgpt.util.cache_utils import cached
|
||||
from dbgpt.rag.retriever.rerank import RerankEmbeddingsRanker
|
||||
from dbgpt.rag.embedding.embedding_factory import RerankEmbeddingFactory
|
||||
|
||||
from .base import Resource, ResourceParameters, ResourceType
|
||||
|
||||
@ -14,6 +17,7 @@ if TYPE_CHECKING:
|
||||
from dbgpt.rag.retriever.base import BaseRetriever
|
||||
from dbgpt.storage.vector_store.filters import MetadataFilters
|
||||
|
||||
CFG = Config()
|
||||
|
||||
@dataclasses.dataclass
|
||||
class RetrieverResourceParameters(ResourceParameters):
|
||||
@ -32,6 +36,12 @@ class RetrieverResource(Resource[ResourceParameters]):
|
||||
"""Create a new RetrieverResource."""
|
||||
self._name = name
|
||||
self._retriever = retriever
|
||||
app_config = CFG.SYSTEM_APP.config.configs.get("app_config")
|
||||
rerank_embeddings = RerankEmbeddingFactory.get_instance(
|
||||
CFG.SYSTEM_APP
|
||||
).create()
|
||||
self.need_rerank = bool(app_config.models.rerankers)
|
||||
self.reranker = RerankEmbeddingsRanker(rerank_embeddings, topk=app_config.rag.rerank_top_k)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
@ -77,6 +87,9 @@ class RetrieverResource(Resource[ResourceParameters]):
|
||||
if not question:
|
||||
raise ValueError("Question is required for knowledge resource.")
|
||||
chunks = await self.retrieve(question)
|
||||
if self.need_rerank and len(chunks) > 1:
|
||||
chunks = self.reranker.rank(candidates_with_scores=chunks, query=question)
|
||||
|
||||
content = "\n".join(
|
||||
[f"--{i}--:" + chunk.content for i, chunk in enumerate(chunks)]
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user