From fbfc97290812765e57a60c747f43671cf792e8a3 Mon Sep 17 00:00:00 2001 From: tam Date: Sat, 19 Apr 2025 17:38:09 +0800 Subject: [PATCH] feat(knowledge): integrate reranking functionality into RetrieverResource - Add reranking support to improve the quality of retrieved chunks - Implement RerankEmbeddingFactory and RerankEmbeddingsRanker - Update RetrieverResource to use reranking when configured - Modify retrieve method to apply reranking if needed --- .../src/dbgpt/agent/resource/knowledge.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/packages/dbgpt-core/src/dbgpt/agent/resource/knowledge.py b/packages/dbgpt-core/src/dbgpt/agent/resource/knowledge.py index aa18d0b53..16f7ff622 100644 --- a/packages/dbgpt-core/src/dbgpt/agent/resource/knowledge.py +++ b/packages/dbgpt-core/src/dbgpt/agent/resource/knowledge.py @@ -5,8 +5,11 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type import cachetools +from dbgpt._private.config import Config from dbgpt.core import Chunk from dbgpt.util.cache_utils import cached +from dbgpt.rag.retriever.rerank import RerankEmbeddingsRanker +from dbgpt.rag.embedding.embedding_factory import RerankEmbeddingFactory from .base import Resource, ResourceParameters, ResourceType @@ -14,6 +17,7 @@ if TYPE_CHECKING: from dbgpt.rag.retriever.base import BaseRetriever from dbgpt.storage.vector_store.filters import MetadataFilters +CFG = Config() @dataclasses.dataclass class RetrieverResourceParameters(ResourceParameters): @@ -32,6 +36,12 @@ class RetrieverResource(Resource[ResourceParameters]): """Create a new RetrieverResource.""" self._name = name self._retriever = retriever + app_config = CFG.SYSTEM_APP.config.configs.get("app_config") + rerank_embeddings = RerankEmbeddingFactory.get_instance( + CFG.SYSTEM_APP + ).create() + self.need_rerank = bool(app_config.models.rerankers) + self.reranker = RerankEmbeddingsRanker(rerank_embeddings, topk=app_config.rag.rerank_top_k) @property def name(self) -> str: @@ -77,6 +87,9 @@ class RetrieverResource(Resource[ResourceParameters]): if not question: raise ValueError("Question is required for knowledge resource.") chunks = await self.retrieve(question) + if self.need_rerank and len(chunks) > 1: + chunks = self.reranker.rank(candidates_with_scores=chunks, query=question) + content = "\n".join( [f"--{i}--:" + chunk.content for i, chunk in enumerate(chunks)] )