From af8fadce093da9ab9fb27494bf8dded7e519ffec Mon Sep 17 00:00:00 2001 From: yhjun1026 <460342015@qq.com> Date: Thu, 15 Aug 2024 15:37:10 +0800 Subject: [PATCH] feat(feedback): feedback upgrade --- .../upgrade/v0_6_0/upgrade_to_v0.6.0.sql | 6 -- dbgpt/app/knowledge/service.py | 1 - dbgpt/app/scene/chat_knowledge/v1/chat.py | 4 +- dbgpt/serve/conversation/api/schemas.py | 8 ++ dbgpt/serve/conversation/service/service.py | 15 ++++ dbgpt/serve/feedback/service/service.py | 2 +- dbgpt/serve/rag/retriever/knowledge_space.py | 30 ++++---- dbgpt/serve/rag/retriever/qa_retriever.py | 76 +++++++++---------- dbgpt/serve/rag/retriever/retriever_chain.py | 63 +++++++++------ 9 files changed, 114 insertions(+), 91 deletions(-) diff --git a/assets/schema/upgrade/v0_6_0/upgrade_to_v0.6.0.sql b/assets/schema/upgrade/v0_6_0/upgrade_to_v0.6.0.sql index ea30cff61..d38bf23c3 100644 --- a/assets/schema/upgrade/v0_6_0/upgrade_to_v0.6.0.sql +++ b/assets/schema/upgrade/v0_6_0/upgrade_to_v0.6.0.sql @@ -12,12 +12,6 @@ ALTER TABLE gpts_app ADD COLUMN `admins` text DEFAULT NULL COMMENT 'administrat ALTER TABLE connect_config ADD COLUMN `user_name` varchar(255) DEFAULT NULL COMMENT 'user name'; ALTER TABLE connect_config ADD COLUMN `user_id` varchar(255) DEFAULT NULL COMMENT 'user id'; - ---knowledge_space -ALTER TABLE knowledge_space ADD COLUMN `user_id` varchar(255) DEFAULT NULL COMMENT 'knowledge space owner'; -ALTER TABLE knowledge_space ADD COLUMN `user_ids` text DEFAULT NULL COMMENT 'knowledge space members'; - - -- document_chunk ALTER TABLE document_chunk ADD COLUMN `questions` text DEFAULT NULL COMMENT 'chunk related questions'; diff --git a/dbgpt/app/knowledge/service.py b/dbgpt/app/knowledge/service.py index 46b9a8086..f5a07209a 100644 --- a/dbgpt/app/knowledge/service.py +++ b/dbgpt/app/knowledge/service.py @@ -172,7 +172,6 @@ class KnowledgeService: ks.context = argument_request.argument return knowledge_space_dao.update_knowledge_space(ks) - def get_knowledge_documents(self, space, request: DocumentQueryRequest): """get knowledge documents Args: diff --git a/dbgpt/app/scene/chat_knowledge/v1/chat.py b/dbgpt/app/scene/chat_knowledge/v1/chat.py index 59f9e5048..01cf41db7 100644 --- a/dbgpt/app/scene/chat_knowledge/v1/chat.py +++ b/dbgpt/app/scene/chat_knowledge/v1/chat.py @@ -76,9 +76,7 @@ class ChatKnowledge(BaseChat): embedding_fn = embedding_factory.create( model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] ) - from dbgpt.serve.rag.models.models import ( - KnowledgeSpaceDao, - ) + from dbgpt.serve.rag.models.models import KnowledgeSpaceDao from dbgpt.storage.vector_store.base import VectorStoreConfig spaces = KnowledgeSpaceDao().get_knowledge_space_by_ids([self.knowledge_space]) diff --git a/dbgpt/serve/conversation/api/schemas.py b/dbgpt/serve/conversation/api/schemas.py index aacfdddac..038368219 100644 --- a/dbgpt/serve/conversation/api/schemas.py +++ b/dbgpt/serve/conversation/api/schemas.py @@ -156,6 +156,14 @@ class MessageVo(BaseModel): ], ) + feedback: Optional[Dict] = Field( + default={}, + description="feedback info", + examples=[ + "{}", + ], + ) + def to_dict(self, **kwargs) -> Dict[str, Any]: """Convert the model to a dictionary""" return model_to_dict(self, **kwargs) diff --git a/dbgpt/serve/conversation/service/service.py b/dbgpt/serve/conversation/service/service.py index 435623a8d..c38ac3d10 100644 --- a/dbgpt/serve/conversation/service/service.py +++ b/dbgpt/serve/conversation/service/service.py @@ -14,6 +14,7 @@ from dbgpt.storage.metadata import BaseDao from dbgpt.storage.metadata._base_dao import REQ, RES from dbgpt.util.pagination_utils import PaginationResult +from ...feedback.api.endpoints import get_service from ..api.schemas import MessageVo, ServeRequest, ServerResponse from ..config import SERVE_CONFIG_KEY_PREFIX, SERVE_SERVICE_COMPONENT_NAME, ServeConfig from ..models.models import ServeDao, ServeEntity @@ -201,13 +202,27 @@ class Service(BaseService[ServeEntity, ServeRequest, ServerResponse]): conv: StorageConversation = self.create_storage_conv(request) result = [] messages = _append_view_messages(conv.messages) + + feedback_service = get_service() + + feedbacks = feedback_service.list_conv_feedbacks(conv_uid=request.conv_uid) + fb_map = {fb.message_id: fb.to_dict() for fb in feedbacks} + for msg in messages: + feedback = {} + if ( + msg.round_index is not None + and fb_map.get(str(msg.round_index)) is not None + ): + feedback = fb_map.get(str(msg.round_index)) + result.append( MessageVo( role=msg.type, context=msg.content, order=msg.round_index, model_name=self.config.default_model, + feedback=feedback, ) ) return result diff --git a/dbgpt/serve/feedback/service/service.py b/dbgpt/serve/feedback/service/service.py index d34fbd0da..7392a3b3f 100644 --- a/dbgpt/serve/feedback/service/service.py +++ b/dbgpt/serve/feedback/service/service.py @@ -105,7 +105,7 @@ class Service(BaseService[ServeEntity, ServeRequest, ServerResponse]): feedbacks = self.dao.get_list( ServeRequest(conv_uid=conv_uid, feedback_type=feedback_type) ) - return [ServerResponse.from_entity(item) for item in feedbacks] + return feedbacks def create_or_update(self, request: ServeRequest) -> ServerResponse: """ diff --git a/dbgpt/serve/rag/retriever/knowledge_space.py b/dbgpt/serve/rag/retriever/knowledge_space.py index 61f818318..92fd8cb41 100644 --- a/dbgpt/serve/rag/retriever/knowledge_space.py +++ b/dbgpt/serve/rag/retriever/knowledge_space.py @@ -5,7 +5,7 @@ from dbgpt.component import ComponentType from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG from dbgpt.core import Chunk from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory -from dbgpt.rag.retriever import EmbeddingRetriever, Ranker, QueryRewrite +from dbgpt.rag.retriever import EmbeddingRetriever, QueryRewrite, Ranker from dbgpt.rag.retriever.base import BaseRetriever from dbgpt.serve.rag.connector import VectorStoreConnector from dbgpt.serve.rag.models.models import KnowledgeSpaceDao @@ -47,6 +47,7 @@ class KnowledgeSpaceRetriever(BaseRetriever): model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] ) from dbgpt.storage.vector_store.base import VectorStoreConfig + space_dao = KnowledgeSpaceDao() space = space_dao.get_one({"id": space_id}) config = VectorStoreConfig(name=space.name, embedding_fn=embedding_fn) @@ -58,18 +59,17 @@ class KnowledgeSpaceRetriever(BaseRetriever): ComponentType.EXECUTOR_DEFAULT, ExecutorFactory ).create() - self._retriever_chain = RetrieverChain(retrievers=[ - QARetriever(space_id=space_id, - top_k=top_k, - embedding_fn=embedding_fn - ), - EmbeddingRetriever( - index_store=self._vector_store_connector.index_client, - top_k=top_k, - query_rewrite=self._query_rewrite, - rerank=self._rerank - ) - ], executor=self._executor + self._retriever_chain = RetrieverChain( + retrievers=[ + QARetriever(space_id=space_id, top_k=top_k, embedding_fn=embedding_fn), + EmbeddingRetriever( + index_store=self._vector_store_connector.index_client, + top_k=top_k, + query_rewrite=self._query_rewrite, + rerank=self._rerank, + ), + ], + executor=self._executor, ) def _retrieve( @@ -84,9 +84,7 @@ class KnowledgeSpaceRetriever(BaseRetriever): Return: List[Chunk]: list of chunks """ - candidates = self._retriever_chain.retrieve( - query=query, filters=filters - ) + candidates = self._retriever_chain.retrieve(query=query, filters=filters) return candidates def _retrieve_with_score( diff --git a/dbgpt/serve/rag/retriever/qa_retriever.py b/dbgpt/serve/rag/retriever/qa_retriever.py index 21ba2f704..4abe68ea2 100644 --- a/dbgpt/serve/rag/retriever/qa_retriever.py +++ b/dbgpt/serve/rag/retriever/qa_retriever.py @@ -1,12 +1,11 @@ import ast import json import logging -from typing import List, Optional, Any +from typing import Any, List, Optional from dbgpt._private.config import Config from dbgpt.app.knowledge.chunk_db import DocumentChunkDao, DocumentChunkEntity from dbgpt.app.knowledge.document_db import KnowledgeDocumentDao - from dbgpt.component import ComponentType from dbgpt.core import Chunk from dbgpt.rag.retriever.base import BaseRetriever @@ -45,9 +44,7 @@ class QARetriever(BaseRetriever): self._chunk_dao = DocumentChunkDao() self._embedding_fn = embedding_fn - space = self._space_dao.get_one( - {"id": space_id} - ) + space = self._space_dao.get_one({"id": space_id}) if not space: raise ValueError("space not found") self.documents = self._document_dao.get_list({"space": space.name}) @@ -72,16 +69,16 @@ class QARetriever(BaseRetriever): questions = json.loads(doc.questions) if query in questions: chunks = self._chunk_dao.get_document_chunks( - DocumentChunkEntity( - document_id=doc.id - ), - page_size=CHUNK_PAGE_SIZE + DocumentChunkEntity(document_id=doc.id), + page_size=CHUNK_PAGE_SIZE, ) candidates = [ - Chunk(content=chunk.content, - metadata=ast.literal_eval(chunk.meta_info), - retriever=self.name(), - score=0.0) + Chunk( + content=chunk.content, + metadata=ast.literal_eval(chunk.meta_info), + retriever=self.name(), + score=0.0, + ) for chunk in chunks ] candidate_results.extend( @@ -109,8 +106,7 @@ class QARetriever(BaseRetriever): doc_ids = [doc.id for doc in self.documents] query_param = DocumentChunkEntity() chunks = self._chunk_dao.get_chunks_with_questions( - query=query_param, - document_ids=doc_ids + query=query_param, document_ids=doc_ids ) for chunk in chunks: if chunk.questions: @@ -118,14 +114,13 @@ class QARetriever(BaseRetriever): if query in questions: logger.info(f"qa chunk hit:{chunk}, question:{query}") candidate_results.append( - Chunk(content=chunk.content, - chunk_id=str(chunk.id), - metadata={ - "prop_field": ast.literal_eval(chunk.meta_info) - }, - retriever=self.name(), - score=1.0 - ) + Chunk( + content=chunk.content, + chunk_id=str(chunk.id), + metadata={"prop_field": ast.literal_eval(chunk.meta_info)}, + retriever=self.name(), + score=1.0, + ) ) if len(candidate_results) > 0: return self._cosine_similarity_rerank(candidate_results, query) @@ -137,16 +132,16 @@ class QARetriever(BaseRetriever): logger.info(f"qa document hit:{doc}, question:{query}") chunks = self._chunk_dao.get_document_chunks( DocumentChunkEntity(document_id=doc.id), - page_size=CHUNK_PAGE_SIZE + page_size=CHUNK_PAGE_SIZE, ) candidates_with_scores = [ - Chunk(content=chunk.content, - chunk_id=str(chunk.id), - metadata={ - "prop_field": ast.literal_eval(chunk.meta_info) - }, - retriever=self.name(), - score=1.0) + Chunk( + content=chunk.content, + chunk_id=str(chunk.id), + metadata={"prop_field": ast.literal_eval(chunk.meta_info)}, + retriever=self.name(), + score=1.0, + ) for chunk in chunks ] candidate_results.extend( @@ -188,26 +183,29 @@ class QARetriever(BaseRetriever): ) return candidates_with_score - def _cosine_similarity_rerank(self, candidates_with_scores: List[Chunk] - , query: str) -> List[Chunk]: + def _cosine_similarity_rerank( + self, candidates_with_scores: List[Chunk], query: str + ) -> List[Chunk]: """Rerank candidates using cosine similarity.""" if len(candidates_with_scores) > self._top_k: for candidate in candidates_with_scores: similarity = calculate_cosine_similarity( embeddings=self._embedding_fn, prediction=query, - contexts=[candidate.content] + contexts=[candidate.content], ) score = float(similarity.mean()) candidate.score = score candidates_with_scores.sort(key=lambda x: x.score, reverse=True) candidates_with_scores = candidates_with_scores[: self._top_k] candidates_with_scores = [ - Chunk(content=candidate.content, - chunk_id=candidate.chunk_id, - metadata=candidate.metadata, - retriever=self.name(), - score=1.0) + Chunk( + content=candidate.content, + chunk_id=candidate.chunk_id, + metadata=candidate.metadata, + retriever=self.name(), + score=1.0, + ) for candidate in candidates_with_scores ] return candidates_with_scores diff --git a/dbgpt/serve/rag/retriever/retriever_chain.py b/dbgpt/serve/rag/retriever/retriever_chain.py index 18918198a..6ef435594 100644 --- a/dbgpt/serve/rag/retriever/retriever_chain.py +++ b/dbgpt/serve/rag/retriever/retriever_chain.py @@ -1,5 +1,5 @@ -from concurrent.futures import ThreadPoolExecutor, Executor -from typing import Optional, List +from concurrent.futures import Executor, ThreadPoolExecutor +from typing import List, Optional from dbgpt.core import Chunk from dbgpt.rag.retriever.base import BaseRetriever @@ -10,14 +10,18 @@ from dbgpt.util.executor_utils import blocking_func_to_async class RetrieverChain(BaseRetriever): """Retriever chain class.""" - def __init__(self, retrievers: Optional[List[BaseRetriever]] = None, - executor: Optional[Executor] = None): + def __init__( + self, + retrievers: Optional[List[BaseRetriever]] = None, + executor: Optional[Executor] = None, + ): """Create retriever chain instance.""" self._retrievers = retrievers or [] self._executor = executor or ThreadPoolExecutor() - def _retrieve(self, query: str, filters: Optional[MetadataFilters] = None) -> List[ - Chunk]: + def _retrieve( + self, query: str, filters: Optional[MetadataFilters] = None + ) -> List[Chunk]: """Retrieve knowledge chunks. Args: query (str): query text @@ -26,15 +30,14 @@ class RetrieverChain(BaseRetriever): List[Chunk]: list of chunks """ for retriever in self._retrievers: - candidates = retriever.retrieve( - query, filters - ) + candidates = retriever.retrieve(query, filters) if candidates: return candidates return [] - async def _aretrieve(self, query: str, filters: Optional[MetadataFilters] = None) -> \ - List[Chunk]: + async def _aretrieve( + self, query: str, filters: Optional[MetadataFilters] = None + ) -> List[Chunk]: """Retrieve knowledge chunks. Args: query (str): query text @@ -47,13 +50,18 @@ class RetrieverChain(BaseRetriever): ) return candidates - def _retrieve_with_score(self, query: str, score_threshold: float, filters: Optional[MetadataFilters] = None) -> List[Chunk]: + def _retrieve_with_score( + self, + query: str, + score_threshold: float, + filters: Optional[MetadataFilters] = None, + ) -> List[Chunk]: """Retrieve knowledge chunks. - Args: - query (str): query text - filters: (Optional[MetadataFilters]) metadata filters. - Return: - List[Chunk]: list of chunks + Args: + query (str): query text + filters: (Optional[MetadataFilters]) metadata filters. + Return: + List[Chunk]: list of chunks """ for retriever in self._retrievers: candidates_with_scores = retriever.retrieve_with_scores( @@ -63,16 +71,21 @@ class RetrieverChain(BaseRetriever): return candidates_with_scores return [] - async def _aretrieve_with_score(self, query: str, score_threshold: float, filters: Optional[MetadataFilters] = None) -> List[Chunk]: + async def _aretrieve_with_score( + self, + query: str, + score_threshold: float, + filters: Optional[MetadataFilters] = None, + ) -> List[Chunk]: """Retrieve knowledge chunks with score. - Args: - query (str): query text - score_threshold (float): score threshold - filters: (Optional[MetadataFilters]) metadata filters. - Return: - List[Chunk]: list of chunks with score + Args: + query (str): query text + score_threshold (float): score threshold + filters: (Optional[MetadataFilters]) metadata filters. + Return: + List[Chunk]: list of chunks with score """ candidates_with_score = await blocking_func_to_async( self._executor, self._retrieve_with_score, query, score_threshold, filters ) - return candidates_with_score \ No newline at end of file + return candidates_with_score