feat(feedback): feedback upgrade

This commit is contained in:
yhjun1026
2024-08-15 15:37:10 +08:00
parent 260735e9cc
commit af8fadce09
9 changed files with 114 additions and 91 deletions

View File

@@ -12,12 +12,6 @@ ALTER TABLE gpts_app ADD COLUMN `admins` text DEFAULT NULL COMMENT 'administrat
ALTER TABLE connect_config ADD COLUMN `user_name` varchar(255) DEFAULT NULL COMMENT 'user name'; ALTER TABLE connect_config ADD COLUMN `user_name` varchar(255) DEFAULT NULL COMMENT 'user name';
ALTER TABLE connect_config ADD COLUMN `user_id` varchar(255) DEFAULT NULL COMMENT 'user id'; ALTER TABLE connect_config ADD COLUMN `user_id` varchar(255) DEFAULT NULL COMMENT 'user id';
--knowledge_space
ALTER TABLE knowledge_space ADD COLUMN `user_id` varchar(255) DEFAULT NULL COMMENT 'knowledge space owner';
ALTER TABLE knowledge_space ADD COLUMN `user_ids` text DEFAULT NULL COMMENT 'knowledge space members';
-- document_chunk -- document_chunk
ALTER TABLE document_chunk ADD COLUMN `questions` text DEFAULT NULL COMMENT 'chunk related questions'; ALTER TABLE document_chunk ADD COLUMN `questions` text DEFAULT NULL COMMENT 'chunk related questions';

View File

@@ -172,7 +172,6 @@ class KnowledgeService:
ks.context = argument_request.argument ks.context = argument_request.argument
return knowledge_space_dao.update_knowledge_space(ks) return knowledge_space_dao.update_knowledge_space(ks)
def get_knowledge_documents(self, space, request: DocumentQueryRequest): def get_knowledge_documents(self, space, request: DocumentQueryRequest):
"""get knowledge documents """get knowledge documents
Args: Args:

View File

@@ -76,9 +76,7 @@ class ChatKnowledge(BaseChat):
embedding_fn = embedding_factory.create( embedding_fn = embedding_factory.create(
model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL]
) )
from dbgpt.serve.rag.models.models import ( from dbgpt.serve.rag.models.models import KnowledgeSpaceDao
KnowledgeSpaceDao,
)
from dbgpt.storage.vector_store.base import VectorStoreConfig from dbgpt.storage.vector_store.base import VectorStoreConfig
spaces = KnowledgeSpaceDao().get_knowledge_space_by_ids([self.knowledge_space]) spaces = KnowledgeSpaceDao().get_knowledge_space_by_ids([self.knowledge_space])

View File

@@ -156,6 +156,14 @@ class MessageVo(BaseModel):
], ],
) )
feedback: Optional[Dict] = Field(
default={},
description="feedback info",
examples=[
"{}",
],
)
def to_dict(self, **kwargs) -> Dict[str, Any]: def to_dict(self, **kwargs) -> Dict[str, Any]:
"""Convert the model to a dictionary""" """Convert the model to a dictionary"""
return model_to_dict(self, **kwargs) return model_to_dict(self, **kwargs)

View File

@@ -14,6 +14,7 @@ from dbgpt.storage.metadata import BaseDao
from dbgpt.storage.metadata._base_dao import REQ, RES from dbgpt.storage.metadata._base_dao import REQ, RES
from dbgpt.util.pagination_utils import PaginationResult from dbgpt.util.pagination_utils import PaginationResult
from ...feedback.api.endpoints import get_service
from ..api.schemas import MessageVo, ServeRequest, ServerResponse from ..api.schemas import MessageVo, ServeRequest, ServerResponse
from ..config import SERVE_CONFIG_KEY_PREFIX, SERVE_SERVICE_COMPONENT_NAME, ServeConfig from ..config import SERVE_CONFIG_KEY_PREFIX, SERVE_SERVICE_COMPONENT_NAME, ServeConfig
from ..models.models import ServeDao, ServeEntity from ..models.models import ServeDao, ServeEntity
@@ -201,13 +202,27 @@ class Service(BaseService[ServeEntity, ServeRequest, ServerResponse]):
conv: StorageConversation = self.create_storage_conv(request) conv: StorageConversation = self.create_storage_conv(request)
result = [] result = []
messages = _append_view_messages(conv.messages) messages = _append_view_messages(conv.messages)
feedback_service = get_service()
feedbacks = feedback_service.list_conv_feedbacks(conv_uid=request.conv_uid)
fb_map = {fb.message_id: fb.to_dict() for fb in feedbacks}
for msg in messages: for msg in messages:
feedback = {}
if (
msg.round_index is not None
and fb_map.get(str(msg.round_index)) is not None
):
feedback = fb_map.get(str(msg.round_index))
result.append( result.append(
MessageVo( MessageVo(
role=msg.type, role=msg.type,
context=msg.content, context=msg.content,
order=msg.round_index, order=msg.round_index,
model_name=self.config.default_model, model_name=self.config.default_model,
feedback=feedback,
) )
) )
return result return result

View File

@@ -105,7 +105,7 @@ class Service(BaseService[ServeEntity, ServeRequest, ServerResponse]):
feedbacks = self.dao.get_list( feedbacks = self.dao.get_list(
ServeRequest(conv_uid=conv_uid, feedback_type=feedback_type) ServeRequest(conv_uid=conv_uid, feedback_type=feedback_type)
) )
return [ServerResponse.from_entity(item) for item in feedbacks] return feedbacks
def create_or_update(self, request: ServeRequest) -> ServerResponse: def create_or_update(self, request: ServeRequest) -> ServerResponse:
""" """

View File

@@ -5,7 +5,7 @@ from dbgpt.component import ComponentType
from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG
from dbgpt.core import Chunk from dbgpt.core import Chunk
from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
from dbgpt.rag.retriever import EmbeddingRetriever, Ranker, QueryRewrite from dbgpt.rag.retriever import EmbeddingRetriever, QueryRewrite, Ranker
from dbgpt.rag.retriever.base import BaseRetriever from dbgpt.rag.retriever.base import BaseRetriever
from dbgpt.serve.rag.connector import VectorStoreConnector from dbgpt.serve.rag.connector import VectorStoreConnector
from dbgpt.serve.rag.models.models import KnowledgeSpaceDao from dbgpt.serve.rag.models.models import KnowledgeSpaceDao
@@ -47,6 +47,7 @@ class KnowledgeSpaceRetriever(BaseRetriever):
model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL]
) )
from dbgpt.storage.vector_store.base import VectorStoreConfig from dbgpt.storage.vector_store.base import VectorStoreConfig
space_dao = KnowledgeSpaceDao() space_dao = KnowledgeSpaceDao()
space = space_dao.get_one({"id": space_id}) space = space_dao.get_one({"id": space_id})
config = VectorStoreConfig(name=space.name, embedding_fn=embedding_fn) config = VectorStoreConfig(name=space.name, embedding_fn=embedding_fn)
@@ -58,18 +59,17 @@ class KnowledgeSpaceRetriever(BaseRetriever):
ComponentType.EXECUTOR_DEFAULT, ExecutorFactory ComponentType.EXECUTOR_DEFAULT, ExecutorFactory
).create() ).create()
self._retriever_chain = RetrieverChain(retrievers=[ self._retriever_chain = RetrieverChain(
QARetriever(space_id=space_id, retrievers=[
top_k=top_k, QARetriever(space_id=space_id, top_k=top_k, embedding_fn=embedding_fn),
embedding_fn=embedding_fn EmbeddingRetriever(
), index_store=self._vector_store_connector.index_client,
EmbeddingRetriever( top_k=top_k,
index_store=self._vector_store_connector.index_client, query_rewrite=self._query_rewrite,
top_k=top_k, rerank=self._rerank,
query_rewrite=self._query_rewrite, ),
rerank=self._rerank ],
) executor=self._executor,
], executor=self._executor
) )
def _retrieve( def _retrieve(
@@ -84,9 +84,7 @@ class KnowledgeSpaceRetriever(BaseRetriever):
Return: Return:
List[Chunk]: list of chunks List[Chunk]: list of chunks
""" """
candidates = self._retriever_chain.retrieve( candidates = self._retriever_chain.retrieve(query=query, filters=filters)
query=query, filters=filters
)
return candidates return candidates
def _retrieve_with_score( def _retrieve_with_score(

View File

@@ -1,12 +1,11 @@
import ast import ast
import json import json
import logging import logging
from typing import List, Optional, Any from typing import Any, List, Optional
from dbgpt._private.config import Config from dbgpt._private.config import Config
from dbgpt.app.knowledge.chunk_db import DocumentChunkDao, DocumentChunkEntity from dbgpt.app.knowledge.chunk_db import DocumentChunkDao, DocumentChunkEntity
from dbgpt.app.knowledge.document_db import KnowledgeDocumentDao from dbgpt.app.knowledge.document_db import KnowledgeDocumentDao
from dbgpt.component import ComponentType from dbgpt.component import ComponentType
from dbgpt.core import Chunk from dbgpt.core import Chunk
from dbgpt.rag.retriever.base import BaseRetriever from dbgpt.rag.retriever.base import BaseRetriever
@@ -45,9 +44,7 @@ class QARetriever(BaseRetriever):
self._chunk_dao = DocumentChunkDao() self._chunk_dao = DocumentChunkDao()
self._embedding_fn = embedding_fn self._embedding_fn = embedding_fn
space = self._space_dao.get_one( space = self._space_dao.get_one({"id": space_id})
{"id": space_id}
)
if not space: if not space:
raise ValueError("space not found") raise ValueError("space not found")
self.documents = self._document_dao.get_list({"space": space.name}) self.documents = self._document_dao.get_list({"space": space.name})
@@ -72,16 +69,16 @@ class QARetriever(BaseRetriever):
questions = json.loads(doc.questions) questions = json.loads(doc.questions)
if query in questions: if query in questions:
chunks = self._chunk_dao.get_document_chunks( chunks = self._chunk_dao.get_document_chunks(
DocumentChunkEntity( DocumentChunkEntity(document_id=doc.id),
document_id=doc.id page_size=CHUNK_PAGE_SIZE,
),
page_size=CHUNK_PAGE_SIZE
) )
candidates = [ candidates = [
Chunk(content=chunk.content, Chunk(
metadata=ast.literal_eval(chunk.meta_info), content=chunk.content,
retriever=self.name(), metadata=ast.literal_eval(chunk.meta_info),
score=0.0) retriever=self.name(),
score=0.0,
)
for chunk in chunks for chunk in chunks
] ]
candidate_results.extend( candidate_results.extend(
@@ -109,8 +106,7 @@ class QARetriever(BaseRetriever):
doc_ids = [doc.id for doc in self.documents] doc_ids = [doc.id for doc in self.documents]
query_param = DocumentChunkEntity() query_param = DocumentChunkEntity()
chunks = self._chunk_dao.get_chunks_with_questions( chunks = self._chunk_dao.get_chunks_with_questions(
query=query_param, query=query_param, document_ids=doc_ids
document_ids=doc_ids
) )
for chunk in chunks: for chunk in chunks:
if chunk.questions: if chunk.questions:
@@ -118,14 +114,13 @@ class QARetriever(BaseRetriever):
if query in questions: if query in questions:
logger.info(f"qa chunk hit:{chunk}, question:{query}") logger.info(f"qa chunk hit:{chunk}, question:{query}")
candidate_results.append( candidate_results.append(
Chunk(content=chunk.content, Chunk(
chunk_id=str(chunk.id), content=chunk.content,
metadata={ chunk_id=str(chunk.id),
"prop_field": ast.literal_eval(chunk.meta_info) metadata={"prop_field": ast.literal_eval(chunk.meta_info)},
}, retriever=self.name(),
retriever=self.name(), score=1.0,
score=1.0 )
)
) )
if len(candidate_results) > 0: if len(candidate_results) > 0:
return self._cosine_similarity_rerank(candidate_results, query) return self._cosine_similarity_rerank(candidate_results, query)
@@ -137,16 +132,16 @@ class QARetriever(BaseRetriever):
logger.info(f"qa document hit:{doc}, question:{query}") logger.info(f"qa document hit:{doc}, question:{query}")
chunks = self._chunk_dao.get_document_chunks( chunks = self._chunk_dao.get_document_chunks(
DocumentChunkEntity(document_id=doc.id), DocumentChunkEntity(document_id=doc.id),
page_size=CHUNK_PAGE_SIZE page_size=CHUNK_PAGE_SIZE,
) )
candidates_with_scores = [ candidates_with_scores = [
Chunk(content=chunk.content, Chunk(
chunk_id=str(chunk.id), content=chunk.content,
metadata={ chunk_id=str(chunk.id),
"prop_field": ast.literal_eval(chunk.meta_info) metadata={"prop_field": ast.literal_eval(chunk.meta_info)},
}, retriever=self.name(),
retriever=self.name(), score=1.0,
score=1.0) )
for chunk in chunks for chunk in chunks
] ]
candidate_results.extend( candidate_results.extend(
@@ -188,26 +183,29 @@ class QARetriever(BaseRetriever):
) )
return candidates_with_score return candidates_with_score
def _cosine_similarity_rerank(self, candidates_with_scores: List[Chunk] def _cosine_similarity_rerank(
, query: str) -> List[Chunk]: self, candidates_with_scores: List[Chunk], query: str
) -> List[Chunk]:
"""Rerank candidates using cosine similarity.""" """Rerank candidates using cosine similarity."""
if len(candidates_with_scores) > self._top_k: if len(candidates_with_scores) > self._top_k:
for candidate in candidates_with_scores: for candidate in candidates_with_scores:
similarity = calculate_cosine_similarity( similarity = calculate_cosine_similarity(
embeddings=self._embedding_fn, embeddings=self._embedding_fn,
prediction=query, prediction=query,
contexts=[candidate.content] contexts=[candidate.content],
) )
score = float(similarity.mean()) score = float(similarity.mean())
candidate.score = score candidate.score = score
candidates_with_scores.sort(key=lambda x: x.score, reverse=True) candidates_with_scores.sort(key=lambda x: x.score, reverse=True)
candidates_with_scores = candidates_with_scores[: self._top_k] candidates_with_scores = candidates_with_scores[: self._top_k]
candidates_with_scores = [ candidates_with_scores = [
Chunk(content=candidate.content, Chunk(
chunk_id=candidate.chunk_id, content=candidate.content,
metadata=candidate.metadata, chunk_id=candidate.chunk_id,
retriever=self.name(), metadata=candidate.metadata,
score=1.0) retriever=self.name(),
score=1.0,
)
for candidate in candidates_with_scores for candidate in candidates_with_scores
] ]
return candidates_with_scores return candidates_with_scores

View File

@@ -1,5 +1,5 @@
from concurrent.futures import ThreadPoolExecutor, Executor from concurrent.futures import Executor, ThreadPoolExecutor
from typing import Optional, List from typing import List, Optional
from dbgpt.core import Chunk from dbgpt.core import Chunk
from dbgpt.rag.retriever.base import BaseRetriever from dbgpt.rag.retriever.base import BaseRetriever
@@ -10,14 +10,18 @@ from dbgpt.util.executor_utils import blocking_func_to_async
class RetrieverChain(BaseRetriever): class RetrieverChain(BaseRetriever):
"""Retriever chain class.""" """Retriever chain class."""
def __init__(self, retrievers: Optional[List[BaseRetriever]] = None, def __init__(
executor: Optional[Executor] = None): self,
retrievers: Optional[List[BaseRetriever]] = None,
executor: Optional[Executor] = None,
):
"""Create retriever chain instance.""" """Create retriever chain instance."""
self._retrievers = retrievers or [] self._retrievers = retrievers or []
self._executor = executor or ThreadPoolExecutor() self._executor = executor or ThreadPoolExecutor()
def _retrieve(self, query: str, filters: Optional[MetadataFilters] = None) -> List[ def _retrieve(
Chunk]: self, query: str, filters: Optional[MetadataFilters] = None
) -> List[Chunk]:
"""Retrieve knowledge chunks. """Retrieve knowledge chunks.
Args: Args:
query (str): query text query (str): query text
@@ -26,15 +30,14 @@ class RetrieverChain(BaseRetriever):
List[Chunk]: list of chunks List[Chunk]: list of chunks
""" """
for retriever in self._retrievers: for retriever in self._retrievers:
candidates = retriever.retrieve( candidates = retriever.retrieve(query, filters)
query, filters
)
if candidates: if candidates:
return candidates return candidates
return [] return []
async def _aretrieve(self, query: str, filters: Optional[MetadataFilters] = None) -> \ async def _aretrieve(
List[Chunk]: self, query: str, filters: Optional[MetadataFilters] = None
) -> List[Chunk]:
"""Retrieve knowledge chunks. """Retrieve knowledge chunks.
Args: Args:
query (str): query text query (str): query text
@@ -47,13 +50,18 @@ class RetrieverChain(BaseRetriever):
) )
return candidates return candidates
def _retrieve_with_score(self, query: str, score_threshold: float, filters: Optional[MetadataFilters] = None) -> List[Chunk]: def _retrieve_with_score(
self,
query: str,
score_threshold: float,
filters: Optional[MetadataFilters] = None,
) -> List[Chunk]:
"""Retrieve knowledge chunks. """Retrieve knowledge chunks.
Args: Args:
query (str): query text query (str): query text
filters: (Optional[MetadataFilters]) metadata filters. filters: (Optional[MetadataFilters]) metadata filters.
Return: Return:
List[Chunk]: list of chunks List[Chunk]: list of chunks
""" """
for retriever in self._retrievers: for retriever in self._retrievers:
candidates_with_scores = retriever.retrieve_with_scores( candidates_with_scores = retriever.retrieve_with_scores(
@@ -63,14 +71,19 @@ class RetrieverChain(BaseRetriever):
return candidates_with_scores return candidates_with_scores
return [] return []
async def _aretrieve_with_score(self, query: str, score_threshold: float, filters: Optional[MetadataFilters] = None) -> List[Chunk]: async def _aretrieve_with_score(
self,
query: str,
score_threshold: float,
filters: Optional[MetadataFilters] = None,
) -> List[Chunk]:
"""Retrieve knowledge chunks with score. """Retrieve knowledge chunks with score.
Args: Args:
query (str): query text query (str): query text
score_threshold (float): score threshold score_threshold (float): score threshold
filters: (Optional[MetadataFilters]) metadata filters. filters: (Optional[MetadataFilters]) metadata filters.
Return: Return:
List[Chunk]: list of chunks with score List[Chunk]: list of chunks with score
""" """
candidates_with_score = await blocking_func_to_async( candidates_with_score = await blocking_func_to_async(
self._executor, self._retrieve_with_score, query, score_threshold, filters self._executor, self._retrieve_with_score, query, score_threshold, filters