feat(feedback): feedback upgrade

This commit is contained in:
yhjun1026
2024-08-15 15:37:10 +08:00
parent 260735e9cc
commit af8fadce09
9 changed files with 114 additions and 91 deletions

View File

@@ -12,12 +12,6 @@ ALTER TABLE gpts_app ADD COLUMN `admins` text DEFAULT NULL COMMENT 'administrat
ALTER TABLE connect_config ADD COLUMN `user_name` varchar(255) DEFAULT NULL COMMENT 'user name';
ALTER TABLE connect_config ADD COLUMN `user_id` varchar(255) DEFAULT NULL COMMENT 'user id';
--knowledge_space
ALTER TABLE knowledge_space ADD COLUMN `user_id` varchar(255) DEFAULT NULL COMMENT 'knowledge space owner';
ALTER TABLE knowledge_space ADD COLUMN `user_ids` text DEFAULT NULL COMMENT 'knowledge space members';
-- document_chunk
ALTER TABLE document_chunk ADD COLUMN `questions` text DEFAULT NULL COMMENT 'chunk related questions';

View File

@@ -172,7 +172,6 @@ class KnowledgeService:
ks.context = argument_request.argument
return knowledge_space_dao.update_knowledge_space(ks)
def get_knowledge_documents(self, space, request: DocumentQueryRequest):
"""get knowledge documents
Args:

View File

@@ -76,9 +76,7 @@ class ChatKnowledge(BaseChat):
embedding_fn = embedding_factory.create(
model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL]
)
from dbgpt.serve.rag.models.models import (
KnowledgeSpaceDao,
)
from dbgpt.serve.rag.models.models import KnowledgeSpaceDao
from dbgpt.storage.vector_store.base import VectorStoreConfig
spaces = KnowledgeSpaceDao().get_knowledge_space_by_ids([self.knowledge_space])

View File

@@ -156,6 +156,14 @@ class MessageVo(BaseModel):
],
)
feedback: Optional[Dict] = Field(
default={},
description="feedback info",
examples=[
"{}",
],
)
def to_dict(self, **kwargs) -> Dict[str, Any]:
"""Convert the model to a dictionary"""
return model_to_dict(self, **kwargs)

View File

@@ -14,6 +14,7 @@ from dbgpt.storage.metadata import BaseDao
from dbgpt.storage.metadata._base_dao import REQ, RES
from dbgpt.util.pagination_utils import PaginationResult
from ...feedback.api.endpoints import get_service
from ..api.schemas import MessageVo, ServeRequest, ServerResponse
from ..config import SERVE_CONFIG_KEY_PREFIX, SERVE_SERVICE_COMPONENT_NAME, ServeConfig
from ..models.models import ServeDao, ServeEntity
@@ -201,13 +202,27 @@ class Service(BaseService[ServeEntity, ServeRequest, ServerResponse]):
conv: StorageConversation = self.create_storage_conv(request)
result = []
messages = _append_view_messages(conv.messages)
feedback_service = get_service()
feedbacks = feedback_service.list_conv_feedbacks(conv_uid=request.conv_uid)
fb_map = {fb.message_id: fb.to_dict() for fb in feedbacks}
for msg in messages:
feedback = {}
if (
msg.round_index is not None
and fb_map.get(str(msg.round_index)) is not None
):
feedback = fb_map.get(str(msg.round_index))
result.append(
MessageVo(
role=msg.type,
context=msg.content,
order=msg.round_index,
model_name=self.config.default_model,
feedback=feedback,
)
)
return result

View File

@@ -105,7 +105,7 @@ class Service(BaseService[ServeEntity, ServeRequest, ServerResponse]):
feedbacks = self.dao.get_list(
ServeRequest(conv_uid=conv_uid, feedback_type=feedback_type)
)
return [ServerResponse.from_entity(item) for item in feedbacks]
return feedbacks
def create_or_update(self, request: ServeRequest) -> ServerResponse:
"""

View File

@@ -5,7 +5,7 @@ from dbgpt.component import ComponentType
from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG
from dbgpt.core import Chunk
from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
from dbgpt.rag.retriever import EmbeddingRetriever, Ranker, QueryRewrite
from dbgpt.rag.retriever import EmbeddingRetriever, QueryRewrite, Ranker
from dbgpt.rag.retriever.base import BaseRetriever
from dbgpt.serve.rag.connector import VectorStoreConnector
from dbgpt.serve.rag.models.models import KnowledgeSpaceDao
@@ -47,6 +47,7 @@ class KnowledgeSpaceRetriever(BaseRetriever):
model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL]
)
from dbgpt.storage.vector_store.base import VectorStoreConfig
space_dao = KnowledgeSpaceDao()
space = space_dao.get_one({"id": space_id})
config = VectorStoreConfig(name=space.name, embedding_fn=embedding_fn)
@@ -58,18 +59,17 @@ class KnowledgeSpaceRetriever(BaseRetriever):
ComponentType.EXECUTOR_DEFAULT, ExecutorFactory
).create()
self._retriever_chain = RetrieverChain(retrievers=[
QARetriever(space_id=space_id,
top_k=top_k,
embedding_fn=embedding_fn
),
EmbeddingRetriever(
index_store=self._vector_store_connector.index_client,
top_k=top_k,
query_rewrite=self._query_rewrite,
rerank=self._rerank
)
], executor=self._executor
self._retriever_chain = RetrieverChain(
retrievers=[
QARetriever(space_id=space_id, top_k=top_k, embedding_fn=embedding_fn),
EmbeddingRetriever(
index_store=self._vector_store_connector.index_client,
top_k=top_k,
query_rewrite=self._query_rewrite,
rerank=self._rerank,
),
],
executor=self._executor,
)
def _retrieve(
@@ -84,9 +84,7 @@ class KnowledgeSpaceRetriever(BaseRetriever):
Return:
List[Chunk]: list of chunks
"""
candidates = self._retriever_chain.retrieve(
query=query, filters=filters
)
candidates = self._retriever_chain.retrieve(query=query, filters=filters)
return candidates
def _retrieve_with_score(

View File

@@ -1,12 +1,11 @@
import ast
import json
import logging
from typing import List, Optional, Any
from typing import Any, List, Optional
from dbgpt._private.config import Config
from dbgpt.app.knowledge.chunk_db import DocumentChunkDao, DocumentChunkEntity
from dbgpt.app.knowledge.document_db import KnowledgeDocumentDao
from dbgpt.component import ComponentType
from dbgpt.core import Chunk
from dbgpt.rag.retriever.base import BaseRetriever
@@ -45,9 +44,7 @@ class QARetriever(BaseRetriever):
self._chunk_dao = DocumentChunkDao()
self._embedding_fn = embedding_fn
space = self._space_dao.get_one(
{"id": space_id}
)
space = self._space_dao.get_one({"id": space_id})
if not space:
raise ValueError("space not found")
self.documents = self._document_dao.get_list({"space": space.name})
@@ -72,16 +69,16 @@ class QARetriever(BaseRetriever):
questions = json.loads(doc.questions)
if query in questions:
chunks = self._chunk_dao.get_document_chunks(
DocumentChunkEntity(
document_id=doc.id
),
page_size=CHUNK_PAGE_SIZE
DocumentChunkEntity(document_id=doc.id),
page_size=CHUNK_PAGE_SIZE,
)
candidates = [
Chunk(content=chunk.content,
metadata=ast.literal_eval(chunk.meta_info),
retriever=self.name(),
score=0.0)
Chunk(
content=chunk.content,
metadata=ast.literal_eval(chunk.meta_info),
retriever=self.name(),
score=0.0,
)
for chunk in chunks
]
candidate_results.extend(
@@ -109,8 +106,7 @@ class QARetriever(BaseRetriever):
doc_ids = [doc.id for doc in self.documents]
query_param = DocumentChunkEntity()
chunks = self._chunk_dao.get_chunks_with_questions(
query=query_param,
document_ids=doc_ids
query=query_param, document_ids=doc_ids
)
for chunk in chunks:
if chunk.questions:
@@ -118,14 +114,13 @@ class QARetriever(BaseRetriever):
if query in questions:
logger.info(f"qa chunk hit:{chunk}, question:{query}")
candidate_results.append(
Chunk(content=chunk.content,
chunk_id=str(chunk.id),
metadata={
"prop_field": ast.literal_eval(chunk.meta_info)
},
retriever=self.name(),
score=1.0
)
Chunk(
content=chunk.content,
chunk_id=str(chunk.id),
metadata={"prop_field": ast.literal_eval(chunk.meta_info)},
retriever=self.name(),
score=1.0,
)
)
if len(candidate_results) > 0:
return self._cosine_similarity_rerank(candidate_results, query)
@@ -137,16 +132,16 @@ class QARetriever(BaseRetriever):
logger.info(f"qa document hit:{doc}, question:{query}")
chunks = self._chunk_dao.get_document_chunks(
DocumentChunkEntity(document_id=doc.id),
page_size=CHUNK_PAGE_SIZE
page_size=CHUNK_PAGE_SIZE,
)
candidates_with_scores = [
Chunk(content=chunk.content,
chunk_id=str(chunk.id),
metadata={
"prop_field": ast.literal_eval(chunk.meta_info)
},
retriever=self.name(),
score=1.0)
Chunk(
content=chunk.content,
chunk_id=str(chunk.id),
metadata={"prop_field": ast.literal_eval(chunk.meta_info)},
retriever=self.name(),
score=1.0,
)
for chunk in chunks
]
candidate_results.extend(
@@ -188,26 +183,29 @@ class QARetriever(BaseRetriever):
)
return candidates_with_score
def _cosine_similarity_rerank(self, candidates_with_scores: List[Chunk]
, query: str) -> List[Chunk]:
def _cosine_similarity_rerank(
self, candidates_with_scores: List[Chunk], query: str
) -> List[Chunk]:
"""Rerank candidates using cosine similarity."""
if len(candidates_with_scores) > self._top_k:
for candidate in candidates_with_scores:
similarity = calculate_cosine_similarity(
embeddings=self._embedding_fn,
prediction=query,
contexts=[candidate.content]
contexts=[candidate.content],
)
score = float(similarity.mean())
candidate.score = score
candidates_with_scores.sort(key=lambda x: x.score, reverse=True)
candidates_with_scores = candidates_with_scores[: self._top_k]
candidates_with_scores = [
Chunk(content=candidate.content,
chunk_id=candidate.chunk_id,
metadata=candidate.metadata,
retriever=self.name(),
score=1.0)
Chunk(
content=candidate.content,
chunk_id=candidate.chunk_id,
metadata=candidate.metadata,
retriever=self.name(),
score=1.0,
)
for candidate in candidates_with_scores
]
return candidates_with_scores

View File

@@ -1,5 +1,5 @@
from concurrent.futures import ThreadPoolExecutor, Executor
from typing import Optional, List
from concurrent.futures import Executor, ThreadPoolExecutor
from typing import List, Optional
from dbgpt.core import Chunk
from dbgpt.rag.retriever.base import BaseRetriever
@@ -10,14 +10,18 @@ from dbgpt.util.executor_utils import blocking_func_to_async
class RetrieverChain(BaseRetriever):
"""Retriever chain class."""
def __init__(self, retrievers: Optional[List[BaseRetriever]] = None,
executor: Optional[Executor] = None):
def __init__(
self,
retrievers: Optional[List[BaseRetriever]] = None,
executor: Optional[Executor] = None,
):
"""Create retriever chain instance."""
self._retrievers = retrievers or []
self._executor = executor or ThreadPoolExecutor()
def _retrieve(self, query: str, filters: Optional[MetadataFilters] = None) -> List[
Chunk]:
def _retrieve(
self, query: str, filters: Optional[MetadataFilters] = None
) -> List[Chunk]:
"""Retrieve knowledge chunks.
Args:
query (str): query text
@@ -26,15 +30,14 @@ class RetrieverChain(BaseRetriever):
List[Chunk]: list of chunks
"""
for retriever in self._retrievers:
candidates = retriever.retrieve(
query, filters
)
candidates = retriever.retrieve(query, filters)
if candidates:
return candidates
return []
async def _aretrieve(self, query: str, filters: Optional[MetadataFilters] = None) -> \
List[Chunk]:
async def _aretrieve(
self, query: str, filters: Optional[MetadataFilters] = None
) -> List[Chunk]:
"""Retrieve knowledge chunks.
Args:
query (str): query text
@@ -47,13 +50,18 @@ class RetrieverChain(BaseRetriever):
)
return candidates
def _retrieve_with_score(self, query: str, score_threshold: float, filters: Optional[MetadataFilters] = None) -> List[Chunk]:
def _retrieve_with_score(
self,
query: str,
score_threshold: float,
filters: Optional[MetadataFilters] = None,
) -> List[Chunk]:
"""Retrieve knowledge chunks.
Args:
query (str): query text
filters: (Optional[MetadataFilters]) metadata filters.
Return:
List[Chunk]: list of chunks
Args:
query (str): query text
filters: (Optional[MetadataFilters]) metadata filters.
Return:
List[Chunk]: list of chunks
"""
for retriever in self._retrievers:
candidates_with_scores = retriever.retrieve_with_scores(
@@ -63,16 +71,21 @@ class RetrieverChain(BaseRetriever):
return candidates_with_scores
return []
async def _aretrieve_with_score(self, query: str, score_threshold: float, filters: Optional[MetadataFilters] = None) -> List[Chunk]:
async def _aretrieve_with_score(
self,
query: str,
score_threshold: float,
filters: Optional[MetadataFilters] = None,
) -> List[Chunk]:
"""Retrieve knowledge chunks with score.
Args:
query (str): query text
score_threshold (float): score threshold
filters: (Optional[MetadataFilters]) metadata filters.
Return:
List[Chunk]: list of chunks with score
Args:
query (str): query text
score_threshold (float): score threshold
filters: (Optional[MetadataFilters]) metadata filters.
Return:
List[Chunk]: list of chunks with score
"""
candidates_with_score = await blocking_func_to_async(
self._executor, self._retrieve_with_score, query, score_threshold, filters
)
return candidates_with_score
return candidates_with_score