mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-10 21:39:33 +00:00
feat: (0.6)New UI (#1855)
Co-authored-by: 夏姜 <wenfengjiang.jwf@digital-engine.com> Co-authored-by: aries_ckt <916701291@qq.com> Co-authored-by: wb-lh513319 <wb-lh513319@alibaba-inc.com> Co-authored-by: csunny <cfqsunny@163.com>
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import timeit
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
|
||||
from dbgpt._private.config import Config
|
||||
from dbgpt.app.knowledge.chunk_db import DocumentChunkDao, DocumentChunkEntity
|
||||
@@ -10,8 +12,10 @@ from dbgpt.app.knowledge.document_db import (
|
||||
KnowledgeDocumentEntity,
|
||||
)
|
||||
from dbgpt.app.knowledge.request.request import (
|
||||
ChunkEditRequest,
|
||||
ChunkQueryRequest,
|
||||
DocumentQueryRequest,
|
||||
DocumentRecallTestRequest,
|
||||
DocumentSummaryRequest,
|
||||
KnowledgeDocumentRequest,
|
||||
KnowledgeSpaceRequest,
|
||||
@@ -20,6 +24,7 @@ from dbgpt.app.knowledge.request.request import (
|
||||
from dbgpt.app.knowledge.request.response import (
|
||||
ChunkQueryResponse,
|
||||
DocumentQueryResponse,
|
||||
DocumentResponse,
|
||||
SpaceQueryResponse,
|
||||
)
|
||||
from dbgpt.component import ComponentType
|
||||
@@ -33,8 +38,10 @@ from dbgpt.rag.chunk_manager import ChunkParameters
|
||||
from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
|
||||
from dbgpt.rag.knowledge.base import KnowledgeType
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
from dbgpt.rag.retriever.rerank import RerankEmbeddingsRanker
|
||||
from dbgpt.serve.rag.connector import VectorStoreConnector
|
||||
from dbgpt.serve.rag.models.models import KnowledgeSpaceDao, KnowledgeSpaceEntity
|
||||
from dbgpt.serve.rag.retriever.knowledge_space import KnowledgeSpaceRetriever
|
||||
from dbgpt.serve.rag.service.service import SyncStatus
|
||||
from dbgpt.storage.vector_store.base import VectorStoreConfig
|
||||
from dbgpt.util.executor_utils import ExecutorFactory, blocking_func_to_async
|
||||
@@ -121,7 +128,10 @@ class KnowledgeService:
|
||||
- request: KnowledgeSpaceRequest
|
||||
"""
|
||||
query = KnowledgeSpaceEntity(
|
||||
name=request.name, vector_type=request.vector_type, owner=request.owner
|
||||
id=request.id,
|
||||
name=request.name,
|
||||
vector_type=request.vector_type,
|
||||
owner=request.owner,
|
||||
)
|
||||
spaces = knowledge_space_dao.get_knowledge_space(query)
|
||||
space_names = [space.name for space in spaces]
|
||||
@@ -144,15 +154,15 @@ class KnowledgeService:
|
||||
responses.append(res)
|
||||
return responses
|
||||
|
||||
def arguments(self, space_name):
|
||||
def arguments(self, space):
|
||||
"""show knowledge space arguments
|
||||
Args:
|
||||
- space_name: Knowledge Space Name
|
||||
"""
|
||||
query = KnowledgeSpaceEntity(name=space_name)
|
||||
query = KnowledgeSpaceEntity(name=space)
|
||||
spaces = knowledge_space_dao.get_knowledge_space(query)
|
||||
if len(spaces) != 1:
|
||||
raise Exception(f"there are no or more than one space called {space_name}")
|
||||
raise Exception(f"there are no or more than one space called {space}")
|
||||
space = spaces[0]
|
||||
if space.context is None:
|
||||
context = self._build_default_context()
|
||||
@@ -160,16 +170,16 @@ class KnowledgeService:
|
||||
context = space.context
|
||||
return json.loads(context)
|
||||
|
||||
def argument_save(self, space_name, argument_request: SpaceArgumentRequest):
|
||||
def argument_save(self, space, argument_request: SpaceArgumentRequest):
|
||||
"""save argument
|
||||
Args:
|
||||
- space_name: Knowledge Space Name
|
||||
- argument_request: SpaceArgumentRequest
|
||||
"""
|
||||
query = KnowledgeSpaceEntity(name=space_name)
|
||||
query = KnowledgeSpaceEntity(name=space)
|
||||
spaces = knowledge_space_dao.get_knowledge_space(query)
|
||||
if len(spaces) != 1:
|
||||
raise Exception(f"there are no or more than one space called {space_name}")
|
||||
raise Exception(f"there are no or more than one space called {space}")
|
||||
space = spaces[0]
|
||||
space.context = argument_request.argument
|
||||
return knowledge_space_dao.update_knowledge_space(space)
|
||||
@@ -182,23 +192,42 @@ class KnowledgeService:
|
||||
Returns:
|
||||
- res DocumentQueryResponse
|
||||
"""
|
||||
|
||||
total = None
|
||||
page = request.page
|
||||
if request.page_size <= 0:
|
||||
request.page_size = 20
|
||||
ks = knowledge_space_dao.get_one({"name": space})
|
||||
if ks is None:
|
||||
raise Exception(f"there is no space id called {space}")
|
||||
res = DocumentQueryResponse()
|
||||
if request.doc_ids and len(request.doc_ids) > 0:
|
||||
data = knowledge_document_dao.documents_by_ids(request.doc_ids)
|
||||
documents: List[
|
||||
KnowledgeDocumentEntity
|
||||
] = knowledge_document_dao.documents_by_ids(request.doc_ids)
|
||||
res.data = [item.to_dict() for item in documents]
|
||||
else:
|
||||
query = KnowledgeDocumentEntity(
|
||||
doc_name=request.doc_name,
|
||||
doc_type=request.doc_type,
|
||||
space=space,
|
||||
status=request.status,
|
||||
)
|
||||
data = knowledge_document_dao.get_knowledge_documents(
|
||||
query, page=request.page, page_size=request.page_size
|
||||
)
|
||||
total = knowledge_document_dao.get_knowledge_documents_count(query)
|
||||
return DocumentQueryResponse(data=data, total=total, page=page)
|
||||
space_name = ks.name
|
||||
query = {
|
||||
"doc_type": request.doc_type,
|
||||
"space": space_name,
|
||||
"status": request.status,
|
||||
}
|
||||
if request.doc_name:
|
||||
docs = knowledge_document_dao.get_list({"space": space_name})
|
||||
docs = [DocumentResponse.serve_to_response(doc) for doc in docs]
|
||||
res.data = [
|
||||
doc
|
||||
for doc in docs
|
||||
if doc.doc_name and request.doc_name in doc.doc_name
|
||||
]
|
||||
else:
|
||||
result = knowledge_document_dao.get_list_page(
|
||||
query, page=request.page, page_size=request.page_size
|
||||
)
|
||||
docs = result.items
|
||||
docs = [DocumentResponse.serve_to_response(doc) for doc in docs]
|
||||
res.data = docs
|
||||
res.total = result.total_count
|
||||
res.page = result.page
|
||||
return res
|
||||
|
||||
async def document_summary(self, request: DocumentSummaryRequest):
|
||||
"""get document summary
|
||||
@@ -259,6 +288,96 @@ class KnowledgeService:
|
||||
summary, request.conv_uid, request.model_name
|
||||
)
|
||||
|
||||
def get_space_context_by_space_id(self, space_id):
|
||||
"""get space contect
|
||||
Args:
|
||||
- space_id: space name
|
||||
"""
|
||||
spaces = self.get_knowledge_space_by_ids([space_id])
|
||||
if len(spaces) != 1:
|
||||
raise Exception(
|
||||
f"have not found {space_id} space or found more than one space called {space_id}"
|
||||
)
|
||||
space = spaces[0]
|
||||
if space.context is not None:
|
||||
return json.loads(spaces[0].context)
|
||||
return None
|
||||
|
||||
def get_knowledge_space_by_ids(self, ids):
|
||||
"""
|
||||
get knowledge space by ids.
|
||||
"""
|
||||
return knowledge_space_dao.get_knowledge_space_by_ids(ids)
|
||||
|
||||
def recall_test(
|
||||
self, space_name, doc_recall_test_request: DocumentRecallTestRequest
|
||||
):
|
||||
logger.info(f"recall_test {space_name}, {doc_recall_test_request}")
|
||||
from dbgpt.rag.embedding.embedding_factory import RerankEmbeddingFactory
|
||||
|
||||
try:
|
||||
start_time = timeit.default_timer()
|
||||
question = doc_recall_test_request.question
|
||||
space_context = self.get_space_context(space_name)
|
||||
logger.info(f"space_context is {space_context}")
|
||||
space = knowledge_space_dao.get_one({"name": space_name})
|
||||
|
||||
top_k = int(doc_recall_test_request.recall_top_k)
|
||||
score_threshold = (
|
||||
float(space_context["embedding"].get("recall_score", 0.3))
|
||||
if (space_context and "embedding" in space_context)
|
||||
else 0.3
|
||||
)
|
||||
|
||||
if CFG.RERANK_MODEL is not None:
|
||||
if top_k < int(CFG.RERANK_TOP_K) or top_k < 20:
|
||||
# We use reranker, so if the top_k is less than 20,
|
||||
# we need to set it to 20
|
||||
top_k = max(int(CFG.RERANK_TOP_K), 20)
|
||||
|
||||
knowledge_space_retriever = KnowledgeSpaceRetriever(
|
||||
space_id=space.id, top_k=top_k
|
||||
)
|
||||
chunks = knowledge_space_retriever.retrieve_with_scores(
|
||||
question, score_threshold
|
||||
)
|
||||
retrievers_end_time = timeit.default_timer()
|
||||
retrievers_cost_time = retrievers_end_time - start_time
|
||||
logger.info(
|
||||
f"retrieve chunks size is {len(chunks)}, "
|
||||
f"retrievers_cost_time is {retrievers_cost_time} seconds"
|
||||
)
|
||||
|
||||
recall_top_k = int(doc_recall_test_request.recall_top_k)
|
||||
if CFG.RERANK_MODEL is not None:
|
||||
rerank_embeddings = RerankEmbeddingFactory.get_instance(
|
||||
CFG.SYSTEM_APP
|
||||
).create()
|
||||
reranker = RerankEmbeddingsRanker(rerank_embeddings, topk=recall_top_k)
|
||||
chunks = reranker.rank(candidates_with_scores=chunks, query=question)
|
||||
|
||||
recall_score_threshold = doc_recall_test_request.recall_score_threshold
|
||||
if recall_score_threshold is not None:
|
||||
chunks = [
|
||||
chunk for chunk in chunks if chunk.score >= recall_score_threshold
|
||||
]
|
||||
recall_end_time = timeit.default_timer()
|
||||
recall_cost_time = recall_end_time - start_time
|
||||
cost_time_map = {
|
||||
"retrievers_cost_time": retrievers_cost_time,
|
||||
"recall_cost_time": recall_cost_time,
|
||||
}
|
||||
logger.info(
|
||||
f"recall chunks size is {len(chunks)}, "
|
||||
f"recall_cost_time is {recall_cost_time} seconds, {cost_time_map}"
|
||||
)
|
||||
|
||||
# return chunks, cost_time_map
|
||||
return chunks
|
||||
except Exception as e:
|
||||
logger.error(f" recall_test error: {str(e)}")
|
||||
return []
|
||||
|
||||
def update_knowledge_space(
|
||||
self, space_id: int, space_request: KnowledgeSpaceRequest
|
||||
):
|
||||
@@ -371,18 +490,13 @@ class KnowledgeService:
|
||||
doc_name=request.doc_name,
|
||||
doc_type=request.doc_type,
|
||||
)
|
||||
document_query = KnowledgeDocumentEntity(id=request.document_id)
|
||||
documents = knowledge_document_dao.get_documents(document_query)
|
||||
|
||||
data = document_chunk_dao.get_document_chunks(
|
||||
query, page=request.page, page_size=request.page_size
|
||||
)
|
||||
res = ChunkQueryResponse(
|
||||
data=data,
|
||||
summary=documents[0].summary,
|
||||
total=document_chunk_dao.get_document_chunks_count(query),
|
||||
page=request.page,
|
||||
)
|
||||
res = ChunkQueryResponse()
|
||||
res.data = [
|
||||
chunk.to_dict()
|
||||
for chunk in document_chunk_dao.get_document_chunks(
|
||||
query, page=request.page, page_size=request.page_size
|
||||
)
|
||||
]
|
||||
return res
|
||||
|
||||
@trace("async_doc_embedding")
|
||||
|
Reference in New Issue
Block a user