mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-01 08:11:45 +00:00
style:fmt
This commit is contained in:
parent
01eae42554
commit
d11ec46ee5
@ -21,7 +21,13 @@ class CSVEmbedding(SourceEmbedding):
|
|||||||
source_reader: Optional = None,
|
source_reader: Optional = None,
|
||||||
text_splitter: Optional[TextSplitter] = None,
|
text_splitter: Optional[TextSplitter] = None,
|
||||||
):
|
):
|
||||||
"""Initialize with csv path."""
|
"""Initialize with csv path.
|
||||||
|
Args:
|
||||||
|
- file_path: data source path
|
||||||
|
- vector_store_config: vector store config params.
|
||||||
|
- source_reader: Optional[BaseLoader]
|
||||||
|
- text_splitter: Optional[TextSplitter]
|
||||||
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||||
)
|
)
|
||||||
|
@ -28,7 +28,16 @@ class EmbeddingEngine:
|
|||||||
text_splitter: Optional[TextSplitter] = None,
|
text_splitter: Optional[TextSplitter] = None,
|
||||||
embedding_factory: EmbeddingFactory = None,
|
embedding_factory: EmbeddingFactory = None,
|
||||||
):
|
):
|
||||||
"""Initialize with knowledge embedding client, model_name, vector_store_config, knowledge_type, knowledge_source"""
|
"""Initialize with knowledge embedding client, model_name, vector_store_config, knowledge_type, knowledge_source
|
||||||
|
Args:
|
||||||
|
- model_name: model_name
|
||||||
|
- vector_store_config: vector store config: Dict
|
||||||
|
- knowledge_type: Optional[KnowledgeType]
|
||||||
|
- knowledge_source: Optional[str]
|
||||||
|
- source_reader: Optional[BaseLoader]
|
||||||
|
- text_splitter: Optional[TextSplitter]
|
||||||
|
- embedding_factory: EmbeddingFactory
|
||||||
|
"""
|
||||||
self.knowledge_source = knowledge_source
|
self.knowledge_source = knowledge_source
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
self.vector_store_config = vector_store_config
|
self.vector_store_config = vector_store_config
|
||||||
@ -65,6 +74,11 @@ class EmbeddingEngine:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def similar_search(self, text, topk):
|
def similar_search(self, text, topk):
|
||||||
|
"""vector db similar search
|
||||||
|
Args:
|
||||||
|
- text: query text
|
||||||
|
- topk: top k
|
||||||
|
"""
|
||||||
vector_client = VectorStoreConnector(
|
vector_client = VectorStoreConnector(
|
||||||
self.vector_store_config["vector_store_type"], self.vector_store_config
|
self.vector_store_config["vector_store_type"], self.vector_store_config
|
||||||
)
|
)
|
||||||
@ -75,12 +89,17 @@ class EmbeddingEngine:
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def vector_exist(self):
|
def vector_exist(self):
|
||||||
|
"""vector db is exist"""
|
||||||
vector_client = VectorStoreConnector(
|
vector_client = VectorStoreConnector(
|
||||||
self.vector_store_config["vector_store_type"], self.vector_store_config
|
self.vector_store_config["vector_store_type"], self.vector_store_config
|
||||||
)
|
)
|
||||||
return vector_client.vector_name_exists()
|
return vector_client.vector_name_exists()
|
||||||
|
|
||||||
def delete_by_ids(self, ids):
|
def delete_by_ids(self, ids):
|
||||||
|
"""delete vector db by ids
|
||||||
|
Args:
|
||||||
|
- ids: vector ids
|
||||||
|
"""
|
||||||
vector_client = VectorStoreConnector(
|
vector_client = VectorStoreConnector(
|
||||||
self.vector_store_config["vector_store_type"], self.vector_store_config
|
self.vector_store_config["vector_store_type"], self.vector_store_config
|
||||||
)
|
)
|
||||||
|
@ -23,7 +23,13 @@ class PDFEmbedding(SourceEmbedding):
|
|||||||
source_reader: Optional = None,
|
source_reader: Optional = None,
|
||||||
text_splitter: Optional[TextSplitter] = None,
|
text_splitter: Optional[TextSplitter] = None,
|
||||||
):
|
):
|
||||||
"""Initialize pdf word path."""
|
"""Initialize pdf word path.
|
||||||
|
Args:
|
||||||
|
- file_path: data source path
|
||||||
|
- vector_store_config: vector store config params.
|
||||||
|
- source_reader: Optional[BaseLoader]
|
||||||
|
- text_splitter: Optional[TextSplitter]
|
||||||
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||||
)
|
)
|
||||||
|
@ -23,7 +23,13 @@ class PPTEmbedding(SourceEmbedding):
|
|||||||
source_reader: Optional = None,
|
source_reader: Optional = None,
|
||||||
text_splitter: Optional[TextSplitter] = None,
|
text_splitter: Optional[TextSplitter] = None,
|
||||||
):
|
):
|
||||||
"""Initialize ppt word path."""
|
"""Initialize ppt word path.
|
||||||
|
Args:
|
||||||
|
- file_path: data source path
|
||||||
|
- vector_store_config: vector store config params.
|
||||||
|
- source_reader: Optional[BaseLoader]
|
||||||
|
- text_splitter: Optional[TextSplitter]
|
||||||
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||||
)
|
)
|
||||||
|
@ -33,7 +33,7 @@ class SourceEmbedding(ABC):
|
|||||||
Args:
|
Args:
|
||||||
- file_path: data source path
|
- file_path: data source path
|
||||||
- vector_store_config: vector store config params.
|
- vector_store_config: vector store config params.
|
||||||
- source_reader: Optional[]
|
- source_reader: Optional[BaseLoader]
|
||||||
- text_splitter: Optional[TextSplitter]
|
- text_splitter: Optional[TextSplitter]
|
||||||
- embedding_args: Optional
|
- embedding_args: Optional
|
||||||
"""
|
"""
|
||||||
@ -52,8 +52,8 @@ class SourceEmbedding(ABC):
|
|||||||
@register
|
@register
|
||||||
def data_process(self, text):
|
def data_process(self, text):
|
||||||
"""pre process data.
|
"""pre process data.
|
||||||
Args:
|
Args:
|
||||||
- text: raw text
|
- text: raw text
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@register
|
@register
|
||||||
|
@ -20,7 +20,13 @@ class StringEmbedding(SourceEmbedding):
|
|||||||
source_reader: Optional = None,
|
source_reader: Optional = None,
|
||||||
text_splitter: Optional[TextSplitter] = None,
|
text_splitter: Optional[TextSplitter] = None,
|
||||||
):
|
):
|
||||||
"""Initialize raw text word path."""
|
"""Initialize raw text word path.
|
||||||
|
Args:
|
||||||
|
- file_path: data source path
|
||||||
|
- vector_store_config: vector store config params.
|
||||||
|
- source_reader: Optional[BaseLoader]
|
||||||
|
- text_splitter: Optional[TextSplitter]
|
||||||
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
vector_store_config=vector_store_config,
|
vector_store_config=vector_store_config,
|
||||||
|
@ -22,7 +22,13 @@ class URLEmbedding(SourceEmbedding):
|
|||||||
source_reader: Optional = None,
|
source_reader: Optional = None,
|
||||||
text_splitter: Optional[TextSplitter] = None,
|
text_splitter: Optional[TextSplitter] = None,
|
||||||
):
|
):
|
||||||
"""Initialize url word path."""
|
"""Initialize url word path.
|
||||||
|
Args:
|
||||||
|
- file_path: data source path
|
||||||
|
- vector_store_config: vector store config params.
|
||||||
|
- source_reader: Optional[BaseLoader]
|
||||||
|
- text_splitter: Optional[TextSplitter]
|
||||||
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||||
)
|
)
|
||||||
|
@ -23,7 +23,13 @@ class WordEmbedding(SourceEmbedding):
|
|||||||
source_reader: Optional = None,
|
source_reader: Optional = None,
|
||||||
text_splitter: Optional[TextSplitter] = None,
|
text_splitter: Optional[TextSplitter] = None,
|
||||||
):
|
):
|
||||||
"""Initialize with word path."""
|
"""Initialize with word path.
|
||||||
|
Args:
|
||||||
|
- file_path: data source path
|
||||||
|
- vector_store_config: vector store config params.
|
||||||
|
- source_reader: Optional[BaseLoader]
|
||||||
|
- text_splitter: Optional[TextSplitter]
|
||||||
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||||
)
|
)
|
||||||
|
@ -57,12 +57,21 @@ class SyncStatus(Enum):
|
|||||||
|
|
||||||
# @singleton
|
# @singleton
|
||||||
class KnowledgeService:
|
class KnowledgeService:
|
||||||
|
"""KnowledgeService
|
||||||
|
Knowledge Management Service:
|
||||||
|
-knowledge_space management
|
||||||
|
-knowledge_document management
|
||||||
|
-embedding management
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
"""create knowledge space"""
|
|
||||||
|
|
||||||
def create_knowledge_space(self, request: KnowledgeSpaceRequest):
|
def create_knowledge_space(self, request: KnowledgeSpaceRequest):
|
||||||
|
"""create knowledge space
|
||||||
|
Args:
|
||||||
|
- request: KnowledgeSpaceRequest
|
||||||
|
"""
|
||||||
query = KnowledgeSpaceEntity(
|
query = KnowledgeSpaceEntity(
|
||||||
name=request.name,
|
name=request.name,
|
||||||
)
|
)
|
||||||
@ -72,9 +81,11 @@ class KnowledgeService:
|
|||||||
knowledge_space_dao.create_knowledge_space(request)
|
knowledge_space_dao.create_knowledge_space(request)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
"""create knowledge document"""
|
|
||||||
|
|
||||||
def create_knowledge_document(self, space, request: KnowledgeDocumentRequest):
|
def create_knowledge_document(self, space, request: KnowledgeDocumentRequest):
|
||||||
|
"""create knowledge document
|
||||||
|
Args:
|
||||||
|
- request: KnowledgeDocumentRequest
|
||||||
|
"""
|
||||||
query = KnowledgeDocumentEntity(doc_name=request.doc_name, space=space)
|
query = KnowledgeDocumentEntity(doc_name=request.doc_name, space=space)
|
||||||
documents = knowledge_document_dao.get_knowledge_documents(query)
|
documents = knowledge_document_dao.get_knowledge_documents(query)
|
||||||
if len(documents) > 0:
|
if len(documents) > 0:
|
||||||
@ -91,9 +102,11 @@ class KnowledgeService:
|
|||||||
)
|
)
|
||||||
return knowledge_document_dao.create_knowledge_document(document)
|
return knowledge_document_dao.create_knowledge_document(document)
|
||||||
|
|
||||||
"""get knowledge space"""
|
|
||||||
|
|
||||||
def get_knowledge_space(self, request: KnowledgeSpaceRequest):
|
def get_knowledge_space(self, request: KnowledgeSpaceRequest):
|
||||||
|
"""get knowledge space
|
||||||
|
Args:
|
||||||
|
- request: KnowledgeSpaceRequest
|
||||||
|
"""
|
||||||
query = KnowledgeSpaceEntity(
|
query = KnowledgeSpaceEntity(
|
||||||
name=request.name, vector_type=request.vector_type, owner=request.owner
|
name=request.name, vector_type=request.vector_type, owner=request.owner
|
||||||
)
|
)
|
||||||
@ -116,6 +129,10 @@ class KnowledgeService:
|
|||||||
return responses
|
return responses
|
||||||
|
|
||||||
def arguments(self, space_name):
|
def arguments(self, space_name):
|
||||||
|
"""show knowledge space arguments
|
||||||
|
Args:
|
||||||
|
- space_name: Knowledge Space Name
|
||||||
|
"""
|
||||||
query = KnowledgeSpaceEntity(name=space_name)
|
query = KnowledgeSpaceEntity(name=space_name)
|
||||||
spaces = knowledge_space_dao.get_knowledge_space(query)
|
spaces = knowledge_space_dao.get_knowledge_space(query)
|
||||||
if len(spaces) != 1:
|
if len(spaces) != 1:
|
||||||
@ -128,6 +145,11 @@ class KnowledgeService:
|
|||||||
return json.loads(context)
|
return json.loads(context)
|
||||||
|
|
||||||
def argument_save(self, space_name, argument_request: SpaceArgumentRequest):
|
def argument_save(self, space_name, argument_request: SpaceArgumentRequest):
|
||||||
|
"""save argument
|
||||||
|
Args:
|
||||||
|
- space_name: Knowledge Space Name
|
||||||
|
- argument_request: SpaceArgumentRequest
|
||||||
|
"""
|
||||||
query = KnowledgeSpaceEntity(name=space_name)
|
query = KnowledgeSpaceEntity(name=space_name)
|
||||||
spaces = knowledge_space_dao.get_knowledge_space(query)
|
spaces = knowledge_space_dao.get_knowledge_space(query)
|
||||||
if len(spaces) != 1:
|
if len(spaces) != 1:
|
||||||
@ -136,9 +158,12 @@ class KnowledgeService:
|
|||||||
space.context = argument_request.argument
|
space.context = argument_request.argument
|
||||||
return knowledge_space_dao.update_knowledge_space(space)
|
return knowledge_space_dao.update_knowledge_space(space)
|
||||||
|
|
||||||
"""get knowledge get_knowledge_documents"""
|
|
||||||
|
|
||||||
def get_knowledge_documents(self, space, request: DocumentQueryRequest):
|
def get_knowledge_documents(self, space, request: DocumentQueryRequest):
|
||||||
|
"""get knowledge documents
|
||||||
|
Args:
|
||||||
|
- space: Knowledge Space Name
|
||||||
|
- request: DocumentQueryRequest
|
||||||
|
"""
|
||||||
query = KnowledgeDocumentEntity(
|
query = KnowledgeDocumentEntity(
|
||||||
doc_name=request.doc_name,
|
doc_name=request.doc_name,
|
||||||
doc_type=request.doc_type,
|
doc_type=request.doc_type,
|
||||||
@ -153,9 +178,12 @@ class KnowledgeService:
|
|||||||
res.page = request.page
|
res.page = request.page
|
||||||
return res
|
return res
|
||||||
|
|
||||||
"""sync knowledge document chunk into vector store"""
|
|
||||||
|
|
||||||
def sync_knowledge_document(self, space_name, sync_request: DocumentSyncRequest):
|
def sync_knowledge_document(self, space_name, sync_request: DocumentSyncRequest):
|
||||||
|
"""sync knowledge document chunk into vector store
|
||||||
|
Args:
|
||||||
|
- space: Knowledge Space Name
|
||||||
|
- sync_request: DocumentSyncRequest
|
||||||
|
"""
|
||||||
from pilot.embedding_engine.embedding_engine import EmbeddingEngine
|
from pilot.embedding_engine.embedding_engine import EmbeddingEngine
|
||||||
from pilot.embedding_engine.embedding_factory import EmbeddingFactory
|
from pilot.embedding_engine.embedding_factory import EmbeddingFactory
|
||||||
from pilot.embedding_engine.pre_text_splitter import PreTextSplitter
|
from pilot.embedding_engine.pre_text_splitter import PreTextSplitter
|
||||||
@ -249,11 +277,6 @@ class KnowledgeService:
|
|||||||
doc.chunk_size = len(chunk_docs)
|
doc.chunk_size = len(chunk_docs)
|
||||||
doc.gmt_modified = datetime.now()
|
doc.gmt_modified = datetime.now()
|
||||||
knowledge_document_dao.update_knowledge_document(doc)
|
knowledge_document_dao.update_knowledge_document(doc)
|
||||||
# async doc embeddings
|
|
||||||
# thread = threading.Thread(
|
|
||||||
# target=self.async_doc_embedding, args=(client, chunk_docs, doc)
|
|
||||||
# )
|
|
||||||
# thread.start()
|
|
||||||
executor = CFG.SYSTEM_APP.get_component(
|
executor = CFG.SYSTEM_APP.get_component(
|
||||||
ComponentType.EXECUTOR_DEFAULT, ExecutorFactory
|
ComponentType.EXECUTOR_DEFAULT, ExecutorFactory
|
||||||
).create()
|
).create()
|
||||||
@ -277,16 +300,21 @@ class KnowledgeService:
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
"""update knowledge space"""
|
|
||||||
|
|
||||||
def update_knowledge_space(
|
def update_knowledge_space(
|
||||||
self, space_id: int, space_request: KnowledgeSpaceRequest
|
self, space_id: int, space_request: KnowledgeSpaceRequest
|
||||||
):
|
):
|
||||||
|
"""update knowledge space
|
||||||
|
Args:
|
||||||
|
- space_id: space id
|
||||||
|
- space_request: KnowledgeSpaceRequest
|
||||||
|
"""
|
||||||
knowledge_space_dao.update_knowledge_space(space_id, space_request)
|
knowledge_space_dao.update_knowledge_space(space_id, space_request)
|
||||||
|
|
||||||
"""delete knowledge space"""
|
|
||||||
|
|
||||||
def delete_space(self, space_name: str):
|
def delete_space(self, space_name: str):
|
||||||
|
"""delete knowledge space
|
||||||
|
Args:
|
||||||
|
- space_name: knowledge space name
|
||||||
|
"""
|
||||||
query = KnowledgeSpaceEntity(name=space_name)
|
query = KnowledgeSpaceEntity(name=space_name)
|
||||||
spaces = knowledge_space_dao.get_knowledge_space(query)
|
spaces = knowledge_space_dao.get_knowledge_space(query)
|
||||||
if len(spaces) == 0:
|
if len(spaces) == 0:
|
||||||
@ -312,6 +340,11 @@ class KnowledgeService:
|
|||||||
return knowledge_space_dao.delete_knowledge_space(space)
|
return knowledge_space_dao.delete_knowledge_space(space)
|
||||||
|
|
||||||
def delete_document(self, space_name: str, doc_name: str):
|
def delete_document(self, space_name: str, doc_name: str):
|
||||||
|
"""delete document
|
||||||
|
Args:
|
||||||
|
- space_name: knowledge space name
|
||||||
|
- doc_name: doocument name
|
||||||
|
"""
|
||||||
document_query = KnowledgeDocumentEntity(doc_name=doc_name, space=space_name)
|
document_query = KnowledgeDocumentEntity(doc_name=doc_name, space=space_name)
|
||||||
documents = knowledge_document_dao.get_documents(document_query)
|
documents = knowledge_document_dao.get_documents(document_query)
|
||||||
if len(documents) != 1:
|
if len(documents) != 1:
|
||||||
@ -332,9 +365,11 @@ class KnowledgeService:
|
|||||||
# delete document
|
# delete document
|
||||||
return knowledge_document_dao.delete(document_query)
|
return knowledge_document_dao.delete(document_query)
|
||||||
|
|
||||||
"""get document chunks"""
|
|
||||||
|
|
||||||
def get_document_chunks(self, request: ChunkQueryRequest):
|
def get_document_chunks(self, request: ChunkQueryRequest):
|
||||||
|
"""get document chunks
|
||||||
|
Args:
|
||||||
|
- request: ChunkQueryRequest
|
||||||
|
"""
|
||||||
query = DocumentChunkEntity(
|
query = DocumentChunkEntity(
|
||||||
id=request.id,
|
id=request.id,
|
||||||
document_id=request.document_id,
|
document_id=request.document_id,
|
||||||
@ -350,6 +385,12 @@ class KnowledgeService:
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def async_doc_embedding(self, client, chunk_docs, doc):
|
def async_doc_embedding(self, client, chunk_docs, doc):
|
||||||
|
"""async document embedding into vector db
|
||||||
|
Args:
|
||||||
|
- client: EmbeddingEngine Client
|
||||||
|
- chunk_docs: List[Document]
|
||||||
|
- doc: doc
|
||||||
|
"""
|
||||||
logger.info(
|
logger.info(
|
||||||
f"async_doc_embedding, doc:{doc.doc_name}, chunk_size:{len(chunk_docs)}, begin embedding to vector store-{CFG.VECTOR_STORE_TYPE}"
|
f"async_doc_embedding, doc:{doc.doc_name}, chunk_size:{len(chunk_docs)}, begin embedding to vector store-{CFG.VECTOR_STORE_TYPE}"
|
||||||
)
|
)
|
||||||
@ -391,6 +432,10 @@ class KnowledgeService:
|
|||||||
return context_template_string
|
return context_template_string
|
||||||
|
|
||||||
def get_space_context(self, space_name):
|
def get_space_context(self, space_name):
|
||||||
|
"""get space contect
|
||||||
|
Args:
|
||||||
|
- space_name: space name
|
||||||
|
"""
|
||||||
request = KnowledgeSpaceRequest()
|
request = KnowledgeSpaceRequest()
|
||||||
request.name = space_name
|
request.name = space_name
|
||||||
spaces = self.get_knowledge_space(request)
|
spaces = self.get_knowledge_space(request)
|
||||||
|
Loading…
Reference in New Issue
Block a user