From 7186309f83d902f55803e97f98125c8f857d57f0 Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Mon, 31 Jul 2023 16:47:48 +0800 Subject: [PATCH 1/9] feat:knowledge document delete 1.space delete 2.document delete --- ...odeTextLoader.py => encode_text_loader.py} | 0 pilot/embedding_engine/markdown_embedding.py | 2 +- pilot/server/knowledge/api.py | 20 ++++++ pilot/server/knowledge/chunk_db.py | 21 ++++-- pilot/server/knowledge/document_db.py | 58 ++++++++++++++-- pilot/server/knowledge/request/response.py | 5 +- pilot/server/knowledge/service.py | 66 ++++++++++++++++++- pilot/server/knowledge/space_db.py | 16 ++--- pilot/server/static/404.html | 2 +- pilot/server/static/404/index.html | 2 +- .../SANF9WwUqTlR_CIVwY23O/_buildManifest.js | 1 + .../SANF9WwUqTlR_CIVwY23O/_ssgManifest.js | 1 + .../_LoIuTcFbFlI-HxTFdO0M/_buildManifest.js | 1 + .../_LoIuTcFbFlI-HxTFdO0M/_ssgManifest.js | 1 + .../static/chunks/872-4a145d8028102d89.js | 16 +++++ .../chunks/app/chat/page-03fc16d89ff0beaa.js | 1 + .../chunklist/page-b9df21afecc6f900.js | 1 + .../documents/page-8049f9b1a73f5400.js | 1 + .../documents/page-9e372b2f1794c185.js | 1 + .../documents/page-ad0b9d2c68d4c2ba.js | 1 + .../documents/page-e071dad7ec9c5721.js | 1 + .../app/datastores/page-02fa00c4a6fbe114.js | 1 + .../app/datastores/page-19b9e9e53f8b0137.js | 1 + .../chunks/app/layout-11d4d8e3d2a1bc5a.js | 1 + .../chunks/app/page-1a66758966b07f9b.js | 1 + .../fMX6_gTKSU5kv3yMf8rpy/_buildManifest.js | 1 + .../fMX6_gTKSU5kv3yMf8rpy/_ssgManifest.js | 1 + .../qD2POSHFrMfMY5jgX4NDk/_buildManifest.js | 1 + .../qD2POSHFrMfMY5jgX4NDk/_ssgManifest.js | 1 + pilot/server/static/chat/index.html | 2 +- pilot/server/static/chat/index.txt | 6 +- .../datastores/documents/chunklist/index.html | 2 +- .../datastores/documents/chunklist/index.txt | 6 +- .../static/datastores/documents/index.html | 2 +- .../static/datastores/documents/index.txt | 6 +- pilot/server/static/datastores/index.html | 2 +- pilot/server/static/datastores/index.txt | 6 +- pilot/server/static/index.html | 2 +- pilot/server/static/index.txt | 6 +- pilot/vector_store/chroma_store.py | 27 +++++++- pilot/vector_store/connector.py | 7 +- pilot/vector_store/milvus_store.py | 6 ++ 42 files changed, 256 insertions(+), 50 deletions(-) rename pilot/embedding_engine/{EncodeTextLoader.py => encode_text_loader.py} (100%) create mode 100644 pilot/server/static/_next/static/SANF9WwUqTlR_CIVwY23O/_buildManifest.js create mode 100644 pilot/server/static/_next/static/SANF9WwUqTlR_CIVwY23O/_ssgManifest.js create mode 100644 pilot/server/static/_next/static/_LoIuTcFbFlI-HxTFdO0M/_buildManifest.js create mode 100644 pilot/server/static/_next/static/_LoIuTcFbFlI-HxTFdO0M/_ssgManifest.js create mode 100644 pilot/server/static/_next/static/chunks/872-4a145d8028102d89.js create mode 100644 pilot/server/static/_next/static/chunks/app/chat/page-03fc16d89ff0beaa.js create mode 100644 pilot/server/static/_next/static/chunks/app/datastores/documents/chunklist/page-b9df21afecc6f900.js create mode 100644 pilot/server/static/_next/static/chunks/app/datastores/documents/page-8049f9b1a73f5400.js create mode 100644 pilot/server/static/_next/static/chunks/app/datastores/documents/page-9e372b2f1794c185.js create mode 100644 pilot/server/static/_next/static/chunks/app/datastores/documents/page-ad0b9d2c68d4c2ba.js create mode 100644 pilot/server/static/_next/static/chunks/app/datastores/documents/page-e071dad7ec9c5721.js create mode 100644 pilot/server/static/_next/static/chunks/app/datastores/page-02fa00c4a6fbe114.js create mode 100644 pilot/server/static/_next/static/chunks/app/datastores/page-19b9e9e53f8b0137.js create mode 100644 pilot/server/static/_next/static/chunks/app/layout-11d4d8e3d2a1bc5a.js create mode 100644 pilot/server/static/_next/static/chunks/app/page-1a66758966b07f9b.js create mode 100644 pilot/server/static/_next/static/fMX6_gTKSU5kv3yMf8rpy/_buildManifest.js create mode 100644 pilot/server/static/_next/static/fMX6_gTKSU5kv3yMf8rpy/_ssgManifest.js create mode 100644 pilot/server/static/_next/static/qD2POSHFrMfMY5jgX4NDk/_buildManifest.js create mode 100644 pilot/server/static/_next/static/qD2POSHFrMfMY5jgX4NDk/_ssgManifest.js diff --git a/pilot/embedding_engine/EncodeTextLoader.py b/pilot/embedding_engine/encode_text_loader.py similarity index 100% rename from pilot/embedding_engine/EncodeTextLoader.py rename to pilot/embedding_engine/encode_text_loader.py diff --git a/pilot/embedding_engine/markdown_embedding.py b/pilot/embedding_engine/markdown_embedding.py index c73a271f8..946b13a89 100644 --- a/pilot/embedding_engine/markdown_embedding.py +++ b/pilot/embedding_engine/markdown_embedding.py @@ -14,7 +14,7 @@ from langchain.text_splitter import ( ) from pilot.embedding_engine import SourceEmbedding, register -from pilot.embedding_engine.EncodeTextLoader import EncodeTextLoader +from pilot.embedding_engine.encode_text_loader import EncodeTextLoader class MarkdownEmbedding(SourceEmbedding): diff --git a/pilot/server/knowledge/api.py b/pilot/server/knowledge/api.py index f7960f80c..737b779bc 100644 --- a/pilot/server/knowledge/api.py +++ b/pilot/server/knowledge/api.py @@ -52,6 +52,15 @@ def space_list(request: KnowledgeSpaceRequest): return Result.faild(code="E000X", msg=f"space list error {e}") +@router.post("/knowledge/space/delete") +def space_delete(request: KnowledgeSpaceRequest): + print(f"/space/list params:") + try: + return Result.succ(knowledge_space_service.delete_space(request.name)) + except Exception as e: + return Result.faild(code="E000X", msg=f"space list error {e}") + + @router.post("/knowledge/{space_name}/document/add") def document_add(space_name: str, request: KnowledgeDocumentRequest): print(f"/document/add params: {space_name}, {request}") @@ -77,6 +86,17 @@ def document_list(space_name: str, query_request: DocumentQueryRequest): return Result.faild(code="E000X", msg=f"document list error {e}") +@router.post("/knowledge/{space_name}/document/delete") +def document_delete(space_name: str, query_request: DocumentQueryRequest): + print(f"/document/list params: {space_name}, {query_request}") + try: + return Result.succ( + knowledge_space_service.delete_document(space_name, query_request.doc_name) + ) + except Exception as e: + return Result.faild(code="E000X", msg=f"document list error {e}") + + @router.post("/knowledge/{space_name}/document/upload") async def document_upload( space_name: str, diff --git a/pilot/server/knowledge/chunk_db.py b/pilot/server/knowledge/chunk_db.py index d67f4a01a..d3b58fc91 100644 --- a/pilot/server/knowledge/chunk_db.py +++ b/pilot/server/knowledge/chunk_db.py @@ -81,6 +81,7 @@ class DocumentChunkDao: page_size ) result = document_chunks.all() + session.close() return result def get_document_chunks_count(self, query: DocumentChunkEntity): @@ -105,6 +106,7 @@ class DocumentChunkDao: DocumentChunkEntity.meta_info == query.meta_info ) count = document_chunks.scalar() + session.close() return count # def update_knowledge_document(self, document:KnowledgeDocumentEntity): @@ -113,9 +115,16 @@ class DocumentChunkDao: # session.commit() # return updated_space.id - # def delete_knowledge_document(self, document_id:int): - # cursor = self.conn.cursor() - # query = "DELETE FROM knowledge_document WHERE id = %s" - # cursor.execute(query, (document_id,)) - # self.conn.commit() - # cursor.close() + def delete(self, document_id: int): + session = self.Session() + if document_id is None: + raise Exception("document_id is None") + query = DocumentChunkEntity(document_id=document_id) + knowledge_documents = session.query(DocumentChunkEntity) + if query.document_id is not None: + chunks = knowledge_documents.filter( + DocumentChunkEntity.document_id == query.document_id + ) + chunks.delete() + session.commit() + session.close() diff --git a/pilot/server/knowledge/document_db.py b/pilot/server/knowledge/document_db.py index a4f3ba978..3ac923c03 100644 --- a/pilot/server/knowledge/document_db.py +++ b/pilot/server/knowledge/document_db.py @@ -91,6 +91,38 @@ class KnowledgeDocumentDao: page_size ) result = knowledge_documents.all() + session.close() + return result + + def get_documents(self, query): + session = self.Session() + knowledge_documents = session.query(KnowledgeDocumentEntity) + if query.id is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.id == query.id + ) + if query.doc_name is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.doc_name == query.doc_name + ) + if query.doc_type is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.doc_type == query.doc_type + ) + if query.space is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.space == query.space + ) + if query.status is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.status == query.status + ) + + knowledge_documents = knowledge_documents.order_by( + KnowledgeDocumentEntity.id.desc() + ) + result = knowledge_documents.all() + session.close() return result def get_knowledge_documents_count(self, query): @@ -117,18 +149,32 @@ class KnowledgeDocumentDao: KnowledgeDocumentEntity.status == query.status ) count = knowledge_documents.scalar() + session.close() return count def update_knowledge_document(self, document: KnowledgeDocumentEntity): session = self.Session() updated_space = session.merge(document) session.commit() + session.close() return updated_space.id # - # def delete_knowledge_document(self, document_id: int): - # cursor = self.conn.cursor() - # query = "DELETE FROM knowledge_document WHERE id = %s" - # cursor.execute(query, (document_id,)) - # self.conn.commit() - # cursor.close() + def delete(self, query: KnowledgeDocumentEntity): + session = self.Session() + knowledge_documents = session.query(KnowledgeDocumentEntity) + if query.id is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.id == query.id + ) + if query.doc_name is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.doc_name == query.doc_name + ) + if query.space is not None: + knowledge_documents = knowledge_documents.filter( + KnowledgeDocumentEntity.doc_name == query.doc_name + ) + knowledge_documents.delete() + session.commit() + session.close() diff --git a/pilot/server/knowledge/request/response.py b/pilot/server/knowledge/request/response.py index 98dcbcc76..d302eb392 100644 --- a/pilot/server/knowledge/request/response.py +++ b/pilot/server/knowledge/request/response.py @@ -26,6 +26,7 @@ class DocumentQueryResponse(BaseModel): class SpaceQueryResponse(BaseModel): """data: data""" + id: int = None name: str = None """vector_type: vector type""" vector_type: str = None @@ -33,5 +34,7 @@ class SpaceQueryResponse(BaseModel): desc: str = None """owner: owner""" owner: str = None + gmt_created: str = None + gmt_modified: str = None """doc_count: doc_count""" - doc_count: int = None + docs: int = None diff --git a/pilot/server/knowledge/service.py b/pilot/server/knowledge/service.py index bb84b4789..3f863e0c4 100644 --- a/pilot/server/knowledge/service.py +++ b/pilot/server/knowledge/service.py @@ -2,6 +2,7 @@ import threading from datetime import datetime from langchain.text_splitter import RecursiveCharacterTextSplitter, SpacyTextSplitter +from pilot.vector_store.connector import VectorStoreConnector from pilot.configs.config import Config from pilot.configs.model_config import LLM_MODEL_CONFIG, KNOWLEDGE_UPLOAD_ROOT_PATH @@ -89,7 +90,23 @@ class KnowledgeService: query = KnowledgeSpaceEntity( name=request.name, vector_type=request.vector_type, owner=request.owner ) - return knowledge_space_dao.get_knowledge_space(query) + responses = [] + spaces = knowledge_space_dao.get_knowledge_space(query) + for space in spaces: + res = SpaceQueryResponse() + res.id = space.id + res.name = space.name + res.vector_type = space.vector_type + res.desc = space.desc + res.owner = space.owner + res.gmt_created = space.gmt_created + res.gmt_modified = space.gmt_modified + res.owner = space.owner + query = KnowledgeDocumentEntity(space=space.name) + doc_count = knowledge_document_dao.get_knowledge_documents_count(query) + res.docs = doc_count + responses.append(res) + return responses """get knowledge get_knowledge_documents""" @@ -191,8 +208,51 @@ class KnowledgeService: """delete knowledge space""" - def delete_knowledge_space(self, space_id: int): - return knowledge_space_dao.delete_knowledge_space(space_id) + def delete_space(self, space_name: str): + query = KnowledgeSpaceEntity(name=space_name) + spaces = knowledge_space_dao.get_knowledge_space(query) + if len(spaces) == 0: + raise Exception(f"delete error, no space name:{space_name} in database") + space = spaces[0] + vector_config = {} + vector_config["vector_store_name"] = space.name + vector_config["vector_store_type"] = CFG.VECTOR_STORE_TYPE + vector_config["chroma_persist_path"] = KNOWLEDGE_UPLOAD_ROOT_PATH + vector_client = VectorStoreConnector( + vector_store_type=CFG.VECTOR_STORE_TYPE, ctx=vector_config + ) + # delete vectors + vector_client.delete_vector_name(space.name) + document_query = KnowledgeDocumentEntity(space=space.name) + # delete chunks + documents = knowledge_document_dao.get_documents(document_query) + for document in documents: + document_chunk_dao.delete(document.id) + # delete documents + knowledge_document_dao.delete(document_query) + # delete space + return knowledge_space_dao.delete_knowledge_space(space) + + def delete_document(self, space_name: str, doc_name: str): + document_query = KnowledgeDocumentEntity(doc_name=doc_name, space=space_name) + documents = knowledge_document_dao.get_documents(document_query) + if len(documents) != 1: + raise Exception(f"there are no or more than one document called {doc_name}") + vector_ids = documents[0].vector_ids + if vector_ids is not None: + vector_config = {} + vector_config["vector_store_name"] = space_name + vector_config["vector_store_type"] = CFG.VECTOR_STORE_TYPE + vector_config["chroma_persist_path"] = KNOWLEDGE_UPLOAD_ROOT_PATH + vector_client = VectorStoreConnector( + vector_store_type=CFG.VECTOR_STORE_TYPE, ctx=vector_config + ) + # delete vector by ids + vector_client.delete_by_ids(vector_ids) + # delete chunks + document_chunk_dao.delete(documents[0].id) + # delete document + return knowledge_document_dao.delete(document_query) """get document chunks""" diff --git a/pilot/server/knowledge/space_db.py b/pilot/server/knowledge/space_db.py index d4f6fbba6..911683fdd 100644 --- a/pilot/server/knowledge/space_db.py +++ b/pilot/server/knowledge/space_db.py @@ -39,7 +39,7 @@ class KnowledgeSpaceDao: session = self.Session() knowledge_space = KnowledgeSpaceEntity( name=space.name, - vector_type=space.vector_type, + vector_type=CFG.VECTOR_STORE_TYPE, desc=space.desc, owner=space.owner, gmt_created=datetime.now(), @@ -47,7 +47,6 @@ class KnowledgeSpaceDao: ) session.add(knowledge_space) session.commit() - session.close() def get_knowledge_space(self, query: KnowledgeSpaceEntity): @@ -86,6 +85,7 @@ class KnowledgeSpaceDao: KnowledgeSpaceEntity.gmt_created.desc() ) result = knowledge_spaces.all() + session.close() return result def update_knowledge_space(self, space_id: int, space: KnowledgeSpaceEntity): @@ -97,9 +97,9 @@ class KnowledgeSpaceDao: self.conn.commit() cursor.close() - def delete_knowledge_space(self, space_id: int): - cursor = self.conn.cursor() - query = "DELETE FROM knowledge_space WHERE id = %s" - cursor.execute(query, (space_id,)) - self.conn.commit() - cursor.close() + def delete_knowledge_space(self, space: KnowledgeSpaceEntity): + session = self.Session() + if space: + session.delete(space) + session.commit() + session.close() diff --git a/pilot/server/static/404.html b/pilot/server/static/404.html index 4da41deb4..94df6c2b0 100644 --- a/pilot/server/static/404.html +++ b/pilot/server/static/404.html @@ -1 +1 @@ -