Merge branch 'main' into TY_V4.0_TESTING

This commit is contained in:
yhjun1026 2023-10-20 09:55:22 +08:00
commit cf8d107a89
36 changed files with 350 additions and 71 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 550 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 503 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 764 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

After

Width:  |  Height:  |  Size: 732 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

View File

@ -97,4 +97,10 @@ pip install langchain>=0.0.286
```commandline
pip install --use-pep517 fschat
```
```
##### Q9: alembic.util.exc.CommandError: Target database is not up to date.
delete files in `DB-GPT/pilot/meta_data/alembic/versions/` and reboot.
```commandline
rm -rf DB-GPT/pilot/meta_data/alembic/versions/*
```

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-09-26 17:47+0800\n"
"POT-Creation-Date: 2023-10-19 19:31+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
@ -20,12 +20,12 @@ msgstr ""
"Generated-By: Babel 2.12.1\n"
#: ../../getting_started/faq/deploy/deploy_faq.md:1
#: ca823e9d6d1d433db7ed15c8273e1b00
#: fb640f7c38744cbf996dcf7f73f325f6
msgid "Installation FAQ"
msgstr "Installation FAQ"
#: ../../getting_started/faq/deploy/deploy_faq.md:5
#: 3803d098c534434f9f513b3a62de54a4
#: 79fd80e469d14d608554d53a0e0ed2e3
#, fuzzy
msgid ""
"Q1: execute `pip install -e .` error, found some package cannot find "
@ -35,18 +35,18 @@ msgstr ""
"cannot find correct version."
#: ../../getting_started/faq/deploy/deploy_faq.md:6
#: b785864f47e643df9a4669d8da6167d6
#: f1f6e3291d1446b5bbcf744cd4c4e89a
msgid "change the pip source."
msgstr "替换pip源."
#: ../../getting_started/faq/deploy/deploy_faq.md:13
#: ../../getting_started/faq/deploy/deploy_faq.md:20
#: c41f026fb1464c71a45d0746c224ecce f70fb69b568d4fc4ad4c4731b2032eaf
#: 68e1b39a08774a81b9061cc5205e4c1c dd34901f446749e998cd34ec5b6c44f4
msgid "or"
msgstr "或者"
#: ../../getting_started/faq/deploy/deploy_faq.md:27
#: d179e3d695764f838dc354eb0d978bb3
#: 0899f0e28dae443b8f912d96c797b79c
msgid ""
"Q2: sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to"
" open database file"
@ -55,86 +55,97 @@ msgstr ""
" open database file"
#: ../../getting_started/faq/deploy/deploy_faq.md:29
#: 55174e8d247a414e8c6c8861d4707a55
#: 3e60d8190e49436b8c40b34a67b7bfb3
msgid "make sure you pull latest code or create directory with mkdir pilot/data"
msgstr "make sure you pull latest code or create directory with mkdir pilot/data"
#: ../../getting_started/faq/deploy/deploy_faq.md:31
#: dbce9e9cae734a5083a6f0fc28bce7cd
#: baeaae20238842d3b8e4ae5b337198e5
msgid "Q3: The model keeps getting killed."
msgstr "Q3: The model keeps getting killed."
#: ../../getting_started/faq/deploy/deploy_faq.md:33
#: 2de5648d2e7546bf85f20f4162003298
#: eb3936307ad64b19b73483ff9ae126f2
msgid ""
"your GPU VRAM size is not enough, try replace your hardware or replace "
"other llms."
msgstr "GPU显存不够, 增加显存或者换一个显存小的模型"
#: ../../getting_started/faq/deploy/deploy_faq.md:35
#: 47810771cd364964b9b5b8fd85bca4ee
#: f6dba770717041699c73b4cd00d48aad
msgid "Q4: How to access website on the public network"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:37
#: e8c5bac6680648509d528ea6aaf5994e
#: 447d9e9374de44bab6d8a03f2c936676
msgid ""
"You can try to use gradio's [network](https://github.com/gradio-"
"app/gradio/blob/main/gradio/networking.py) to achieve."
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:48
#: bb75ec127f574c00a09d92d5206e9357
#: 5e34dd4dfcf34feeb1815dfa974041d0
msgid "Open `url` with your browser to see the website."
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:50
#: 5fdb87b84bd94385a1a93dab8d41ebe8
#: aaef774ce6124021a3862bc0a25d465f
msgid "Q5: (Windows) execute `pip install -e .` error"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:52
#: 31eef51e044044f29f3ad08defa9c305
#: ec3945df451c4ec2b32ebb476f45c82b
msgid "The error log like the following:"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:71
#: aaba0c3060b443e4b9877f70d78321ce
#: 1df09f6d9f9b4c1a8a32d6e271e5ee39
msgid ""
"Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-"
"tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:75
#: 4c8137546e5c4240884f7ea6d9d922bf
#: 251f47bfa5694242a1c9d81a2022b7a0
msgid "Q6: `Torch not compiled with CUDA enabled`"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:82
#: 01daf14f8c494219b1d9a5af4449951e
#: bc9dfdfc47924a0e8d3ec535e23bf923
msgid "Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:83
#: c75e6371911e4d5ca6859e51501c9679
#: b5a632baa42745bdbee5d6ba516d8d8b
msgid ""
"Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally"
"/#start-locally) with CUDA support."
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:85
#: 7cfb9003e505445ebb9ed3d015e184e2
#: 0092fb91642749f5a55b629017c0de6a
msgid "Q7: ImportError: cannot import name 'PersistentClient' from 'chromadb'."
msgstr "Q7: ImportError: cannot import name 'PersistentClient' from 'chromadb'."
#: ../../getting_started/faq/deploy/deploy_faq.md:91
#: e1d5d5d85ddc480d8d81f7b550848cbf
#: 4aa87418f2a54c138bf3b7ff28a7e776
msgid ""
"Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
"HuggingFaceEmbeddings.model_kwargs extra not permitted"
msgstr "Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
msgstr ""
"Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
"HuggingFaceEmbeddings.model_kwargs extra not permitted"
#: ../../getting_started/faq/deploy/deploy_faq.md:102
#: 6b690ab272af44f6b126cfe5ce1435ef
msgid "Q9: alembic.util.exc.CommandError: Target database is not up to date."
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:103
#: 223026d3b9124363b695937922d8f8d5
msgid "delete files in `DB-GPT/pilot/meta_data/alembic/versions/` and reboot."
msgstr "删除`DB-GPT/pilot/meta_data/alembic/versions/`目录下文件"
#~ msgid ""
#~ "Q2: When use Mysql, Access denied "
#~ "for user 'root@localhost'(using password :NO)"

View File

@ -10,6 +10,8 @@ from pilot.base_modules.meta_data.meta_data import Base, engine, session
char_set_sql = DDL("ALTER TABLE plugin_hub CONVERT TO CHARACTER SET utf8mb4")
class PluginHubEntity(Base):
__tablename__ = "plugin_hub"
__table_args__ = {
@ -35,7 +37,6 @@ class PluginHubEntity(Base):
Index("idx_q_type", "type")
class PluginHubDao(BaseDao[PluginHubEntity]):
def __init__(self):
super().__init__(

View File

@ -52,7 +52,18 @@ class RDBMSDatabase(BaseConnect):
custom_table_info: Optional[dict] = None,
view_support: bool = False,
):
"""Create engine from database URI."""
"""Create engine from database URI.
Args:
- engine: Engine sqlalchemy.engine
- schema: Optional[str].
- metadata: Optional[MetaData]
- ignore_tables: Optional[List[str]]
- include_tables: Optional[List[str]]
- sample_rows_in_table_info: int default:3,
- indexes_in_table_info: bool = False,
- custom_table_info: Optional[dict] = None,
- view_support: bool = False,
"""
self._engine = engine
self._schema = schema
if include_tables and ignore_tables:
@ -92,6 +103,15 @@ class RDBMSDatabase(BaseConnect):
engine_args: Optional[dict] = None,
**kwargs: Any,
) -> RDBMSDatabase:
"""Construct a SQLAlchemy engine from uri database.
Args:
host (str): database host.
port (int): database port.
user (str): database user.
pwd (str): database password.
db_name (str): database name.
engine_args (Optional[dict]):other engine_args.
"""
db_url: str = (
cls.driver
+ "://"

View File

@ -21,7 +21,13 @@ class CSVEmbedding(SourceEmbedding):
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize with csv path."""
"""Initialize with csv path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)

View File

@ -28,7 +28,16 @@ class EmbeddingEngine:
text_splitter: Optional[TextSplitter] = None,
embedding_factory: EmbeddingFactory = None,
):
"""Initialize with knowledge embedding client, model_name, vector_store_config, knowledge_type, knowledge_source"""
"""Initialize with knowledge embedding client, model_name, vector_store_config, knowledge_type, knowledge_source
Args:
- model_name: model_name
- vector_store_config: vector store config: Dict
- knowledge_type: Optional[KnowledgeType]
- knowledge_source: Optional[str]
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
- embedding_factory: EmbeddingFactory
"""
self.knowledge_source = knowledge_source
self.model_name = model_name
self.vector_store_config = vector_store_config
@ -65,6 +74,11 @@ class EmbeddingEngine:
)
def similar_search(self, text, topk):
"""vector db similar search
Args:
- text: query text
- topk: top k
"""
vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)
@ -75,12 +89,17 @@ class EmbeddingEngine:
return ans
def vector_exist(self):
"""vector db is exist"""
vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)
return vector_client.vector_name_exists()
def delete_by_ids(self, ids):
"""delete vector db by ids
Args:
- ids: vector ids
"""
vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)

View File

@ -23,7 +23,13 @@ class PDFEmbedding(SourceEmbedding):
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize pdf word path."""
"""Initialize pdf word path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)

View File

@ -23,7 +23,13 @@ class PPTEmbedding(SourceEmbedding):
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize ppt word path."""
"""Initialize ppt word path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)

View File

@ -29,7 +29,14 @@ class SourceEmbedding(ABC):
text_splitter: Optional[TextSplitter] = None,
embedding_args: Optional[Dict] = None,
):
"""Initialize with Loader url, model_name, vector_store_config"""
"""Initialize with Loader url, model_name, vector_store_config
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
- embedding_args: Optional
"""
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None
@ -44,21 +51,33 @@ class SourceEmbedding(ABC):
@register
def data_process(self, text):
"""pre process data."""
"""pre process data.
Args:
- text: raw text
"""
@register
def text_splitter(self, text_splitter: TextSplitter):
"""add text split chunk"""
"""add text split chunk
Args:
- text_splitter: TextSplitter
"""
pass
@register
def text_to_vector(self, docs):
"""transform vector"""
"""transform vector
Args:
- docs: List[Document]
"""
pass
@register
def index_to_store(self, docs):
"""index to vector store"""
"""index to vector store
Args:
- docs: List[Document]
"""
self.vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)
@ -66,7 +85,10 @@ class SourceEmbedding(ABC):
@register
def similar_search(self, doc, topk):
"""vector store similarity_search"""
"""vector store similarity_search
Args:
- query: query
"""
self.vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)
@ -82,6 +104,7 @@ class SourceEmbedding(ABC):
return self.vector_client.vector_name_exists()
def source_embedding(self):
"""read()->data_process()->text_split()->index_to_store()"""
if "read" in registered_methods:
text = self.read()
if "data_process" in registered_methods:

View File

@ -20,7 +20,13 @@ class StringEmbedding(SourceEmbedding):
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize raw text word path."""
"""Initialize raw text word path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path=file_path,
vector_store_config=vector_store_config,

View File

@ -22,7 +22,13 @@ class URLEmbedding(SourceEmbedding):
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize url word path."""
"""Initialize url word path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)

View File

@ -23,7 +23,13 @@ class WordEmbedding(SourceEmbedding):
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize with word path."""
"""Initialize with word path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)

View File

@ -31,7 +31,6 @@ class ChatHistoryEntity(Base):
Index("idx_q_conv", "summary")
class ChatHistoryDao(BaseDao[ChatHistoryEntity]):
def __init__(self):
super().__init__(

View File

@ -9,6 +9,10 @@ from pilot.openapi.api_v1.feedback.feed_back_model import FeedBackBody
class ChatFeedBackEntity(Base):
__tablename__ = "chat_feed_back"
__table_args__ = {
"mysql_charset": "utf8mb4",
"mysql_collate": "utf8mb4_unicode_ci",
}
id = Column(Integer, primary_key=True)
conv_uid = Column(String(128))
conv_index = Column(Integer)

View File

@ -21,6 +21,12 @@ CFG = Config()
class BaseChat(ABC):
"""DB-GPT Chat Service Base Module
Include:
stream_call():scene + prompt -> stream response
nostream_call():scene + prompt -> nostream response
"""
chat_scene: str = None
llm_model: Any = None
# By default, keep the last two rounds of conversation records as the context
@ -32,6 +38,14 @@ class BaseChat(ABC):
arbitrary_types_allowed = True
def __init__(self, chat_param: Dict):
"""Chat Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) select param
"""
self.chat_session_id = chat_param["chat_session_id"]
self.chat_mode = chat_param["chat_mode"]
self.current_user_input: str = chat_param["current_user_input"]

View File

@ -18,10 +18,20 @@ logger = logging.getLogger("chat_agent")
class ChatAgent(BaseChat):
"""Chat With Agent through plugin"""
chat_scene: str = ChatScene.ChatAgent.value()
chat_retention_rounds = 0
def __init__(self, chat_param: Dict):
"""Chat Agent Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) agent plugin
"""
if not chat_param["select_param"]:
raise ValueError("Please select a Plugin!")
self.select_plugins = chat_param["select_param"].split(",")

View File

@ -19,10 +19,17 @@ CFG = Config()
class ChatDashboard(BaseChat):
chat_scene: str = ChatScene.ChatDashboard.value()
report_name: str
"""Number of results to return from the query"""
"""Chat Dashboard to generate dashboard chart"""
def __init__(self, chat_param: Dict):
""" """
"""Chat Dashboard Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) dbname
"""
self.db_name = chat_param["select_param"]
chat_param["chat_mode"] = ChatScene.ChatDashboard
super().__init__(chat_param=chat_param)

View File

@ -19,10 +19,20 @@ CFG = Config()
class ChatExcel(BaseChat):
"""a Excel analyzer to analyze Excel Data"""
chat_scene: str = ChatScene.ChatExcel.value()
chat_retention_rounds = 1
def __init__(self, chat_param: Dict):
"""Chat Excel Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) file path
"""
chat_mode = ChatScene.ChatExcel
self.select_param = chat_param["select_param"]

View File

@ -15,6 +15,14 @@ class ChatWithDbAutoExecute(BaseChat):
"""Number of results to return from the query"""
def __init__(self, chat_param: Dict):
"""Chat Data Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) dbname
"""
chat_mode = ChatScene.ChatWithDbExecute
self.db_name = chat_param["select_param"]
chat_param["chat_mode"] = chat_mode
@ -31,6 +39,9 @@ class ChatWithDbAutoExecute(BaseChat):
self.top_k: int = 200
def generate_input_values(self):
"""
generate input values
"""
try:
from pilot.summary.db_summary_client import DBSummaryClient
except ImportError:

View File

@ -12,10 +12,17 @@ CFG = Config()
class ChatWithDbQA(BaseChat):
chat_scene: str = ChatScene.ChatWithDbQA.value()
"""Number of results to return from the query"""
"""As a DBA, Chat DB Module, chat with combine DB meta schema """
def __init__(self, chat_param: Dict):
""" """
"""Chat DB Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) dbname
"""
self.db_name = chat_param["select_param"]
chat_param["chat_mode"] = ChatScene.ChatWithDbQA
super().__init__(chat_param=chat_param)

View File

@ -11,11 +11,21 @@ CFG = Config()
class ChatWithPlugin(BaseChat):
"""Chat With Plugin"""
chat_scene: str = ChatScene.ChatExecution.value()
plugins_prompt_generator: PluginPromptGenerator
select_plugin: str = None
def __init__(self, chat_param: Dict):
"""Chat Dashboard Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) plugin selector
"""
self.plugin_selector = chat_param["select_param"]
chat_param["chat_mode"] = ChatScene.ChatExecution
super().__init__(chat_param=chat_param)

View File

@ -19,10 +19,17 @@ CFG = Config()
class ChatKnowledge(BaseChat):
chat_scene: str = ChatScene.ChatKnowledge.value()
"""Number of results to return from the query"""
"""KBQA Chat Module"""
def __init__(self, chat_param: Dict):
""" """
"""Chat Knowledge Module Initialization
Args:
- chat_param: Dict
- chat_session_id: (str) chat session_id
- current_user_input: (str) current user input
- model_name:(str) llm model name
- select_param:(str) space name
"""
from pilot.embedding_engine.embedding_engine import EmbeddingEngine
from pilot.embedding_engine.embedding_factory import EmbeddingFactory

View File

@ -21,6 +21,7 @@ def signal_handler(sig, frame):
def async_db_summary(system_app: SystemApp):
"""async db schema into vector db"""
from pilot.summary.db_summary_client import DBSummaryClient
client = DBSummaryClient(system_app=system_app)

View File

@ -115,6 +115,9 @@ def _get_webserver_params(args: List[str] = None):
def initialize_app(param: WebWerverParameters = None, args: List[str] = None):
"""Initialize app
If you use gunicorn as a process manager, initialize_app can be invoke in `on_starting` hook.
Args:
param:WebWerverParameters
args:List[str]
"""
if not param:
param = _get_webserver_params(args)

View File

@ -12,6 +12,10 @@ CFG = Config()
class DocumentChunkEntity(Base):
__tablename__ = "document_chunk"
__table_args__ = {
"mysql_charset": "utf8mb4",
"mysql_collate": "utf8mb4_unicode_ci",
}
id = Column(Integer, primary_key=True)
document_id = Column(Integer)
doc_name = Column(String(100))

View File

@ -11,6 +11,10 @@ CFG = Config()
class KnowledgeDocumentEntity(Base):
__tablename__ = "knowledge_document"
__table_args__ = {
"mysql_charset": "utf8mb4",
"mysql_collate": "utf8mb4_unicode_ci",
}
id = Column(Integer, primary_key=True)
doc_name = Column(String(100))
doc_type = Column(String(100))
@ -24,6 +28,8 @@ class KnowledgeDocumentEntity(Base):
gmt_created = Column(DateTime)
gmt_modified = Column(DateTime)
__table_args__ = {"mysql_charset": "utf8mb4"}
def __repr__(self):
return f"KnowledgeDocumentEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', chunk_size='{self.chunk_size}', status='{self.status}', last_sync='{self.last_sync}', content='{self.content}', result='{self.result}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"

View File

@ -57,12 +57,21 @@ class SyncStatus(Enum):
# @singleton
class KnowledgeService:
"""KnowledgeService
Knowledge Management Service:
-knowledge_space management
-knowledge_document management
-embedding management
"""
def __init__(self):
pass
"""create knowledge space"""
def create_knowledge_space(self, request: KnowledgeSpaceRequest):
"""create knowledge space
Args:
- request: KnowledgeSpaceRequest
"""
query = KnowledgeSpaceEntity(
name=request.name,
)
@ -72,9 +81,11 @@ class KnowledgeService:
knowledge_space_dao.create_knowledge_space(request)
return True
"""create knowledge document"""
def create_knowledge_document(self, space, request: KnowledgeDocumentRequest):
"""create knowledge document
Args:
- request: KnowledgeDocumentRequest
"""
query = KnowledgeDocumentEntity(doc_name=request.doc_name, space=space)
documents = knowledge_document_dao.get_knowledge_documents(query)
if len(documents) > 0:
@ -91,9 +102,11 @@ class KnowledgeService:
)
return knowledge_document_dao.create_knowledge_document(document)
"""get knowledge space"""
def get_knowledge_space(self, request: KnowledgeSpaceRequest):
"""get knowledge space
Args:
- request: KnowledgeSpaceRequest
"""
query = KnowledgeSpaceEntity(
name=request.name, vector_type=request.vector_type, owner=request.owner
)
@ -116,6 +129,10 @@ class KnowledgeService:
return responses
def arguments(self, space_name):
"""show knowledge space arguments
Args:
- space_name: Knowledge Space Name
"""
query = KnowledgeSpaceEntity(name=space_name)
spaces = knowledge_space_dao.get_knowledge_space(query)
if len(spaces) != 1:
@ -128,6 +145,11 @@ class KnowledgeService:
return json.loads(context)
def argument_save(self, space_name, argument_request: SpaceArgumentRequest):
"""save argument
Args:
- space_name: Knowledge Space Name
- argument_request: SpaceArgumentRequest
"""
query = KnowledgeSpaceEntity(name=space_name)
spaces = knowledge_space_dao.get_knowledge_space(query)
if len(spaces) != 1:
@ -136,9 +158,12 @@ class KnowledgeService:
space.context = argument_request.argument
return knowledge_space_dao.update_knowledge_space(space)
"""get knowledge get_knowledge_documents"""
def get_knowledge_documents(self, space, request: DocumentQueryRequest):
"""get knowledge documents
Args:
- space: Knowledge Space Name
- request: DocumentQueryRequest
"""
query = KnowledgeDocumentEntity(
doc_name=request.doc_name,
doc_type=request.doc_type,
@ -153,9 +178,12 @@ class KnowledgeService:
res.page = request.page
return res
"""sync knowledge document chunk into vector store"""
def sync_knowledge_document(self, space_name, sync_request: DocumentSyncRequest):
"""sync knowledge document chunk into vector store
Args:
- space: Knowledge Space Name
- sync_request: DocumentSyncRequest
"""
from pilot.embedding_engine.embedding_engine import EmbeddingEngine
from pilot.embedding_engine.embedding_factory import EmbeddingFactory
from pilot.embedding_engine.pre_text_splitter import PreTextSplitter
@ -249,11 +277,6 @@ class KnowledgeService:
doc.chunk_size = len(chunk_docs)
doc.gmt_modified = datetime.now()
knowledge_document_dao.update_knowledge_document(doc)
# async doc embeddings
# thread = threading.Thread(
# target=self.async_doc_embedding, args=(client, chunk_docs, doc)
# )
# thread.start()
executor = CFG.SYSTEM_APP.get_component(
ComponentType.EXECUTOR_DEFAULT, ExecutorFactory
).create()
@ -277,16 +300,21 @@ class KnowledgeService:
return True
"""update knowledge space"""
def update_knowledge_space(
self, space_id: int, space_request: KnowledgeSpaceRequest
):
"""update knowledge space
Args:
- space_id: space id
- space_request: KnowledgeSpaceRequest
"""
knowledge_space_dao.update_knowledge_space(space_id, space_request)
"""delete knowledge space"""
def delete_space(self, space_name: str):
"""delete knowledge space
Args:
- space_name: knowledge space name
"""
query = KnowledgeSpaceEntity(name=space_name)
spaces = knowledge_space_dao.get_knowledge_space(query)
if len(spaces) == 0:
@ -312,6 +340,11 @@ class KnowledgeService:
return knowledge_space_dao.delete_knowledge_space(space)
def delete_document(self, space_name: str, doc_name: str):
"""delete document
Args:
- space_name: knowledge space name
- doc_name: doocument name
"""
document_query = KnowledgeDocumentEntity(doc_name=doc_name, space=space_name)
documents = knowledge_document_dao.get_documents(document_query)
if len(documents) != 1:
@ -332,9 +365,11 @@ class KnowledgeService:
# delete document
return knowledge_document_dao.delete(document_query)
"""get document chunks"""
def get_document_chunks(self, request: ChunkQueryRequest):
"""get document chunks
Args:
- request: ChunkQueryRequest
"""
query = DocumentChunkEntity(
id=request.id,
document_id=request.document_id,
@ -350,6 +385,12 @@ class KnowledgeService:
return res
def async_doc_embedding(self, client, chunk_docs, doc):
"""async document embedding into vector db
Args:
- client: EmbeddingEngine Client
- chunk_docs: List[Document]
- doc: doc
"""
logger.info(
f"async_doc_embedding, doc:{doc.doc_name}, chunk_size:{len(chunk_docs)}, begin embedding to vector store-{CFG.VECTOR_STORE_TYPE}"
)
@ -391,6 +432,10 @@ class KnowledgeService:
return context_template_string
def get_space_context(self, space_name):
"""get space contect
Args:
- space_name: space name
"""
request = KnowledgeSpaceRequest()
request.name = space_name
spaces = self.get_knowledge_space(request)

View File

@ -12,6 +12,10 @@ CFG = Config()
class KnowledgeSpaceEntity(Base):
__tablename__ = "knowledge_space"
__table_args__ = {
"mysql_charset": "utf8mb4",
"mysql_collate": "utf8mb4_unicode_ci",
}
id = Column(Integer, primary_key=True)
name = Column(String(100))
vector_type = Column(String(100))

View File

@ -16,6 +16,9 @@ CFG = Config()
model_path = LLM_MODEL_CONFIG.get(CFG.LLM_MODEL)
if __name__ == "__main__":
"""run llm server including controller, manager worker
If you use gunicorn as a process manager, initialize_app can be invoke in `on_starting` hook.
"""
run_worker_manager(
model_name=CFG.LLM_MODEL,
model_path=model_path,

View File

@ -13,6 +13,10 @@ CFG = Config()
class PromptManageEntity(Base):
__tablename__ = "prompt_manage"
__table_args__ = {
"mysql_charset": "utf8mb4",
"mysql_collate": "utf8mb4_unicode_ci",
}
id = Column(Integer, primary_key=True)
chat_scene = Column(String(100))
sub_chat_scene = Column(String(100))

View File

@ -14,7 +14,11 @@ class VectorStoreConnector:
"""
def __init__(self, vector_store_type, ctx: {}) -> None:
"""initialize vector store connector."""
"""initialize vector store connector.
Args:
- vector_store_type: vector store type Milvus, Chroma, Weaviate
- ctx: vector store config params.
"""
self.ctx = ctx
self._register()
@ -30,20 +34,30 @@ class VectorStoreConnector:
"""load document in vector database."""
return self.client.load_document(docs)
def similar_search(self, docs, topk):
"""similar search in vector database."""
return self.client.similar_search(docs, topk)
def similar_search(self, doc: str, topk: int):
"""similar search in vector database.
Args:
- doc: query text
- topk: topk
"""
return self.client.similar_search(doc, topk)
def vector_name_exists(self):
"""is vector store name exist."""
return self.client.vector_name_exists()
def delete_vector_name(self, vector_name):
"""vector store delete"""
"""vector store delete
Args:
- vector_name: vector store name
"""
return self.client.delete_vector_name(vector_name)
def delete_by_ids(self, ids):
"""vector store delete by ids."""
"""vector store delete by ids.
Args:
- ids: vector ids
"""
return self.client.delete_by_ids(ids=ids)
def _match(self, vector_store_type) -> bool: