From 0ff63feca1b1b84a9990bd563f7916f1a39d29aa Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Sun, 8 Oct 2023 22:10:50 +0500 Subject: [PATCH] fix:.env customize vector store config does not work Close #655 --- .env.template | 8 ++++++++ pilot/scene/chat_knowledge/v1/chat.py | 3 +-- pilot/server/knowledge/service.py | 1 - pilot/summary/db_summary_client.py | 8 -------- pilot/vector_store/base.py | 2 +- pilot/vector_store/chroma_store.py | 9 ++++++--- pilot/vector_store/milvus_store.py | 11 ++++++----- pilot/vector_store/weaviate_store.py | 2 +- 8 files changed, 23 insertions(+), 21 deletions(-) diff --git a/.env.template b/.env.template index c09418b4b..4c44d3241 100644 --- a/.env.template +++ b/.env.template @@ -112,13 +112,21 @@ DENYLISTED_PLUGINS= #*******************************************************************# #** VECTOR STORE SETTINGS **# #*******************************************************************# +### Chroma vector db config VECTOR_STORE_TYPE=Chroma +#CHROMA_PERSIST_PATH=/root/DB-GPT/pilot/data + +### Milvus vector db config +#VECTOR_STORE_TYPE=Milvus #MILVUS_URL=127.0.0.1 #MILVUS_PORT=19530 #MILVUS_USERNAME #MILVUS_PASSWORD #MILVUS_SECURE= +### Weaviate vector db config +#VECTOR_STORE_TYPE=Weaviate +#WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network #*******************************************************************# #** WebServer Language Support **# diff --git a/pilot/scene/chat_knowledge/v1/chat.py b/pilot/scene/chat_knowledge/v1/chat.py index 332e7c080..8177a1a5a 100644 --- a/pilot/scene/chat_knowledge/v1/chat.py +++ b/pilot/scene/chat_knowledge/v1/chat.py @@ -46,7 +46,6 @@ class ChatKnowledge(BaseChat): vector_store_config = { "vector_store_name": self.knowledge_space, "vector_store_type": CFG.VECTOR_STORE_TYPE, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, } embedding_factory = CFG.SYSTEM_APP.get_component( "embedding_factory", EmbeddingFactory @@ -93,7 +92,7 @@ class ChatKnowledge(BaseChat): context = [d.page_content for d in docs] context = context[: self.max_token] relations = list( - set([os.path.basename(d.metadata.get("source")) for d in docs]) + set([os.path.basename(d.metadata.get("source", "")) for d in docs]) ) input_values = { "context": context, diff --git a/pilot/server/knowledge/service.py b/pilot/server/knowledge/service.py index bf994d22d..c11fc3b46 100644 --- a/pilot/server/knowledge/service.py +++ b/pilot/server/knowledge/service.py @@ -239,7 +239,6 @@ class KnowledgeService: vector_store_config={ "vector_store_name": space_name, "vector_store_type": CFG.VECTOR_STORE_TYPE, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, }, text_splitter=text_splitter, embedding_factory=embedding_factory, diff --git a/pilot/summary/db_summary_client.py b/pilot/summary/db_summary_client.py index 1c56db45b..23597e0f0 100644 --- a/pilot/summary/db_summary_client.py +++ b/pilot/summary/db_summary_client.py @@ -44,7 +44,6 @@ class DBSummaryClient: vector_store_config = { "vector_store_name": dbname + "_summary", "vector_store_type": CFG.VECTOR_STORE_TYPE, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, "embeddings": embeddings, } embedding = StringEmbedding( @@ -73,7 +72,6 @@ class DBSummaryClient: table_vector_store_config = { "vector_store_name": dbname + "_" + table_name + "_ts", "vector_store_type": CFG.VECTOR_STORE_TYPE, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, "embeddings": embeddings, } embedding = StringEmbedding( @@ -91,7 +89,6 @@ class DBSummaryClient: vector_store_config = { "vector_store_name": dbname + "_profile", "vector_store_type": CFG.VECTOR_STORE_TYPE, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, } embedding_factory = CFG.SYSTEM_APP.get_component( "embedding_factory", EmbeddingFactory @@ -112,9 +109,7 @@ class DBSummaryClient: vector_store_config = { "vector_store_name": dbname + "_summary", - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, "vector_store_type": CFG.VECTOR_STORE_TYPE, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, } embedding_factory = CFG.SYSTEM_APP.get_component( "embedding_factory", EmbeddingFactory @@ -142,9 +137,7 @@ class DBSummaryClient: for table in related_tables: vector_store_config = { "vector_store_name": dbname + "_" + table + "_ts", - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, "vector_store_type": CFG.VECTOR_STORE_TYPE, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, } knowledge_embedding_client = EmbeddingEngine( model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL], @@ -172,7 +165,6 @@ class DBSummaryClient: vector_store_name = dbname + "_profile" profile_store_config = { "vector_store_name": vector_store_name, - "chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH, "vector_store_type": CFG.VECTOR_STORE_TYPE, "embeddings": embeddings, } diff --git a/pilot/vector_store/base.py b/pilot/vector_store/base.py index 74cd2f98c..4c03f2989 100644 --- a/pilot/vector_store/base.py +++ b/pilot/vector_store/base.py @@ -15,7 +15,7 @@ class VectorStoreBase(ABC): pass @abstractmethod - def vector_name_exists(self, text, topk) -> None: + def vector_name_exists(self) -> None: """is vector store name exist.""" pass diff --git a/pilot/vector_store/chroma_store.py b/pilot/vector_store/chroma_store.py index 58ae88bf6..f51bc9917 100644 --- a/pilot/vector_store/chroma_store.py +++ b/pilot/vector_store/chroma_store.py @@ -16,10 +16,13 @@ class ChromaStore(VectorStoreBase): from langchain.vectorstores import Chroma self.ctx = ctx - self.embeddings = ctx.get("embeddings", None) - self.persist_dir = os.path.join( - ctx["chroma_persist_path"], ctx["vector_store_name"] + ".vectordb" + chroma_path = ctx.get( + "CHROMA_PERSIST_PATH", os.getenv("CHROMA_PERSIST_PATH", os.getcwd()) ) + self.persist_dir = os.path.join( + chroma_path, ctx["vector_store_name"] + ".vectordb" + ) + self.embeddings = ctx.get("embeddings", None) chroma_settings = Settings( # chroma_db_impl="duckdb+parquet", => deprecated configuration of Chroma persist_directory=self.persist_dir, diff --git a/pilot/vector_store/milvus_store.py b/pilot/vector_store/milvus_store.py index 104cdc5fc..5deca8b47 100644 --- a/pilot/vector_store/milvus_store.py +++ b/pilot/vector_store/milvus_store.py @@ -1,5 +1,6 @@ from __future__ import annotations import logging +import os from typing import Any, Iterable, List, Optional, Tuple from pymilvus import Collection, DataType, connections, utility @@ -21,12 +22,12 @@ class MilvusStore(VectorStoreBase): # self.configure(cfg) connect_kwargs = {} - self.uri = ctx.get("milvus_url", None) - self.port = ctx.get("milvus_port", None) - self.username = ctx.get("milvus_username", None) - self.password = ctx.get("milvus_password", None) + self.uri = ctx.get("MILVUS_URL", os.getenv("MILVUS_URL")) + self.port = ctx.get("MILVUS_PORT", os.getenv("MILVUS_PORT")) + self.username = ctx.get("MILVUS_USERNAME", os.getenv("MILVUS_USERNAME")) + self.password = ctx.get("MILVUS_PASSWORD", os.getenv("MILVUS_PASSWORD")) + self.secure = ctx.get("MILVUS_SECURE", os.getenv("MILVUS_SECURE")) self.collection_name = ctx.get("vector_store_name", None) - self.secure = ctx.get("secure", None) self.embedding = ctx.get("embeddings", None) self.fields = [] self.alias = "default" diff --git a/pilot/vector_store/weaviate_store.py b/pilot/vector_store/weaviate_store.py index 9df9b39b1..795cf21f9 100644 --- a/pilot/vector_store/weaviate_store.py +++ b/pilot/vector_store/weaviate_store.py @@ -28,7 +28,7 @@ class WeaviateStore(VectorStoreBase): ) self.ctx = ctx - self.weaviate_url = CFG.WEAVIATE_URL + self.weaviate_url = ctx.get("WEAVIATE_URL", os.getenv("WEAVIATE_URL")) self.embedding = ctx.get("embeddings", None) self.vector_name = ctx["vector_store_name"] self.persist_dir = os.path.join(