fix(ChatKnowledge):.env other vector db customize vector store config does not work (#656)

.env other vector db customize vector store config does not work
Close #655
This commit is contained in:
FangYin Cheng 2023-10-09 10:05:44 +08:00 committed by GitHub
commit 4aa825d708
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 23 additions and 21 deletions

View File

@ -112,13 +112,21 @@ DENYLISTED_PLUGINS=
#*******************************************************************#
#** VECTOR STORE SETTINGS **#
#*******************************************************************#
### Chroma vector db config
VECTOR_STORE_TYPE=Chroma
#CHROMA_PERSIST_PATH=/root/DB-GPT/pilot/data
### Milvus vector db config
#VECTOR_STORE_TYPE=Milvus
#MILVUS_URL=127.0.0.1
#MILVUS_PORT=19530
#MILVUS_USERNAME
#MILVUS_PASSWORD
#MILVUS_SECURE=
### Weaviate vector db config
#VECTOR_STORE_TYPE=Weaviate
#WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network
#*******************************************************************#
#** WebServer Language Support **#

View File

@ -46,7 +46,6 @@ class ChatKnowledge(BaseChat):
vector_store_config = {
"vector_store_name": self.knowledge_space,
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
}
embedding_factory = CFG.SYSTEM_APP.get_component(
"embedding_factory", EmbeddingFactory
@ -93,7 +92,7 @@ class ChatKnowledge(BaseChat):
context = [d.page_content for d in docs]
context = context[: self.max_token]
relations = list(
set([os.path.basename(d.metadata.get("source")) for d in docs])
set([os.path.basename(d.metadata.get("source", "")) for d in docs])
)
input_values = {
"context": context,

View File

@ -239,7 +239,6 @@ class KnowledgeService:
vector_store_config={
"vector_store_name": space_name,
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
},
text_splitter=text_splitter,
embedding_factory=embedding_factory,

View File

@ -44,7 +44,6 @@ class DBSummaryClient:
vector_store_config = {
"vector_store_name": dbname + "_summary",
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
"embeddings": embeddings,
}
embedding = StringEmbedding(
@ -73,7 +72,6 @@ class DBSummaryClient:
table_vector_store_config = {
"vector_store_name": dbname + "_" + table_name + "_ts",
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
"embeddings": embeddings,
}
embedding = StringEmbedding(
@ -91,7 +89,6 @@ class DBSummaryClient:
vector_store_config = {
"vector_store_name": dbname + "_profile",
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
}
embedding_factory = CFG.SYSTEM_APP.get_component(
"embedding_factory", EmbeddingFactory
@ -112,9 +109,7 @@ class DBSummaryClient:
vector_store_config = {
"vector_store_name": dbname + "_summary",
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
}
embedding_factory = CFG.SYSTEM_APP.get_component(
"embedding_factory", EmbeddingFactory
@ -142,9 +137,7 @@ class DBSummaryClient:
for table in related_tables:
vector_store_config = {
"vector_store_name": dbname + "_" + table + "_ts",
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
}
knowledge_embedding_client = EmbeddingEngine(
model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL],
@ -172,7 +165,6 @@ class DBSummaryClient:
vector_store_name = dbname + "_profile"
profile_store_config = {
"vector_store_name": vector_store_name,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"embeddings": embeddings,
}

View File

@ -15,7 +15,7 @@ class VectorStoreBase(ABC):
pass
@abstractmethod
def vector_name_exists(self, text, topk) -> None:
def vector_name_exists(self) -> None:
"""is vector store name exist."""
pass

View File

@ -16,10 +16,13 @@ class ChromaStore(VectorStoreBase):
from langchain.vectorstores import Chroma
self.ctx = ctx
self.embeddings = ctx.get("embeddings", None)
self.persist_dir = os.path.join(
ctx["chroma_persist_path"], ctx["vector_store_name"] + ".vectordb"
chroma_path = ctx.get(
"CHROMA_PERSIST_PATH", os.getenv("CHROMA_PERSIST_PATH", os.getcwd())
)
self.persist_dir = os.path.join(
chroma_path, ctx["vector_store_name"] + ".vectordb"
)
self.embeddings = ctx.get("embeddings", None)
chroma_settings = Settings(
# chroma_db_impl="duckdb+parquet", => deprecated configuration of Chroma
persist_directory=self.persist_dir,

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import logging
import os
from typing import Any, Iterable, List, Optional, Tuple
from pymilvus import Collection, DataType, connections, utility
@ -21,12 +22,12 @@ class MilvusStore(VectorStoreBase):
# self.configure(cfg)
connect_kwargs = {}
self.uri = ctx.get("milvus_url", None)
self.port = ctx.get("milvus_port", None)
self.username = ctx.get("milvus_username", None)
self.password = ctx.get("milvus_password", None)
self.uri = ctx.get("MILVUS_URL", os.getenv("MILVUS_URL"))
self.port = ctx.get("MILVUS_PORT", os.getenv("MILVUS_PORT"))
self.username = ctx.get("MILVUS_USERNAME", os.getenv("MILVUS_USERNAME"))
self.password = ctx.get("MILVUS_PASSWORD", os.getenv("MILVUS_PASSWORD"))
self.secure = ctx.get("MILVUS_SECURE", os.getenv("MILVUS_SECURE"))
self.collection_name = ctx.get("vector_store_name", None)
self.secure = ctx.get("secure", None)
self.embedding = ctx.get("embeddings", None)
self.fields = []
self.alias = "default"

View File

@ -28,7 +28,7 @@ class WeaviateStore(VectorStoreBase):
)
self.ctx = ctx
self.weaviate_url = CFG.WEAVIATE_URL
self.weaviate_url = ctx.get("WEAVIATE_URL", os.getenv("WEAVIATE_URL"))
self.embedding = ctx.get("embeddings", None)
self.vector_name = ctx["vector_store_name"]
self.persist_dir = os.path.join(