diff --git a/pilot/vector_store/milvus_store.py b/pilot/vector_store/milvus_store.py index a61027850..8af9240e2 100644 --- a/pilot/vector_store/milvus_store.py +++ b/pilot/vector_store/milvus_store.py @@ -139,29 +139,21 @@ class MilvusStore(VectorStoreBase): fields.append( FieldSchema(text_field, DataType.VARCHAR, max_length=max_length + 1) ) - # create the primary key field + # primary key field fields.append( FieldSchema(primary_field, DataType.INT64, is_primary=True, auto_id=True) ) - # create the vector field + # vector field fields.append(FieldSchema(vector_field, DataType.FLOAT_VECTOR, dim=dim)) - # Create the schema for the collection + # milvus the schema for the collection schema = CollectionSchema(fields) # Create the collection collection = Collection(collection_name, schema) self.col = collection - # Index parameters for the collection + # index parameters for the collection index = self.index_params - # Create the index + # milvus index collection.create_index(vector_field, index) - # Create the VectorStore - # milvus = cls( - # embedding, - # kwargs.get("connection_args", {"port": 19530}), - # collection_name, - # text_field, - # ) - # Add the texts. schema = collection.schema for x in schema.fields: self.fields.append(x.name) diff --git a/requirements.txt b/requirements.txt index aea4f00e0..685661026 100644 --- a/requirements.txt +++ b/requirements.txt @@ -69,6 +69,7 @@ colorama playsound distro pypdf +milvus-cli==0.3.2 # Testing dependencies pytest diff --git a/tools/knowlege_init.py b/tools/knowlege_init.py index 23ca33a80..e64521031 100644 --- a/tools/knowlege_init.py +++ b/tools/knowlege_init.py @@ -2,11 +2,11 @@ # -*- coding: utf-8 -*- import argparse -from pilot.configs.model_config import DATASETS_DIR, LLM_MODEL_CONFIG, VECTOR_SEARCH_TOP_K, VECTOR_STORE_CONFIG, \ - VECTOR_STORE_TYPE +from pilot.configs.config import Config +from pilot.configs.model_config import DATASETS_DIR, LLM_MODEL_CONFIG, VECTOR_SEARCH_TOP_K from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding - +CFG = Config() class LocalKnowledgeInit: embeddings: object = None model_name = LLM_MODEL_CONFIG["text2vec"] @@ -32,6 +32,7 @@ class LocalKnowledgeInit: dc, s = doc yield s, dc + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--vector_name", type=str, default="default") @@ -40,8 +41,8 @@ if __name__ == "__main__": args = parser.parse_args() vector_name = args.vector_name append_mode = args.append - store_type = VECTOR_STORE_TYPE - vector_store_config = {"url": VECTOR_STORE_CONFIG["url"], "port": VECTOR_STORE_CONFIG["port"], "vector_store_name":vector_name} + store_type = CFG.VECTOR_STORE_TYPE + vector_store_config = {"vector_store_name": vector_name} print(vector_store_config) kv = LocalKnowledgeInit(vector_store_config=vector_store_config) vector_store = kv.knowledge_persist(file_path=DATASETS_DIR, append_mode=append_mode)