diff --git a/pilot/source_embedding/search_milvus.py b/pilot/source_embedding/search_milvus.py index a7898f183..25acff097 100644 --- a/pilot/source_embedding/search_milvus.py +++ b/pilot/source_embedding/search_milvus.py @@ -1,3 +1,4 @@ +from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Milvus from pymilvus import Collection,utility from pymilvus import connections, DataType, FieldSchema, CollectionSchema @@ -36,16 +37,21 @@ from pymilvus import connections, DataType, FieldSchema, CollectionSchema # hit = results[0][0] # hit.entity.get('title') -milvus = connections.connect( - alias="default", - host='localhost', - port="19530" -) -data = ["aaa", "bbb"] -# text_embeddings = Text2Vectors() -mivuls = Milvus(collection_name='document', embedding_function= text_embeddings, connection_args={"host": "127.0.0.1", "port": "19530", "alias":"default"}, text_field="") +# milvus = connections.connect( +# alias="default", +# host='localhost', +# port="19530" +# ) +from pilot.vector_store.milvus_store import MilvusStore -mivuls.from_texts(texts=data, embedding=text_embeddings) +data = ["aaa", "bbb"] +model_name = "/Users/chenketing/Desktop/project/all-MiniLM-L6-v2" +embeddings = HuggingFaceEmbeddings(model_name=model_name) + +# text_embeddings = Text2Vectors() +mivuls = MilvusStore(cfg={"url": "127.0.0.1", "port": "19530", "alias": "default", "table_name": "test_c"}) + +# mivuls.from_texts(texts=data, embedding=embeddings) # docs, # embedding=embeddings, # connection_args={"host": "127.0.0.1", "port": "19530", "alias": "default"} diff --git a/pilot/vector_store/milvus_store.py b/pilot/vector_store/milvus_store.py new file mode 100644 index 000000000..1f07c969e --- /dev/null +++ b/pilot/vector_store/milvus_store.py @@ -0,0 +1,91 @@ + +from pymilvus import DataType, FieldSchema, CollectionSchema, connections, Collection + +from pilot.vector_store.vector_store_base import VectorStoreBase + + +class MilvusStore(VectorStoreBase): + def __init__(self, cfg: {}) -> None: + """Construct a milvus memory storage connection. + + Args: + cfg (Config): Auto-GPT global config. + """ + # self.configure(cfg) + + connect_kwargs = {} + self.uri = None + self.uri = cfg["url"] + self.port = cfg["port"] + self.username = cfg.get("username", None) + self.password = cfg.get("password", None) + self.collection_name = cfg["table_name"] + self.password = cfg.get("secure", None) + + # use HNSW by default. + self.index_params = { + "metric_type": "IP", + "index_type": "HNSW", + "params": {"M": 8, "efConstruction": 64}, + } + + if (self.username is None) != (self.password is None): + raise ValueError( + "Both username and password must be set to use authentication for Milvus" + ) + if self.username: + connect_kwargs["user"] = self.username + connect_kwargs["password"] = self.password + + connections.connect( + **connect_kwargs, + host=self.uri or "127.0.0.1", + port=self.port or "19530", + alias="default" + # secure=self.secure, + ) + + self.init_schema() + + def init_schema(self) -> None: + """Initialize collection in milvus database.""" + fields = [ + FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True), + FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=384), + FieldSchema(name="raw_text", dtype=DataType.VARCHAR, max_length=65535), + ] + + # create collection if not exist and load it. + self.schema = CollectionSchema(fields, "db-gpt memory storage") + self.collection = Collection(self.collection_name, self.schema) + self.index_params = { + "metric_type": "IP", + "index_type": "HNSW", + "params": {"M": 8, "efConstruction": 64}, + } + # create index if not exist. + if not self.collection.has_index(): + self.collection.release() + self.collection.create_index( + "vector", + self.index_params, + index_name="vector", + ) + self.collection.load() + + # def add(self, data) -> str: + # """Add an embedding of data into milvus. + # + # Args: + # data (str): The raw text to construct embedding index. + # + # Returns: + # str: log. + # """ + # embedding = get_ada_embedding(data) + # result = self.collection.insert([[embedding], [data]]) + # _text = ( + # "Inserting data into memory at primary key: " + # f"{result.primary_keys[0]}:\n data: {data}" + # ) + # return _text \ No newline at end of file diff --git a/pilot/vector_store/vector_store_base.py b/pilot/vector_store/vector_store_base.py new file mode 100644 index 000000000..818730f0f --- /dev/null +++ b/pilot/vector_store/vector_store_base.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod + + +class VectorStoreBase(ABC): + + @abstractmethod + def init_schema(self) -> None: + """Initialize schema in vector database.""" + pass \ No newline at end of file