feature:milvus vector store

This commit is contained in:
chenketing 2023-05-12 16:53:10 +08:00
parent d42a9f3bd1
commit 477c20514b
3 changed files with 115 additions and 9 deletions

View File

@ -1,3 +1,4 @@
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Milvus
from pymilvus import Collection,utility
from pymilvus import connections, DataType, FieldSchema, CollectionSchema
@ -36,16 +37,21 @@ from pymilvus import connections, DataType, FieldSchema, CollectionSchema
# hit = results[0][0]
# hit.entity.get('title')
milvus = connections.connect(
alias="default",
host='localhost',
port="19530"
)
data = ["aaa", "bbb"]
# text_embeddings = Text2Vectors()
mivuls = Milvus(collection_name='document', embedding_function= text_embeddings, connection_args={"host": "127.0.0.1", "port": "19530", "alias":"default"}, text_field="")
# milvus = connections.connect(
# alias="default",
# host='localhost',
# port="19530"
# )
from pilot.vector_store.milvus_store import MilvusStore
mivuls.from_texts(texts=data, embedding=text_embeddings)
data = ["aaa", "bbb"]
model_name = "/Users/chenketing/Desktop/project/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
# text_embeddings = Text2Vectors()
mivuls = MilvusStore(cfg={"url": "127.0.0.1", "port": "19530", "alias": "default", "table_name": "test_c"})
# mivuls.from_texts(texts=data, embedding=embeddings)
# docs,
# embedding=embeddings,
# connection_args={"host": "127.0.0.1", "port": "19530", "alias": "default"}

View File

@ -0,0 +1,91 @@
from pymilvus import DataType, FieldSchema, CollectionSchema, connections, Collection
from pilot.vector_store.vector_store_base import VectorStoreBase
class MilvusStore(VectorStoreBase):
def __init__(self, cfg: {}) -> None:
"""Construct a milvus memory storage connection.
Args:
cfg (Config): Auto-GPT global config.
"""
# self.configure(cfg)
connect_kwargs = {}
self.uri = None
self.uri = cfg["url"]
self.port = cfg["port"]
self.username = cfg.get("username", None)
self.password = cfg.get("password", None)
self.collection_name = cfg["table_name"]
self.password = cfg.get("secure", None)
# use HNSW by default.
self.index_params = {
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
if (self.username is None) != (self.password is None):
raise ValueError(
"Both username and password must be set to use authentication for Milvus"
)
if self.username:
connect_kwargs["user"] = self.username
connect_kwargs["password"] = self.password
connections.connect(
**connect_kwargs,
host=self.uri or "127.0.0.1",
port=self.port or "19530",
alias="default"
# secure=self.secure,
)
self.init_schema()
def init_schema(self) -> None:
"""Initialize collection in milvus database."""
fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=384),
FieldSchema(name="raw_text", dtype=DataType.VARCHAR, max_length=65535),
]
# create collection if not exist and load it.
self.schema = CollectionSchema(fields, "db-gpt memory storage")
self.collection = Collection(self.collection_name, self.schema)
self.index_params = {
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
# create index if not exist.
if not self.collection.has_index():
self.collection.release()
self.collection.create_index(
"vector",
self.index_params,
index_name="vector",
)
self.collection.load()
# def add(self, data) -> str:
# """Add an embedding of data into milvus.
#
# Args:
# data (str): The raw text to construct embedding index.
#
# Returns:
# str: log.
# """
# embedding = get_ada_embedding(data)
# result = self.collection.insert([[embedding], [data]])
# _text = (
# "Inserting data into memory at primary key: "
# f"{result.primary_keys[0]}:\n data: {data}"
# )
# return _text

View File

@ -0,0 +1,9 @@
from abc import ABC, abstractmethod
class VectorStoreBase(ABC):
@abstractmethod
def init_schema(self) -> None:
"""Initialize schema in vector database."""
pass