feature:milvus vector store

This commit is contained in:
chenketing 2023-05-12 16:53:10 +08:00
parent d42a9f3bd1
commit 477c20514b
3 changed files with 115 additions and 9 deletions

View File

@ -1,3 +1,4 @@
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Milvus from langchain.vectorstores import Milvus
from pymilvus import Collection,utility from pymilvus import Collection,utility
from pymilvus import connections, DataType, FieldSchema, CollectionSchema from pymilvus import connections, DataType, FieldSchema, CollectionSchema
@ -36,16 +37,21 @@ from pymilvus import connections, DataType, FieldSchema, CollectionSchema
# hit = results[0][0] # hit = results[0][0]
# hit.entity.get('title') # hit.entity.get('title')
milvus = connections.connect( # milvus = connections.connect(
alias="default", # alias="default",
host='localhost', # host='localhost',
port="19530" # port="19530"
) # )
data = ["aaa", "bbb"] from pilot.vector_store.milvus_store import MilvusStore
# text_embeddings = Text2Vectors()
mivuls = Milvus(collection_name='document', embedding_function= text_embeddings, connection_args={"host": "127.0.0.1", "port": "19530", "alias":"default"}, text_field="")
mivuls.from_texts(texts=data, embedding=text_embeddings) data = ["aaa", "bbb"]
model_name = "/Users/chenketing/Desktop/project/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
# text_embeddings = Text2Vectors()
mivuls = MilvusStore(cfg={"url": "127.0.0.1", "port": "19530", "alias": "default", "table_name": "test_c"})
# mivuls.from_texts(texts=data, embedding=embeddings)
# docs, # docs,
# embedding=embeddings, # embedding=embeddings,
# connection_args={"host": "127.0.0.1", "port": "19530", "alias": "default"} # connection_args={"host": "127.0.0.1", "port": "19530", "alias": "default"}

View File

@ -0,0 +1,91 @@
from pymilvus import DataType, FieldSchema, CollectionSchema, connections, Collection
from pilot.vector_store.vector_store_base import VectorStoreBase
class MilvusStore(VectorStoreBase):
def __init__(self, cfg: {}) -> None:
"""Construct a milvus memory storage connection.
Args:
cfg (Config): Auto-GPT global config.
"""
# self.configure(cfg)
connect_kwargs = {}
self.uri = None
self.uri = cfg["url"]
self.port = cfg["port"]
self.username = cfg.get("username", None)
self.password = cfg.get("password", None)
self.collection_name = cfg["table_name"]
self.password = cfg.get("secure", None)
# use HNSW by default.
self.index_params = {
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
if (self.username is None) != (self.password is None):
raise ValueError(
"Both username and password must be set to use authentication for Milvus"
)
if self.username:
connect_kwargs["user"] = self.username
connect_kwargs["password"] = self.password
connections.connect(
**connect_kwargs,
host=self.uri or "127.0.0.1",
port=self.port or "19530",
alias="default"
# secure=self.secure,
)
self.init_schema()
def init_schema(self) -> None:
"""Initialize collection in milvus database."""
fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=384),
FieldSchema(name="raw_text", dtype=DataType.VARCHAR, max_length=65535),
]
# create collection if not exist and load it.
self.schema = CollectionSchema(fields, "db-gpt memory storage")
self.collection = Collection(self.collection_name, self.schema)
self.index_params = {
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
# create index if not exist.
if not self.collection.has_index():
self.collection.release()
self.collection.create_index(
"vector",
self.index_params,
index_name="vector",
)
self.collection.load()
# def add(self, data) -> str:
# """Add an embedding of data into milvus.
#
# Args:
# data (str): The raw text to construct embedding index.
#
# Returns:
# str: log.
# """
# embedding = get_ada_embedding(data)
# result = self.collection.insert([[embedding], [data]])
# _text = (
# "Inserting data into memory at primary key: "
# f"{result.primary_keys[0]}:\n data: {data}"
# )
# return _text

View File

@ -0,0 +1,9 @@
from abc import ABC, abstractmethod
class VectorStoreBase(ABC):
@abstractmethod
def init_schema(self) -> None:
"""Initialize schema in vector database."""
pass