mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-22 11:51:42 +00:00
feature:milvus vector store
This commit is contained in:
parent
d42a9f3bd1
commit
477c20514b
@ -1,3 +1,4 @@
|
||||
from langchain.embeddings import HuggingFaceEmbeddings
|
||||
from langchain.vectorstores import Milvus
|
||||
from pymilvus import Collection,utility
|
||||
from pymilvus import connections, DataType, FieldSchema, CollectionSchema
|
||||
@ -36,16 +37,21 @@ from pymilvus import connections, DataType, FieldSchema, CollectionSchema
|
||||
# hit = results[0][0]
|
||||
# hit.entity.get('title')
|
||||
|
||||
milvus = connections.connect(
|
||||
alias="default",
|
||||
host='localhost',
|
||||
port="19530"
|
||||
)
|
||||
data = ["aaa", "bbb"]
|
||||
# text_embeddings = Text2Vectors()
|
||||
mivuls = Milvus(collection_name='document', embedding_function= text_embeddings, connection_args={"host": "127.0.0.1", "port": "19530", "alias":"default"}, text_field="")
|
||||
# milvus = connections.connect(
|
||||
# alias="default",
|
||||
# host='localhost',
|
||||
# port="19530"
|
||||
# )
|
||||
from pilot.vector_store.milvus_store import MilvusStore
|
||||
|
||||
mivuls.from_texts(texts=data, embedding=text_embeddings)
|
||||
data = ["aaa", "bbb"]
|
||||
model_name = "/Users/chenketing/Desktop/project/all-MiniLM-L6-v2"
|
||||
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
||||
|
||||
# text_embeddings = Text2Vectors()
|
||||
mivuls = MilvusStore(cfg={"url": "127.0.0.1", "port": "19530", "alias": "default", "table_name": "test_c"})
|
||||
|
||||
# mivuls.from_texts(texts=data, embedding=embeddings)
|
||||
# docs,
|
||||
# embedding=embeddings,
|
||||
# connection_args={"host": "127.0.0.1", "port": "19530", "alias": "default"}
|
||||
|
91
pilot/vector_store/milvus_store.py
Normal file
91
pilot/vector_store/milvus_store.py
Normal file
@ -0,0 +1,91 @@
|
||||
|
||||
from pymilvus import DataType, FieldSchema, CollectionSchema, connections, Collection
|
||||
|
||||
from pilot.vector_store.vector_store_base import VectorStoreBase
|
||||
|
||||
|
||||
class MilvusStore(VectorStoreBase):
|
||||
def __init__(self, cfg: {}) -> None:
|
||||
"""Construct a milvus memory storage connection.
|
||||
|
||||
Args:
|
||||
cfg (Config): Auto-GPT global config.
|
||||
"""
|
||||
# self.configure(cfg)
|
||||
|
||||
connect_kwargs = {}
|
||||
self.uri = None
|
||||
self.uri = cfg["url"]
|
||||
self.port = cfg["port"]
|
||||
self.username = cfg.get("username", None)
|
||||
self.password = cfg.get("password", None)
|
||||
self.collection_name = cfg["table_name"]
|
||||
self.password = cfg.get("secure", None)
|
||||
|
||||
# use HNSW by default.
|
||||
self.index_params = {
|
||||
"metric_type": "IP",
|
||||
"index_type": "HNSW",
|
||||
"params": {"M": 8, "efConstruction": 64},
|
||||
}
|
||||
|
||||
if (self.username is None) != (self.password is None):
|
||||
raise ValueError(
|
||||
"Both username and password must be set to use authentication for Milvus"
|
||||
)
|
||||
if self.username:
|
||||
connect_kwargs["user"] = self.username
|
||||
connect_kwargs["password"] = self.password
|
||||
|
||||
connections.connect(
|
||||
**connect_kwargs,
|
||||
host=self.uri or "127.0.0.1",
|
||||
port=self.port or "19530",
|
||||
alias="default"
|
||||
# secure=self.secure,
|
||||
)
|
||||
|
||||
self.init_schema()
|
||||
|
||||
def init_schema(self) -> None:
|
||||
"""Initialize collection in milvus database."""
|
||||
fields = [
|
||||
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
|
||||
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=384),
|
||||
FieldSchema(name="raw_text", dtype=DataType.VARCHAR, max_length=65535),
|
||||
]
|
||||
|
||||
# create collection if not exist and load it.
|
||||
self.schema = CollectionSchema(fields, "db-gpt memory storage")
|
||||
self.collection = Collection(self.collection_name, self.schema)
|
||||
self.index_params = {
|
||||
"metric_type": "IP",
|
||||
"index_type": "HNSW",
|
||||
"params": {"M": 8, "efConstruction": 64},
|
||||
}
|
||||
# create index if not exist.
|
||||
if not self.collection.has_index():
|
||||
self.collection.release()
|
||||
self.collection.create_index(
|
||||
"vector",
|
||||
self.index_params,
|
||||
index_name="vector",
|
||||
)
|
||||
self.collection.load()
|
||||
|
||||
# def add(self, data) -> str:
|
||||
# """Add an embedding of data into milvus.
|
||||
#
|
||||
# Args:
|
||||
# data (str): The raw text to construct embedding index.
|
||||
#
|
||||
# Returns:
|
||||
# str: log.
|
||||
# """
|
||||
# embedding = get_ada_embedding(data)
|
||||
# result = self.collection.insert([[embedding], [data]])
|
||||
# _text = (
|
||||
# "Inserting data into memory at primary key: "
|
||||
# f"{result.primary_keys[0]}:\n data: {data}"
|
||||
# )
|
||||
# return _text
|
9
pilot/vector_store/vector_store_base.py
Normal file
9
pilot/vector_store/vector_store_base.py
Normal file
@ -0,0 +1,9 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class VectorStoreBase(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def init_schema(self) -> None:
|
||||
"""Initialize schema in vector database."""
|
||||
pass
|
Loading…
Reference in New Issue
Block a user