docs: Add chroma and milvus connector docs

Add vector docs,  provide how to you vector connector in DB-GPT.
1.chroma docs
2.milvus docs
Closes #230
This commit is contained in:
aries-ckt 2023-06-16 10:17:18 +08:00
parent 8d3f5c9702
commit 57b6418a88
2 changed files with 126 additions and 0 deletions

View File

@ -0,0 +1,50 @@
ChromaStore
==================================
ChromaStore is one implementation of the Chroma vector database in VectorConnector.
inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document().
```
class ChromaStore(VectorStoreBase):
"""chroma database"""
def __init__(self, ctx: {}) -> None:
self.ctx = ctx
self.embeddings = ctx["embeddings"]
self.persist_dir = os.path.join(
KNOWLEDGE_UPLOAD_ROOT_PATH, ctx["vector_store_name"] + ".vectordb"
)
self.vector_store_client = Chroma(
persist_directory=self.persist_dir, embedding_function=self.embeddings
)
```
similar_search()
```
def similar_search(self, text, topk) -> None:
logger.info("ChromaStore similar search")
return self.vector_store_client.similarity_search(text, topk)
```
vector_name_exists()
```
def vector_name_exists(self):
return (
os.path.exists(self.persist_dir) and len(os.listdir(self.persist_dir)) > 0
)
```
load_document()
```
def load_document(self, documents):
logger.info("ChromaStore load document")
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
self.vector_store_client.add_texts(texts=texts, metadatas=metadatas)
self.vector_store_client.persist()
```

View File

@ -0,0 +1,76 @@
MilvusStore
==================================
MilvusStore is one implementation of the Milvus vector database in VectorConnector.
[Tutorial on how to create a Milvus instance](https://milvus.io/docs/install_standalone-docker.md)
inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document().
```
class MilvusStore(VectorStoreBase):
"""Milvus database"""
def __init__(self, ctx: {}) -> None:
"""init a milvus storage connection.
Args:
ctx ({}): MilvusStore global config.
"""
# self.configure(cfg)
connect_kwargs = {}
self.uri = CFG.MILVUS_URL
self.port = CFG.MILVUS_PORT
self.username = CFG.MILVUS_USERNAME
self.password = CFG.MILVUS_PASSWORD
self.collection_name = ctx.get("vector_store_name", None)
self.secure = ctx.get("secure", None)
self.embedding = ctx.get("embeddings", None)
self.fields = []
self.alias = "default"
)
```
similar_search()
```
def similar_search(self, text, topk) -> None:
"""similar_search in vector database."""
self.col = Collection(self.collection_name)
schema = self.col.schema
for x in schema.fields:
self.fields.append(x.name)
if x.auto_id:
self.fields.remove(x.name)
if x.is_primary:
self.primary_field = x.name
if x.dtype == DataType.FLOAT_VECTOR or x.dtype == DataType.BINARY_VECTOR:
self.vector_field = x.name
_, docs_and_scores = self._search(text, topk)
return [doc for doc, _, _ in docs_and_scores]
```
vector_name_exists()
```
def vector_name_exists(self):
"""is vector store name exist."""
return utility.has_collection(self.collection_name)
```
load_document()
```
def load_document(self, documents) -> None:
"""load document in vector database."""
# self.init_schema_and_load(self.collection_name, documents)
batch_size = 500
batched_list = [
documents[i : i + batch_size] for i in range(0, len(documents), batch_size)
]
# docs = []
for doc_batch in batched_list:
self.init_schema_and_load(self.collection_name, doc_batch)
```