mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-30 15:21:02 +00:00
docs: Add chroma and milvus connector docs (#230)
Add vector docs, provide how to you vector connector in DB-GPT. 1.chroma docs 2.milvus docs Closes #230
This commit is contained in:
commit
3f78c8a077
23
docs/modules/vector.rst
Normal file
23
docs/modules/vector.rst
Normal file
@ -0,0 +1,23 @@
|
||||
VectorConnector
|
||||
---------
|
||||
|
||||
**VectorConnector Introduce**
|
||||
|
||||
vector knowledge base is a method of mapping words in language to a high-dimensional vector space. In the vector space, each word is represented as a vector that contains many numerical features, which represent the relationship between the word and other words. This mapping is a clustering technique, and the semantic relationship between words can be calculated by computing the differences between their vectors in the vector space. Vector knowledge bases can be used for natural language processing tasks such as sentiment analysis, text classification, and machine translation. Common vector knowledge bases include Word2Vec, GloVe, and FastText. The training of these vector knowledge bases usually requires a large corpus and computing resources to complete.
|
||||
|
||||
VectorConnector is a vector database connection adapter that allows you to connect different vector databases and abstracts away implementation differences and underlying details of different vector data. For example, it can be used to connect to databases such as Milvus, Chroma, Elasticsearch, and Weaviate.
|
||||
|
||||
DB-GPT VectorConnector currently support milvus and chroma vector database
|
||||
|
||||
- `chroma <./vector/chroma.html>`_: supported chroma vector database.
|
||||
- `milvus <./vector/milvus.html>`_: supported milvus vector database.
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: VectorConnector
|
||||
:name: chroma
|
||||
:hidden:
|
||||
|
||||
./vector/chroma/chroma.md
|
||||
./vector/milvus/milvus.md
|
50
docs/modules/vector/chroma/chroma.md
Normal file
50
docs/modules/vector/chroma/chroma.md
Normal file
@ -0,0 +1,50 @@
|
||||
ChromaStore
|
||||
==================================
|
||||
ChromaStore is one implementation of the Chroma vector database in VectorConnector.
|
||||
|
||||
inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document().
|
||||
```
|
||||
class ChromaStore(VectorStoreBase):
|
||||
"""chroma database"""
|
||||
|
||||
def __init__(self, ctx: {}) -> None:
|
||||
self.ctx = ctx
|
||||
self.embeddings = ctx["embeddings"]
|
||||
self.persist_dir = os.path.join(
|
||||
KNOWLEDGE_UPLOAD_ROOT_PATH, ctx["vector_store_name"] + ".vectordb"
|
||||
)
|
||||
self.vector_store_client = Chroma(
|
||||
persist_directory=self.persist_dir, embedding_function=self.embeddings
|
||||
)
|
||||
```
|
||||
|
||||
similar_search()
|
||||
|
||||
```
|
||||
def similar_search(self, text, topk) -> None:
|
||||
logger.info("ChromaStore similar search")
|
||||
return self.vector_store_client.similarity_search(text, topk)
|
||||
|
||||
```
|
||||
|
||||
vector_name_exists()
|
||||
|
||||
```
|
||||
def vector_name_exists(self):
|
||||
return (
|
||||
os.path.exists(self.persist_dir) and len(os.listdir(self.persist_dir)) > 0
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
load_document()
|
||||
|
||||
```
|
||||
def load_document(self, documents):
|
||||
logger.info("ChromaStore load document")
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
self.vector_store_client.add_texts(texts=texts, metadatas=metadatas)
|
||||
self.vector_store_client.persist()
|
||||
```
|
||||
|
76
docs/modules/vector/milvus/milvus.md
Normal file
76
docs/modules/vector/milvus/milvus.md
Normal file
@ -0,0 +1,76 @@
|
||||
MilvusStore
|
||||
==================================
|
||||
MilvusStore is one implementation of the Milvus vector database in VectorConnector.
|
||||
|
||||
[Tutorial on how to create a Milvus instance](https://milvus.io/docs/install_standalone-docker.md)
|
||||
|
||||
inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document().
|
||||
```
|
||||
class MilvusStore(VectorStoreBase):
|
||||
"""Milvus database"""
|
||||
|
||||
def __init__(self, ctx: {}) -> None:
|
||||
"""init a milvus storage connection.
|
||||
|
||||
Args:
|
||||
ctx ({}): MilvusStore global config.
|
||||
"""
|
||||
# self.configure(cfg)
|
||||
|
||||
connect_kwargs = {}
|
||||
self.uri = CFG.MILVUS_URL
|
||||
self.port = CFG.MILVUS_PORT
|
||||
self.username = CFG.MILVUS_USERNAME
|
||||
self.password = CFG.MILVUS_PASSWORD
|
||||
self.collection_name = ctx.get("vector_store_name", None)
|
||||
self.secure = ctx.get("secure", None)
|
||||
self.embedding = ctx.get("embeddings", None)
|
||||
self.fields = []
|
||||
self.alias = "default"
|
||||
)
|
||||
```
|
||||
|
||||
similar_search()
|
||||
|
||||
```
|
||||
def similar_search(self, text, topk) -> None:
|
||||
"""similar_search in vector database."""
|
||||
self.col = Collection(self.collection_name)
|
||||
schema = self.col.schema
|
||||
for x in schema.fields:
|
||||
self.fields.append(x.name)
|
||||
if x.auto_id:
|
||||
self.fields.remove(x.name)
|
||||
if x.is_primary:
|
||||
self.primary_field = x.name
|
||||
if x.dtype == DataType.FLOAT_VECTOR or x.dtype == DataType.BINARY_VECTOR:
|
||||
self.vector_field = x.name
|
||||
_, docs_and_scores = self._search(text, topk)
|
||||
return [doc for doc, _, _ in docs_and_scores]
|
||||
|
||||
```
|
||||
|
||||
vector_name_exists()
|
||||
|
||||
```
|
||||
def vector_name_exists(self):
|
||||
"""is vector store name exist."""
|
||||
return utility.has_collection(self.collection_name)
|
||||
|
||||
```
|
||||
|
||||
load_document()
|
||||
|
||||
```
|
||||
def load_document(self, documents) -> None:
|
||||
"""load document in vector database."""
|
||||
# self.init_schema_and_load(self.collection_name, documents)
|
||||
batch_size = 500
|
||||
batched_list = [
|
||||
documents[i : i + batch_size] for i in range(0, len(documents), batch_size)
|
||||
]
|
||||
# docs = []
|
||||
for doc_batch in batched_list:
|
||||
self.init_schema_and_load(self.collection_name, doc_batch)
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user