From 57b6418a8858fe668e603ab4b56d3478719f6444 Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Fri, 16 Jun 2023 10:17:18 +0800 Subject: [PATCH 1/2] docs: Add chroma and milvus connector docs Add vector docs, provide how to you vector connector in DB-GPT. 1.chroma docs 2.milvus docs Closes #230 --- docs/modules/vector/chroma/chroma.md | 50 ++++++++++++++++++ docs/modules/vector/milvus/milvus.md | 76 ++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 docs/modules/vector/chroma/chroma.md create mode 100644 docs/modules/vector/milvus/milvus.md diff --git a/docs/modules/vector/chroma/chroma.md b/docs/modules/vector/chroma/chroma.md new file mode 100644 index 000000000..7c9f41eea --- /dev/null +++ b/docs/modules/vector/chroma/chroma.md @@ -0,0 +1,50 @@ +ChromaStore +================================== +ChromaStore is one implementation of the Chroma vector database in VectorConnector. + +inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document(). +``` +class ChromaStore(VectorStoreBase): + """chroma database""" + + def __init__(self, ctx: {}) -> None: + self.ctx = ctx + self.embeddings = ctx["embeddings"] + self.persist_dir = os.path.join( + KNOWLEDGE_UPLOAD_ROOT_PATH, ctx["vector_store_name"] + ".vectordb" + ) + self.vector_store_client = Chroma( + persist_directory=self.persist_dir, embedding_function=self.embeddings + ) +``` + +similar_search() + +``` + def similar_search(self, text, topk) -> None: + logger.info("ChromaStore similar search") + return self.vector_store_client.similarity_search(text, topk) + +``` + +vector_name_exists() + +``` + def vector_name_exists(self): + return ( + os.path.exists(self.persist_dir) and len(os.listdir(self.persist_dir)) > 0 + ) + +``` + +load_document() + +``` + def load_document(self, documents): + logger.info("ChromaStore load document") + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + self.vector_store_client.add_texts(texts=texts, metadatas=metadatas) + self.vector_store_client.persist() +``` + diff --git a/docs/modules/vector/milvus/milvus.md b/docs/modules/vector/milvus/milvus.md new file mode 100644 index 000000000..9cb89314c --- /dev/null +++ b/docs/modules/vector/milvus/milvus.md @@ -0,0 +1,76 @@ +MilvusStore +================================== +MilvusStore is one implementation of the Milvus vector database in VectorConnector. + +[Tutorial on how to create a Milvus instance](https://milvus.io/docs/install_standalone-docker.md) + +inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document(). +``` +class MilvusStore(VectorStoreBase): + """Milvus database""" + + def __init__(self, ctx: {}) -> None: + """init a milvus storage connection. + + Args: + ctx ({}): MilvusStore global config. + """ + # self.configure(cfg) + + connect_kwargs = {} + self.uri = CFG.MILVUS_URL + self.port = CFG.MILVUS_PORT + self.username = CFG.MILVUS_USERNAME + self.password = CFG.MILVUS_PASSWORD + self.collection_name = ctx.get("vector_store_name", None) + self.secure = ctx.get("secure", None) + self.embedding = ctx.get("embeddings", None) + self.fields = [] + self.alias = "default" + ) +``` + +similar_search() + +``` + def similar_search(self, text, topk) -> None: + """similar_search in vector database.""" + self.col = Collection(self.collection_name) + schema = self.col.schema + for x in schema.fields: + self.fields.append(x.name) + if x.auto_id: + self.fields.remove(x.name) + if x.is_primary: + self.primary_field = x.name + if x.dtype == DataType.FLOAT_VECTOR or x.dtype == DataType.BINARY_VECTOR: + self.vector_field = x.name + _, docs_and_scores = self._search(text, topk) + return [doc for doc, _, _ in docs_and_scores] + +``` + +vector_name_exists() + +``` + def vector_name_exists(self): + """is vector store name exist.""" + return utility.has_collection(self.collection_name) + +``` + +load_document() + +``` + def load_document(self, documents) -> None: + """load document in vector database.""" + # self.init_schema_and_load(self.collection_name, documents) + batch_size = 500 + batched_list = [ + documents[i : i + batch_size] for i in range(0, len(documents), batch_size) + ] + # docs = [] + for doc_batch in batched_list: + self.init_schema_and_load(self.collection_name, doc_batch) +``` + From a2dac8926dfa3a91b4fc7e69915bf6a17e69bc70 Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Fri, 16 Jun 2023 10:18:50 +0800 Subject: [PATCH 2/2] docs: Add vector docs, provide how to use vector connector in DB-GPT. 1.chroma docs 2.milvus docs Closes #230 --- docs/modules/vector.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 docs/modules/vector.rst diff --git a/docs/modules/vector.rst b/docs/modules/vector.rst new file mode 100644 index 000000000..d425a027d --- /dev/null +++ b/docs/modules/vector.rst @@ -0,0 +1,23 @@ +VectorConnector +--------- + +**VectorConnector Introduce** + +vector knowledge base is a method of mapping words in language to a high-dimensional vector space. In the vector space, each word is represented as a vector that contains many numerical features, which represent the relationship between the word and other words. This mapping is a clustering technique, and the semantic relationship between words can be calculated by computing the differences between their vectors in the vector space. Vector knowledge bases can be used for natural language processing tasks such as sentiment analysis, text classification, and machine translation. Common vector knowledge bases include Word2Vec, GloVe, and FastText. The training of these vector knowledge bases usually requires a large corpus and computing resources to complete. + +VectorConnector is a vector database connection adapter that allows you to connect different vector databases and abstracts away implementation differences and underlying details of different vector data. For example, it can be used to connect to databases such as Milvus, Chroma, Elasticsearch, and Weaviate. + +DB-GPT VectorConnector currently support milvus and chroma vector database + +- `chroma <./vector/chroma.html>`_: supported chroma vector database. +- `milvus <./vector/milvus.html>`_: supported milvus vector database. + + +.. toctree:: + :maxdepth: 2 + :caption: VectorConnector + :name: chroma + :hidden: + + ./vector/chroma/chroma.md + ./vector/milvus/milvus.md \ No newline at end of file