diff --git a/docs/modules/vector/chroma/chroma.md b/docs/modules/vector/chroma/chroma.md new file mode 100644 index 000000000..7c9f41eea --- /dev/null +++ b/docs/modules/vector/chroma/chroma.md @@ -0,0 +1,50 @@ +ChromaStore +================================== +ChromaStore is one implementation of the Chroma vector database in VectorConnector. + +inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document(). +``` +class ChromaStore(VectorStoreBase): + """chroma database""" + + def __init__(self, ctx: {}) -> None: + self.ctx = ctx + self.embeddings = ctx["embeddings"] + self.persist_dir = os.path.join( + KNOWLEDGE_UPLOAD_ROOT_PATH, ctx["vector_store_name"] + ".vectordb" + ) + self.vector_store_client = Chroma( + persist_directory=self.persist_dir, embedding_function=self.embeddings + ) +``` + +similar_search() + +``` + def similar_search(self, text, topk) -> None: + logger.info("ChromaStore similar search") + return self.vector_store_client.similarity_search(text, topk) + +``` + +vector_name_exists() + +``` + def vector_name_exists(self): + return ( + os.path.exists(self.persist_dir) and len(os.listdir(self.persist_dir)) > 0 + ) + +``` + +load_document() + +``` + def load_document(self, documents): + logger.info("ChromaStore load document") + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + self.vector_store_client.add_texts(texts=texts, metadatas=metadatas) + self.vector_store_client.persist() +``` + diff --git a/docs/modules/vector/milvus/milvus.md b/docs/modules/vector/milvus/milvus.md new file mode 100644 index 000000000..9cb89314c --- /dev/null +++ b/docs/modules/vector/milvus/milvus.md @@ -0,0 +1,76 @@ +MilvusStore +================================== +MilvusStore is one implementation of the Milvus vector database in VectorConnector. + +[Tutorial on how to create a Milvus instance](https://milvus.io/docs/install_standalone-docker.md) + +inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document(). +``` +class MilvusStore(VectorStoreBase): + """Milvus database""" + + def __init__(self, ctx: {}) -> None: + """init a milvus storage connection. + + Args: + ctx ({}): MilvusStore global config. + """ + # self.configure(cfg) + + connect_kwargs = {} + self.uri = CFG.MILVUS_URL + self.port = CFG.MILVUS_PORT + self.username = CFG.MILVUS_USERNAME + self.password = CFG.MILVUS_PASSWORD + self.collection_name = ctx.get("vector_store_name", None) + self.secure = ctx.get("secure", None) + self.embedding = ctx.get("embeddings", None) + self.fields = [] + self.alias = "default" + ) +``` + +similar_search() + +``` + def similar_search(self, text, topk) -> None: + """similar_search in vector database.""" + self.col = Collection(self.collection_name) + schema = self.col.schema + for x in schema.fields: + self.fields.append(x.name) + if x.auto_id: + self.fields.remove(x.name) + if x.is_primary: + self.primary_field = x.name + if x.dtype == DataType.FLOAT_VECTOR or x.dtype == DataType.BINARY_VECTOR: + self.vector_field = x.name + _, docs_and_scores = self._search(text, topk) + return [doc for doc, _, _ in docs_and_scores] + +``` + +vector_name_exists() + +``` + def vector_name_exists(self): + """is vector store name exist.""" + return utility.has_collection(self.collection_name) + +``` + +load_document() + +``` + def load_document(self, documents) -> None: + """load document in vector database.""" + # self.init_schema_and_load(self.collection_name, documents) + batch_size = 500 + batched_list = [ + documents[i : i + batch_size] for i in range(0, len(documents), batch_size) + ] + # docs = [] + for doc_batch in batched_list: + self.init_schema_and_load(self.collection_name, doc_batch) +``` +