From a5d6518d8757feb39bbe74acf74e67ebb6ce7efe Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Mon, 19 Jun 2023 17:36:45 +0800 Subject: [PATCH] docs: Add Weaviate docs, provide how to use Weaviate vector database in DB-GPT. 1.Weaviate docs --- .../LC_MESSAGES/getting_started/tutorials.po | 46 +++++----- .../zh_CN/LC_MESSAGES/modules/vector.po | 12 ++- docs/modules/vector.rst | 4 +- docs/modules/vector/weaviate/weaviate.md | 89 +++++++++++++++++++ 4 files changed, 125 insertions(+), 26 deletions(-) create mode 100644 docs/modules/vector/weaviate/weaviate.md diff --git a/docs/locales/zh_CN/LC_MESSAGES/getting_started/tutorials.po b/docs/locales/zh_CN/LC_MESSAGES/getting_started/tutorials.po index c182325a4..b5f11c8fa 100644 --- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/tutorials.po +++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/tutorials.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: DB-GPT 0.1.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2023-06-14 17:19+0800\n" +"POT-Creation-Date: 2023-06-15 21:28+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -19,25 +19,25 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.12.1\n" -#: ../../getting_started/tutorials.md:1 f80ad98cf2d444a3a159655b37ae4d4f +#: ../../getting_started/tutorials.md:1 12844381da6e4aeab07dbccf4a6ae020 msgid "Tutorials" msgstr "教程" -#: ../../getting_started/tutorials.md:4 ff8771c31b024e538dbbaaeaccec8aa7 +#: ../../getting_started/tutorials.md:4 6312ba6a99a04bfdb04b9407fb92d967 msgid "This is a collection of DB-GPT tutorials on Medium." msgstr "这是知乎上DB-GPT教程的集合。." -#: ../../getting_started/tutorials.md:6 0c1c544a5d184375aabd7fe6918042e2 +#: ../../getting_started/tutorials.md:6 2fc56b41dec94e4c8ba56014e72ad527 msgid "" "DB-GPT is divided into several functions, including chat with knowledge " "base, execute SQL, chat with database, and execute plugins." msgstr "" -#: ../../getting_started/tutorials.md:8 b5e106a97bbd4467b071bb05d837d7d9 -msgid "Introduction" +#: ../../getting_started/tutorials.md:8 ae3ad1badc0948f8a0c271ca1bd58399 +msgid "Introduce" msgstr "" -#: ../../getting_started/tutorials.md:9 9df43436183d4b41a75a09fec15743d7 +#: ../../getting_started/tutorials.md:9 c646d59a9b3b49459d8f8e6d17f7ea05 #, fuzzy msgid "[What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I)" msgstr "" @@ -45,11 +45,12 @@ msgstr "" "GPT](https://www.bilibili.com/video/BV1SM4y1a7Nj/?buvid=551b023900b290f9497610b2155a2668&is_story_h5=false&mid=%2BVyE%2Fwau5woPcUKieCWS0A%3D%3D&p=1&plat_id=116&share_from=ugc&share_medium=iphone&share_plat=ios&share_session_id=5D08B533-82A4-4D40-9615-7826065B4574&share_source=GENERIC&share_tag=s_i×tamp=1686307943&unique_k=bhO3lgQ&up_id=31375446)" " by csunny (https://github.com/csunny/DB-GPT)" -#: ../../getting_started/tutorials.md:11 3096811f862649bf84ff3cd29cdf14db -msgid "Knowledge Base" +#: ../../getting_started/tutorials.md:11 7446c2e65eb249fa8d2486604a218a63 +#, fuzzy +msgid "Knowledge" msgstr "知识库" -#: ../../getting_started/tutorials.md:13 ea00f3de8c754bf2950e735a2f14043a +#: ../../getting_started/tutorials.md:13 7911e97570c6467095e3d8277c81625e #, fuzzy msgid "" "[How to Create your own knowledge repository](https://db-" @@ -58,55 +59,58 @@ msgstr "" "[怎么创建自己的知识库](https://db-" "gpt.readthedocs.io/en/latest/modules/knowledge.html)" -#: ../../getting_started/tutorials.md:15 07195f11314945989eeeb9400c8a9b43 +#: ../../getting_started/tutorials.md:15 39a1d08f301f4266a4043e0eab501825 #, fuzzy msgid "![Add new Knowledge demonstration](../../assets/new_knownledge.gif)" msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)" -#: ../../getting_started/tutorials.md:15 333cdda401df4509a11d14535391b8a8 +#: ../../getting_started/tutorials.md:15 f37788a8907b4463a8a7fc4145c6ca1e #, fuzzy msgid "Add new Knowledge demonstration" msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)" -#: ../../getting_started/tutorials.md:17 5245cd247a184f63a10f735f414f303f +#: ../../getting_started/tutorials.md:17 1e3d3188b0d2459186290df1f849390a msgid "SQL Generation" msgstr "" -#: ../../getting_started/tutorials.md:18 9a980e7625d34b98bf318851c43fb13d +#: ../../getting_started/tutorials.md:18 6cec0866942b4bd1beaec17355b370ca #, fuzzy msgid "![sql generation demonstration](../../assets/demo_en.gif)" msgstr "[sql生成演示](../../assets/demo_en.gif)" -#: ../../getting_started/tutorials.md:18 952c680cf62140978b4e94d36c49134a +#: ../../getting_started/tutorials.md:18 41c5f06251cb4149a5ff47e6b800bc65 #, fuzzy msgid "sql generation demonstration" msgstr "[sql生成演示](../../assets/demo_en.gif)" -#: ../../getting_started/tutorials.md:20 c0a6f9fefbb9404695fe3bffb6ecc577 +#: ../../getting_started/tutorials.md:20 862e024da7cb4e8b9d3e7396664d34ce msgid "SQL Execute" msgstr "SQL执行" -#: ../../getting_started/tutorials.md:21 e959cc6ca356407d854ee5541233c19a +#: ../../getting_started/tutorials.md:21 381350353517454cb800e5b804319027 #, fuzzy msgid "![sql execute demonstration](../../assets/auto_sql_en.gif)" msgstr "[sql execute 演示](../../assets/auto_sql_en.gif)" -#: ../../getting_started/tutorials.md:21 69247d51ccd349b082ea452f6d74d2b3 +#: ../../getting_started/tutorials.md:21 523a107500394147bb2051cddb5d9ad9 #, fuzzy msgid "sql execute demonstration" msgstr "SQL执行" -#: ../../getting_started/tutorials.md:23 0fd9770dbf3c49b0b644599dc70187a7 +#: ../../getting_started/tutorials.md:23 105d8c6319754b7794daf2993a10ed87 #, fuzzy msgid "Plugins" msgstr "DB Plugins" -#: ../../getting_started/tutorials.md:24 cf58eb1ee13f49f69e501c0e221b4bed +#: ../../getting_started/tutorials.md:24 45a1bdf9424a4cd48b0f361d46c64f1e #, fuzzy msgid "![db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)" msgstr "[db plugins 演示](../../assets/dbgpt_bytebase_plugin.gif)" -#: ../../getting_started/tutorials.md:24 9e474caadb87481ba51f8595067f7edd +#: ../../getting_started/tutorials.md:24 275b5132954a44e983a7c4a69e914142 msgid "db plugins demonstration" msgstr "" +#~ msgid "Introduction" +#~ msgstr "" + diff --git a/docs/locales/zh_CN/LC_MESSAGES/modules/vector.po b/docs/locales/zh_CN/LC_MESSAGES/modules/vector.po index c3c7f7822..555bf3980 100644 --- a/docs/locales/zh_CN/LC_MESSAGES/modules/vector.po +++ b/docs/locales/zh_CN/LC_MESSAGES/modules/vector.po @@ -50,17 +50,21 @@ msgid "" "differences and underlying details of different vector data. For example," " it can be used to connect to databases such as Milvus, Chroma, " "Elasticsearch, and Weaviate." -msgstr "vectorconnector是一个向量数据库连接适配器,你可以通过它来连接不同的向量数据库,并且屏蔽掉了不同的向量数据的是实现差异和底层细节。例如Milvus, Chroma, Elasticsearch, Weaviate..." +msgstr "" +"vectorconnector是一个向量数据库连接适配器,你可以通过它来连接不同的向量数据库,并且屏蔽掉了不同的向量数据的是实现差异和底层细节。例如Milvus," +" Chroma, Elasticsearch, Weaviate..." #: ../../modules/vector.rst:10 4d359cf0cf254b9392e9d3b963bc1071 msgid "DB-GPT VectorConnector currently support milvus and chroma vector database" msgstr "DB-GPT VectorConnector当前支持Milvus和Chroma,未来会越来越多." #: ../../modules/vector.rst:12 60809c6ecbb64cc8888b9dc2f9de398a -msgid "`Chroma <./vector/chroma.html>`_: supported chroma vector database." -msgstr "msgid "`Chroma <./vector/chroma.html>`_: 支持Chroma向量数据库" +#, fuzzy +msgid "`chroma <./vector/chroma.html>`_: supported chroma vector database." +msgstr "msgid \"`Chroma <./vector/chroma.html>`_: 支持Chroma向量数据库" #: ../../modules/vector.rst:13 e24aeb84d75741fe9517a0d5ec5d92f1 -msgid "`Milvus <./vector/milvus.html>`_: supported milvus vector database." +#, fuzzy +msgid "`milvus <./vector/milvus.html>`_: supported milvus vector database." msgstr "Milvus <./vector/milvus.html>`_: 支持Milvus向量数据库" diff --git a/docs/modules/vector.rst b/docs/modules/vector.rst index d425a027d..c50a93794 100644 --- a/docs/modules/vector.rst +++ b/docs/modules/vector.rst @@ -11,6 +11,7 @@ DB-GPT VectorConnector currently support milvus and chroma vector database - `chroma <./vector/chroma.html>`_: supported chroma vector database. - `milvus <./vector/milvus.html>`_: supported milvus vector database. +- `weaviate <./vector/weaviate.html>`_: supported weaviate vector database. .. toctree:: @@ -20,4 +21,5 @@ DB-GPT VectorConnector currently support milvus and chroma vector database :hidden: ./vector/chroma/chroma.md - ./vector/milvus/milvus.md \ No newline at end of file + ./vector/milvus/milvus.md + ./vector/weaviate/weaviate.md \ No newline at end of file diff --git a/docs/modules/vector/weaviate/weaviate.md b/docs/modules/vector/weaviate/weaviate.md new file mode 100644 index 000000000..709a90a6a --- /dev/null +++ b/docs/modules/vector/weaviate/weaviate.md @@ -0,0 +1,89 @@ +WeaviateStore +================================== +WeaviateStore is one implementation of the Milvus vector database in VectorConnector. + +[Tutorial on how to create a Weaviate instance](https://weaviate.io/developers/weaviate/installation) + +inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document(). +``` +class WeaviateStore(VectorStoreBase): + """Weaviate database""" + + def __init__(self, ctx: dict) -> None: + """Initialize with Weaviate client.""" + try: + import weaviate + except ImportError: + raise ValueError( + "Could not import weaviate python package. " + "Please install it with `pip install weaviate-client`." + ) + + self.ctx = ctx + self.weaviate_url = CFG.WEAVIATE_URL + self.embedding = ctx.get("embeddings", None) + self.vector_name = ctx["vector_store_name"] + self.persist_dir = os.path.join( + KNOWLEDGE_UPLOAD_ROOT_PATH, self.vector_name + ".vectordb" + ) + + self.vector_store_client = weaviate.Client(self.weaviate_url) +``` + +similar_search() + +``` + def similar_search(self, text: str, topk: int) -> None: + """Perform similar search in Weaviate""" + logger.info("Weaviate similar search") + # nearText = { + # "concepts": [text], + # "distance": 0.75, # prior to v1.14 use "certainty" instead of "distance" + # } + # vector = self.embedding.embed_query(text) + response = ( + self.vector_store_client.query.get(self.vector_name, ["metadata", "page_content"]) + # .with_near_vector({"vector": vector}) + .with_limit(topk) + .do() + ) + docs = response['data']['Get'][list(response['data']['Get'].keys())[0]] + return docs + +``` + +vector_name_exists() + +``` + def vector_name_exists(self) -> bool: + """Check if a vector name exists for a given class in Weaviate. + Returns: + bool: True if the vector name exists, False otherwise. + """ + if self.vector_store_client.schema.get(self.vector_name): + return True + return False + +``` + +load_document() + +``` + def load_document(self, documents: list) -> None: + """Load documents into Weaviate""" + logger.info("Weaviate load document") + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + + # Import data + with self.vector_store_client.batch as batch: + batch.batch_size = 100 + + # Batch import all documents + for i in range(len(texts)): + properties = {"metadata": metadatas[i]['source'], "page_content": texts[i]} + + self.vector_store_client.batch.add_data_object(data_object=properties, class_name=self.vector_name) + self.vector_store_client.batch.flush() +``` +