docs: Add Weaviate docs, provide how to use Weaviate vector database in DB-GPT.

1.Weaviate docs
This commit is contained in:
aries-ckt 2023-06-19 17:36:45 +08:00
parent c32f3f1766
commit a5d6518d87
4 changed files with 125 additions and 26 deletions

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: DB-GPT 0.1.0\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-06-14 17:19+0800\n"
"POT-Creation-Date: 2023-06-15 21:28+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
@ -19,25 +19,25 @@ msgstr ""
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.12.1\n"
#: ../../getting_started/tutorials.md:1 f80ad98cf2d444a3a159655b37ae4d4f
#: ../../getting_started/tutorials.md:1 12844381da6e4aeab07dbccf4a6ae020
msgid "Tutorials"
msgstr "教程"
#: ../../getting_started/tutorials.md:4 ff8771c31b024e538dbbaaeaccec8aa7
#: ../../getting_started/tutorials.md:4 6312ba6a99a04bfdb04b9407fb92d967
msgid "This is a collection of DB-GPT tutorials on Medium."
msgstr "这是知乎上DB-GPT教程的集合。"
#: ../../getting_started/tutorials.md:6 0c1c544a5d184375aabd7fe6918042e2
#: ../../getting_started/tutorials.md:6 2fc56b41dec94e4c8ba56014e72ad527
msgid ""
"DB-GPT is divided into several functions, including chat with knowledge "
"base, execute SQL, chat with database, and execute plugins."
msgstr ""
#: ../../getting_started/tutorials.md:8 b5e106a97bbd4467b071bb05d837d7d9
msgid "Introduction"
#: ../../getting_started/tutorials.md:8 ae3ad1badc0948f8a0c271ca1bd58399
msgid "Introduce"
msgstr ""
#: ../../getting_started/tutorials.md:9 9df43436183d4b41a75a09fec15743d7
#: ../../getting_started/tutorials.md:9 c646d59a9b3b49459d8f8e6d17f7ea05
#, fuzzy
msgid "[What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I)"
msgstr ""
@ -45,11 +45,12 @@ msgstr ""
"GPT](https://www.bilibili.com/video/BV1SM4y1a7Nj/?buvid=551b023900b290f9497610b2155a2668&is_story_h5=false&mid=%2BVyE%2Fwau5woPcUKieCWS0A%3D%3D&p=1&plat_id=116&share_from=ugc&share_medium=iphone&share_plat=ios&share_session_id=5D08B533-82A4-4D40-9615-7826065B4574&share_source=GENERIC&share_tag=s_i&timestamp=1686307943&unique_k=bhO3lgQ&up_id=31375446)"
" by csunny (https://github.com/csunny/DB-GPT)"
#: ../../getting_started/tutorials.md:11 3096811f862649bf84ff3cd29cdf14db
msgid "Knowledge Base"
#: ../../getting_started/tutorials.md:11 7446c2e65eb249fa8d2486604a218a63
#, fuzzy
msgid "Knowledge"
msgstr "知识库"
#: ../../getting_started/tutorials.md:13 ea00f3de8c754bf2950e735a2f14043a
#: ../../getting_started/tutorials.md:13 7911e97570c6467095e3d8277c81625e
#, fuzzy
msgid ""
"[How to Create your own knowledge repository](https://db-"
@ -58,55 +59,58 @@ msgstr ""
"[怎么创建自己的知识库](https://db-"
"gpt.readthedocs.io/en/latest/modules/knowledge.html)"
#: ../../getting_started/tutorials.md:15 07195f11314945989eeeb9400c8a9b43
#: ../../getting_started/tutorials.md:15 39a1d08f301f4266a4043e0eab501825
#, fuzzy
msgid "![Add new Knowledge demonstration](../../assets/new_knownledge.gif)"
msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)"
#: ../../getting_started/tutorials.md:15 333cdda401df4509a11d14535391b8a8
#: ../../getting_started/tutorials.md:15 f37788a8907b4463a8a7fc4145c6ca1e
#, fuzzy
msgid "Add new Knowledge demonstration"
msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)"
#: ../../getting_started/tutorials.md:17 5245cd247a184f63a10f735f414f303f
#: ../../getting_started/tutorials.md:17 1e3d3188b0d2459186290df1f849390a
msgid "SQL Generation"
msgstr ""
#: ../../getting_started/tutorials.md:18 9a980e7625d34b98bf318851c43fb13d
#: ../../getting_started/tutorials.md:18 6cec0866942b4bd1beaec17355b370ca
#, fuzzy
msgid "![sql generation demonstration](../../assets/demo_en.gif)"
msgstr "[sql生成演示](../../assets/demo_en.gif)"
#: ../../getting_started/tutorials.md:18 952c680cf62140978b4e94d36c49134a
#: ../../getting_started/tutorials.md:18 41c5f06251cb4149a5ff47e6b800bc65
#, fuzzy
msgid "sql generation demonstration"
msgstr "[sql生成演示](../../assets/demo_en.gif)"
#: ../../getting_started/tutorials.md:20 c0a6f9fefbb9404695fe3bffb6ecc577
#: ../../getting_started/tutorials.md:20 862e024da7cb4e8b9d3e7396664d34ce
msgid "SQL Execute"
msgstr "SQL执行"
#: ../../getting_started/tutorials.md:21 e959cc6ca356407d854ee5541233c19a
#: ../../getting_started/tutorials.md:21 381350353517454cb800e5b804319027
#, fuzzy
msgid "![sql execute demonstration](../../assets/auto_sql_en.gif)"
msgstr "[sql execute 演示](../../assets/auto_sql_en.gif)"
#: ../../getting_started/tutorials.md:21 69247d51ccd349b082ea452f6d74d2b3
#: ../../getting_started/tutorials.md:21 523a107500394147bb2051cddb5d9ad9
#, fuzzy
msgid "sql execute demonstration"
msgstr "SQL执行"
#: ../../getting_started/tutorials.md:23 0fd9770dbf3c49b0b644599dc70187a7
#: ../../getting_started/tutorials.md:23 105d8c6319754b7794daf2993a10ed87
#, fuzzy
msgid "Plugins"
msgstr "DB Plugins"
#: ../../getting_started/tutorials.md:24 cf58eb1ee13f49f69e501c0e221b4bed
#: ../../getting_started/tutorials.md:24 45a1bdf9424a4cd48b0f361d46c64f1e
#, fuzzy
msgid "![db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)"
msgstr "[db plugins 演示](../../assets/dbgpt_bytebase_plugin.gif)"
#: ../../getting_started/tutorials.md:24 9e474caadb87481ba51f8595067f7edd
#: ../../getting_started/tutorials.md:24 275b5132954a44e983a7c4a69e914142
msgid "db plugins demonstration"
msgstr ""
#~ msgid "Introduction"
#~ msgstr ""

View File

@ -50,17 +50,21 @@ msgid ""
"differences and underlying details of different vector data. For example,"
" it can be used to connect to databases such as Milvus, Chroma, "
"Elasticsearch, and Weaviate."
msgstr "vectorconnector是一个向量数据库连接适配器你可以通过它来连接不同的向量数据库并且屏蔽掉了不同的向量数据的是实现差异和底层细节。例如Milvus, Chroma, Elasticsearch, Weaviate..."
msgstr ""
"vectorconnector是一个向量数据库连接适配器你可以通过它来连接不同的向量数据库并且屏蔽掉了不同的向量数据的是实现差异和底层细节。例如Milvus,"
" Chroma, Elasticsearch, Weaviate..."
#: ../../modules/vector.rst:10 4d359cf0cf254b9392e9d3b963bc1071
msgid "DB-GPT VectorConnector currently support milvus and chroma vector database"
msgstr "DB-GPT VectorConnector当前支持Milvus和Chroma未来会越来越多."
#: ../../modules/vector.rst:12 60809c6ecbb64cc8888b9dc2f9de398a
msgid "`Chroma <./vector/chroma.html>`_: supported chroma vector database."
msgstr "msgid "`Chroma <./vector/chroma.html>`_: 支持Chroma向量数据库"
#, fuzzy
msgid "`chroma <./vector/chroma.html>`_: supported chroma vector database."
msgstr "msgid \"`Chroma <./vector/chroma.html>`_: 支持Chroma向量数据库"
#: ../../modules/vector.rst:13 e24aeb84d75741fe9517a0d5ec5d92f1
msgid "`Milvus <./vector/milvus.html>`_: supported milvus vector database."
#, fuzzy
msgid "`milvus <./vector/milvus.html>`_: supported milvus vector database."
msgstr "Milvus <./vector/milvus.html>`_: 支持Milvus向量数据库"

View File

@ -11,6 +11,7 @@ DB-GPT VectorConnector currently support milvus and chroma vector database
- `chroma <./vector/chroma.html>`_: supported chroma vector database.
- `milvus <./vector/milvus.html>`_: supported milvus vector database.
- `weaviate <./vector/weaviate.html>`_: supported weaviate vector database.
.. toctree::
@ -20,4 +21,5 @@ DB-GPT VectorConnector currently support milvus and chroma vector database
:hidden:
./vector/chroma/chroma.md
./vector/milvus/milvus.md
./vector/milvus/milvus.md
./vector/weaviate/weaviate.md

View File

@ -0,0 +1,89 @@
WeaviateStore
==================================
WeaviateStore is one implementation of the Milvus vector database in VectorConnector.
[Tutorial on how to create a Weaviate instance](https://weaviate.io/developers/weaviate/installation)
inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document().
```
class WeaviateStore(VectorStoreBase):
"""Weaviate database"""
def __init__(self, ctx: dict) -> None:
"""Initialize with Weaviate client."""
try:
import weaviate
except ImportError:
raise ValueError(
"Could not import weaviate python package. "
"Please install it with `pip install weaviate-client`."
)
self.ctx = ctx
self.weaviate_url = CFG.WEAVIATE_URL
self.embedding = ctx.get("embeddings", None)
self.vector_name = ctx["vector_store_name"]
self.persist_dir = os.path.join(
KNOWLEDGE_UPLOAD_ROOT_PATH, self.vector_name + ".vectordb"
)
self.vector_store_client = weaviate.Client(self.weaviate_url)
```
similar_search()
```
def similar_search(self, text: str, topk: int) -> None:
"""Perform similar search in Weaviate"""
logger.info("Weaviate similar search")
# nearText = {
# "concepts": [text],
# "distance": 0.75, # prior to v1.14 use "certainty" instead of "distance"
# }
# vector = self.embedding.embed_query(text)
response = (
self.vector_store_client.query.get(self.vector_name, ["metadata", "page_content"])
# .with_near_vector({"vector": vector})
.with_limit(topk)
.do()
)
docs = response['data']['Get'][list(response['data']['Get'].keys())[0]]
return docs
```
vector_name_exists()
```
def vector_name_exists(self) -> bool:
"""Check if a vector name exists for a given class in Weaviate.
Returns:
bool: True if the vector name exists, False otherwise.
"""
if self.vector_store_client.schema.get(self.vector_name):
return True
return False
```
load_document()
```
def load_document(self, documents: list) -> None:
"""Load documents into Weaviate"""
logger.info("Weaviate load document")
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
# Import data
with self.vector_store_client.batch as batch:
batch.batch_size = 100
# Batch import all documents
for i in range(len(texts)):
properties = {"metadata": metadatas[i]['source'], "page_content": texts[i]}
self.vector_store_client.batch.add_data_object(data_object=properties, class_name=self.vector_name)
self.vector_store_client.batch.flush()
```