doc:update knowledge api

2025-09-13 21:21:08 +00:00 · 2023-07-12 13:53:48 +08:00
parent 929e7fe96b
commit 7d2b96aeca
2 changed files with 70 additions and 30 deletions
--- a/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.3.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-07-10 16:59+0800\n"
+"POT-Creation-Date: 2023-07-12 11:57+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -19,7 +19,7 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"

-#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:84
+#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:98
 #: ca36c0ca545c4d70b51fe811a3e7caca
 msgid "Knowledge"
 msgstr "知识"
@@ -54,22 +54,29 @@ msgstr "准备"

 #: ../../modules/knowledge.rst:15 515555d13e7548deb596d80ea1514bb2
 msgid "before execution:"
-msgstr ""
+msgstr "开始前"

-#: ../../modules/knowledge.rst:21 8b790c0c37114dfc8eda4863af9314b4
+#: ../../modules/knowledge.rst:21 3333f92965ee41ea9cfa542de6c1e976
 msgid ""
-"2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma "
-"(now only support Chroma and Milvus, if you set Milvus, please set "
-"MILVUS_URL and MILVUS_PORT)"
-msgstr "2.更新你的.env，设置你的向量存储类型，VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus，如果你设置了Milvus，请设置MILVUS_URL和MILVUS_PORT)"
+"2.prepare embedding model, you can download from https://huggingface.co/."
+" Notice you have installed git-lfs. eg: git clone "
+"https://huggingface.co/THUDM/chatglm2-6b"
+msgstr "提前准备Embedding Model, 你可以在https://huggingface.co/进行下载，注意：你需要先安装git-lfs.eg: git clone "
+"https://huggingface.co/THUDM/chatglm2-6b"

-#: ../../modules/knowledge.rst:24 058fa57484a64756ab2650b46f4b33bf
+#: ../../modules/knowledge.rst:29 7abcbe007d594f4aaa43ddef88ef4d89
+msgid ""
+"3.prepare vector_store instance and vector store config, now we support "
+"Chroma, Milvus and Weaviate."
+msgstr "提前准备向量数据库环境，目前支持Chroma, Milvus and Weaviate向量数据库"
+
+#: ../../modules/knowledge.rst:50 058fa57484a64756ab2650b46f4b33bf
 msgid ""
 "3.init Url Type EmbeddingEngine api and embedding your document into "
 "vector store in your code."
 msgstr "初始化 Url类型 EmbeddingEngine api， 将url文档embedding向量化到向量数据库 "

-#: ../../modules/knowledge.rst:40 5f255b96abd346479ab3c371393e47dc
+#: ../../modules/knowledge.rst:62 5f255b96abd346479ab3c371393e47dc
 #, fuzzy
 msgid ""
 "4.init Document Type EmbeddingEngine api and embedding your document into"
@@ -79,17 +86,17 @@ msgstr ""
 "初始化 文档型类型 EmbeddingEngine api， 将文档embedding向量化到向量数据库(文档可以是.txt, .pdf, "
 ".md, .html, .doc, .ppt)"

-#: ../../modules/knowledge.rst:57 d8c85ba7714749269714b03857738f70
+#: ../../modules/knowledge.rst:75 d8c85ba7714749269714b03857738f70
 msgid ""
 "5.init TEXT Type EmbeddingEngine api and embedding your document into "
 "vector store in your code."
 msgstr "初始化TEXT类型 EmbeddingEngine api， 将文档embedding向量化到向量数据库"

-#: ../../modules/knowledge.rst:73 c59e4650d57e44ae8d967768dddf908a
+#: ../../modules/knowledge.rst:87 c59e4650d57e44ae8d967768dddf908a
 msgid "4.similar search based on your knowledge base. ::"
 msgstr "在知识库进行相似性搜索"

-#: ../../modules/knowledge.rst:79 f500fcdc791c4286b411819ae9ab3dc6
+#: ../../modules/knowledge.rst:93 f500fcdc791c4286b411819ae9ab3dc6
 msgid ""
 "Note that the default vector model used is text2vec-large-chinese (which "
 "is a large model, so if your personal computer configuration is not "
@@ -99,7 +106,7 @@ msgstr ""
 "注意，这里默认向量模型是text2vec-large-chinese(模型比较大，如果个人电脑配置不够建议采用text2vec-base-"
 "chinese),因此确保需要将模型download下来放到models目录中。"

-#: ../../modules/knowledge.rst:81 62a5e10a19844ba9955113f5c78cb460
+#: ../../modules/knowledge.rst:95 62a5e10a19844ba9955113f5c78cb460
 msgid ""
 "`pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf "
 "embedding."
@@ -131,3 +138,11 @@ msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embeddin
 #~ "folders in the pilot/datasets directory."
 #~ msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。"

+#~ msgid ""
+#~ "2.Update your .env, set your vector "
+#~ "store type, VECTOR_STORE_TYPE=Chroma (now only"
+#~ " support Chroma and Milvus, if you"
+#~ " set Milvus, please set MILVUS_URL "
+#~ "and MILVUS_PORT)"
+#~ msgstr "2.更新你的.env，设置你的向量存储类型，VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus，如果你设置了Milvus，请设置MILVUS_URL和MILVUS_PORT)"
+
--- a/docs/modules/knowledge.rst
+++ b/docs/modules/knowledge.rst
@@ -16,20 +16,55 @@ before execution:

 ::

+    pip install  db-gpt -i https://pypi.org/
    python -m spacy download zh_core_web_sm
+    from pilot import EmbeddingEngine,KnowledgeType

-2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma
-(now only support Chroma and Milvus, if you set Milvus, please set MILVUS_URL and MILVUS_PORT)
+
+2.prepare embedding model, you can download from https://huggingface.co/.
+Notice you have installed git-lfs.
+
+eg: git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
+
+::
+
+    embedding_model = "your_embedding_model_path/all-MiniLM-L6-v2"
+
+3.prepare vector_store instance and vector store config, now we support Chroma, Milvus and Weaviate.
+
+::
+
+    #Chroma
+    vector_store_config = {
+        "vector_store_type":"Chroma",
+        "vector_store_name":"your_name",#you can define yourself
+        "chroma_persist_path":"your_persist_dir"
+    }
+    #Milvus
+    vector_store_config = {
+        "vector_store_type":"Milvus",
+        "vector_store_name":"your_name",#you can define yourself
+        "milvus_url":"your_url",
+        "milvus_port":"your_port",
+        "milvus_username":"your_username",(optional)
+        "milvus_password":"your_password",(optional)
+        "milvus_secure":"your_secure"(optional)
+    }
+    #Weaviate
+    vector_store_config = {
+        "vector_store_type":"Weaviate",
+        "vector_store_name":"your_name",#you can define yourself
+        "weaviate_url":"your_url",
+        "weaviate_port":"your_port",
+        "weaviate_username":"your_username",(optional)
+        "weaviate_password":"your_password",(optional)
+    }

 3.init Url Type EmbeddingEngine api and embedding your document into vector store in your code.

 ::

    url = "https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html"
-    embedding_model = "your_model_path/all-MiniLM-L6-v2"
-    vector_store_config = {
-            "vector_store_name": your_name,
-        }
    embedding_engine = EmbeddingEngine(
                        knowledge_source=url,
                        knowledge_type=KnowledgeType.URL.value,
@@ -43,12 +78,6 @@ Document type can be .txt, .pdf, .md, .doc, .ppt.
 ::

    document_path = "your_path/test.md"
-    embedding_model = "your_model_path/all-MiniLM-L6-v2"
-    vector_store_config = {
-            "vector_store_name": your_name,
-            "vector_store_type": "Chroma",
-            "chroma_persist_path": "your_persist_dir",
-        }
    embedding_engine = EmbeddingEngine(
                        knowledge_source=document_path,
                        knowledge_type=KnowledgeType.DOCUMENT.value,
@@ -61,10 +90,6 @@ Document type can be .txt, .pdf, .md, .doc, .ppt.
 ::

    raw_text = "a long passage"
-    embedding_model = "your_model_path/all-MiniLM-L6-v2"
-    vector_store_config = {
-            "vector_store_name": your_name,
-        }
    embedding_engine = EmbeddingEngine(
                        knowledge_source=raw_text,
                        knowledge_type=KnowledgeType.TEXT.value,