From 7d2b96aeca275b75eb5145fcaf5c19719d251171 Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Wed, 12 Jul 2023 13:53:48 +0800 Subject: [PATCH] doc:update knowledge api --- .../zh_CN/LC_MESSAGES/modules/knowledge.po | 43 +++++++++----- docs/modules/knowledge.rst | 57 +++++++++++++------ 2 files changed, 70 insertions(+), 30 deletions(-) diff --git a/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po b/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po index 0e2bd4b8f..db68e179c 100644 --- a/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po +++ b/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: DB-GPT 0.3.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2023-07-10 16:59+0800\n" +"POT-Creation-Date: 2023-07-12 11:57+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -19,7 +19,7 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.12.1\n" -#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:84 +#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:98 #: ca36c0ca545c4d70b51fe811a3e7caca msgid "Knowledge" msgstr "知识" @@ -54,22 +54,29 @@ msgstr "准备" #: ../../modules/knowledge.rst:15 515555d13e7548deb596d80ea1514bb2 msgid "before execution:" -msgstr "" +msgstr "开始前" -#: ../../modules/knowledge.rst:21 8b790c0c37114dfc8eda4863af9314b4 +#: ../../modules/knowledge.rst:21 3333f92965ee41ea9cfa542de6c1e976 msgid "" -"2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma " -"(now only support Chroma and Milvus, if you set Milvus, please set " -"MILVUS_URL and MILVUS_PORT)" -msgstr "2.更新你的.env,设置你的向量存储类型,VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus,如果你设置了Milvus,请设置MILVUS_URL和MILVUS_PORT)" +"2.prepare embedding model, you can download from https://huggingface.co/." +" Notice you have installed git-lfs. eg: git clone " +"https://huggingface.co/THUDM/chatglm2-6b" +msgstr "提前准备Embedding Model, 你可以在https://huggingface.co/进行下载,注意:你需要先安装git-lfs.eg: git clone " +"https://huggingface.co/THUDM/chatglm2-6b" -#: ../../modules/knowledge.rst:24 058fa57484a64756ab2650b46f4b33bf +#: ../../modules/knowledge.rst:29 7abcbe007d594f4aaa43ddef88ef4d89 +msgid "" +"3.prepare vector_store instance and vector store config, now we support " +"Chroma, Milvus and Weaviate." +msgstr "提前准备向量数据库环境,目前支持Chroma, Milvus and Weaviate向量数据库" + +#: ../../modules/knowledge.rst:50 058fa57484a64756ab2650b46f4b33bf msgid "" "3.init Url Type EmbeddingEngine api and embedding your document into " "vector store in your code." msgstr "初始化 Url类型 EmbeddingEngine api, 将url文档embedding向量化到向量数据库 " -#: ../../modules/knowledge.rst:40 5f255b96abd346479ab3c371393e47dc +#: ../../modules/knowledge.rst:62 5f255b96abd346479ab3c371393e47dc #, fuzzy msgid "" "4.init Document Type EmbeddingEngine api and embedding your document into" @@ -79,17 +86,17 @@ msgstr "" "初始化 文档型类型 EmbeddingEngine api, 将文档embedding向量化到向量数据库(文档可以是.txt, .pdf, " ".md, .html, .doc, .ppt)" -#: ../../modules/knowledge.rst:57 d8c85ba7714749269714b03857738f70 +#: ../../modules/knowledge.rst:75 d8c85ba7714749269714b03857738f70 msgid "" "5.init TEXT Type EmbeddingEngine api and embedding your document into " "vector store in your code." msgstr "初始化TEXT类型 EmbeddingEngine api, 将文档embedding向量化到向量数据库" -#: ../../modules/knowledge.rst:73 c59e4650d57e44ae8d967768dddf908a +#: ../../modules/knowledge.rst:87 c59e4650d57e44ae8d967768dddf908a msgid "4.similar search based on your knowledge base. ::" msgstr "在知识库进行相似性搜索" -#: ../../modules/knowledge.rst:79 f500fcdc791c4286b411819ae9ab3dc6 +#: ../../modules/knowledge.rst:93 f500fcdc791c4286b411819ae9ab3dc6 msgid "" "Note that the default vector model used is text2vec-large-chinese (which " "is a large model, so if your personal computer configuration is not " @@ -99,7 +106,7 @@ msgstr "" "注意,这里默认向量模型是text2vec-large-chinese(模型比较大,如果个人电脑配置不够建议采用text2vec-base-" "chinese),因此确保需要将模型download下来放到models目录中。" -#: ../../modules/knowledge.rst:81 62a5e10a19844ba9955113f5c78cb460 +#: ../../modules/knowledge.rst:95 62a5e10a19844ba9955113f5c78cb460 msgid "" "`pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf " "embedding." @@ -131,3 +138,11 @@ msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embeddin #~ "folders in the pilot/datasets directory." #~ msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。" +#~ msgid "" +#~ "2.Update your .env, set your vector " +#~ "store type, VECTOR_STORE_TYPE=Chroma (now only" +#~ " support Chroma and Milvus, if you" +#~ " set Milvus, please set MILVUS_URL " +#~ "and MILVUS_PORT)" +#~ msgstr "2.更新你的.env,设置你的向量存储类型,VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus,如果你设置了Milvus,请设置MILVUS_URL和MILVUS_PORT)" + diff --git a/docs/modules/knowledge.rst b/docs/modules/knowledge.rst index 756486f89..bfb6a7cb4 100644 --- a/docs/modules/knowledge.rst +++ b/docs/modules/knowledge.rst @@ -16,20 +16,55 @@ before execution: :: + pip install db-gpt -i https://pypi.org/ python -m spacy download zh_core_web_sm + from pilot import EmbeddingEngine,KnowledgeType -2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma -(now only support Chroma and Milvus, if you set Milvus, please set MILVUS_URL and MILVUS_PORT) + +2.prepare embedding model, you can download from https://huggingface.co/. +Notice you have installed git-lfs. + +eg: git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 + +:: + + embedding_model = "your_embedding_model_path/all-MiniLM-L6-v2" + +3.prepare vector_store instance and vector store config, now we support Chroma, Milvus and Weaviate. + +:: + + #Chroma + vector_store_config = { + "vector_store_type":"Chroma", + "vector_store_name":"your_name",#you can define yourself + "chroma_persist_path":"your_persist_dir" + } + #Milvus + vector_store_config = { + "vector_store_type":"Milvus", + "vector_store_name":"your_name",#you can define yourself + "milvus_url":"your_url", + "milvus_port":"your_port", + "milvus_username":"your_username",(optional) + "milvus_password":"your_password",(optional) + "milvus_secure":"your_secure"(optional) + } + #Weaviate + vector_store_config = { + "vector_store_type":"Weaviate", + "vector_store_name":"your_name",#you can define yourself + "weaviate_url":"your_url", + "weaviate_port":"your_port", + "weaviate_username":"your_username",(optional) + "weaviate_password":"your_password",(optional) + } 3.init Url Type EmbeddingEngine api and embedding your document into vector store in your code. :: url = "https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html" - embedding_model = "your_model_path/all-MiniLM-L6-v2" - vector_store_config = { - "vector_store_name": your_name, - } embedding_engine = EmbeddingEngine( knowledge_source=url, knowledge_type=KnowledgeType.URL.value, @@ -43,12 +78,6 @@ Document type can be .txt, .pdf, .md, .doc, .ppt. :: document_path = "your_path/test.md" - embedding_model = "your_model_path/all-MiniLM-L6-v2" - vector_store_config = { - "vector_store_name": your_name, - "vector_store_type": "Chroma", - "chroma_persist_path": "your_persist_dir", - } embedding_engine = EmbeddingEngine( knowledge_source=document_path, knowledge_type=KnowledgeType.DOCUMENT.value, @@ -61,10 +90,6 @@ Document type can be .txt, .pdf, .md, .doc, .ppt. :: raw_text = "a long passage" - embedding_model = "your_model_path/all-MiniLM-L6-v2" - vector_store_config = { - "vector_store_name": your_name, - } embedding_engine = EmbeddingEngine( knowledge_source=raw_text, knowledge_type=KnowledgeType.TEXT.value,