mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-05 18:33:52 +00:00
doc:update knowledge api
This commit is contained in:
parent
929e7fe96b
commit
7d2b96aeca
@ -8,7 +8,7 @@ msgid ""
|
|||||||
msgstr ""
|
msgstr ""
|
||||||
"Project-Id-Version: DB-GPT 0.3.0\n"
|
"Project-Id-Version: DB-GPT 0.3.0\n"
|
||||||
"Report-Msgid-Bugs-To: \n"
|
"Report-Msgid-Bugs-To: \n"
|
||||||
"POT-Creation-Date: 2023-07-10 16:59+0800\n"
|
"POT-Creation-Date: 2023-07-12 11:57+0800\n"
|
||||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
"Language: zh_CN\n"
|
"Language: zh_CN\n"
|
||||||
@ -19,7 +19,7 @@ msgstr ""
|
|||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"Generated-By: Babel 2.12.1\n"
|
"Generated-By: Babel 2.12.1\n"
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:84
|
#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:98
|
||||||
#: ca36c0ca545c4d70b51fe811a3e7caca
|
#: ca36c0ca545c4d70b51fe811a3e7caca
|
||||||
msgid "Knowledge"
|
msgid "Knowledge"
|
||||||
msgstr "知识"
|
msgstr "知识"
|
||||||
@ -54,22 +54,29 @@ msgstr "准备"
|
|||||||
|
|
||||||
#: ../../modules/knowledge.rst:15 515555d13e7548deb596d80ea1514bb2
|
#: ../../modules/knowledge.rst:15 515555d13e7548deb596d80ea1514bb2
|
||||||
msgid "before execution:"
|
msgid "before execution:"
|
||||||
msgstr ""
|
msgstr "开始前"
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:21 8b790c0c37114dfc8eda4863af9314b4
|
#: ../../modules/knowledge.rst:21 3333f92965ee41ea9cfa542de6c1e976
|
||||||
msgid ""
|
msgid ""
|
||||||
"2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma "
|
"2.prepare embedding model, you can download from https://huggingface.co/."
|
||||||
"(now only support Chroma and Milvus, if you set Milvus, please set "
|
" Notice you have installed git-lfs. eg: git clone "
|
||||||
"MILVUS_URL and MILVUS_PORT)"
|
"https://huggingface.co/THUDM/chatglm2-6b"
|
||||||
msgstr "2.更新你的.env,设置你的向量存储类型,VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus,如果你设置了Milvus,请设置MILVUS_URL和MILVUS_PORT)"
|
msgstr "提前准备Embedding Model, 你可以在https://huggingface.co/进行下载,注意:你需要先安装git-lfs.eg: git clone "
|
||||||
|
"https://huggingface.co/THUDM/chatglm2-6b"
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:24 058fa57484a64756ab2650b46f4b33bf
|
#: ../../modules/knowledge.rst:29 7abcbe007d594f4aaa43ddef88ef4d89
|
||||||
|
msgid ""
|
||||||
|
"3.prepare vector_store instance and vector store config, now we support "
|
||||||
|
"Chroma, Milvus and Weaviate."
|
||||||
|
msgstr "提前准备向量数据库环境,目前支持Chroma, Milvus and Weaviate向量数据库"
|
||||||
|
|
||||||
|
#: ../../modules/knowledge.rst:50 058fa57484a64756ab2650b46f4b33bf
|
||||||
msgid ""
|
msgid ""
|
||||||
"3.init Url Type EmbeddingEngine api and embedding your document into "
|
"3.init Url Type EmbeddingEngine api and embedding your document into "
|
||||||
"vector store in your code."
|
"vector store in your code."
|
||||||
msgstr "初始化 Url类型 EmbeddingEngine api, 将url文档embedding向量化到向量数据库 "
|
msgstr "初始化 Url类型 EmbeddingEngine api, 将url文档embedding向量化到向量数据库 "
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:40 5f255b96abd346479ab3c371393e47dc
|
#: ../../modules/knowledge.rst:62 5f255b96abd346479ab3c371393e47dc
|
||||||
#, fuzzy
|
#, fuzzy
|
||||||
msgid ""
|
msgid ""
|
||||||
"4.init Document Type EmbeddingEngine api and embedding your document into"
|
"4.init Document Type EmbeddingEngine api and embedding your document into"
|
||||||
@ -79,17 +86,17 @@ msgstr ""
|
|||||||
"初始化 文档型类型 EmbeddingEngine api, 将文档embedding向量化到向量数据库(文档可以是.txt, .pdf, "
|
"初始化 文档型类型 EmbeddingEngine api, 将文档embedding向量化到向量数据库(文档可以是.txt, .pdf, "
|
||||||
".md, .html, .doc, .ppt)"
|
".md, .html, .doc, .ppt)"
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:57 d8c85ba7714749269714b03857738f70
|
#: ../../modules/knowledge.rst:75 d8c85ba7714749269714b03857738f70
|
||||||
msgid ""
|
msgid ""
|
||||||
"5.init TEXT Type EmbeddingEngine api and embedding your document into "
|
"5.init TEXT Type EmbeddingEngine api and embedding your document into "
|
||||||
"vector store in your code."
|
"vector store in your code."
|
||||||
msgstr "初始化TEXT类型 EmbeddingEngine api, 将文档embedding向量化到向量数据库"
|
msgstr "初始化TEXT类型 EmbeddingEngine api, 将文档embedding向量化到向量数据库"
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:73 c59e4650d57e44ae8d967768dddf908a
|
#: ../../modules/knowledge.rst:87 c59e4650d57e44ae8d967768dddf908a
|
||||||
msgid "4.similar search based on your knowledge base. ::"
|
msgid "4.similar search based on your knowledge base. ::"
|
||||||
msgstr "在知识库进行相似性搜索"
|
msgstr "在知识库进行相似性搜索"
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:79 f500fcdc791c4286b411819ae9ab3dc6
|
#: ../../modules/knowledge.rst:93 f500fcdc791c4286b411819ae9ab3dc6
|
||||||
msgid ""
|
msgid ""
|
||||||
"Note that the default vector model used is text2vec-large-chinese (which "
|
"Note that the default vector model used is text2vec-large-chinese (which "
|
||||||
"is a large model, so if your personal computer configuration is not "
|
"is a large model, so if your personal computer configuration is not "
|
||||||
@ -99,7 +106,7 @@ msgstr ""
|
|||||||
"注意,这里默认向量模型是text2vec-large-chinese(模型比较大,如果个人电脑配置不够建议采用text2vec-base-"
|
"注意,这里默认向量模型是text2vec-large-chinese(模型比较大,如果个人电脑配置不够建议采用text2vec-base-"
|
||||||
"chinese),因此确保需要将模型download下来放到models目录中。"
|
"chinese),因此确保需要将模型download下来放到models目录中。"
|
||||||
|
|
||||||
#: ../../modules/knowledge.rst:81 62a5e10a19844ba9955113f5c78cb460
|
#: ../../modules/knowledge.rst:95 62a5e10a19844ba9955113f5c78cb460
|
||||||
msgid ""
|
msgid ""
|
||||||
"`pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf "
|
"`pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf "
|
||||||
"embedding."
|
"embedding."
|
||||||
@ -131,3 +138,11 @@ msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embeddin
|
|||||||
#~ "folders in the pilot/datasets directory."
|
#~ "folders in the pilot/datasets directory."
|
||||||
#~ msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。"
|
#~ msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。"
|
||||||
|
|
||||||
|
#~ msgid ""
|
||||||
|
#~ "2.Update your .env, set your vector "
|
||||||
|
#~ "store type, VECTOR_STORE_TYPE=Chroma (now only"
|
||||||
|
#~ " support Chroma and Milvus, if you"
|
||||||
|
#~ " set Milvus, please set MILVUS_URL "
|
||||||
|
#~ "and MILVUS_PORT)"
|
||||||
|
#~ msgstr "2.更新你的.env,设置你的向量存储类型,VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus,如果你设置了Milvus,请设置MILVUS_URL和MILVUS_PORT)"
|
||||||
|
|
||||||
|
@ -16,20 +16,55 @@ before execution:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
|
pip install db-gpt -i https://pypi.org/
|
||||||
python -m spacy download zh_core_web_sm
|
python -m spacy download zh_core_web_sm
|
||||||
|
from pilot import EmbeddingEngine,KnowledgeType
|
||||||
|
|
||||||
2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma
|
|
||||||
(now only support Chroma and Milvus, if you set Milvus, please set MILVUS_URL and MILVUS_PORT)
|
2.prepare embedding model, you can download from https://huggingface.co/.
|
||||||
|
Notice you have installed git-lfs.
|
||||||
|
|
||||||
|
eg: git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
embedding_model = "your_embedding_model_path/all-MiniLM-L6-v2"
|
||||||
|
|
||||||
|
3.prepare vector_store instance and vector store config, now we support Chroma, Milvus and Weaviate.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
#Chroma
|
||||||
|
vector_store_config = {
|
||||||
|
"vector_store_type":"Chroma",
|
||||||
|
"vector_store_name":"your_name",#you can define yourself
|
||||||
|
"chroma_persist_path":"your_persist_dir"
|
||||||
|
}
|
||||||
|
#Milvus
|
||||||
|
vector_store_config = {
|
||||||
|
"vector_store_type":"Milvus",
|
||||||
|
"vector_store_name":"your_name",#you can define yourself
|
||||||
|
"milvus_url":"your_url",
|
||||||
|
"milvus_port":"your_port",
|
||||||
|
"milvus_username":"your_username",(optional)
|
||||||
|
"milvus_password":"your_password",(optional)
|
||||||
|
"milvus_secure":"your_secure"(optional)
|
||||||
|
}
|
||||||
|
#Weaviate
|
||||||
|
vector_store_config = {
|
||||||
|
"vector_store_type":"Weaviate",
|
||||||
|
"vector_store_name":"your_name",#you can define yourself
|
||||||
|
"weaviate_url":"your_url",
|
||||||
|
"weaviate_port":"your_port",
|
||||||
|
"weaviate_username":"your_username",(optional)
|
||||||
|
"weaviate_password":"your_password",(optional)
|
||||||
|
}
|
||||||
|
|
||||||
3.init Url Type EmbeddingEngine api and embedding your document into vector store in your code.
|
3.init Url Type EmbeddingEngine api and embedding your document into vector store in your code.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
url = "https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html"
|
url = "https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html"
|
||||||
embedding_model = "your_model_path/all-MiniLM-L6-v2"
|
|
||||||
vector_store_config = {
|
|
||||||
"vector_store_name": your_name,
|
|
||||||
}
|
|
||||||
embedding_engine = EmbeddingEngine(
|
embedding_engine = EmbeddingEngine(
|
||||||
knowledge_source=url,
|
knowledge_source=url,
|
||||||
knowledge_type=KnowledgeType.URL.value,
|
knowledge_type=KnowledgeType.URL.value,
|
||||||
@ -43,12 +78,6 @@ Document type can be .txt, .pdf, .md, .doc, .ppt.
|
|||||||
::
|
::
|
||||||
|
|
||||||
document_path = "your_path/test.md"
|
document_path = "your_path/test.md"
|
||||||
embedding_model = "your_model_path/all-MiniLM-L6-v2"
|
|
||||||
vector_store_config = {
|
|
||||||
"vector_store_name": your_name,
|
|
||||||
"vector_store_type": "Chroma",
|
|
||||||
"chroma_persist_path": "your_persist_dir",
|
|
||||||
}
|
|
||||||
embedding_engine = EmbeddingEngine(
|
embedding_engine = EmbeddingEngine(
|
||||||
knowledge_source=document_path,
|
knowledge_source=document_path,
|
||||||
knowledge_type=KnowledgeType.DOCUMENT.value,
|
knowledge_type=KnowledgeType.DOCUMENT.value,
|
||||||
@ -61,10 +90,6 @@ Document type can be .txt, .pdf, .md, .doc, .ppt.
|
|||||||
::
|
::
|
||||||
|
|
||||||
raw_text = "a long passage"
|
raw_text = "a long passage"
|
||||||
embedding_model = "your_model_path/all-MiniLM-L6-v2"
|
|
||||||
vector_store_config = {
|
|
||||||
"vector_store_name": your_name,
|
|
||||||
}
|
|
||||||
embedding_engine = EmbeddingEngine(
|
embedding_engine = EmbeddingEngine(
|
||||||
knowledge_source=raw_text,
|
knowledge_source=raw_text,
|
||||||
knowledge_type=KnowledgeType.TEXT.value,
|
knowledge_type=KnowledgeType.TEXT.value,
|
||||||
|
Loading…
Reference in New Issue
Block a user