mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-01 16:18:27 +00:00
sentence transformer
This commit is contained in:
parent
b7b4a1fb63
commit
205eab7268
@ -236,6 +236,12 @@ pre {
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def change_mode(mode):
|
||||||
|
if mode == "默认知识库对话":
|
||||||
|
return gr.update(visible=False)
|
||||||
|
else:
|
||||||
|
return gr.update(visible=True)
|
||||||
|
|
||||||
|
|
||||||
def build_single_model_ui():
|
def build_single_model_ui():
|
||||||
|
|
||||||
@ -249,6 +255,7 @@ def build_single_model_ui():
|
|||||||
The service is a research preview intended for non-commercial use only. subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA
|
The service is a research preview intended for non-commercial use only. subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
vs_path, file_status, vs_list = gr.State(""), gr.State(""), gr.State(vs_list)
|
||||||
state = gr.State()
|
state = gr.State()
|
||||||
gr.Markdown(notice_markdown, elem_id="notice_markdown")
|
gr.Markdown(notice_markdown, elem_id="notice_markdown")
|
||||||
|
|
||||||
@ -270,10 +277,16 @@ def build_single_model_ui():
|
|||||||
interactive=True,
|
interactive=True,
|
||||||
label="最大输出Token数",
|
label="最大输出Token数",
|
||||||
)
|
)
|
||||||
|
tabs = gr.Tabs()
|
||||||
with gr.Tabs():
|
with tabs:
|
||||||
with gr.TabItem("知识问答", elem_id="QA"):
|
with gr.TabItem("知识问答", elem_id="QA"):
|
||||||
pass
|
doc2vec = gr.Column(visible=False)
|
||||||
|
with doc2vec:
|
||||||
|
mode = gr.Radio(["默认知识库对话", "新增知识库"])
|
||||||
|
vs_setting = gr.Accordion("配置知识库")
|
||||||
|
mode.change(fn=change_mode, inputs=mode, outputs=vs_setting)
|
||||||
|
with vs_setting:
|
||||||
|
select_vs = gr.Dropdown()
|
||||||
with gr.TabItem("SQL生成与诊断", elem_id="SQL"):
|
with gr.TabItem("SQL生成与诊断", elem_id="SQL"):
|
||||||
# TODO A selector to choose database
|
# TODO A selector to choose database
|
||||||
with gr.Row(elem_id="db_selector"):
|
with gr.Row(elem_id="db_selector"):
|
||||||
@ -300,6 +313,10 @@ def build_single_model_ui():
|
|||||||
regenerate_btn = gr.Button(value="重新生成", interactive=False)
|
regenerate_btn = gr.Button(value="重新生成", interactive=False)
|
||||||
clear_btn = gr.Button(value="清理", interactive=False)
|
clear_btn = gr.Button(value="清理", interactive=False)
|
||||||
|
|
||||||
|
# QA 模式下清空数据库选项
|
||||||
|
if tabs.elem_id == "QA":
|
||||||
|
db_selector = ""
|
||||||
|
|
||||||
gr.Markdown(learn_more_markdown)
|
gr.Markdown(learn_more_markdown)
|
||||||
|
|
||||||
btn_list = [regenerate_btn, clear_btn]
|
btn_list = [regenerate_btn, clear_btn]
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding:utf-8 -*-
|
# -*- coding:utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
from langchain.text_splitter import CharacterTextSplitter
|
from langchain.text_splitter import CharacterTextSplitter
|
||||||
from langchain.vectorstores import Chroma
|
from langchain.vectorstores import Chroma
|
||||||
from pilot.model.vicuna_llm import VicunaEmbeddingLLM
|
from pilot.model.vicuna_llm import VicunaEmbeddingLLM
|
||||||
# from langchain.embeddings import SentenceTransformerEmbeddings
|
from pilot.configs.model_config import VECTORE_PATH
|
||||||
|
from langchain.embeddings import HuggingFaceEmbeddings
|
||||||
|
|
||||||
embeddings = VicunaEmbeddingLLM()
|
embeddings = VicunaEmbeddingLLM()
|
||||||
|
|
||||||
@ -21,18 +22,22 @@ def knownledge_tovec(filename):
|
|||||||
)
|
)
|
||||||
return docsearch
|
return docsearch
|
||||||
|
|
||||||
|
def knownledge_tovec_st(filename):
|
||||||
|
""" Use sentence transformers to embedding the document.
|
||||||
|
https://github.com/UKPLab/sentence-transformers
|
||||||
|
"""
|
||||||
|
from pilot.configs.model_config import llm_model_config
|
||||||
|
embeddings = HuggingFaceEmbeddings(model=llm_model_config["sentence-transforms"])
|
||||||
|
|
||||||
# def knownledge_tovec_st(filename):
|
with open(filename, "r") as f:
|
||||||
# """ Use sentence transformers to embedding the document.
|
knownledge = f.read()
|
||||||
# https://github.com/UKPLab/sentence-transformers
|
|
||||||
# """
|
|
||||||
# from pilot.configs.model_config import llm_model_config
|
|
||||||
# embeddings = SentenceTransformerEmbeddings(model=llm_model_config["sentence-transforms"])
|
|
||||||
|
|
||||||
# with open(filename, "r") as f:
|
|
||||||
# knownledge = f.read()
|
|
||||||
|
|
||||||
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||||
# texts = text_splitter(knownledge)
|
texts = text_splitter(knownledge)
|
||||||
# docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])
|
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])
|
||||||
# return docsearch
|
return docsearch
|
||||||
|
|
||||||
|
def get_vector_storelist():
|
||||||
|
if not os.path.exists(VECTORE_PATH):
|
||||||
|
return []
|
||||||
|
return os.listdir(VECTORE_PATH)
|
Loading…
Reference in New Issue
Block a user