mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-12 12:37:14 +00:00
ci: make ci happy lint the code, delete unused imports
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
@@ -2,24 +2,28 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
import gradio as gr
|
||||
from langchain.agents import (
|
||||
load_tools,
|
||||
initialize_agent,
|
||||
AgentType
|
||||
)
|
||||
from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
|
||||
from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext
|
||||
from langchain.agents import AgentType, initialize_agent, load_tools
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
from llama_index import Document, GPTSimpleVectorIndex
|
||||
from llama_index import (
|
||||
Document,
|
||||
GPTSimpleVectorIndex,
|
||||
LangchainEmbedding,
|
||||
LLMPredictor,
|
||||
ServiceContext,
|
||||
)
|
||||
|
||||
from pilot.model.vicuna_llm import VicunaEmbeddingLLM, VicunaRequestLLM
|
||||
|
||||
|
||||
def agent_demo():
|
||||
llm = VicunaRequestLLM()
|
||||
|
||||
tools = load_tools(['python_repl'], llm=llm)
|
||||
agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
|
||||
agent.run(
|
||||
"Write a SQL script that Query 'select count(1)!'"
|
||||
tools = load_tools(["python_repl"], llm=llm)
|
||||
agent = initialize_agent(
|
||||
tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True
|
||||
)
|
||||
agent.run("Write a SQL script that Query 'select count(1)!'")
|
||||
|
||||
|
||||
def knowledged_qa_demo(text_list):
|
||||
llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
|
||||
@@ -27,27 +31,34 @@ def knowledged_qa_demo(text_list):
|
||||
embed_model = LangchainEmbedding(hfemb)
|
||||
documents = [Document(t) for t in text_list]
|
||||
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
|
||||
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
|
||||
service_context = ServiceContext.from_defaults(
|
||||
llm_predictor=llm_predictor, embed_model=embed_model
|
||||
)
|
||||
index = GPTSimpleVectorIndex.from_documents(
|
||||
documents, service_context=service_context
|
||||
)
|
||||
return index
|
||||
|
||||
|
||||
def get_answer(q):
|
||||
base_knowledge = """ """
|
||||
base_knowledge = """ """
|
||||
text_list = [base_knowledge]
|
||||
index = knowledged_qa_demo(text_list)
|
||||
response = index.query(q)
|
||||
return response.response
|
||||
|
||||
|
||||
def get_similar(q):
|
||||
from pilot.vector_store.extract_tovec import knownledge_tovec, knownledge_tovec_st
|
||||
|
||||
docsearch = knownledge_tovec_st("./datasets/plan.md")
|
||||
docs = docsearch.similarity_search_with_score(q, k=1)
|
||||
|
||||
for doc in docs:
|
||||
dc, s = doc
|
||||
dc, s = doc
|
||||
print(s)
|
||||
yield dc.page_content
|
||||
yield dc.page_content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# agent_demo()
|
||||
@@ -58,8 +69,7 @@ if __name__ == "__main__":
|
||||
text_input = gr.TextArea()
|
||||
text_output = gr.TextArea()
|
||||
text_button = gr.Button()
|
||||
|
||||
|
||||
text_button.click(get_similar, inputs=text_input, outputs=text_output)
|
||||
|
||||
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
|
||||
|
||||
|
@@ -1,30 +1,29 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
import os
|
||||
import sys
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import gradio as gr
|
||||
import requests
|
||||
|
||||
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.append(ROOT_PATH)
|
||||
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.conversation import conv_qa_prompt_template, conv_templates
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.conversation import conv_qa_prompt_template, conv_templates
|
||||
|
||||
llmstream_stream_path = "generate_stream"
|
||||
|
||||
CFG = Config()
|
||||
|
||||
def generate(query):
|
||||
|
||||
def generate(query):
|
||||
template_name = "conv_one_shot"
|
||||
state = conv_templates[template_name].copy()
|
||||
|
||||
@@ -47,7 +46,7 @@ def generate(query):
|
||||
"prompt": prompt,
|
||||
"temperature": 1.0,
|
||||
"max_new_tokens": 1024,
|
||||
"stop": "###"
|
||||
"stop": "###",
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
@@ -57,19 +56,18 @@ def generate(query):
|
||||
skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3
|
||||
|
||||
for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
|
||||
|
||||
if chunk:
|
||||
data = json.loads(chunk.decode())
|
||||
if data["error_code"] == 0:
|
||||
|
||||
if "vicuna" in CFG.LLM_MODEL:
|
||||
output = data["text"][skip_echo_len:].strip()
|
||||
else:
|
||||
output = data["text"].strip()
|
||||
|
||||
state.messages[-1][-1] = output + "▌"
|
||||
yield(output)
|
||||
|
||||
yield (output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(CFG.LLM_MODEL)
|
||||
with gr.Blocks() as demo:
|
||||
@@ -78,10 +76,7 @@ if __name__ == "__main__":
|
||||
text_input = gr.TextArea()
|
||||
text_output = gr.TextArea()
|
||||
text_button = gr.Button("提交")
|
||||
|
||||
|
||||
text_button.click(generate, inputs=text_input, outputs=text_output)
|
||||
|
||||
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
|
||||
|
||||
|
||||
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
|
||||
|
@@ -1,19 +1,19 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
|
||||
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
|
||||
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
||||
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
||||
|
||||
# read the document of data dir
|
||||
documents = SimpleDirectoryReader("data").load_data()
|
||||
# split the document to chunk, max token size=500, convert chunk to vector
|
||||
# split the document to chunk, max token size=500, convert chunk to vector
|
||||
|
||||
index = GPTSimpleVectorIndex(documents)
|
||||
|
||||
# save index
|
||||
index.save_to_disk("index.json")
|
||||
index.save_to_disk("index.json")
|
||||
|
@@ -3,17 +3,19 @@
|
||||
|
||||
import gradio as gr
|
||||
|
||||
|
||||
def change_tab():
|
||||
return gr.Tabs.update(selected=1)
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
with gr.Tabs() as tabs:
|
||||
with gr.TabItem("Train", id=0):
|
||||
t = gr.Textbox()
|
||||
with gr.TabItem("Inference", id=1):
|
||||
i = gr.Image()
|
||||
|
||||
|
||||
btn = gr.Button()
|
||||
btn.click(change_tab, None, tabs)
|
||||
|
||||
demo.launch()
|
||||
demo.launch()
|
||||
|
@@ -1,5 +1,3 @@
|
||||
|
||||
|
||||
from pilot.source_embedding.csv_embedding import CSVEmbedding
|
||||
|
||||
# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
|
||||
@@ -8,6 +6,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
|
||||
vector_store_path = "your_path/"
|
||||
|
||||
|
||||
pdf_embedding = CSVEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
|
||||
pdf_embedding = CSVEmbedding(
|
||||
file_path=path,
|
||||
model_name=model_name,
|
||||
vector_store_config={
|
||||
"vector_store_name": "url",
|
||||
"vector_store_path": "vector_store_path",
|
||||
},
|
||||
)
|
||||
pdf_embedding.source_embedding()
|
||||
print("success")
|
||||
print("success")
|
||||
|
@@ -6,6 +6,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
|
||||
vector_store_path = "your_path/"
|
||||
|
||||
|
||||
pdf_embedding = PDFEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "ob-pdf", "vector_store_path": vector_store_path})
|
||||
pdf_embedding = PDFEmbedding(
|
||||
file_path=path,
|
||||
model_name=model_name,
|
||||
vector_store_config={
|
||||
"vector_store_name": "ob-pdf",
|
||||
"vector_store_path": vector_store_path,
|
||||
},
|
||||
)
|
||||
pdf_embedding.source_embedding()
|
||||
print("success")
|
||||
print("success")
|
||||
|
@@ -5,6 +5,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
|
||||
vector_store_path = "your_path"
|
||||
|
||||
|
||||
pdf_embedding = URLEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
|
||||
pdf_embedding = URLEmbedding(
|
||||
file_path=path,
|
||||
model_name=model_name,
|
||||
vector_store_config={
|
||||
"vector_store_name": "url",
|
||||
"vector_store_path": "vector_store_path",
|
||||
},
|
||||
)
|
||||
pdf_embedding.source_embedding()
|
||||
print("success")
|
||||
print("success")
|
||||
|
@@ -1,19 +1,28 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
from llama_index import LLMPredictor
|
||||
import torch
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
from langchain.llms.base import LLM
|
||||
from llama_index import (
|
||||
GPTListIndex,
|
||||
GPTSimpleVectorIndex,
|
||||
LangchainEmbedding,
|
||||
LLMPredictor,
|
||||
PromptHelper,
|
||||
SimpleDirectoryReader,
|
||||
)
|
||||
from transformers import pipeline
|
||||
|
||||
|
||||
class FlanLLM(LLM):
|
||||
model_name = "google/flan-t5-large"
|
||||
pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
|
||||
"torch_dtype": torch.bfloat16
|
||||
})
|
||||
pipeline = pipeline(
|
||||
"text2text-generation",
|
||||
model=model_name,
|
||||
device=0,
|
||||
model_kwargs={"torch_dtype": torch.bfloat16},
|
||||
)
|
||||
|
||||
def _call(self, prompt, stop=None):
|
||||
return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
|
||||
@@ -24,6 +33,7 @@ class FlanLLM(LLM):
|
||||
def _llm_type(self):
|
||||
return "custome"
|
||||
|
||||
|
||||
llm_predictor = LLMPredictor(llm=FlanLLM())
|
||||
hfemb = HuggingFaceEmbeddings()
|
||||
embed_model = LangchainEmbedding(hfemb)
|
||||
@@ -214,9 +224,10 @@ OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形
|
||||
|
||||
回答: nlj也是左表的表是驱动表,这个要了解下计划执行方面的基本原理,取左表的一行数据,再遍历右表,一旦满足连接条件,就可以返回数据
|
||||
anti/semi只是因为not exists/exist的语义只是返回左表数据,改成anti join是一种计划优化,连接的方式比子查询更优
|
||||
"""
|
||||
"""
|
||||
|
||||
from llama_index import Document
|
||||
|
||||
text_list = [text1]
|
||||
documents = [Document(t) for t in text_list]
|
||||
|
||||
@@ -226,12 +237,18 @@ max_input_size = 512
|
||||
max_chunk_overlap = 20
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
|
||||
index = GPTListIndex(
|
||||
documents,
|
||||
embed_model=embed_model,
|
||||
llm_predictor=llm_predictor,
|
||||
prompt_helper=prompt_helper,
|
||||
)
|
||||
index.save_to_disk("index.json")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import logging
|
||||
|
||||
logging.getLogger().setLevel(logging.CRITICAL)
|
||||
for d in documents:
|
||||
print(d)
|
||||
|
Reference in New Issue
Block a user