ci: make ci happy lint the code, delete unused imports

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
yihong0618
2023-05-24 18:42:55 +08:00
parent 562d5a98cc
commit b098a48898
75 changed files with 1110 additions and 824 deletions

View File

@@ -2,24 +2,28 @@
# -*- coding:utf-8 -*-
import gradio as gr
from langchain.agents import (
load_tools,
initialize_agent,
AgentType
)
from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import Document, GPTSimpleVectorIndex
from llama_index import (
Document,
GPTSimpleVectorIndex,
LangchainEmbedding,
LLMPredictor,
ServiceContext,
)
from pilot.model.vicuna_llm import VicunaEmbeddingLLM, VicunaRequestLLM
def agent_demo():
llm = VicunaRequestLLM()
tools = load_tools(['python_repl'], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent.run(
"Write a SQL script that Query 'select count(1)!'"
tools = load_tools(["python_repl"], llm=llm)
agent = initialize_agent(
tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)
agent.run("Write a SQL script that Query 'select count(1)!'")
def knowledged_qa_demo(text_list):
llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
@@ -27,27 +31,34 @@ def knowledged_qa_demo(text_list):
embed_model = LangchainEmbedding(hfemb)
documents = [Document(t) for t in text_list]
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor, embed_model=embed_model
)
index = GPTSimpleVectorIndex.from_documents(
documents, service_context=service_context
)
return index
def get_answer(q):
base_knowledge = """ """
base_knowledge = """ """
text_list = [base_knowledge]
index = knowledged_qa_demo(text_list)
response = index.query(q)
return response.response
def get_similar(q):
from pilot.vector_store.extract_tovec import knownledge_tovec, knownledge_tovec_st
docsearch = knownledge_tovec_st("./datasets/plan.md")
docs = docsearch.similarity_search_with_score(q, k=1)
for doc in docs:
dc, s = doc
dc, s = doc
print(s)
yield dc.page_content
yield dc.page_content
if __name__ == "__main__":
# agent_demo()
@@ -58,8 +69,7 @@ if __name__ == "__main__":
text_input = gr.TextArea()
text_output = gr.TextArea()
text_button = gr.Button()
text_button.click(get_similar, inputs=text_input, outputs=text_output)
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")

View File

@@ -1,30 +1,29 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import requests
import json
import time
import uuid
import os
import sys
from urllib.parse import urljoin
import gradio as gr
import requests
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_PATH)
from pilot.configs.config import Config
from pilot.conversation import conv_qa_prompt_template, conv_templates
from langchain.prompts import PromptTemplate
from pilot.configs.config import Config
from pilot.conversation import conv_qa_prompt_template, conv_templates
llmstream_stream_path = "generate_stream"
CFG = Config()
def generate(query):
def generate(query):
template_name = "conv_one_shot"
state = conv_templates[template_name].copy()
@@ -47,7 +46,7 @@ def generate(query):
"prompt": prompt,
"temperature": 1.0,
"max_new_tokens": 1024,
"stop": "###"
"stop": "###",
}
response = requests.post(
@@ -57,19 +56,18 @@ def generate(query):
skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3
for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
if chunk:
data = json.loads(chunk.decode())
if data["error_code"] == 0:
if "vicuna" in CFG.LLM_MODEL:
output = data["text"][skip_echo_len:].strip()
else:
output = data["text"].strip()
state.messages[-1][-1] = output + ""
yield(output)
yield (output)
if __name__ == "__main__":
print(CFG.LLM_MODEL)
with gr.Blocks() as demo:
@@ -78,10 +76,7 @@ if __name__ == "__main__":
text_input = gr.TextArea()
text_output = gr.TextArea()
text_button = gr.Button("提交")
text_button.click(generate, inputs=text_input, outputs=text_output)
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")

View File

@@ -1,19 +1,19 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import logging
import sys
from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# read the document of data dir
documents = SimpleDirectoryReader("data").load_data()
# split the document to chunk, max token size=500, convert chunk to vector
# split the document to chunk, max token size=500, convert chunk to vector
index = GPTSimpleVectorIndex(documents)
# save index
index.save_to_disk("index.json")
index.save_to_disk("index.json")

View File

@@ -3,17 +3,19 @@
import gradio as gr
def change_tab():
return gr.Tabs.update(selected=1)
with gr.Blocks() as demo:
with gr.Tabs() as tabs:
with gr.TabItem("Train", id=0):
t = gr.Textbox()
with gr.TabItem("Inference", id=1):
i = gr.Image()
btn = gr.Button()
btn.click(change_tab, None, tabs)
demo.launch()
demo.launch()

View File

@@ -1,5 +1,3 @@
from pilot.source_embedding.csv_embedding import CSVEmbedding
# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
@@ -8,6 +6,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
vector_store_path = "your_path/"
pdf_embedding = CSVEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
pdf_embedding = CSVEmbedding(
file_path=path,
model_name=model_name,
vector_store_config={
"vector_store_name": "url",
"vector_store_path": "vector_store_path",
},
)
pdf_embedding.source_embedding()
print("success")
print("success")

View File

@@ -6,6 +6,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
vector_store_path = "your_path/"
pdf_embedding = PDFEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "ob-pdf", "vector_store_path": vector_store_path})
pdf_embedding = PDFEmbedding(
file_path=path,
model_name=model_name,
vector_store_config={
"vector_store_name": "ob-pdf",
"vector_store_path": vector_store_path,
},
)
pdf_embedding.source_embedding()
print("success")
print("success")

View File

@@ -5,6 +5,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
vector_store_path = "your_path"
pdf_embedding = URLEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
pdf_embedding = URLEmbedding(
file_path=path,
model_name=model_name,
vector_store_config={
"vector_store_name": "url",
"vector_store_path": "vector_store_path",
},
)
pdf_embedding.source_embedding()
print("success")
print("success")

View File

@@ -1,19 +1,28 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LLMPredictor
import torch
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.llms.base import LLM
from llama_index import (
GPTListIndex,
GPTSimpleVectorIndex,
LangchainEmbedding,
LLMPredictor,
PromptHelper,
SimpleDirectoryReader,
)
from transformers import pipeline
class FlanLLM(LLM):
model_name = "google/flan-t5-large"
pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
"torch_dtype": torch.bfloat16
})
pipeline = pipeline(
"text2text-generation",
model=model_name,
device=0,
model_kwargs={"torch_dtype": torch.bfloat16},
)
def _call(self, prompt, stop=None):
return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
@@ -24,6 +33,7 @@ class FlanLLM(LLM):
def _llm_type(self):
return "custome"
llm_predictor = LLMPredictor(llm=FlanLLM())
hfemb = HuggingFaceEmbeddings()
embed_model = LangchainEmbedding(hfemb)
@@ -214,9 +224,10 @@ OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形
回答: nlj也是左表的表是驱动表这个要了解下计划执行方面的基本原理取左表的一行数据再遍历右表一旦满足连接条件就可以返回数据
anti/semi只是因为not exists/exist的语义只是返回左表数据改成anti join是一种计划优化连接的方式比子查询更优
"""
"""
from llama_index import Document
text_list = [text1]
documents = [Document(t) for t in text_list]
@@ -226,12 +237,18 @@ max_input_size = 512
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index = GPTListIndex(
documents,
embed_model=embed_model,
llm_predictor=llm_predictor,
prompt_helper=prompt_helper,
)
index.save_to_disk("index.json")
if __name__ == "__main__":
import logging
logging.getLogger().setLevel(logging.CRITICAL)
for d in documents:
print(d)