ci: make ci happy lint the code, delete unused imports

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-09-12 12:37:14 +00:00 · 2023-05-24 18:42:55 +08:00
parent 562d5a98cc
commit b098a48898
75 changed files with 1110 additions and 824 deletions
--- a/examples/app.py
+++ b/examples/app.py
@@ -2,24 +2,28 @@
 # -*- coding:utf-8 -*-

 import gradio as gr
-from langchain.agents import (
-    load_tools,
-    initialize_agent,
-    AgentType
-)
-from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
-from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext
+from langchain.agents import AgentType, initialize_agent, load_tools
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from llama_index import Document, GPTSimpleVectorIndex
+from llama_index import (
+    Document,
+    GPTSimpleVectorIndex,
+    LangchainEmbedding,
+    LLMPredictor,
+    ServiceContext,
+)
+
+from pilot.model.vicuna_llm import VicunaEmbeddingLLM, VicunaRequestLLM
+

 def agent_demo():
    llm = VicunaRequestLLM()

-    tools = load_tools(['python_repl'], llm=llm)
-    agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
-    agent.run(
-        "Write a SQL script that Query 'select count(1)!'"
+    tools = load_tools(["python_repl"], llm=llm)
+    agent = initialize_agent(
+        tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True
    )
+    agent.run("Write a SQL script that Query 'select count(1)!'")
+

 def knowledged_qa_demo(text_list):
    llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
@@ -27,27 +31,34 @@ def knowledged_qa_demo(text_list):
    embed_model = LangchainEmbedding(hfemb)
    documents = [Document(t) for t in text_list]

-    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
-    index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) 
+    service_context = ServiceContext.from_defaults(
+        llm_predictor=llm_predictor, embed_model=embed_model
+    )
+    index = GPTSimpleVectorIndex.from_documents(
+        documents, service_context=service_context
+    )
    return index


 def get_answer(q):
-    base_knowledge = """ """ 
+    base_knowledge = """ """
    text_list = [base_knowledge]
    index = knowledged_qa_demo(text_list)
    response = index.query(q)
    return response.response

+
 def get_similar(q):
    from pilot.vector_store.extract_tovec import knownledge_tovec, knownledge_tovec_st
+
    docsearch = knownledge_tovec_st("./datasets/plan.md")
    docs = docsearch.similarity_search_with_score(q, k=1)

    for doc in docs:
-        dc, s = doc 
+        dc, s = doc
        print(s)
-        yield dc.page_content 
+        yield dc.page_content
+

 if __name__ == "__main__":
    # agent_demo()
@@ -58,8 +69,7 @@ if __name__ == "__main__":
            text_input = gr.TextArea()
            text_output = gr.TextArea()
            text_button = gr.Button()
-        
+
        text_button.click(get_similar, inputs=text_input, outputs=text_output)

    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
-   
--- a/examples/embdserver.py
+++ b/examples/embdserver.py
@@ -1,30 +1,29 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-

-import requests
 import json
-import time
-import uuid
 import os
 import sys
 from urllib.parse import urljoin
+
 import gradio as gr
+import requests

 ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(ROOT_PATH)


-from pilot.configs.config import Config
-from pilot.conversation import conv_qa_prompt_template, conv_templates
 from langchain.prompts import PromptTemplate

+from pilot.configs.config import Config
+from pilot.conversation import conv_qa_prompt_template, conv_templates

 llmstream_stream_path = "generate_stream"

 CFG = Config()

-def generate(query):

+def generate(query):
    template_name = "conv_one_shot"
    state = conv_templates[template_name].copy()

@@ -47,7 +46,7 @@ def generate(query):
        "prompt": prompt,
        "temperature": 1.0,
        "max_new_tokens": 1024,
-        "stop": "###"
+        "stop": "###",
    }

    response = requests.post(
@@ -57,19 +56,18 @@ def generate(query):
    skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3

    for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
-
        if chunk:
            data = json.loads(chunk.decode())
            if data["error_code"] == 0:
-
                if "vicuna" in CFG.LLM_MODEL:
                    output = data["text"][skip_echo_len:].strip()
                else:
                    output = data["text"].strip()

                state.messages[-1][-1] = output + "▌"
-                yield(output) 
- 
+                yield (output)
+
+
 if __name__ == "__main__":
    print(CFG.LLM_MODEL)
    with gr.Blocks() as demo:
@@ -78,10 +76,7 @@ if __name__ == "__main__":
            text_input = gr.TextArea()
            text_output = gr.TextArea()
            text_button = gr.Button("提交")
-        

        text_button.click(generate, inputs=text_input, outputs=text_output)

-    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") 
-
-    
+    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
--- a/examples/gpt_index.py
+++ b/examples/gpt_index.py
@@ -1,19 +1,19 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

-import os
 import logging
 import sys

-from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
+from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
+
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

 # read the document of data dir
 documents = SimpleDirectoryReader("data").load_data()
-# split the document to chunk, max token size=500, convert chunk to vector 
+# split the document to chunk, max token size=500, convert chunk to vector

 index = GPTSimpleVectorIndex(documents)

 # save index
-index.save_to_disk("index.json")
+index.save_to_disk("index.json")
--- a/examples/gradio_test.py
+++ b/examples/gradio_test.py
@@ -3,17 +3,19 @@

 import gradio as gr

+
 def change_tab():
    return gr.Tabs.update(selected=1)

+
 with gr.Blocks() as demo:
    with gr.Tabs() as tabs:
        with gr.TabItem("Train", id=0):
            t = gr.Textbox()
        with gr.TabItem("Inference", id=1):
            i = gr.Image()
-    
+
    btn = gr.Button()
    btn.click(change_tab, None, tabs)

-demo.launch()
+demo.launch()
--- a/examples/knowledge_embedding/csv_embedding_test.py
+++ b/examples/knowledge_embedding/csv_embedding_test.py
@@ -1,5 +1,3 @@
-
-
 from pilot.source_embedding.csv_embedding import CSVEmbedding

 # path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
@@ -8,6 +6,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
 vector_store_path = "your_path/"


-pdf_embedding = CSVEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
+pdf_embedding = CSVEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "url",
+        "vector_store_path": "vector_store_path",
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
--- a/examples/knowledge_embedding/pdf_embedding_test.py
+++ b/examples/knowledge_embedding/pdf_embedding_test.py
@@ -6,6 +6,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
 vector_store_path = "your_path/"


-pdf_embedding = PDFEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "ob-pdf", "vector_store_path": vector_store_path})
+pdf_embedding = PDFEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "ob-pdf",
+        "vector_store_path": vector_store_path,
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
--- a/examples/knowledge_embedding/url_embedding_test.py
+++ b/examples/knowledge_embedding/url_embedding_test.py
@@ -5,6 +5,13 @@ model_name = "your_path/all-MiniLM-L6-v2"
 vector_store_path = "your_path"


-pdf_embedding = URLEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
+pdf_embedding = URLEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "url",
+        "vector_store_path": "vector_store_path",
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
--- a/examples/t5_example.py
+++ b/examples/t5_example.py
@@ -1,19 +1,28 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

-from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
-from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from llama_index import LLMPredictor
 import torch
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 from langchain.llms.base import LLM
+from llama_index import (
+    GPTListIndex,
+    GPTSimpleVectorIndex,
+    LangchainEmbedding,
+    LLMPredictor,
+    PromptHelper,
+    SimpleDirectoryReader,
+)
 from transformers import pipeline


 class FlanLLM(LLM):
    model_name = "google/flan-t5-large"
-    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
-        "torch_dtype": torch.bfloat16
-    })
+    pipeline = pipeline(
+        "text2text-generation",
+        model=model_name,
+        device=0,
+        model_kwargs={"torch_dtype": torch.bfloat16},
+    )

    def _call(self, prompt, stop=None):
        return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
@@ -24,6 +33,7 @@ class FlanLLM(LLM):
    def _llm_type(self):
        return "custome"

+
 llm_predictor = LLMPredictor(llm=FlanLLM())
 hfemb = HuggingFaceEmbeddings()
 embed_model = LangchainEmbedding(hfemb)
@@ -214,9 +224,10 @@ OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形

 回答: nlj也是左表的表是驱动表，这个要了解下计划执行方面的基本原理，取左表的一行数据，再遍历右表，一旦满足连接条件，就可以返回数据
 anti/semi只是因为not exists/exist的语义只是返回左表数据，改成anti join是一种计划优化，连接的方式比子查询更优
-""" 
+"""

 from llama_index import Document
+
 text_list = [text1]
 documents = [Document(t) for t in text_list]

@@ -226,12 +237,18 @@ max_input_size = 512
 max_chunk_overlap = 20
 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

-index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+index = GPTListIndex(
+    documents,
+    embed_model=embed_model,
+    llm_predictor=llm_predictor,
+    prompt_helper=prompt_helper,
+)
 index.save_to_disk("index.json")


 if __name__ == "__main__":
    import logging
+
    logging.getLogger().setLevel(logging.CRITICAL)
    for d in documents:
        print(d)