mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-17 15:58:25 +00:00
chores: extra code clean
This commit is contained in:
parent
41430ef01f
commit
dba7c2a897
@ -1,74 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding:utf-8 -*-
|
|
||||||
|
|
||||||
import gradio as gr
|
|
||||||
from langchain.agents import AgentType, initialize_agent, load_tools
|
|
||||||
from llama_index import (
|
|
||||||
Document,
|
|
||||||
GPTVectorStoreIndex,
|
|
||||||
LangchainEmbedding,
|
|
||||||
LLMPredictor,
|
|
||||||
ServiceContext,
|
|
||||||
)
|
|
||||||
|
|
||||||
from pilot.model.llm_out.vicuna_llm import VicunaEmbeddingLLM, VicunaRequestLLM
|
|
||||||
|
|
||||||
|
|
||||||
def agent_demo():
|
|
||||||
llm = VicunaRequestLLM()
|
|
||||||
|
|
||||||
tools = load_tools(["python_repl"], llm=llm)
|
|
||||||
agent = initialize_agent(
|
|
||||||
tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True
|
|
||||||
)
|
|
||||||
agent.run("Write a SQL script that Query 'select count(1)!'")
|
|
||||||
|
|
||||||
|
|
||||||
def knowledged_qa_demo(text_list):
|
|
||||||
llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
|
|
||||||
hfemb = VicunaEmbeddingLLM()
|
|
||||||
embed_model = LangchainEmbedding(hfemb)
|
|
||||||
documents = [Document(t) for t in text_list]
|
|
||||||
|
|
||||||
service_context = ServiceContext.from_defaults(
|
|
||||||
llm_predictor=llm_predictor, embed_model=embed_model
|
|
||||||
)
|
|
||||||
index = GPTVectorStoreIndex.from_documents(
|
|
||||||
documents, service_context=service_context
|
|
||||||
)
|
|
||||||
return index
|
|
||||||
|
|
||||||
|
|
||||||
def get_answer(q):
|
|
||||||
base_knowledge = """ """
|
|
||||||
text_list = [base_knowledge]
|
|
||||||
index = knowledged_qa_demo(text_list)
|
|
||||||
response = index.query(q)
|
|
||||||
return response.response
|
|
||||||
|
|
||||||
|
|
||||||
def get_similar(q):
|
|
||||||
from pilot.vector_store.extract_tovec import knownledge_tovec_st
|
|
||||||
|
|
||||||
docsearch = knownledge_tovec_st("./datasets/plan.md")
|
|
||||||
docs = docsearch.similarity_search_with_score(q, k=1)
|
|
||||||
|
|
||||||
for doc in docs:
|
|
||||||
dc, s = doc
|
|
||||||
print(s)
|
|
||||||
yield dc.page_content
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# agent_demo()
|
|
||||||
|
|
||||||
with gr.Blocks() as demo:
|
|
||||||
gr.Markdown("数据库智能助手")
|
|
||||||
with gr.Tab("知识问答"):
|
|
||||||
text_input = gr.TextArea()
|
|
||||||
text_output = gr.TextArea()
|
|
||||||
text_button = gr.Button()
|
|
||||||
|
|
||||||
text_button.click(get_similar, inputs=text_input, outputs=text_output)
|
|
||||||
|
|
||||||
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
|
|
@ -1,82 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding:utf-8 -*-
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
|
|
||||||
import gradio as gr
|
|
||||||
import requests
|
|
||||||
|
|
||||||
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
sys.path.append(ROOT_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
from langchain.prompts import PromptTemplate
|
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
|
||||||
from pilot.conversation import conv_qa_prompt_template, conv_templates
|
|
||||||
|
|
||||||
llmstream_stream_path = "generate_stream"
|
|
||||||
|
|
||||||
CFG = Config()
|
|
||||||
|
|
||||||
|
|
||||||
def generate(query):
|
|
||||||
template_name = "conv_one_shot"
|
|
||||||
state = conv_templates[template_name].copy()
|
|
||||||
|
|
||||||
# pt = PromptTemplate(
|
|
||||||
# template=conv_qa_prompt_template,
|
|
||||||
# input_variables=["context", "question"]
|
|
||||||
# )
|
|
||||||
|
|
||||||
# result = pt.format(context="This page covers how to use the Chroma ecosystem within LangChain. It is broken into two parts: installation and setup, and then references to specific Chroma wrappers.",
|
|
||||||
# question=query)
|
|
||||||
|
|
||||||
# print(result)
|
|
||||||
|
|
||||||
state.append_message(state.roles[0], query)
|
|
||||||
state.append_message(state.roles[1], None)
|
|
||||||
|
|
||||||
prompt = state.get_prompt()
|
|
||||||
params = {
|
|
||||||
"model": "chatglm-6b",
|
|
||||||
"prompt": prompt,
|
|
||||||
"temperature": 1.0,
|
|
||||||
"max_new_tokens": 1024,
|
|
||||||
"stop": "###",
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.post(
|
|
||||||
url=urljoin(CFG.MODEL_SERVER, llmstream_stream_path), data=json.dumps(params)
|
|
||||||
)
|
|
||||||
|
|
||||||
skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3
|
|
||||||
|
|
||||||
for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
|
|
||||||
if chunk:
|
|
||||||
data = json.loads(chunk.decode())
|
|
||||||
if data["error_code"] == 0:
|
|
||||||
if "vicuna" in CFG.LLM_MODEL:
|
|
||||||
output = data["text"][skip_echo_len:].strip()
|
|
||||||
else:
|
|
||||||
output = data["text"].strip()
|
|
||||||
|
|
||||||
state.messages[-1][-1] = output + "▌"
|
|
||||||
yield (output)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print(CFG.LLM_MODEL)
|
|
||||||
with gr.Blocks() as demo:
|
|
||||||
gr.Markdown("数据库SQL生成助手")
|
|
||||||
with gr.Tab("SQL生成"):
|
|
||||||
text_input = gr.TextArea()
|
|
||||||
text_output = gr.TextArea()
|
|
||||||
text_button = gr.Button("提交")
|
|
||||||
|
|
||||||
text_button.click(generate, inputs=text_input, outputs=text_output)
|
|
||||||
|
|
||||||
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
|
|
@ -1,19 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
|
|
||||||
|
|
||||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
||||||
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
|
||||||
|
|
||||||
# read the document of data dir
|
|
||||||
documents = SimpleDirectoryReader("data").load_data()
|
|
||||||
# split the document to chunk, max token size=500, convert chunk to vector
|
|
||||||
|
|
||||||
index = GPTVectorStoreIndex(documents)
|
|
||||||
|
|
||||||
# save index
|
|
||||||
index.save_to_disk("index.json")
|
|
@ -1,21 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding:utf-8 -*-
|
|
||||||
|
|
||||||
import gradio as gr
|
|
||||||
|
|
||||||
|
|
||||||
def change_tab():
|
|
||||||
return gr.Tabs.update(selected=1)
|
|
||||||
|
|
||||||
|
|
||||||
with gr.Blocks() as demo:
|
|
||||||
with gr.Tabs() as tabs:
|
|
||||||
with gr.TabItem("Train", id=0):
|
|
||||||
t = gr.Textbox()
|
|
||||||
with gr.TabItem("Inference", id=1):
|
|
||||||
i = gr.Image()
|
|
||||||
|
|
||||||
btn = gr.Button()
|
|
||||||
btn.click(change_tab, None, tabs)
|
|
||||||
|
|
||||||
demo.launch()
|
|
@ -1,18 +0,0 @@
|
|||||||
from pilot.embedding_engine.csv_embedding import CSVEmbedding
|
|
||||||
|
|
||||||
# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
|
|
||||||
path = "xx.csv"
|
|
||||||
model_name = "your_path/all-MiniLM-L6-v2"
|
|
||||||
vector_store_path = "your_path/"
|
|
||||||
|
|
||||||
|
|
||||||
pdf_embedding = CSVEmbedding(
|
|
||||||
file_path=path,
|
|
||||||
model_name=model_name,
|
|
||||||
vector_store_config={
|
|
||||||
"vector_store_name": "url",
|
|
||||||
"vector_store_path": "vector_store_path",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
pdf_embedding.source_embedding()
|
|
||||||
print("success")
|
|
@ -1,18 +0,0 @@
|
|||||||
from pilot.embedding_engine.pdf_embedding import PDFEmbedding
|
|
||||||
|
|
||||||
path = "xxx.pdf"
|
|
||||||
path = "your_path/OceanBase-数据库-V4.1.0-应用开发.pdf"
|
|
||||||
model_name = "your_path/all-MiniLM-L6-v2"
|
|
||||||
vector_store_path = "your_path/"
|
|
||||||
|
|
||||||
|
|
||||||
pdf_embedding = PDFEmbedding(
|
|
||||||
file_path=path,
|
|
||||||
model_name=model_name,
|
|
||||||
vector_store_config={
|
|
||||||
"vector_store_name": "ob-pdf",
|
|
||||||
"vector_store_path": vector_store_path,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
pdf_embedding.source_embedding()
|
|
||||||
print("success")
|
|
@ -1,17 +0,0 @@
|
|||||||
from pilot.embedding_engine.url_embedding import URLEmbedding
|
|
||||||
|
|
||||||
path = "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023"
|
|
||||||
model_name = "your_path/all-MiniLM-L6-v2"
|
|
||||||
vector_store_path = "your_path"
|
|
||||||
|
|
||||||
|
|
||||||
pdf_embedding = URLEmbedding(
|
|
||||||
file_path=path,
|
|
||||||
model_name=model_name,
|
|
||||||
vector_store_config={
|
|
||||||
"vector_store_name": "url",
|
|
||||||
"vector_store_path": "vector_store_path",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
pdf_embedding.source_embedding()
|
|
||||||
print("success")
|
|
@ -1,67 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import dashscope
|
|
||||||
import requests
|
|
||||||
import hashlib
|
|
||||||
from http import HTTPStatus
|
|
||||||
from dashscope import Generation
|
|
||||||
|
|
||||||
|
|
||||||
def call_with_messages():
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": "你是生活助手机器人。"},
|
|
||||||
{"role": "user", "content": "如何做西红柿鸡蛋?"},
|
|
||||||
]
|
|
||||||
gen = Generation()
|
|
||||||
response = gen.call(
|
|
||||||
Generation.Models.qwen_turbo,
|
|
||||||
messages=messages,
|
|
||||||
stream=True,
|
|
||||||
top_p=0.8,
|
|
||||||
result_format="message", # set the result to be "message" format.
|
|
||||||
)
|
|
||||||
|
|
||||||
for response in response:
|
|
||||||
# The response status_code is HTTPStatus.OK indicate success,
|
|
||||||
# otherwise indicate request is failed, you can get error code
|
|
||||||
# and message from code and message.
|
|
||||||
if response.status_code == HTTPStatus.OK:
|
|
||||||
print(response.output) # The output text
|
|
||||||
print(response.usage) # The usage information
|
|
||||||
else:
|
|
||||||
print(response.code) # The error code.
|
|
||||||
print(response.message) # The error message.
|
|
||||||
|
|
||||||
|
|
||||||
def build_access_token(api_key: str, secret_key: str) -> str:
|
|
||||||
"""
|
|
||||||
Generate Access token according AK, SK
|
|
||||||
"""
|
|
||||||
|
|
||||||
url = "https://aip.baidubce.com/oauth/2.0/token"
|
|
||||||
params = {
|
|
||||||
"grant_type": "client_credentials",
|
|
||||||
"client_id": api_key,
|
|
||||||
"client_secret": secret_key,
|
|
||||||
}
|
|
||||||
|
|
||||||
res = requests.get(url=url, params=params)
|
|
||||||
|
|
||||||
if res.status_code == 200:
|
|
||||||
return res.json().get("access_token")
|
|
||||||
|
|
||||||
|
|
||||||
def _calculate_md5(text: str) -> str:
|
|
||||||
md5 = hashlib.md5()
|
|
||||||
md5.update(text.encode("utf-8"))
|
|
||||||
encrypted = md5.hexdigest()
|
|
||||||
return encrypted
|
|
||||||
|
|
||||||
|
|
||||||
def baichuan_call():
|
|
||||||
url = "https://api.baichuan-ai.com/v1/stream/chat"
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
call_with_messages()
|
|
@ -1,257 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
|
||||||
from langchain.llms.base import LLM
|
|
||||||
from llama_index import (
|
|
||||||
GPTListIndex,
|
|
||||||
GPTVectorStoreIndex,
|
|
||||||
LangchainEmbedding,
|
|
||||||
LLMPredictor,
|
|
||||||
PromptHelper,
|
|
||||||
SimpleDirectoryReader,
|
|
||||||
)
|
|
||||||
from transformers import pipeline
|
|
||||||
|
|
||||||
|
|
||||||
class FlanLLM(LLM):
|
|
||||||
model_name = "google/flan-t5-large"
|
|
||||||
pipeline = pipeline(
|
|
||||||
"text2text-generation",
|
|
||||||
model=model_name,
|
|
||||||
device=0,
|
|
||||||
model_kwargs={"torch_dtype": torch.bfloat16},
|
|
||||||
)
|
|
||||||
|
|
||||||
def _call(self, prompt, stop=None):
|
|
||||||
return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
|
|
||||||
|
|
||||||
def _identifying_params(self):
|
|
||||||
return {"name_of_model": self.model_name}
|
|
||||||
|
|
||||||
def _llm_type(self):
|
|
||||||
return "custome"
|
|
||||||
|
|
||||||
|
|
||||||
llm_predictor = LLMPredictor(llm=FlanLLM())
|
|
||||||
hfemb = HuggingFaceEmbeddings()
|
|
||||||
embed_model = LangchainEmbedding(hfemb)
|
|
||||||
|
|
||||||
text1 = """
|
|
||||||
执行计划是对一条 SQL 查询语句在数据库中执行过程的描述。用户可以通过 EXPLAIN 命令查看优化器针对指定 SQL 生成的逻辑执行计划。
|
|
||||||
|
|
||||||
如果要分析某条 SQL 的性能问题,通常需要先查看 SQL 的执行计划,排查每一步 SQL 执行是否存在问题。所以读懂执行计划是 SQL 优化的先决条件,而了解执行计划的算子是理解 EXPLAIN 命令的关键。
|
|
||||||
|
|
||||||
OceanBase 数据库的执行计划命令有三种模式:EXPLAIN BASIC、EXPLAIN 和 EXPLAIN EXTENDED。这三种模式对执行计划展现不同粒度的细节信息:
|
|
||||||
|
|
||||||
EXPLAIN BASIC 命令用于最基本的计划展示。
|
|
||||||
|
|
||||||
EXPLAIN EXTENDED 命令用于最详细的计划展示(通常在排查问题时使用这种展示模式)。
|
|
||||||
|
|
||||||
EXPLAIN 命令所展示的信息可以帮助普通用户了解整个计划的执行方式。
|
|
||||||
|
|
||||||
EXPLAIN 命令格式如下:
|
|
||||||
EXPLAIN [BASIC | EXTENDED | PARTITIONS | FORMAT = format_name] [PRETTY | PRETTY_COLOR] explainable_stmt
|
|
||||||
format_name:
|
|
||||||
{ TRADITIONAL | JSON }
|
|
||||||
explainable_stmt:
|
|
||||||
{ SELECT statement
|
|
||||||
| DELETE statement
|
|
||||||
| INSERT statement
|
|
||||||
| REPLACE statement
|
|
||||||
| UPDATE statement }
|
|
||||||
|
|
||||||
|
|
||||||
EXPLAIN 命令适用于 SELECT、DELETE、INSERT、REPLACE 和 UPDATE 语句,显示优化器所提供的有关语句执行计划的信息,包括如何处理该语句,如何联接表以及以何种顺序联接表等信息。
|
|
||||||
|
|
||||||
一般来说,可以使用 EXPLAIN EXTENDED 命令,将表扫描的范围段展示出来。使用 EXPLAIN OUTLINE 命令可以显示 Outline 信息。
|
|
||||||
|
|
||||||
FORMAT 选项可用于选择输出格式。TRADITIONAL 表示以表格格式显示输出,这也是默认设置。JSON 表示以 JSON 格式显示信息。
|
|
||||||
|
|
||||||
使用 EXPLAIN PARTITITIONS 也可用于检查涉及分区表的查询。如果检查针对非分区表的查询,则不会产生错误,但 PARTIONS 列的值始终为 NULL。
|
|
||||||
|
|
||||||
对于复杂的执行计划,可以使用 PRETTY 或者 PRETTY_COLOR 选项将计划树中的父节点和子节点使用树线或彩色树线连接起来,使得执行计划展示更方便阅读。示例如下:
|
|
||||||
obclient> CREATE TABLE p1table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 2;
|
|
||||||
Query OK, 0 rows affected
|
|
||||||
|
|
||||||
obclient> CREATE TABLE p2table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 4;
|
|
||||||
Query OK, 0 rows affected
|
|
||||||
|
|
||||||
obclient> EXPLAIN EXTENDED PRETTY_COLOR SELECT * FROM p1table p1 JOIN p2table p2 ON p1.c1=p2.c2\G
|
|
||||||
*************************** 1. row ***************************
|
|
||||||
Query Plan: ==========================================================
|
|
||||||
|ID|OPERATOR |NAME |EST. ROWS|COST|
|
|
||||||
----------------------------------------------------------
|
|
||||||
|0 |PX COORDINATOR | |1 |278 |
|
|
||||||
|1 | EXCHANGE OUT DISTR |:EX10001|1 |277 |
|
|
||||||
|2 | HASH JOIN | |1 |276 |
|
|
||||||
|3 | ├PX PARTITION ITERATOR | |1 |92 |
|
|
||||||
|4 | │ TABLE SCAN |P1 |1 |92 |
|
|
||||||
|5 | └EXCHANGE IN DISTR | |1 |184 |
|
|
||||||
|6 | EXCHANGE OUT DISTR (PKEY)|:EX10000|1 |184 |
|
|
||||||
|7 | PX PARTITION ITERATOR | |1 |183 |
|
|
||||||
|8 | TABLE SCAN |P2 |1 |183 |
|
|
||||||
==========================================================
|
|
||||||
|
|
||||||
Outputs & filters:
|
|
||||||
-------------------------------------
|
|
||||||
0 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil)
|
|
||||||
1 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil), dop=1
|
|
||||||
2 - output([P1.C1], [P2.C2], [P1.C2], [P2.C1]), filter(nil),
|
|
||||||
equal_conds([P1.C1 = P2.C2]), other_conds(nil)
|
|
||||||
3 - output([P1.C1], [P1.C2]), filter(nil)
|
|
||||||
4 - output([P1.C1], [P1.C2]), filter(nil),
|
|
||||||
access([P1.C1], [P1.C2]), partitions(p[0-1])
|
|
||||||
5 - output([P2.C2], [P2.C1]), filter(nil)
|
|
||||||
6 - (#keys=1, [P2.C2]), output([P2.C2], [P2.C1]), filter(nil), dop=1
|
|
||||||
7 - output([P2.C1], [P2.C2]), filter(nil)
|
|
||||||
8 - output([P2.C1], [P2.C2]), filter(nil),
|
|
||||||
access([P2.C1], [P2.C2]), partitions(p[0-3])
|
|
||||||
|
|
||||||
1 row in set
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 执行计划形状与算子信息
|
|
||||||
|
|
||||||
在数据库系统中,执行计划在内部通常是以树的形式来表示的,但是不同的数据库会选择不同的方式展示给用户。
|
|
||||||
|
|
||||||
如下示例分别为 PostgreSQL 数据库、Oracle 数据库和 OceanBase 数据库对于 TPCDS Q3 的计划展示。
|
|
||||||
|
|
||||||
```sql
|
|
||||||
obclient> SELECT /*TPC-DS Q3*/ *
|
|
||||||
FROM (SELECT dt.d_year,
|
|
||||||
item.i_brand_id brand_id,
|
|
||||||
item.i_brand brand,
|
|
||||||
Sum(ss_net_profit) sum_agg
|
|
||||||
FROM date_dim dt,
|
|
||||||
store_sales,
|
|
||||||
item
|
|
||||||
WHERE dt.d_date_sk = store_sales.ss_sold_date_sk
|
|
||||||
AND store_sales.ss_item_sk = item.i_item_sk
|
|
||||||
AND item.i_manufact_id = 914
|
|
||||||
AND dt.d_moy = 11
|
|
||||||
GROUP BY dt.d_year,
|
|
||||||
item.i_brand,
|
|
||||||
item.i_brand_id
|
|
||||||
ORDER BY dt.d_year,
|
|
||||||
sum_agg DESC,
|
|
||||||
brand_id)
|
|
||||||
WHERE ROWNUM <= 100;
|
|
||||||
|
|
||||||
PostgreSQL 数据库执行计划展示如下:
|
|
||||||
Limit (cost=13986.86..13987.20 rows=27 width=91)
|
|
||||||
Sort (cost=13986.86..13986.93 rows=27 width=65)
|
|
||||||
Sort Key: dt.d_year, (sum(store_sales.ss_net_profit)), item.i_brand_id
|
|
||||||
HashAggregate (cost=13985.95..13986.22 rows=27 width=65)
|
|
||||||
Merge Join (cost=13884.21..13983.91 rows=204 width=65)
|
|
||||||
Merge Cond: (dt.d_date_sk = store_sales.ss_sold_date_sk)
|
|
||||||
Index Scan using date_dim_pkey on date_dim dt (cost=0.00..3494.62 rows=6080 width=8)
|
|
||||||
Filter: (d_moy = 11)
|
|
||||||
Sort (cost=12170.87..12177.27 rows=2560 width=65)
|
|
||||||
Sort Key: store_sales.ss_sold_date_sk
|
|
||||||
Nested Loop (cost=6.02..12025.94 rows=2560 width=65)
|
|
||||||
Seq Scan on item (cost=0.00..1455.00 rows=16 width=59)
|
|
||||||
Filter: (i_manufact_id = 914)
|
|
||||||
Bitmap Heap Scan on store_sales (cost=6.02..658.94 rows=174 width=14)
|
|
||||||
Recheck Cond: (ss_item_sk = item.i_item_sk)
|
|
||||||
Bitmap Index Scan on store_sales_pkey (cost=0.00..5.97 rows=174 width=0)
|
|
||||||
Index Cond: (ss_item_sk = item.i_item_sk)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Oracle 数据库执行计划展示如下:
|
|
||||||
Plan hash value: 2331821367
|
|
||||||
--------------------------------------------------------------------------------------------------
|
|
||||||
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
|
|
||||||
--------------------------------------------------------------------------------------------------
|
|
||||||
| 0 | SELECT STATEMENT | | 100 | 9100 | 3688 (1)| 00:00:01 |
|
|
||||||
|* 1 | COUNT STOPKEY | | | | | |
|
|
||||||
| 2 | VIEW | | 2736 | 243K| 3688 (1)| 00:00:01 |
|
|
||||||
|* 3 | SORT ORDER BY STOPKEY | | 2736 | 256K| 3688 (1)| 00:00:01 |
|
|
||||||
| 4 | HASH GROUP BY | | 2736 | 256K| 3688 (1)| 00:00:01 |
|
|
||||||
|* 5 | HASH JOIN | | 2736 | 256K| 3686 (1)| 00:00:01 |
|
|
||||||
|* 6 | TABLE ACCESS FULL | DATE_DIM | 6087 | 79131 | 376 (1)| 00:00:01 |
|
|
||||||
| 7 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 |
|
|
||||||
| 8 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 |
|
|
||||||
|* 9 | TABLE ACCESS FULL | ITEM | 18 | 1188 | 375 (0)| 00:00:01 |
|
|
||||||
|* 10 | INDEX RANGE SCAN | SYS_C0010069 | 159 | | 2 (0)| 00:00:01 |
|
|
||||||
| 11 | TABLE ACCESS BY INDEX ROWID| STORE_SALES | 159 | 2703 | 163 (0)| 00:00:01 |
|
|
||||||
--------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
OceanBase 数据库执行计划展示如下:
|
|
||||||
|ID|OPERATOR |NAME |EST. ROWS|COST |
|
|
||||||
-------------------------------------------------------
|
|
||||||
|0 |LIMIT | |100 |81141|
|
|
||||||
|1 | TOP-N SORT | |100 |81127|
|
|
||||||
|2 | HASH GROUP BY | |2924 |68551|
|
|
||||||
|3 | HASH JOIN | |2924 |65004|
|
|
||||||
|4 | SUBPLAN SCAN |VIEW1 |2953 |19070|
|
|
||||||
|5 | HASH GROUP BY | |2953 |18662|
|
|
||||||
|6 | NESTED-LOOP JOIN| |2953 |15080|
|
|
||||||
|7 | TABLE SCAN |ITEM |19 |11841|
|
|
||||||
|8 | TABLE SCAN |STORE_SALES|161 |73 |
|
|
||||||
|9 | TABLE SCAN |DT |6088 |29401|
|
|
||||||
=======================================================
|
|
||||||
|
|
||||||
由示例可见,OceanBase 数据库的计划展示与 Oracle 数据库类似。
|
|
||||||
|
|
||||||
OceanBase 数据库执行计划中的各列的含义如下:
|
|
||||||
列名 含义
|
|
||||||
ID 执行树按照前序遍历的方式得到的编号(从 0 开始)。
|
|
||||||
OPERATOR 操作算子的名称。
|
|
||||||
NAME 对应表操作的表名(索引名)。
|
|
||||||
EST. ROWS 估算该操作算子的输出行数。
|
|
||||||
COST 该操作算子的执行代价(微秒)。
|
|
||||||
|
|
||||||
|
|
||||||
OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形结构展示。其中每一个操作在树中的层次通过其在 operator 中的缩进予以展示,层次最深的优先执行,层次相同的以特定算子的执行顺序为标准来执行。
|
|
||||||
|
|
||||||
问题: update a not exists (b…)
|
|
||||||
我一开始以为 B是驱动表,B的数据挺多的 后来看到NLAJ,是说左边的表关联右边的表
|
|
||||||
所以这个的驱动表是不是实际是A,用A的匹配B的,这个理解有问题吗
|
|
||||||
|
|
||||||
回答: 没错 A 驱动 B的
|
|
||||||
|
|
||||||
问题: 光知道最下最右的是驱动表了 所以一开始搞得有点懵 :sweat_smile:
|
|
||||||
|
|
||||||
回答: nlj应该原理应该都是左表(驱动表)的记录探测右表(被驱动表), 选哪张成为左表或右表就基于一些其他考量了,比如数据量, 而anti join/semi join只是对 not exist/exist的一种优化,相关的原理和资料网上可以查阅一下
|
|
||||||
|
|
||||||
问题: 也就是nlj 就是按照之前理解的谁先执行 谁就是驱动表 也就是执行计划中的最右的表
|
|
||||||
而anti join/semi join,谁在not exist左面,谁就是驱动表。这么理解对吧
|
|
||||||
|
|
||||||
回答: nlj也是左表的表是驱动表,这个要了解下计划执行方面的基本原理,取左表的一行数据,再遍历右表,一旦满足连接条件,就可以返回数据
|
|
||||||
anti/semi只是因为not exists/exist的语义只是返回左表数据,改成anti join是一种计划优化,连接的方式比子查询更优
|
|
||||||
"""
|
|
||||||
|
|
||||||
from llama_index import Document
|
|
||||||
|
|
||||||
text_list = [text1]
|
|
||||||
documents = [Document(t) for t in text_list]
|
|
||||||
|
|
||||||
num_output = 250
|
|
||||||
max_input_size = 512
|
|
||||||
|
|
||||||
max_chunk_overlap = 20
|
|
||||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
|
||||||
|
|
||||||
index = GPTListIndex(
|
|
||||||
documents,
|
|
||||||
embed_model=embed_model,
|
|
||||||
llm_predictor=llm_predictor,
|
|
||||||
prompt_helper=prompt_helper,
|
|
||||||
)
|
|
||||||
index.save_to_disk("index.json")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logging.getLogger().setLevel(logging.CRITICAL)
|
|
||||||
for d in documents:
|
|
||||||
print(d)
|
|
||||||
|
|
||||||
response = index.query("数据库的执行计划命令有多少?")
|
|
||||||
print(response)
|
|
Loading…
Reference in New Issue
Block a user