From e6fbd037ef324147cd10b08b2ac013ce3a3582b1 Mon Sep 17 00:00:00 2001 From: csunny Date: Sun, 16 Apr 2023 21:51:55 +0800 Subject: [PATCH 01/23] update --- app.py | 241 +++++++++++++++++++++++++++++++++++++ examples/gpt_index.py | 19 +++ examples/obgpt_index.ipynb | 0 3 files changed, 260 insertions(+) create mode 100644 app.py create mode 100644 examples/gpt_index.py create mode 100644 examples/obgpt_index.ipynb diff --git a/app.py b/app.py new file mode 100644 index 000000000..b001d0933 --- /dev/null +++ b/app.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper +from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from llama_index import LLMPredictor +import torch +from langchain.llms.base import LLM +from transformers import pipeline + + +class FlanLLM(LLM): + model_name = "google/flan-t5-large" + pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={ + "torch_dtype": torch.bfloat16 + }) + + def _call(self, prompt, stop=None): + return self.pipeline(prompt, max_length=9999)[0]["generated_text"] + + def _identifying_params(self): + return {"name_of_model": self.model_name} + + def _llm_type(self): + return "custome" + +llm_predictor = LLMPredictor(llm=FlanLLM()) +hfemb = HuggingFaceEmbeddings() +embed_model = LangchainEmbedding(hfemb) + +text1 = """ + 执行计划是对一条 SQL 查询语句在数据库中执行过程的描述。用户可以通过 EXPLAIN 命令查看优化器针对指定 SQL 生成的逻辑执行计划。 + +如果要分析某条 SQL 的性能问题,通常需要先查看 SQL 的执行计划,排查每一步 SQL 执行是否存在问题。所以读懂执行计划是 SQL 优化的先决条件,而了解执行计划的算子是理解 EXPLAIN 命令的关键。 + +OceanBase 数据库的执行计划命令有三种模式:EXPLAIN BASIC、EXPLAIN 和 EXPLAIN EXTENDED。这三种模式对执行计划展现不同粒度的细节信息: + +EXPLAIN BASIC 命令用于最基本的计划展示。 + +EXPLAIN EXTENDED 命令用于最详细的计划展示(通常在排查问题时使用这种展示模式)。 + +EXPLAIN 命令所展示的信息可以帮助普通用户了解整个计划的执行方式。 + +EXPLAIN 命令格式如下: +EXPLAIN [BASIC | EXTENDED | PARTITIONS | FORMAT = format_name] [PRETTY | PRETTY_COLOR] explainable_stmt +format_name: + { TRADITIONAL | JSON } +explainable_stmt: + { SELECT statement + | DELETE statement + | INSERT statement + | REPLACE statement + | UPDATE statement } + + +EXPLAIN 命令适用于 SELECT、DELETE、INSERT、REPLACE 和 UPDATE 语句,显示优化器所提供的有关语句执行计划的信息,包括如何处理该语句,如何联接表以及以何种顺序联接表等信息。 + +一般来说,可以使用 EXPLAIN EXTENDED 命令,将表扫描的范围段展示出来。使用 EXPLAIN OUTLINE 命令可以显示 Outline 信息。 + +FORMAT 选项可用于选择输出格式。TRADITIONAL 表示以表格格式显示输出,这也是默认设置。JSON 表示以 JSON 格式显示信息。 + +使用 EXPLAIN PARTITITIONS 也可用于检查涉及分区表的查询。如果检查针对非分区表的查询,则不会产生错误,但 PARTIONS 列的值始终为 NULL。 + +对于复杂的执行计划,可以使用 PRETTY 或者 PRETTY_COLOR 选项将计划树中的父节点和子节点使用树线或彩色树线连接起来,使得执行计划展示更方便阅读。示例如下: +obclient> CREATE TABLE p1table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 2; +Query OK, 0 rows affected + +obclient> CREATE TABLE p2table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 4; +Query OK, 0 rows affected + +obclient> EXPLAIN EXTENDED PRETTY_COLOR SELECT * FROM p1table p1 JOIN p2table p2 ON p1.c1=p2.c2\G +*************************** 1. row *************************** +Query Plan: ========================================================== +|ID|OPERATOR |NAME |EST. ROWS|COST| +---------------------------------------------------------- +|0 |PX COORDINATOR | |1 |278 | +|1 | EXCHANGE OUT DISTR |:EX10001|1 |277 | +|2 | HASH JOIN | |1 |276 | +|3 | ├PX PARTITION ITERATOR | |1 |92 | +|4 | │ TABLE SCAN |P1 |1 |92 | +|5 | └EXCHANGE IN DISTR | |1 |184 | +|6 | EXCHANGE OUT DISTR (PKEY)|:EX10000|1 |184 | +|7 | PX PARTITION ITERATOR | |1 |183 | +|8 | TABLE SCAN |P2 |1 |183 | +========================================================== + +Outputs & filters: +------------------------------------- + 0 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil) + 1 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil), dop=1 + 2 - output([P1.C1], [P2.C2], [P1.C2], [P2.C1]), filter(nil), + equal_conds([P1.C1 = P2.C2]), other_conds(nil) + 3 - output([P1.C1], [P1.C2]), filter(nil) + 4 - output([P1.C1], [P1.C2]), filter(nil), + access([P1.C1], [P1.C2]), partitions(p[0-1]) + 5 - output([P2.C2], [P2.C1]), filter(nil) + 6 - (#keys=1, [P2.C2]), output([P2.C2], [P2.C1]), filter(nil), dop=1 + 7 - output([P2.C1], [P2.C2]), filter(nil) + 8 - output([P2.C1], [P2.C2]), filter(nil), + access([P2.C1], [P2.C2]), partitions(p[0-3]) + +1 row in set + + + + +## 执行计划形状与算子信息 + +在数据库系统中,执行计划在内部通常是以树的形式来表示的,但是不同的数据库会选择不同的方式展示给用户。 + +如下示例分别为 PostgreSQL 数据库、Oracle 数据库和 OceanBase 数据库对于 TPCDS Q3 的计划展示。 + +```sql +obclient> SELECT /*TPC-DS Q3*/ * + FROM (SELECT dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + Sum(ss_net_profit) sum_agg + FROM date_dim dt, + store_sales, + item + WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manufact_id = 914 + AND dt.d_moy = 11 + GROUP BY dt.d_year, + item.i_brand, + item.i_brand_id + ORDER BY dt.d_year, + sum_agg DESC, + brand_id) + WHERE ROWNUM <= 100; + +PostgreSQL 数据库执行计划展示如下: +Limit (cost=13986.86..13987.20 rows=27 width=91) + Sort (cost=13986.86..13986.93 rows=27 width=65) + Sort Key: dt.d_year, (sum(store_sales.ss_net_profit)), item.i_brand_id + HashAggregate (cost=13985.95..13986.22 rows=27 width=65) + Merge Join (cost=13884.21..13983.91 rows=204 width=65) + Merge Cond: (dt.d_date_sk = store_sales.ss_sold_date_sk) + Index Scan using date_dim_pkey on date_dim dt (cost=0.00..3494.62 rows=6080 width=8) + Filter: (d_moy = 11) + Sort (cost=12170.87..12177.27 rows=2560 width=65) + Sort Key: store_sales.ss_sold_date_sk + Nested Loop (cost=6.02..12025.94 rows=2560 width=65) + Seq Scan on item (cost=0.00..1455.00 rows=16 width=59) + Filter: (i_manufact_id = 914) + Bitmap Heap Scan on store_sales (cost=6.02..658.94 rows=174 width=14) + Recheck Cond: (ss_item_sk = item.i_item_sk) + Bitmap Index Scan on store_sales_pkey (cost=0.00..5.97 rows=174 width=0) + Index Cond: (ss_item_sk = item.i_item_sk) + + + +Oracle 数据库执行计划展示如下: +Plan hash value: 2331821367 +-------------------------------------------------------------------------------------------------- +| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | +-------------------------------------------------------------------------------------------------- +| 0 | SELECT STATEMENT | | 100 | 9100 | 3688 (1)| 00:00:01 | +|* 1 | COUNT STOPKEY | | | | | | +| 2 | VIEW | | 2736 | 243K| 3688 (1)| 00:00:01 | +|* 3 | SORT ORDER BY STOPKEY | | 2736 | 256K| 3688 (1)| 00:00:01 | +| 4 | HASH GROUP BY | | 2736 | 256K| 3688 (1)| 00:00:01 | +|* 5 | HASH JOIN | | 2736 | 256K| 3686 (1)| 00:00:01 | +|* 6 | TABLE ACCESS FULL | DATE_DIM | 6087 | 79131 | 376 (1)| 00:00:01 | +| 7 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 | +| 8 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 | +|* 9 | TABLE ACCESS FULL | ITEM | 18 | 1188 | 375 (0)| 00:00:01 | +|* 10 | INDEX RANGE SCAN | SYS_C0010069 | 159 | | 2 (0)| 00:00:01 | +| 11 | TABLE ACCESS BY INDEX ROWID| STORE_SALES | 159 | 2703 | 163 (0)| 00:00:01 | +-------------------------------------------------------------------------------------------------- + +OceanBase 数据库执行计划展示如下: +|ID|OPERATOR |NAME |EST. ROWS|COST | +------------------------------------------------------- +|0 |LIMIT | |100 |81141| +|1 | TOP-N SORT | |100 |81127| +|2 | HASH GROUP BY | |2924 |68551| +|3 | HASH JOIN | |2924 |65004| +|4 | SUBPLAN SCAN |VIEW1 |2953 |19070| +|5 | HASH GROUP BY | |2953 |18662| +|6 | NESTED-LOOP JOIN| |2953 |15080| +|7 | TABLE SCAN |ITEM |19 |11841| +|8 | TABLE SCAN |STORE_SALES|161 |73 | +|9 | TABLE SCAN |DT |6088 |29401| +======================================================= + +由示例可见,OceanBase 数据库的计划展示与 Oracle 数据库类似。 + +OceanBase 数据库执行计划中的各列的含义如下: +列名 含义 +ID 执行树按照前序遍历的方式得到的编号(从 0 开始)。 +OPERATOR 操作算子的名称。 +NAME 对应表操作的表名(索引名)。 +EST. ROWS 估算该操作算子的输出行数。 +COST 该操作算子的执行代价(微秒)。 + + +OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形结构展示。其中每一个操作在树中的层次通过其在 operator 中的缩进予以展示,层次最深的优先执行,层次相同的以特定算子的执行顺序为标准来执行。 + +问题: update a not exists (b…) +我一开始以为 B是驱动表,B的数据挺多的 后来看到NLAJ,是说左边的表关联右边的表 +所以这个的驱动表是不是实际是A,用A的匹配B的,这个理解有问题吗 + +回答: 没错 A 驱动 B的 + +问题: 光知道最下最右的是驱动表了 所以一开始搞得有点懵 :sweat_smile: + +回答: nlj应该原理应该都是左表(驱动表)的记录探测右表(被驱动表), 选哪张成为左表或右表就基于一些其他考量了,比如数据量, 而anti join/semi join只是对 not exist/exist的一种优化,相关的原理和资料网上可以查阅一下 + +问题: 也就是nlj 就是按照之前理解的谁先执行 谁就是驱动表 也就是执行计划中的最右的表 +而anti join/semi join,谁在not exist左面,谁就是驱动表。这么理解对吧 + +回答: nlj也是左表的表是驱动表,这个要了解下计划执行方面的基本原理,取左表的一行数据,再遍历右表,一旦满足连接条件,就可以返回数据 +anti/semi只是因为not exists/exist的语义只是返回左表数据,改成anti join是一种计划优化,连接的方式比子查询更优 + +""" + +from llama_index import Document +text_list = [text1] +documents = [Document(t) for t in text_list] + +num_output = 250 +max_input_size = 512 + +max_chunk_overlap = 20 +prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) + +index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper) +index.save_to_disk("index.json") + + +if __name__ == "__main__": + import logging + logging.getLogger().setLevel(logging.CRITICAL) + for d in documents: + print(d) + + response = index.query("数据库的执行计划命令有多少?") + print(response) diff --git a/examples/gpt_index.py b/examples/gpt_index.py new file mode 100644 index 000000000..29c0a3fe0 --- /dev/null +++ b/examples/gpt_index.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import logging +import sys + +from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex +logging.basicConfig(stream=sys.stdout, level=logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) + +# read the document of data dir +documents = SimpleDirectoryReader("data").load_data() +# split the document to chunk, max token size=500, convert chunk to vector + +index = GPTSimpleVectorIndex(documents) + +# save index +index.save_to_disk("index.json") \ No newline at end of file diff --git a/examples/obgpt_index.ipynb b/examples/obgpt_index.ipynb new file mode 100644 index 000000000..e69de29bb From f2730d02d8eb19fefb4adbff05555cb9559e2951 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 17:56:00 +0800 Subject: [PATCH 02/23] update --- examples/obgpt_index.ipynb | 132 +++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/examples/obgpt_index.ipynb b/examples/obgpt_index.ipynb index e69de29bb..bb7b014a9 100644 --- a/examples/obgpt_index.ipynb +++ b/examples/obgpt_index.ipynb @@ -0,0 +1,132 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/magic/miniconda3/envs/gpt_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper\n", + "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", + "from llama_index import LLMPredictor\n", + "import torch\n", + "from langchain.llms.base import LLM\n", + "from transformers import pipeline\n", + "\n", + "import os\n", + "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:512\"\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading checkpoint shards: 100%|██████████| 2/2 [00:34<00:00, 17.15s/it]\n" + ] + }, + { + "ename": "OutOfMemoryError", + "evalue": "CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mFlanLLM\u001b[39;00m(LLM):\n\u001b[1;32m 2\u001b[0m model_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m../../vicuna-7b\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m pipeline \u001b[39m=\u001b[39m pipeline(\u001b[39m\"\u001b[39m\u001b[39mtext-generation\u001b[39m\u001b[39m\"\u001b[39m, model\u001b[39m=\u001b[39mmodel_name, device\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, model_kwargs\u001b[39m=\u001b[39m{\n\u001b[1;32m 4\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtorch_dtype\u001b[39m\u001b[39m\"\u001b[39m: torch\u001b[39m.\u001b[39mbfloat16\n\u001b[1;32m 5\u001b[0m })\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/main.py:221\u001b[0m, in \u001b[0;36mpydantic.main.ModelMetaclass.__new__\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:506\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.infer\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:436\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.__init__\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:546\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.prepare\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:570\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField._set_default_and_type\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:439\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.get_default\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/utils.py:693\u001b[0m, in \u001b[0;36mpydantic.utils.smart_deepcopy\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (2 times), _deepcopy_dict at line 230 (1 times), _reconstruct at line 270 (1 times), deepcopy at line 146 (1 times)]\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 151\u001b[0m copier \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39m__deepcopy__\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m 152\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m y \u001b[39m=\u001b[39m copier(memo)\n\u001b[1;32m 154\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m reductor \u001b[39m=\u001b[39m dispatch_table\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/torch/nn/parameter.py:55\u001b[0m, in \u001b[0;36mParameter.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[39mreturn\u001b[39;00m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)]\n\u001b[1;32m 54\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 55\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdata\u001b[39m.\u001b[39;49mclone(memory_format\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mpreserve_format), \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequires_grad)\n\u001b[1;32m 56\u001b[0m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)] \u001b[39m=\u001b[39m result\n\u001b[1;32m 57\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", + "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF" + ] + } + ], + "source": [ + "class FlanLLM(LLM):\n", + " model_name = \"../../vicuna-7b\"\n", + " pipeline = pipeline(\"text-generation\", model=model_name, device=0, model_kwargs={\n", + " \"torch_dtype\": torch.bfloat16\n", + " })\n", + "\n", + " def _call(self, prompt, stop=None):\n", + " return self.pipeline(prompt, max_length=9999)[0][\"generated_text\"]\n", + "\n", + " def _identifying_params(self):\n", + " return {\"name_of_model\": self.model_name}\n", + "\n", + " def _llm_type(self):\n", + " return \"custome\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_predictor = LLMPredictor(llm=FlanLLM())\n", + "hfemb = HuggingFaceEmbeddings()\n", + "embed_model = LangchainEmbedding(hfemb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "gpt_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 83ae8005ae5d03a2048b0df8f20181be18bd1d92 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 17:56:27 +0800 Subject: [PATCH 03/23] rm test code --- examples/obgpt_index.ipynb | 132 ------------------------------------- 1 file changed, 132 deletions(-) delete mode 100644 examples/obgpt_index.ipynb diff --git a/examples/obgpt_index.ipynb b/examples/obgpt_index.ipynb deleted file mode 100644 index bb7b014a9..000000000 --- a/examples/obgpt_index.ipynb +++ /dev/null @@ -1,132 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/magic/miniconda3/envs/gpt_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper\n", - "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", - "from llama_index import LLMPredictor\n", - "import torch\n", - "from langchain.llms.base import LLM\n", - "from transformers import pipeline\n", - "\n", - "import os\n", - "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:512\"\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading checkpoint shards: 100%|██████████| 2/2 [00:34<00:00, 17.15s/it]\n" - ] - }, - { - "ename": "OutOfMemoryError", - "evalue": "CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mFlanLLM\u001b[39;00m(LLM):\n\u001b[1;32m 2\u001b[0m model_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m../../vicuna-7b\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m pipeline \u001b[39m=\u001b[39m pipeline(\u001b[39m\"\u001b[39m\u001b[39mtext-generation\u001b[39m\u001b[39m\"\u001b[39m, model\u001b[39m=\u001b[39mmodel_name, device\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, model_kwargs\u001b[39m=\u001b[39m{\n\u001b[1;32m 4\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtorch_dtype\u001b[39m\u001b[39m\"\u001b[39m: torch\u001b[39m.\u001b[39mbfloat16\n\u001b[1;32m 5\u001b[0m })\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/main.py:221\u001b[0m, in \u001b[0;36mpydantic.main.ModelMetaclass.__new__\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:506\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.infer\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:436\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.__init__\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:546\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.prepare\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:570\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField._set_default_and_type\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:439\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.get_default\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/utils.py:693\u001b[0m, in \u001b[0;36mpydantic.utils.smart_deepcopy\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", - " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", - " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (2 times), _deepcopy_dict at line 230 (1 times), _reconstruct at line 270 (1 times), deepcopy at line 146 (1 times)]\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", - " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 151\u001b[0m copier \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39m__deepcopy__\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m 152\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m y \u001b[39m=\u001b[39m copier(memo)\n\u001b[1;32m 154\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m reductor \u001b[39m=\u001b[39m dispatch_table\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/torch/nn/parameter.py:55\u001b[0m, in \u001b[0;36mParameter.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[39mreturn\u001b[39;00m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)]\n\u001b[1;32m 54\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 55\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdata\u001b[39m.\u001b[39;49mclone(memory_format\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mpreserve_format), \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequires_grad)\n\u001b[1;32m 56\u001b[0m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)] \u001b[39m=\u001b[39m result\n\u001b[1;32m 57\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", - "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF" - ] - } - ], - "source": [ - "class FlanLLM(LLM):\n", - " model_name = \"../../vicuna-7b\"\n", - " pipeline = pipeline(\"text-generation\", model=model_name, device=0, model_kwargs={\n", - " \"torch_dtype\": torch.bfloat16\n", - " })\n", - "\n", - " def _call(self, prompt, stop=None):\n", - " return self.pipeline(prompt, max_length=9999)[0][\"generated_text\"]\n", - "\n", - " def _identifying_params(self):\n", - " return {\"name_of_model\": self.model_name}\n", - "\n", - " def _llm_type(self):\n", - " return \"custome\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm_predictor = LLMPredictor(llm=FlanLLM())\n", - "hfemb = HuggingFaceEmbeddings()\n", - "embed_model = LangchainEmbedding(hfemb)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "gpt_env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From d308f0f5d82eabcde8b444c0adc3219b749b4cda Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 18:05:24 +0800 Subject: [PATCH 04/23] add requirements --- app.py => pilot/app.py | 0 pilot/server/chatbot.py | 3 +++ pilot/server/sqlgpt.py | 4 ++++ requirements.txt | 4 ++++ 4 files changed, 11 insertions(+) rename app.py => pilot/app.py (100%) create mode 100644 pilot/server/chatbot.py create mode 100644 pilot/server/sqlgpt.py create mode 100644 requirements.txt diff --git a/app.py b/pilot/app.py similarity index 100% rename from app.py rename to pilot/app.py diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py new file mode 100644 index 000000000..97206f2d5 --- /dev/null +++ b/pilot/server/chatbot.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py new file mode 100644 index 000000000..f5a71c392 --- /dev/null +++ b/pilot/server/sqlgpt.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +#-*- coding: utf-8 -*- + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..44e89c0b9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +accelerate==0.16.0 +torch==2.0.0 +torchvision==0.13.1 +torchaudio==0.12.1 From 02bc55ca24949d406e760dda54700a58837f9b66 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 21:59:18 +0800 Subject: [PATCH 05/23] init --- pilot/connections/mysql_conn.py | 2 ++ pilot/connections/pg_conn.py | 2 ++ pilot/model/loader.py | 39 +++++++++++++++++++++++++++++++++ pilot/model/vicuna_llm.py | 9 ++++++++ pilot/server/sqlgpt.py | 1 - pilot/utils.py | 22 +++++++++++++++++++ 6 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 pilot/connections/mysql_conn.py create mode 100644 pilot/connections/pg_conn.py create mode 100644 pilot/model/loader.py create mode 100644 pilot/model/vicuna_llm.py create mode 100644 pilot/utils.py diff --git a/pilot/connections/mysql_conn.py b/pilot/connections/mysql_conn.py new file mode 100644 index 000000000..1f776fc63 --- /dev/null +++ b/pilot/connections/mysql_conn.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/pilot/connections/pg_conn.py b/pilot/connections/pg_conn.py new file mode 100644 index 000000000..1f776fc63 --- /dev/null +++ b/pilot/connections/pg_conn.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/pilot/model/loader.py b/pilot/model/loader.py new file mode 100644 index 000000000..768bcea1d --- /dev/null +++ b/pilot/model/loader.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import torch +from utils import get_gpu_memory +from fastchat.serve.inference import compress_module +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, +) + +class ModerLoader: + + kwargs = {} + + def __init__(self, + model_path) -> None: + + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.model_path = model_path + self.kwargs = { + "torch_dtype": torch.float16, + "device_map": "auto", + "max_memory": get_gpu_memory(), + } + + def loader(self, load_8bit=False, debug=False): + + tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False) + model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs) + + if load_8bit: + compress_module(model, self.device) + + if debug: + print(model) + + return model, tokenizer + diff --git a/pilot/model/vicuna_llm.py b/pilot/model/vicuna_llm.py new file mode 100644 index 000000000..1cc0ca3c3 --- /dev/null +++ b/pilot/model/vicuna_llm.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +from transformers import pipeline +from langchain.llms.base import LLM +from configs.model_config import * + +class VicunaLLM(LLM): + model_name = llm_model_config[LLM_MODEL] diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index f5a71c392..867053afa 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,4 +1,3 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- - diff --git a/pilot/utils.py b/pilot/utils.py new file mode 100644 index 000000000..093b14f99 --- /dev/null +++ b/pilot/utils.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import torch + +def get_gpu_memory(max_gpus=None): + gpu_memory = [] + num_gpus = ( + torch.cuda.device_count() + if max_gpus is None + else min(max_gpus, torch.cuda.device_count()) + ) + + for gpu_id in range(num_gpus): + with torch.cuda.device(gpu_id): + device = torch.cuda.current_device() + gpu_properties = torch.cuda.get_device_properties(device) + total_memory = gpu_properties.total_memory / (1024 ** 3) + allocated_memory = torch.cuda.memory_allocated() / (1024 ** 3) + available_memory = total_memory - allocated_memory + gpu_memory.append(available_memory) + return gpu_memory From 6acc9f8cb41e6a9c839b3d82b5bd3e38ea592846 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 22:04:37 +0800 Subject: [PATCH 06/23] model: add model file --- .gitignore | 1 + pilot/model/loader.py | 1 + pilot/server/sqlgpt.py | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index b6e47617d..ea44648b7 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ parts/ sdist/ var/ wheels/ +models/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 768bcea1d..7a6e8bbee 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -37,3 +37,4 @@ class ModerLoader: return model, tokenizer + diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 867053afa..c55d8e4c0 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,3 +1,9 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- +from model.loader import ModerLoader +from fastchat.serve.inference import generate_stream +from configs.model_config import * + +if __name__ == "__main__": + pass \ No newline at end of file From 57fd9d48ad0bf580957e545e4c85e2059057220d Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 22:18:08 +0800 Subject: [PATCH 07/23] init model and tokenizer --- pilot/__init__.py | 3 +++ pilot/model/__init__.py | 2 ++ pilot/model/loader.py | 2 +- pilot/server/__init__.py | 0 pilot/server/sqlgpt.py | 12 +++++++++--- 5 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 pilot/__init__.py create mode 100644 pilot/model/__init__.py create mode 100644 pilot/server/__init__.py diff --git a/pilot/__init__.py b/pilot/__init__.py new file mode 100644 index 000000000..9244e14db --- /dev/null +++ b/pilot/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__version__ = "0.0.1" diff --git a/pilot/model/__init__.py b/pilot/model/__init__.py new file mode 100644 index 000000000..0c23b3d79 --- /dev/null +++ b/pilot/model/__init__.py @@ -0,0 +1,2 @@ + +from model.loader import * \ No newline at end of file diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 7a6e8bbee..7691a8092 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import torch -from utils import get_gpu_memory +from pilot.utils import get_gpu_memory from fastchat.serve.inference import compress_module from transformers import ( AutoTokenizer, diff --git a/pilot/server/__init__.py b/pilot/server/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index c55d8e4c0..65966006a 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,9 +1,15 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- -from model.loader import ModerLoader +from pilot.model.loader import ModerLoader from fastchat.serve.inference import generate_stream -from configs.model_config import * +from pilot.configs.model_config import * if __name__ == "__main__": - pass \ No newline at end of file + + model_path = llm_model_config[LLM_MODEL] + + ml = ModerLoader(model_path) + model, tokenizer = ml.loader(load_8bit=True) + print(model) + print(tokenizer) \ No newline at end of file From fc4a9a953bac8b7bd3d4fbfd1cd0c8cc97bd7734 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 22:48:39 +0800 Subject: [PATCH 08/23] update --- pilot/model/__init__.py | 2 -- pilot/server/sqlgpt.py | 24 ++++++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pilot/model/__init__.py b/pilot/model/__init__.py index 0c23b3d79..e69de29bb 100644 --- a/pilot/model/__init__.py +++ b/pilot/model/__init__.py @@ -1,2 +0,0 @@ - -from model.loader import * \ No newline at end of file diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 65966006a..582775953 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,15 +1,23 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- -from pilot.model.loader import ModerLoader -from fastchat.serve.inference import generate_stream -from pilot.configs.model_config import * +import torch +from fastchat.serve.inference import generate_stream, compress_module + +BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b" +from transformers import AutoTokenizer, AutoModelForCausalLM if __name__ == "__main__": - model_path = llm_model_config[LLM_MODEL] + device = "cuda" if torch.cuda.is_available() else "cpu" + tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False) + model = AutoModelForCausalLM.from_pretrained( + BASE_MODE, + low_cpu_mem_usage=True, + torch_dtype=torch.float16, + device_map="auto", + ) - ml = ModerLoader(model_path) - model, tokenizer = ml.loader(load_8bit=True) - print(model) - print(tokenizer) \ No newline at end of file + print(device) + #compress_module(model, device) + print(model, tokenizer) \ No newline at end of file From cccfcff0089de33bc61ba9d407a7d2fb047cf4f0 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 22:50:44 +0800 Subject: [PATCH 09/23] add file --- asserts/readme.md | 0 docs/introduct.md | 1 + 2 files changed, 1 insertion(+) create mode 100644 asserts/readme.md create mode 100644 docs/introduct.md diff --git a/asserts/readme.md b/asserts/readme.md new file mode 100644 index 000000000..e69de29bb diff --git a/docs/introduct.md b/docs/introduct.md new file mode 100644 index 000000000..4287ca861 --- /dev/null +++ b/docs/introduct.md @@ -0,0 +1 @@ +# \ No newline at end of file From 4def6b8109c52c515f378805c07d13cb77bdc9f7 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 23:53:29 +0800 Subject: [PATCH 10/23] a demo --- pilot/model/loader.py | 1 - pilot/server/sqlgpt.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 7691a8092..98af18982 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -21,7 +21,6 @@ class ModerLoader: self.kwargs = { "torch_dtype": torch.float16, "device_map": "auto", - "max_memory": get_gpu_memory(), } def loader(self, load_8bit=False, debug=False): diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 582775953..5bf2244cc 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,23 +1,43 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- +import json import torch from fastchat.serve.inference import generate_stream, compress_module -BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b" + from transformers import AutoTokenizer, AutoModelForCausalLM +device = "cuda" if torch.cuda.is_available() else "cpu" +BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b" -if __name__ == "__main__": - - device = "cuda" if torch.cuda.is_available() else "cpu" +def generate(prompt): tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False) model = AutoModelForCausalLM.from_pretrained( BASE_MODE, low_cpu_mem_usage=True, torch_dtype=torch.float16, device_map="auto", - ) + ) + # compress_module(model, device) + # model.to(device) + print(model, tokenizer) + + params = { + "model": "vicuna-13b", + "prompt": prompt, + "temperature": 0.7, + "max_new_tokens": 512, + "stop": "###" + } + output = generate_stream( + model, tokenizer, params, device, context_len=2048, stream_interval=2) + + yield output + +if __name__ == "__main__": + pass + + + + - print(device) - #compress_module(model, device) - print(model, tokenizer) \ No newline at end of file From a7755ce2504583098c1c08f09f1ddd0a1d251ed2 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 00:33:14 +0800 Subject: [PATCH 11/23] add gradio --- README.md | 1 + environment.yml | 62 ++++++++++++++++++++++++++++++++++++++++++ pilot/server/sqlgpt.py | 16 +++++++++-- requirements.txt | 48 ++++++++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 environment.yml diff --git a/README.md b/README.md index 7bc71a23b..303aa41d2 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # DB-GPT A Open Database-GPT Experiment + ![GitHub Repo stars](https://img.shields.io/github/stars/csunny/db-gpt?style=social) diff --git a/environment.yml b/environment.yml new file mode 100644 index 000000000..81872a557 --- /dev/null +++ b/environment.yml @@ -0,0 +1,62 @@ +name: db-pgt +channels: + - pytorch + - defaults + - anaconda +dependencies: + - python=3.9 + - cudatoolkit + - pip + - pytorch=1.12.1 + - pytorch-mutex=1.0=cuda + - torchaudio=0.12.1 + - torchvision=0.13.1 + - pip: + - accelerate==0.16.0 + - aiohttp==3.8.4 + - aiosignal==1.3.1 + - async-timeout==4.0.2 + - attrs==22.2.0 + - bitsandbytes==0.37.0 + - cchardet==2.1.7 + - chardet==5.1.0 + - contourpy==1.0.7 + - cycler==0.11.0 + - filelock==3.9.0 + - fonttools==4.38.0 + - frozenlist==1.3.3 + - huggingface-hub==0.13.4 + - importlib-resources==5.12.0 + - kiwisolver==1.4.4 + - matplotlib==3.7.0 + - multidict==6.0.4 + - openai==0.27.0 + - packaging==23.0 + - psutil==5.9.4 + - pycocotools==2.0.6 + - pyparsing==3.0.9 + - python-dateutil==2.8.2 + - pyyaml==6.0 + - regex==2022.10.31 + - tokenizers==0.13.2 + - tqdm==4.64.1 + - transformers==4.28.0 + - timm==0.6.13 + - spacy==3.5.1 + - webdataset==0.2.48 + - scikit-learn==1.2.2 + - scipy==1.10.1 + - yarl==1.8.2 + - zipp==3.14.0 + - omegaconf==2.3.0 + - opencv-python==4.7.0.72 + - iopath==0.1.10 + - tenacity==8.2.2 + - peft + - pycocoevalcap + - sentence-transformers + - umap-learn + - notebook + - gradio==3.24.1 + - gradio-client==0.0.8 + - wandb diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 5bf2244cc..a74e99ad3 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- + import json import torch +import gradio as gr from fastchat.serve.inference import generate_stream, compress_module @@ -35,9 +37,17 @@ def generate(prompt): yield output if __name__ == "__main__": - pass - - + with gr.Blocks() as demo: + gr.Markdown("数据库SQL生成助手") + with gr.Tab("SQL生成"): + text_input = gr.TextArea() + text_output = gr.TextArea() + text_button = gr.Button("提交") + + + text_button.click(generate, input=text_input, output=text_output) + + demo.queue(concurrency_count=3).launch() diff --git a/requirements.txt b/requirements.txt index 44e89c0b9..dd7bf5189 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,51 @@ accelerate==0.16.0 torch==2.0.0 torchvision==0.13.1 torchaudio==0.12.1 +accelerate==0.16.0 +aiohttp==3.8.4 +aiosignal==1.3.1 +async-timeout==4.0.2 +attrs==22.2.0 +bitsandbytes==0.37.0 +cchardet==2.1.7 +chardet==5.1.0 +contourpy==1.0.7 +cycler==0.11.0 +filelock==3.9.0 +fonttools==4.38.0 +frozenlist==1.3.3 +huggingface-hub==0.13.4 +importlib-resources==5.12.0 +kiwisolver==1.4.4 +matplotlib==3.7.0 +multidict==6.0.4 +openai==0.27.0 +packaging==23.0 +psutil==5.9.4 +pycocotools==2.0.6 +pyparsing==3.0.9 +python-dateutil==2.8.2 +pyyaml==6.0 +regex==2022.10.31 +tokenizers==0.13.2 +tqdm==4.64.1 +transformers==4.28.0 +timm==0.6.13 +spacy==3.5.1 +webdataset==0.2.48 +scikit-learn==1.2.2 +scipy==1.10.1 +yarl==1.8.2 +zipp==3.14.0 +omegaconf==2.3.0 +opencv-python==4.7.0.72 +iopath==0.1.10 +tenacity==8.2.2 +peft +pycocoevalcap +sentence-transformers +umap-learn +notebook +gradio==3.24.1 +gradio-client==0.0.8 +wandb \ No newline at end of file From 2ff4d71fdd7c02ec02d49e4de0262a7e059d43c8 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 00:36:36 +0800 Subject: [PATCH 12/23] fix --- pilot/server/sqlgpt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index a74e99ad3..edd2baf84 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -45,7 +45,7 @@ if __name__ == "__main__": text_button = gr.Button("提交") - text_button.click(generate, input=text_input, output=text_output) + text_button.click(generate, inputs=text_input, outputs=text_output) demo.queue(concurrency_count=3).launch() From 75181d6f2fc43889f204fed00d4f61bd94006454 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 01:26:19 +0800 Subject: [PATCH 13/23] update --- environment.yml | 2 +- pilot/server/sqlgpt.py | 23 +++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/environment.yml b/environment.yml index 81872a557..3ec4dfd98 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: db-pgt +name: db_pgt channels: - pytorch - defaults diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index edd2baf84..81f9b22dd 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -12,18 +12,18 @@ from transformers import AutoTokenizer, AutoModelForCausalLM device = "cuda" if torch.cuda.is_available() else "cpu" BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b" +tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False) +model = AutoModelForCausalLM.from_pretrained( + BASE_MODE, + low_cpu_mem_usage=True, + torch_dtype=torch.float16, + device_map="auto", +) + def generate(prompt): - tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False) - model = AutoModelForCausalLM.from_pretrained( - BASE_MODE, - low_cpu_mem_usage=True, - torch_dtype=torch.float16, - device_map="auto", - ) # compress_module(model, device) # model.to(device) print(model, tokenizer) - params = { "model": "vicuna-13b", "prompt": prompt, @@ -32,9 +32,12 @@ def generate(prompt): "stop": "###" } output = generate_stream( - model, tokenizer, params, device, context_len=2048, stream_interval=2) + model, tokenizer, params, device, context_len=2048, stream_interval=2): + - yield output + for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"): + if chunk: + yield chunk if __name__ == "__main__": with gr.Blocks() as demo: From 41388cec5e295b9781d92a95088a7afb0e41b547 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 01:44:12 +0800 Subject: [PATCH 14/23] add prompt --- pilot/server/sqlgpt.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 81f9b22dd..3148f58b6 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -32,12 +32,13 @@ def generate(prompt): "stop": "###" } output = generate_stream( - model, tokenizer, params, device, context_len=2048, stream_interval=2): + model, tokenizer, params, device, context_len=2048, stream_interval=2) - - for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"): - if chunk: - yield chunk + for chunk in output: + yield chunk + #for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"): + # if chunk: + # yield chunk if __name__ == "__main__": with gr.Blocks() as demo: @@ -50,7 +51,7 @@ if __name__ == "__main__": text_button.click(generate, inputs=text_input, outputs=text_output) - demo.queue(concurrency_count=3).launch() + demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") From c7d3dd2ef2a2bf45f10e01000d234dfdcd66ca91 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 01:44:33 +0800 Subject: [PATCH 15/23] update --- pilot/server/sqlgpt.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 81f9b22dd..52522e6bd 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -7,7 +7,6 @@ import torch import gradio as gr from fastchat.serve.inference import generate_stream, compress_module - from transformers import AutoTokenizer, AutoModelForCausalLM device = "cuda" if torch.cuda.is_available() else "cpu" BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b" @@ -26,18 +25,19 @@ def generate(prompt): print(model, tokenizer) params = { "model": "vicuna-13b", - "prompt": prompt, + "prompt": "这是一个用户与助手之间的对话, 助手精通数据库领域的知识, 并能够对数据库领域知识做出非常专业的回答。以下是用户的问题:" + prompt, "temperature": 0.7, "max_new_tokens": 512, "stop": "###" } - output = generate_stream( - model, tokenizer, params, device, context_len=2048, stream_interval=2): + for output in generate_stream( + model, tokenizer, params, device, context_len=2048, stream_interval=2): + ret = { + "text": output, + "error_code": 0 + } - - for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"): - if chunk: - yield chunk + yield json.dumps(ret).decode() + b"\0" if __name__ == "__main__": with gr.Blocks() as demo: @@ -50,7 +50,7 @@ if __name__ == "__main__": text_button.click(generate, inputs=text_input, outputs=text_output) - demo.queue(concurrency_count=3).launch() + demo.queue(concurrency_count=3).launch(host="0.0.0.0") From 6b770d8e96d82e26c32a9c8f3f5a32fe63545bb3 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 15:17:48 +0800 Subject: [PATCH 16/23] run a demo --- .vscode/launch.json | 25 ++++++++++++++++++ pilot/__init__.py | 2 -- pilot/model/inference.py | 4 +++ pilot/model/vicuna_llm.py | 22 +++++++++++++++- pilot/server/chatbot.py | 53 +++++++++++++++++++++++++++++++++++++++ pilot/server/sqlgpt.py | 18 ++++--------- 6 files changed, 108 insertions(+), 16 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 pilot/model/inference.py diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..09a35ce9c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,25 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "env": {"PYTHONPATH": "${workspaceFolder}"}, + "envFile": "${workspaceFolder}/.env" + }, + { + "name": "Python: Module", + "type": "python", + "request": "launch", + "module": "pilot", + "justMyCode": true, + } + ] +} \ No newline at end of file diff --git a/pilot/__init__.py b/pilot/__init__.py index 9244e14db..f102a9cad 100644 --- a/pilot/__init__.py +++ b/pilot/__init__.py @@ -1,3 +1 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- __version__ = "0.0.1" diff --git a/pilot/model/inference.py b/pilot/model/inference.py new file mode 100644 index 000000000..c3698fb1f --- /dev/null +++ b/pilot/model/inference.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import torch diff --git a/pilot/model/vicuna_llm.py b/pilot/model/vicuna_llm.py index 1cc0ca3c3..b3ecd079d 100644 --- a/pilot/model/vicuna_llm.py +++ b/pilot/model/vicuna_llm.py @@ -1,9 +1,29 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- +import requests +from typing import Any, Mapping, Optional, List from transformers import pipeline from langchain.llms.base import LLM from configs.model_config import * class VicunaLLM(LLM): - model_name = llm_model_config[LLM_MODEL] + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + url = vicuna_model_server + params = { + "model": "vicuna-13b", + "prompt": prompt, + "temperature": 0.7, + "max_new_tokens": 512, + "stop": "###" + } + pass + + @property + def _llm_type(self) -> str: + return "custome" + + def _identifying_params(self) -> Mapping[str, Any]: + return {} + \ No newline at end of file diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py index 97206f2d5..6cc1b8904 100644 --- a/pilot/server/chatbot.py +++ b/pilot/server/chatbot.py @@ -1,3 +1,56 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- +import requests +import json +import time +from urllib.parse import urljoin +import gradio as gr +from configs.model_config import * +vicuna_base_uri = "http://192.168.31.114:21002/" +vicuna_stream_path = "worker_generate_stream" +vicuna_status_path = "worker_get_status" + +def generate(prompt): + params = { + "model": "vicuna-13b", + "prompt": "给出一个查询用户的SQL", + "temperature": 0.7, + "max_new_tokens": 512, + "stop": "###" + } + + sts_response = requests.post( + url=urljoin(vicuna_base_uri, vicuna_status_path) + ) + print(sts_response.text) + + response = requests.post( + url=urljoin(vicuna_base_uri, vicuna_stream_path), data=json.dumps(params) + ) + + skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("") * 3 + for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"): + if chunk: + data = json.loads(chunk.decode()) + if data["error_code"] == 0: + output = data["text"] + yield(output) + + time.sleep(0.02) + +if __name__ == "__main__": + print(LLM_MODEL) + with gr.Blocks() as demo: + gr.Markdown("数据库SQL生成助手") + with gr.Tab("SQL生成"): + text_input = gr.TextArea() + text_output = gr.TextArea() + text_button = gr.Button("提交") + + + text_button.click(generate, inputs=text_input, outputs=text_output) + + demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") + + \ No newline at end of file diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 52522e6bd..6dbf1bfc1 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -20,8 +20,8 @@ model = AutoModelForCausalLM.from_pretrained( ) def generate(prompt): - # compress_module(model, device) - # model.to(device) + compress_module(model, device) + model.to(device) print(model, tokenizer) params = { "model": "vicuna-13b", @@ -31,13 +31,8 @@ def generate(prompt): "stop": "###" } for output in generate_stream( - model, tokenizer, params, device, context_len=2048, stream_interval=2): - ret = { - "text": output, - "error_code": 0 - } - - yield json.dumps(ret).decode() + b"\0" + model, tokenizer, params, device, context_len=2048, stream_interval=1): + yield output if __name__ == "__main__": with gr.Blocks() as demo: @@ -50,7 +45,4 @@ if __name__ == "__main__": text_button.click(generate, inputs=text_input, outputs=text_output) - demo.queue(concurrency_count=3).launch(host="0.0.0.0") - - - + demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") From 7566d636b6d24964b8422f1ce1291aff1eee6da4 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 18:28:42 +0800 Subject: [PATCH 17/23] add vicuna embedding --- examples/t5_example.py | 240 +++++++++++++++++++++++++++++++++ pilot/app.py | 247 ++-------------------------------- pilot/model/inference.py | 82 +++++++++++ pilot/model/loader.py | 8 +- pilot/model/vicuna_llm.py | 73 ++++++++-- pilot/server/chatbot.py | 2 +- pilot/server/vicuna_server.py | 48 +++++++ 7 files changed, 450 insertions(+), 250 deletions(-) create mode 100644 examples/t5_example.py create mode 100644 pilot/server/vicuna_server.py diff --git a/examples/t5_example.py b/examples/t5_example.py new file mode 100644 index 000000000..a63c9f961 --- /dev/null +++ b/examples/t5_example.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper +from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from llama_index import LLMPredictor +import torch +from langchain.llms.base import LLM +from transformers import pipeline + + +class FlanLLM(LLM): + model_name = "google/flan-t5-large" + pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={ + "torch_dtype": torch.bfloat16 + }) + + def _call(self, prompt, stop=None): + return self.pipeline(prompt, max_length=9999)[0]["generated_text"] + + def _identifying_params(self): + return {"name_of_model": self.model_name} + + def _llm_type(self): + return "custome" + +llm_predictor = LLMPredictor(llm=FlanLLM()) +hfemb = HuggingFaceEmbeddings() +embed_model = LangchainEmbedding(hfemb) + +text1 = """ + 执行计划是对一条 SQL 查询语句在数据库中执行过程的描述。用户可以通过 EXPLAIN 命令查看优化器针对指定 SQL 生成的逻辑执行计划。 + +如果要分析某条 SQL 的性能问题,通常需要先查看 SQL 的执行计划,排查每一步 SQL 执行是否存在问题。所以读懂执行计划是 SQL 优化的先决条件,而了解执行计划的算子是理解 EXPLAIN 命令的关键。 + +OceanBase 数据库的执行计划命令有三种模式:EXPLAIN BASIC、EXPLAIN 和 EXPLAIN EXTENDED。这三种模式对执行计划展现不同粒度的细节信息: + +EXPLAIN BASIC 命令用于最基本的计划展示。 + +EXPLAIN EXTENDED 命令用于最详细的计划展示(通常在排查问题时使用这种展示模式)。 + +EXPLAIN 命令所展示的信息可以帮助普通用户了解整个计划的执行方式。 + +EXPLAIN 命令格式如下: +EXPLAIN [BASIC | EXTENDED | PARTITIONS | FORMAT = format_name] [PRETTY | PRETTY_COLOR] explainable_stmt +format_name: + { TRADITIONAL | JSON } +explainable_stmt: + { SELECT statement + | DELETE statement + | INSERT statement + | REPLACE statement + | UPDATE statement } + + +EXPLAIN 命令适用于 SELECT、DELETE、INSERT、REPLACE 和 UPDATE 语句,显示优化器所提供的有关语句执行计划的信息,包括如何处理该语句,如何联接表以及以何种顺序联接表等信息。 + +一般来说,可以使用 EXPLAIN EXTENDED 命令,将表扫描的范围段展示出来。使用 EXPLAIN OUTLINE 命令可以显示 Outline 信息。 + +FORMAT 选项可用于选择输出格式。TRADITIONAL 表示以表格格式显示输出,这也是默认设置。JSON 表示以 JSON 格式显示信息。 + +使用 EXPLAIN PARTITITIONS 也可用于检查涉及分区表的查询。如果检查针对非分区表的查询,则不会产生错误,但 PARTIONS 列的值始终为 NULL。 + +对于复杂的执行计划,可以使用 PRETTY 或者 PRETTY_COLOR 选项将计划树中的父节点和子节点使用树线或彩色树线连接起来,使得执行计划展示更方便阅读。示例如下: +obclient> CREATE TABLE p1table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 2; +Query OK, 0 rows affected + +obclient> CREATE TABLE p2table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 4; +Query OK, 0 rows affected + +obclient> EXPLAIN EXTENDED PRETTY_COLOR SELECT * FROM p1table p1 JOIN p2table p2 ON p1.c1=p2.c2\G +*************************** 1. row *************************** +Query Plan: ========================================================== +|ID|OPERATOR |NAME |EST. ROWS|COST| +---------------------------------------------------------- +|0 |PX COORDINATOR | |1 |278 | +|1 | EXCHANGE OUT DISTR |:EX10001|1 |277 | +|2 | HASH JOIN | |1 |276 | +|3 | ├PX PARTITION ITERATOR | |1 |92 | +|4 | │ TABLE SCAN |P1 |1 |92 | +|5 | └EXCHANGE IN DISTR | |1 |184 | +|6 | EXCHANGE OUT DISTR (PKEY)|:EX10000|1 |184 | +|7 | PX PARTITION ITERATOR | |1 |183 | +|8 | TABLE SCAN |P2 |1 |183 | +========================================================== + +Outputs & filters: +------------------------------------- + 0 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil) + 1 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil), dop=1 + 2 - output([P1.C1], [P2.C2], [P1.C2], [P2.C1]), filter(nil), + equal_conds([P1.C1 = P2.C2]), other_conds(nil) + 3 - output([P1.C1], [P1.C2]), filter(nil) + 4 - output([P1.C1], [P1.C2]), filter(nil), + access([P1.C1], [P1.C2]), partitions(p[0-1]) + 5 - output([P2.C2], [P2.C1]), filter(nil) + 6 - (#keys=1, [P2.C2]), output([P2.C2], [P2.C1]), filter(nil), dop=1 + 7 - output([P2.C1], [P2.C2]), filter(nil) + 8 - output([P2.C1], [P2.C2]), filter(nil), + access([P2.C1], [P2.C2]), partitions(p[0-3]) + +1 row in set + + + + +## 执行计划形状与算子信息 + +在数据库系统中,执行计划在内部通常是以树的形式来表示的,但是不同的数据库会选择不同的方式展示给用户。 + +如下示例分别为 PostgreSQL 数据库、Oracle 数据库和 OceanBase 数据库对于 TPCDS Q3 的计划展示。 + +```sql +obclient> SELECT /*TPC-DS Q3*/ * + FROM (SELECT dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + Sum(ss_net_profit) sum_agg + FROM date_dim dt, + store_sales, + item + WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manufact_id = 914 + AND dt.d_moy = 11 + GROUP BY dt.d_year, + item.i_brand, + item.i_brand_id + ORDER BY dt.d_year, + sum_agg DESC, + brand_id) + WHERE ROWNUM <= 100; + +PostgreSQL 数据库执行计划展示如下: +Limit (cost=13986.86..13987.20 rows=27 width=91) + Sort (cost=13986.86..13986.93 rows=27 width=65) + Sort Key: dt.d_year, (sum(store_sales.ss_net_profit)), item.i_brand_id + HashAggregate (cost=13985.95..13986.22 rows=27 width=65) + Merge Join (cost=13884.21..13983.91 rows=204 width=65) + Merge Cond: (dt.d_date_sk = store_sales.ss_sold_date_sk) + Index Scan using date_dim_pkey on date_dim dt (cost=0.00..3494.62 rows=6080 width=8) + Filter: (d_moy = 11) + Sort (cost=12170.87..12177.27 rows=2560 width=65) + Sort Key: store_sales.ss_sold_date_sk + Nested Loop (cost=6.02..12025.94 rows=2560 width=65) + Seq Scan on item (cost=0.00..1455.00 rows=16 width=59) + Filter: (i_manufact_id = 914) + Bitmap Heap Scan on store_sales (cost=6.02..658.94 rows=174 width=14) + Recheck Cond: (ss_item_sk = item.i_item_sk) + Bitmap Index Scan on store_sales_pkey (cost=0.00..5.97 rows=174 width=0) + Index Cond: (ss_item_sk = item.i_item_sk) + + + +Oracle 数据库执行计划展示如下: +Plan hash value: 2331821367 +-------------------------------------------------------------------------------------------------- +| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | +-------------------------------------------------------------------------------------------------- +| 0 | SELECT STATEMENT | | 100 | 9100 | 3688 (1)| 00:00:01 | +|* 1 | COUNT STOPKEY | | | | | | +| 2 | VIEW | | 2736 | 243K| 3688 (1)| 00:00:01 | +|* 3 | SORT ORDER BY STOPKEY | | 2736 | 256K| 3688 (1)| 00:00:01 | +| 4 | HASH GROUP BY | | 2736 | 256K| 3688 (1)| 00:00:01 | +|* 5 | HASH JOIN | | 2736 | 256K| 3686 (1)| 00:00:01 | +|* 6 | TABLE ACCESS FULL | DATE_DIM | 6087 | 79131 | 376 (1)| 00:00:01 | +| 7 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 | +| 8 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 | +|* 9 | TABLE ACCESS FULL | ITEM | 18 | 1188 | 375 (0)| 00:00:01 | +|* 10 | INDEX RANGE SCAN | SYS_C0010069 | 159 | | 2 (0)| 00:00:01 | +| 11 | TABLE ACCESS BY INDEX ROWID| STORE_SALES | 159 | 2703 | 163 (0)| 00:00:01 | +-------------------------------------------------------------------------------------------------- + +OceanBase 数据库执行计划展示如下: +|ID|OPERATOR |NAME |EST. ROWS|COST | +------------------------------------------------------- +|0 |LIMIT | |100 |81141| +|1 | TOP-N SORT | |100 |81127| +|2 | HASH GROUP BY | |2924 |68551| +|3 | HASH JOIN | |2924 |65004| +|4 | SUBPLAN SCAN |VIEW1 |2953 |19070| +|5 | HASH GROUP BY | |2953 |18662| +|6 | NESTED-LOOP JOIN| |2953 |15080| +|7 | TABLE SCAN |ITEM |19 |11841| +|8 | TABLE SCAN |STORE_SALES|161 |73 | +|9 | TABLE SCAN |DT |6088 |29401| +======================================================= + +由示例可见,OceanBase 数据库的计划展示与 Oracle 数据库类似。 + +OceanBase 数据库执行计划中的各列的含义如下: +列名 含义 +ID 执行树按照前序遍历的方式得到的编号(从 0 开始)。 +OPERATOR 操作算子的名称。 +NAME 对应表操作的表名(索引名)。 +EST. ROWS 估算该操作算子的输出行数。 +COST 该操作算子的执行代价(微秒)。 + + +OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形结构展示。其中每一个操作在树中的层次通过其在 operator 中的缩进予以展示,层次最深的优先执行,层次相同的以特定算子的执行顺序为标准来执行。 + +问题: update a not exists (b…) +我一开始以为 B是驱动表,B的数据挺多的 后来看到NLAJ,是说左边的表关联右边的表 +所以这个的驱动表是不是实际是A,用A的匹配B的,这个理解有问题吗 + +回答: 没错 A 驱动 B的 + +问题: 光知道最下最右的是驱动表了 所以一开始搞得有点懵 :sweat_smile: + +回答: nlj应该原理应该都是左表(驱动表)的记录探测右表(被驱动表), 选哪张成为左表或右表就基于一些其他考量了,比如数据量, 而anti join/semi join只是对 not exist/exist的一种优化,相关的原理和资料网上可以查阅一下 + +问题: 也就是nlj 就是按照之前理解的谁先执行 谁就是驱动表 也就是执行计划中的最右的表 +而anti join/semi join,谁在not exist左面,谁就是驱动表。这么理解对吧 + +回答: nlj也是左表的表是驱动表,这个要了解下计划执行方面的基本原理,取左表的一行数据,再遍历右表,一旦满足连接条件,就可以返回数据 +anti/semi只是因为not exists/exist的语义只是返回左表数据,改成anti join是一种计划优化,连接的方式比子查询更优 +""" + +from llama_index import Document +text_list = [text1] +documents = [Document(t) for t in text_list] + +num_output = 250 +max_input_size = 512 + +max_chunk_overlap = 20 +prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) + +index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper) +index.save_to_disk("index.json") + + +if __name__ == "__main__": + import logging + logging.getLogger().setLevel(logging.CRITICAL) + for d in documents: + print(d) + + response = index.query("数据库的执行计划命令有多少?") + print(response) diff --git a/pilot/app.py b/pilot/app.py index b001d0933..5f2cf93db 100644 --- a/pilot/app.py +++ b/pilot/app.py @@ -1,241 +1,18 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper -from langchain.embeddings.huggingface import HuggingFaceEmbeddings -from llama_index import LLMPredictor -import torch -from langchain.llms.base import LLM -from transformers import pipeline +from langchain.agents import ( + load_tools, + initialize_agent, + AgentType +) -class FlanLLM(LLM): - model_name = "google/flan-t5-large" - pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={ - "torch_dtype": torch.bfloat16 - }) +from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM +llm = VicunaRequestLLM() - def _call(self, prompt, stop=None): - return self.pipeline(prompt, max_length=9999)[0]["generated_text"] - - def _identifying_params(self): - return {"name_of_model": self.model_name} - - def _llm_type(self): - return "custome" - -llm_predictor = LLMPredictor(llm=FlanLLM()) -hfemb = HuggingFaceEmbeddings() -embed_model = LangchainEmbedding(hfemb) - -text1 = """ - 执行计划是对一条 SQL 查询语句在数据库中执行过程的描述。用户可以通过 EXPLAIN 命令查看优化器针对指定 SQL 生成的逻辑执行计划。 - -如果要分析某条 SQL 的性能问题,通常需要先查看 SQL 的执行计划,排查每一步 SQL 执行是否存在问题。所以读懂执行计划是 SQL 优化的先决条件,而了解执行计划的算子是理解 EXPLAIN 命令的关键。 - -OceanBase 数据库的执行计划命令有三种模式:EXPLAIN BASIC、EXPLAIN 和 EXPLAIN EXTENDED。这三种模式对执行计划展现不同粒度的细节信息: - -EXPLAIN BASIC 命令用于最基本的计划展示。 - -EXPLAIN EXTENDED 命令用于最详细的计划展示(通常在排查问题时使用这种展示模式)。 - -EXPLAIN 命令所展示的信息可以帮助普通用户了解整个计划的执行方式。 - -EXPLAIN 命令格式如下: -EXPLAIN [BASIC | EXTENDED | PARTITIONS | FORMAT = format_name] [PRETTY | PRETTY_COLOR] explainable_stmt -format_name: - { TRADITIONAL | JSON } -explainable_stmt: - { SELECT statement - | DELETE statement - | INSERT statement - | REPLACE statement - | UPDATE statement } - - -EXPLAIN 命令适用于 SELECT、DELETE、INSERT、REPLACE 和 UPDATE 语句,显示优化器所提供的有关语句执行计划的信息,包括如何处理该语句,如何联接表以及以何种顺序联接表等信息。 - -一般来说,可以使用 EXPLAIN EXTENDED 命令,将表扫描的范围段展示出来。使用 EXPLAIN OUTLINE 命令可以显示 Outline 信息。 - -FORMAT 选项可用于选择输出格式。TRADITIONAL 表示以表格格式显示输出,这也是默认设置。JSON 表示以 JSON 格式显示信息。 - -使用 EXPLAIN PARTITITIONS 也可用于检查涉及分区表的查询。如果检查针对非分区表的查询,则不会产生错误,但 PARTIONS 列的值始终为 NULL。 - -对于复杂的执行计划,可以使用 PRETTY 或者 PRETTY_COLOR 选项将计划树中的父节点和子节点使用树线或彩色树线连接起来,使得执行计划展示更方便阅读。示例如下: -obclient> CREATE TABLE p1table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 2; -Query OK, 0 rows affected - -obclient> CREATE TABLE p2table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 4; -Query OK, 0 rows affected - -obclient> EXPLAIN EXTENDED PRETTY_COLOR SELECT * FROM p1table p1 JOIN p2table p2 ON p1.c1=p2.c2\G -*************************** 1. row *************************** -Query Plan: ========================================================== -|ID|OPERATOR |NAME |EST. ROWS|COST| ----------------------------------------------------------- -|0 |PX COORDINATOR | |1 |278 | -|1 | EXCHANGE OUT DISTR |:EX10001|1 |277 | -|2 | HASH JOIN | |1 |276 | -|3 | ├PX PARTITION ITERATOR | |1 |92 | -|4 | │ TABLE SCAN |P1 |1 |92 | -|5 | └EXCHANGE IN DISTR | |1 |184 | -|6 | EXCHANGE OUT DISTR (PKEY)|:EX10000|1 |184 | -|7 | PX PARTITION ITERATOR | |1 |183 | -|8 | TABLE SCAN |P2 |1 |183 | -========================================================== - -Outputs & filters: -------------------------------------- - 0 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil) - 1 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil), dop=1 - 2 - output([P1.C1], [P2.C2], [P1.C2], [P2.C1]), filter(nil), - equal_conds([P1.C1 = P2.C2]), other_conds(nil) - 3 - output([P1.C1], [P1.C2]), filter(nil) - 4 - output([P1.C1], [P1.C2]), filter(nil), - access([P1.C1], [P1.C2]), partitions(p[0-1]) - 5 - output([P2.C2], [P2.C1]), filter(nil) - 6 - (#keys=1, [P2.C2]), output([P2.C2], [P2.C1]), filter(nil), dop=1 - 7 - output([P2.C1], [P2.C2]), filter(nil) - 8 - output([P2.C1], [P2.C2]), filter(nil), - access([P2.C1], [P2.C2]), partitions(p[0-3]) - -1 row in set - - - - -## 执行计划形状与算子信息 - -在数据库系统中,执行计划在内部通常是以树的形式来表示的,但是不同的数据库会选择不同的方式展示给用户。 - -如下示例分别为 PostgreSQL 数据库、Oracle 数据库和 OceanBase 数据库对于 TPCDS Q3 的计划展示。 - -```sql -obclient> SELECT /*TPC-DS Q3*/ * - FROM (SELECT dt.d_year, - item.i_brand_id brand_id, - item.i_brand brand, - Sum(ss_net_profit) sum_agg - FROM date_dim dt, - store_sales, - item - WHERE dt.d_date_sk = store_sales.ss_sold_date_sk - AND store_sales.ss_item_sk = item.i_item_sk - AND item.i_manufact_id = 914 - AND dt.d_moy = 11 - GROUP BY dt.d_year, - item.i_brand, - item.i_brand_id - ORDER BY dt.d_year, - sum_agg DESC, - brand_id) - WHERE ROWNUM <= 100; - -PostgreSQL 数据库执行计划展示如下: -Limit (cost=13986.86..13987.20 rows=27 width=91) - Sort (cost=13986.86..13986.93 rows=27 width=65) - Sort Key: dt.d_year, (sum(store_sales.ss_net_profit)), item.i_brand_id - HashAggregate (cost=13985.95..13986.22 rows=27 width=65) - Merge Join (cost=13884.21..13983.91 rows=204 width=65) - Merge Cond: (dt.d_date_sk = store_sales.ss_sold_date_sk) - Index Scan using date_dim_pkey on date_dim dt (cost=0.00..3494.62 rows=6080 width=8) - Filter: (d_moy = 11) - Sort (cost=12170.87..12177.27 rows=2560 width=65) - Sort Key: store_sales.ss_sold_date_sk - Nested Loop (cost=6.02..12025.94 rows=2560 width=65) - Seq Scan on item (cost=0.00..1455.00 rows=16 width=59) - Filter: (i_manufact_id = 914) - Bitmap Heap Scan on store_sales (cost=6.02..658.94 rows=174 width=14) - Recheck Cond: (ss_item_sk = item.i_item_sk) - Bitmap Index Scan on store_sales_pkey (cost=0.00..5.97 rows=174 width=0) - Index Cond: (ss_item_sk = item.i_item_sk) - - - -Oracle 数据库执行计划展示如下: -Plan hash value: 2331821367 --------------------------------------------------------------------------------------------------- -| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | --------------------------------------------------------------------------------------------------- -| 0 | SELECT STATEMENT | | 100 | 9100 | 3688 (1)| 00:00:01 | -|* 1 | COUNT STOPKEY | | | | | | -| 2 | VIEW | | 2736 | 243K| 3688 (1)| 00:00:01 | -|* 3 | SORT ORDER BY STOPKEY | | 2736 | 256K| 3688 (1)| 00:00:01 | -| 4 | HASH GROUP BY | | 2736 | 256K| 3688 (1)| 00:00:01 | -|* 5 | HASH JOIN | | 2736 | 256K| 3686 (1)| 00:00:01 | -|* 6 | TABLE ACCESS FULL | DATE_DIM | 6087 | 79131 | 376 (1)| 00:00:01 | -| 7 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 | -| 8 | NESTED LOOPS | | 2865 | 232K| 3310 (1)| 00:00:01 | -|* 9 | TABLE ACCESS FULL | ITEM | 18 | 1188 | 375 (0)| 00:00:01 | -|* 10 | INDEX RANGE SCAN | SYS_C0010069 | 159 | | 2 (0)| 00:00:01 | -| 11 | TABLE ACCESS BY INDEX ROWID| STORE_SALES | 159 | 2703 | 163 (0)| 00:00:01 | --------------------------------------------------------------------------------------------------- - -OceanBase 数据库执行计划展示如下: -|ID|OPERATOR |NAME |EST. ROWS|COST | -------------------------------------------------------- -|0 |LIMIT | |100 |81141| -|1 | TOP-N SORT | |100 |81127| -|2 | HASH GROUP BY | |2924 |68551| -|3 | HASH JOIN | |2924 |65004| -|4 | SUBPLAN SCAN |VIEW1 |2953 |19070| -|5 | HASH GROUP BY | |2953 |18662| -|6 | NESTED-LOOP JOIN| |2953 |15080| -|7 | TABLE SCAN |ITEM |19 |11841| -|8 | TABLE SCAN |STORE_SALES|161 |73 | -|9 | TABLE SCAN |DT |6088 |29401| -======================================================= - -由示例可见,OceanBase 数据库的计划展示与 Oracle 数据库类似。 - -OceanBase 数据库执行计划中的各列的含义如下: -列名 含义 -ID 执行树按照前序遍历的方式得到的编号(从 0 开始)。 -OPERATOR 操作算子的名称。 -NAME 对应表操作的表名(索引名)。 -EST. ROWS 估算该操作算子的输出行数。 -COST 该操作算子的执行代价(微秒)。 - - -OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形结构展示。其中每一个操作在树中的层次通过其在 operator 中的缩进予以展示,层次最深的优先执行,层次相同的以特定算子的执行顺序为标准来执行。 - -问题: update a not exists (b…) -我一开始以为 B是驱动表,B的数据挺多的 后来看到NLAJ,是说左边的表关联右边的表 -所以这个的驱动表是不是实际是A,用A的匹配B的,这个理解有问题吗 - -回答: 没错 A 驱动 B的 - -问题: 光知道最下最右的是驱动表了 所以一开始搞得有点懵 :sweat_smile: - -回答: nlj应该原理应该都是左表(驱动表)的记录探测右表(被驱动表), 选哪张成为左表或右表就基于一些其他考量了,比如数据量, 而anti join/semi join只是对 not exist/exist的一种优化,相关的原理和资料网上可以查阅一下 - -问题: 也就是nlj 就是按照之前理解的谁先执行 谁就是驱动表 也就是执行计划中的最右的表 -而anti join/semi join,谁在not exist左面,谁就是驱动表。这么理解对吧 - -回答: nlj也是左表的表是驱动表,这个要了解下计划执行方面的基本原理,取左表的一行数据,再遍历右表,一旦满足连接条件,就可以返回数据 -anti/semi只是因为not exists/exist的语义只是返回左表数据,改成anti join是一种计划优化,连接的方式比子查询更优 - -""" - -from llama_index import Document -text_list = [text1] -documents = [Document(t) for t in text_list] - -num_output = 250 -max_input_size = 512 - -max_chunk_overlap = 20 -prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) - -index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper) -index.save_to_disk("index.json") - - -if __name__ == "__main__": - import logging - logging.getLogger().setLevel(logging.CRITICAL) - for d in documents: - print(d) - - response = index.query("数据库的执行计划命令有多少?") - print(response) +tools = load_tools(['python_repl'], llm=llm) +agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True) +agent.run( + "Write a python script that prints 'Hello World!'" +) \ No newline at end of file diff --git a/pilot/model/inference.py b/pilot/model/inference.py index c3698fb1f..2f2948457 100644 --- a/pilot/model/inference.py +++ b/pilot/model/inference.py @@ -2,3 +2,85 @@ # -*- coding: utf-8 -*- import torch + +@torch.inference_mode() +def generate_output(model, tokenizer, params, device, context_len=2048): + prompt = params["prompt"] + temperature = float(params.get("temperature", 1.0)) + max_new_tokens = int(params.get("max_new_tokens", 256)) + stop_parameter = params.get("stop", None) + + if stop_parameter == tokenizer.eso_token: + stop_parameter = None + + stop_strings = [] + if isinstance(stop_parameter, str): + stop_strings.append(stop_parameter) + elif isinstance(stop_parameter, list): + stop_strings = stop_parameter + elif stop_parameter is None: + pass + else: + raise TypeError("Stop parameter must be string or list of strings.") + + pos = -1 + input_ids = tokenizer(prompt).input_ids + output_ids = [] + + max_src_len = context_len - max_new_tokens - 8 + input_ids = input_ids[-max_src_len:] + + for i in range(max_new_tokens): + if i == 0: + out = model(torch.as_tensor([input_ids], device=device), use_cache=True) + logits = out.logits + past_key_values = out.past_key_values + else: + out = model( + input_ids=torch.as_tensor([[token]], device=device), + use_cache=True, + past_key_values=past_key_values, + ) + logits = out.logits + past_key_values = out.past_key_value + last_token_logits = logits[0][-1] + + if temperature < 1e-4: + token = int(torch.argmax(last_token_logits)) + else: + probs = torch.softmax(last_token_logits / temperature, dim=1) + token = int(torch.multinomial(probs, num_samples=1)) + + output_ids.append(token) + + if token == tokenizer.eos_token_id: + stopped = True + else: + stopped = False + + output = tokenizer.decode(output_ids, skip_special_tokens=True) + for stop_str in stop_strings: + pos = output.rfind(stop_str) + if pos != -1: + output = output[:pos] + stoppped = True + break + else: + pass + + if stoppped: + break + + del past_key_values + if pos != -1: + return output[:pos] + return output + + +@torch.inference_mode() +def get_embeddings(model, tokenizer, prompt): + input_ids = tokenizer(prompt).input_ids + input_embeddings = model.get_input_embeddings() + embeddings = input_embeddings(torch.LongTensor([input_ids])) + mean = torch.mean(embeddings[0], 0).cpu().detach() + return mean \ No newline at end of file diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 98af18982..979b2bb89 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -2,8 +2,6 @@ # -*- coding: utf-8 -*- import torch -from pilot.utils import get_gpu_memory -from fastchat.serve.inference import compress_module from transformers import ( AutoTokenizer, AutoModelForCausalLM, @@ -28,12 +26,12 @@ class ModerLoader: tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False) model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs) - if load_8bit: - compress_module(model, self.device) - if debug: print(model) + if self.device == "cuda": + model.to(self.device) + return model, tokenizer diff --git a/pilot/model/vicuna_llm.py b/pilot/model/vicuna_llm.py index b3ecd079d..be433c7c3 100644 --- a/pilot/model/vicuna_llm.py +++ b/pilot/model/vicuna_llm.py @@ -1,24 +1,34 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- +import json import requests +from urllib.parse import urljoin +from langchain.embeddings.base import Embeddings +from pydantic import BaseModel from typing import Any, Mapping, Optional, List -from transformers import pipeline from langchain.llms.base import LLM from configs.model_config import * -class VicunaLLM(LLM): +class VicunaRequestLLM(LLM): + vicuna_generate_path = "generate" def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: - url = vicuna_model_server + if isinstance(stop, list): + stop = stop + ["Observation:"] + params = { - "model": "vicuna-13b", "prompt": prompt, - "temperature": 0.7, - "max_new_tokens": 512, - "stop": "###" + "temperature": 0, + "max_new_tokens": 256, + "stop": stop } - pass + response = requests.post( + url=urljoin(vicuna_model_server, self.vicuna_generate_path), + data=json.dumps(params) + ) + response.raise_for_status() + return response.json()["response"] @property def _llm_type(self) -> str: @@ -26,4 +36,49 @@ class VicunaLLM(LLM): def _identifying_params(self) -> Mapping[str, Any]: return {} - \ No newline at end of file + + +class VicunaEmbeddingLLM(BaseModel, Embeddings): + + vicuna_embedding_path = "embedding" + + def _call(self, prompt: str) -> str: + p = prompt.strip() + print("Sending prompt ", p) + + response = requests.post( + url=urljoin(vicuna_model_server, self.vicuna_embedding_path), + json={ + "prompt": p + } + ) + response.raise_for_status() + return response.json()["response"] + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """ Call out to Vicuna's server embedding endpoint for embedding search docs. + + Args: + texts: The list of text to embed + + Returns: + List of embeddings. one for each text. + """ + results = [] + for text in texts: + response = self.embed_query(text) + results.append(response) + return results + + + def embed_query(self, text: str) -> List[float]: + """ Call out to Vicuna's server embedding endpoint for embedding query text. + + Args: + text: The text to embed. + Returns: + Embedding for the text + """ + embedding = self._call(text) + return embedding + diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py index 6cc1b8904..5796a8c66 100644 --- a/pilot/server/chatbot.py +++ b/pilot/server/chatbot.py @@ -14,7 +14,7 @@ vicuna_status_path = "worker_get_status" def generate(prompt): params = { "model": "vicuna-13b", - "prompt": "给出一个查询用户的SQL", + "prompt": prompt, "temperature": 0.7, "max_new_tokens": 512, "stop": "###" diff --git a/pilot/server/vicuna_server.py b/pilot/server/vicuna_server.py new file mode 100644 index 000000000..996a3e5aa --- /dev/null +++ b/pilot/server/vicuna_server.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from typing import Optional, List +from fastapi import FastAPI +from pydantic import BaseModel +from pilot.model.inference import generate_output, get_embeddings +from pilot.model.loader import ModerLoader +from pilot.configs.model_config import * + +model_path = llm_model_config[LLM_MODEL] +ml = ModerLoader(model_path=model_path) +model, tokenizer = ml.loader(load_8bit=isload_8bit, debug=isdebug) + +app = FastAPI() + +class PromptRequest(BaseModel): + prompt: str + temperature: float + max_new_tokens: int + stop: Optional(List[str]) = None + + +class EmbeddingRequest(BaseModel): + prompt: str + + +@app.post("/generate") +def generate(prompt_request: PromptRequest): + params = { + "prompt": prompt_request.prompt, + "temperature": prompt_request.temperature, + "max_new_tokens": prompt_request.max_new_tokens, + "stop": prompt_request.stop + } + + print("Receive prompt: ", params["prompt"]) + output = generate_output(model, tokenizer, params, DEVICE) + print("Output: ", output) + return {"response": output} + + +@app.post("/embedding") +def embeddings(prompt_request: EmbeddingRequest): + params = {"prompt": prompt_request.prompt} + print("Received prompt: ", params["prompt"]) + output = get_embeddings(model, tokenizer, params["prompt"]) + return {"response": [float(x) for x in output]} \ No newline at end of file From 3b5e4d80767671ce898efa9aaa531ef629d7e5b3 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 21:50:47 +0800 Subject: [PATCH 18/23] fix problem --- pilot/model/loader.py | 4 ++-- pilot/server/chatbot.py | 2 +- pilot/server/sqlgpt.py | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 979b2bb89..7b78ebe8c 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -29,8 +29,8 @@ class ModerLoader: if debug: print(model) - if self.device == "cuda": - model.to(self.device) + # if self.device == "cuda": + # model.to(self.device) return model, tokenizer diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py index 5796a8c66..5e0ad9294 100644 --- a/pilot/server/chatbot.py +++ b/pilot/server/chatbot.py @@ -6,7 +6,7 @@ import json import time from urllib.parse import urljoin import gradio as gr -from configs.model_config import * +from pilot.configs.model_config import * vicuna_base_uri = "http://192.168.31.114:21002/" vicuna_stream_path = "worker_generate_stream" vicuna_status_path = "worker_get_status" diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 6dbf1bfc1..773de8611 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -5,7 +5,7 @@ import json import torch import gradio as gr -from fastchat.serve.inference import generate_stream, compress_module +from fastchat.serve.inference import generate_stream from transformers import AutoTokenizer, AutoModelForCausalLM device = "cuda" if torch.cuda.is_available() else "cpu" @@ -20,7 +20,6 @@ model = AutoModelForCausalLM.from_pretrained( ) def generate(prompt): - compress_module(model, device) model.to(device) print(model, tokenizer) params = { From 7aee4186a2d99e72e0822be9c792bc8bea7cdc76 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 22:30:42 +0800 Subject: [PATCH 19/23] gpu out of memory --- README.md | 17 ++++++++++++++--- pilot/model/inference.py | 3 ++- pilot/server/vicuna_server.py | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 303aa41d2..f3f135932 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,20 @@ A Open Database-GPT Experiment ![GitHub Repo stars](https://img.shields.io/github/stars/csunny/db-gpt?style=social) +DB-GPT is an experimental open-source application that builds upon the fastchat model and uses vicuna as its base model. Additionally, it looks like this application incorporates langchain and llama-index embedding knowledge to improve Database-QA capabilities. -DB-GPT is an experimental open-source application, which based on the vicuna base model. +Overall, it appears to be a sophisticated and innovative tool for working with databases. If you have any specific questions about how to use or implement DB-GPT in your work, please let me know and I'll do my best to assist you. +# Install +1. Run model server +``` +cd pilot/server +uvicorn icuna_server:app --host 0.0.0.0 +``` -## Featurs -Coming soon, please wait... \ No newline at end of file +2. Run gradio webui + +# Featurs +- SQL-Generate +- Database-QA Based Knowledge +- SQL-diagnosis \ No newline at end of file diff --git a/pilot/model/inference.py b/pilot/model/inference.py index 2f2948457..426043aa5 100644 --- a/pilot/model/inference.py +++ b/pilot/model/inference.py @@ -10,7 +10,8 @@ def generate_output(model, tokenizer, params, device, context_len=2048): max_new_tokens = int(params.get("max_new_tokens", 256)) stop_parameter = params.get("stop", None) - if stop_parameter == tokenizer.eso_token: + print(tokenizer.__dir__()) + if stop_parameter == tokenizer.eos_token: stop_parameter = None stop_strings = [] diff --git a/pilot/server/vicuna_server.py b/pilot/server/vicuna_server.py index 996a3e5aa..20ed928d0 100644 --- a/pilot/server/vicuna_server.py +++ b/pilot/server/vicuna_server.py @@ -18,7 +18,7 @@ class PromptRequest(BaseModel): prompt: str temperature: float max_new_tokens: int - stop: Optional(List[str]) = None + stop: Optional[List[str]] = None class EmbeddingRequest(BaseModel): From 8324f3df8bd0deee77a83750180cd66effdf769f Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 22:55:21 +0800 Subject: [PATCH 20/23] update --- pilot/app.py | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/pilot/app.py b/pilot/app.py index 5f2cf93db..f5fcb3ad1 100644 --- a/pilot/app.py +++ b/pilot/app.py @@ -1,18 +1,48 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- - +import streamlit as st from langchain.agents import ( load_tools, initialize_agent, AgentType ) - from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM -llm = VicunaRequestLLM() +from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext +from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from llama_index import Document, GPTSimpleVectorIndex -tools = load_tools(['python_repl'], llm=llm) -agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True) -agent.run( - "Write a python script that prints 'Hello World!'" -) \ No newline at end of file +def agent_demo(): + llm = VicunaRequestLLM() + + tools = load_tools(['python_repl'], llm=llm) + agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True) + agent.run( + "Write a SQL script that Query 'select count(1)!'" + ) + +def knowledged_qa_demo(text_list): + llm_predictor = LLMPredictor(llm=VicunaRequestLLM) + hfemb = VicunaEmbeddingLLM() + embed_model = LangchainEmbedding(hfemb) + documents = [Document(t) for t in text_list] + + service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model) + index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) + return index + + +if __name__ == "__main__": + # agent_demo() + + test1 = """ 这是一段测试文字 """ + text_list = [test1] + index = knowledged_qa_demo(text_list) + + st.title("智能助手") + query = st.text_input("请提问.") + + if st.button("提交"): + response = index.query(query) + print(query, response.response) + st.write(response.response) \ No newline at end of file From 3080f50fe4a745fa0319798d96b2856756509c08 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 23:02:13 +0800 Subject: [PATCH 21/23] fix load model gpu oom --- pilot/model/loader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 7b78ebe8c..5f18a023c 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -7,6 +7,8 @@ from transformers import ( AutoModelForCausalLM, ) +from fastchat.serve.compression import compress_module + class ModerLoader: kwargs = {} @@ -29,6 +31,9 @@ class ModerLoader: if debug: print(model) + if load_8bit: + compress_module(model, self.device) + # if self.device == "cuda": # model.to(self.device) From dd31aa98efbd1fea18e0ef63395bf71938dc9a50 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 23:07:05 +0800 Subject: [PATCH 22/23] update readme file --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f3f135932..71c30dc61 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Overall, it appears to be a sophisticated and innovative tool for working with d 1. Run model server ``` cd pilot/server -uvicorn icuna_server:app --host 0.0.0.0 +uvicorn vicuna_server:app --host 0.0.0.0 ``` 2. Run gradio webui From 172e010843978452edcb041b9f4ce6f9fd03d586 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 29 Apr 2023 23:28:11 +0800 Subject: [PATCH 23/23] add knownledge base QA --- README.md | 3 +++ pilot/app.py | 31 +++++++++++++++++++------------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 71c30dc61..370b035c5 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,9 @@ uvicorn vicuna_server:app --host 0.0.0.0 ``` 2. Run gradio webui +``` +python app.py +``` # Featurs - SQL-Generate diff --git a/pilot/app.py b/pilot/app.py index f5fcb3ad1..6a7a76f3d 100644 --- a/pilot/app.py +++ b/pilot/app.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -import streamlit as st +import gradio as gr from langchain.agents import ( load_tools, initialize_agent, @@ -22,7 +22,7 @@ def agent_demo(): ) def knowledged_qa_demo(text_list): - llm_predictor = LLMPredictor(llm=VicunaRequestLLM) + llm_predictor = LLMPredictor(llm=VicunaRequestLLM()) hfemb = VicunaEmbeddingLLM() embed_model = LangchainEmbedding(hfemb) documents = [Document(t) for t in text_list] @@ -32,17 +32,24 @@ def knowledged_qa_demo(text_list): return index +def get_answer(q): + base_knowledge = """ 这是一段测试文字 """ + text_list = [base_knowledge] + index = knowledged_qa_demo(text_list) + response = index.query(q) + return response.response + if __name__ == "__main__": # agent_demo() - test1 = """ 这是一段测试文字 """ - text_list = [test1] - index = knowledged_qa_demo(text_list) + with gr.Blocks() as demo: + gr.Markdown("数据库智能助手") + with gr.Tab("知识问答"): + text_input = gr.TextArea() + text_output = gr.TextArea() + text_button = gr.Button() + + text_button.click(get_answer, inputs=text_input, outputs=text_output) - st.title("智能助手") - query = st.text_input("请提问.") - - if st.button("提交"): - response = index.query(query) - print(query, response.response) - st.write(response.response) \ No newline at end of file + demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") +