From e6fbd037ef324147cd10b08b2ac013ce3a3582b1 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sun, 16 Apr 2023 21:51:55 +0800
Subject: [PATCH 01/23] update

---
 app.py                     | 241 +++++++++++++++++++++++++++++++++++++
 examples/gpt_index.py      |  19 +++
 examples/obgpt_index.ipynb |   0
 3 files changed, 260 insertions(+)
 create mode 100644 app.py
 create mode 100644 examples/gpt_index.py
 create mode 100644 examples/obgpt_index.ipynb

diff --git a/app.py b/app.py
new file mode 100644
index 000000000..b001d0933
--- /dev/null
+++ b/app.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from llama_index import LLMPredictor
+import torch
+from langchain.llms.base import LLM
+from transformers import pipeline
+
+
+class FlanLLM(LLM):
+    model_name = "google/flan-t5-large"
+    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
+        "torch_dtype": torch.bfloat16
+    })
+
+    def _call(self, prompt, stop=None):
+        return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
+
+    def _identifying_params(self):
+        return {"name_of_model": self.model_name}
+
+    def _llm_type(self):
+        return "custome"
+
+llm_predictor = LLMPredictor(llm=FlanLLM())
+hfemb = HuggingFaceEmbeddings()
+embed_model = LangchainEmbedding(hfemb)
+
+text1 = """
+    执行计划是对一条 SQL 查询语句在数据库中执行过程的描述。用户可以通过 EXPLAIN 命令查看优化器针对指定 SQL 生成的逻辑执行计划。
+
+如果要分析某条 SQL 的性能问题，通常需要先查看 SQL 的执行计划，排查每一步 SQL 执行是否存在问题。所以读懂执行计划是 SQL 优化的先决条件，而了解执行计划的算子是理解 EXPLAIN 命令的关键。
+
+OceanBase 数据库的执行计划命令有三种模式：EXPLAIN BASIC、EXPLAIN 和 EXPLAIN EXTENDED。这三种模式对执行计划展现不同粒度的细节信息:
+
+EXPLAIN BASIC 命令用于最基本的计划展示。
+
+EXPLAIN EXTENDED 命令用于最详细的计划展示（通常在排查问题时使用这种展示模式）。
+
+EXPLAIN 命令所展示的信息可以帮助普通用户了解整个计划的执行方式。
+
+EXPLAIN 命令格式如下：
+EXPLAIN [BASIC | EXTENDED | PARTITIONS | FORMAT = format_name] [PRETTY | PRETTY_COLOR] explainable_stmt
+format_name: 
+  { TRADITIONAL | JSON }
+explainable_stmt: 
+  { SELECT statement
+ | DELETE statement
+ | INSERT statement
+ | REPLACE statement
+ | UPDATE statement }
+
+
+EXPLAIN 命令适用于 SELECT、DELETE、INSERT、REPLACE 和 UPDATE 语句，显示优化器所提供的有关语句执行计划的信息，包括如何处理该语句，如何联接表以及以何种顺序联接表等信息。
+
+一般来说，可以使用 EXPLAIN EXTENDED 命令，将表扫描的范围段展示出来。使用 EXPLAIN OUTLINE 命令可以显示 Outline 信息。
+
+FORMAT 选项可用于选择输出格式。TRADITIONAL 表示以表格格式显示输出，这也是默认设置。JSON 表示以 JSON 格式显示信息。
+
+使用 EXPLAIN PARTITITIONS 也可用于检查涉及分区表的查询。如果检查针对非分区表的查询，则不会产生错误，但 PARTIONS 列的值始终为 NULL。
+
+对于复杂的执行计划，可以使用 PRETTY 或者 PRETTY_COLOR 选项将计划树中的父节点和子节点使用树线或彩色树线连接起来，使得执行计划展示更方便阅读。示例如下：
+obclient> CREATE TABLE p1table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 2;
+Query OK, 0 rows affected
+
+obclient> CREATE TABLE p2table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 4;
+Query OK, 0 rows affected
+
+obclient> EXPLAIN EXTENDED PRETTY_COLOR SELECT  * FROM p1table p1 JOIN p2table p2 ON p1.c1=p2.c2\G
+*************************** 1. row ***************************
+Query Plan: ==========================================================
+|ID|OPERATOR                     |NAME    |EST. ROWS|COST|
+----------------------------------------------------------
+|0 |PX COORDINATOR               |        |1        |278 |
+|1 | EXCHANGE OUT DISTR          |:EX10001|1        |277 |
+|2 |  HASH JOIN                  |        |1        |276 |
+|3 |  ├PX PARTITION ITERATOR     |        |1        |92  |
+|4 |  │ TABLE SCAN               |P1      |1        |92  |
+|5 |  └EXCHANGE IN DISTR         |        |1        |184 |
+|6 |    EXCHANGE OUT DISTR (PKEY)|:EX10000|1        |184 |
+|7 |     PX PARTITION ITERATOR   |        |1        |183 |
+|8 |      TABLE SCAN             |P2      |1        |183 |
+==========================================================
+
+Outputs & filters:
+-------------------------------------
+  0 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil)
+  1 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil), dop=1
+  2 - output([P1.C1], [P2.C2], [P1.C2], [P2.C1]), filter(nil),
+      equal_conds([P1.C1 = P2.C2]), other_conds(nil)
+  3 - output([P1.C1], [P1.C2]), filter(nil)
+  4 - output([P1.C1], [P1.C2]), filter(nil),
+      access([P1.C1], [P1.C2]), partitions(p[0-1])
+  5 - output([P2.C2], [P2.C1]), filter(nil)
+  6 - (#keys=1, [P2.C2]), output([P2.C2], [P2.C1]), filter(nil), dop=1
+  7 - output([P2.C1], [P2.C2]), filter(nil)
+  8 - output([P2.C1], [P2.C2]), filter(nil),
+      access([P2.C1], [P2.C2]), partitions(p[0-3])
+
+1 row in set 
+
+
+
+ 
+## 执行计划形状与算子信息 
+
+在数据库系统中，执行计划在内部通常是以树的形式来表示的，但是不同的数据库会选择不同的方式展示给用户。
+
+如下示例分别为 PostgreSQL 数据库、Oracle 数据库和 OceanBase 数据库对于 TPCDS Q3 的计划展示。
+
+```sql
+obclient> SELECT /*TPC-DS Q3*/ * 
+     FROM (SELECT dt.d_year, 
+                  item.i_brand_id    brand_id, 
+                  item.i_brand       brand, 
+                  Sum(ss_net_profit) sum_agg 
+           FROM   date_dim dt, 
+                  store_sales, 
+                  item 
+           WHERE  dt.d_date_sk = store_sales.ss_sold_date_sk 
+                  AND store_sales.ss_item_sk = item.i_item_sk 
+                  AND item.i_manufact_id = 914 
+                  AND dt.d_moy = 11 
+           GROUP  BY dt.d_year, 
+                  item.i_brand, 
+                  item.i_brand_id 
+           ORDER  BY dt.d_year, 
+                  sum_agg DESC, 
+                  brand_id) 
+     WHERE ROWNUM <= 100; 
+
+PostgreSQL 数据库执行计划展示如下：
+Limit  (cost=13986.86..13987.20 rows=27 width=91)
+         Sort  (cost=13986.86..13986.93 rows=27 width=65)
+         Sort Key: dt.d_year, (sum(store_sales.ss_net_profit)), item.i_brand_id
+              HashAggregate  (cost=13985.95..13986.22 rows=27 width=65)
+                     Merge Join  (cost=13884.21..13983.91 rows=204 width=65)
+                     Merge Cond: (dt.d_date_sk = store_sales.ss_sold_date_sk)
+                           Index Scan using date_dim_pkey on date_dim dt  (cost=0.00..3494.62 rows=6080 width=8)
+                           Filter: (d_moy = 11)
+                           Sort  (cost=12170.87..12177.27 rows=2560 width=65)
+                           Sort Key: store_sales.ss_sold_date_sk
+                                 Nested Loop  (cost=6.02..12025.94 rows=2560 width=65)
+                                       Seq Scan on item  (cost=0.00..1455.00 rows=16 width=59)
+                                       Filter: (i_manufact_id = 914)
+                                       Bitmap Heap Scan on store_sales  (cost=6.02..658.94 rows=174 width=14)
+                                       Recheck Cond: (ss_item_sk = item.i_item_sk)
+                                             Bitmap Index Scan on store_sales_pkey  (cost=0.00..5.97 rows=174 width=0)
+                                             Index Cond: (ss_item_sk = item.i_item_sk)
+
+
+
+Oracle 数据库执行计划展示如下：
+Plan hash value: 2331821367
+--------------------------------------------------------------------------------------------------
+| Id  | Operation                         | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
+--------------------------------------------------------------------------------------------------
+|   0 | SELECT STATEMENT                  |              |   100 |  9100 |  3688   (1)| 00:00:01 |
+|*  1 |  COUNT STOPKEY                    |              |       |       |            |          |
+|   2 |   VIEW                            |              |  2736 |   243K|  3688   (1)| 00:00:01 |
+|*  3 |    SORT ORDER BY STOPKEY          |              |  2736 |   256K|  3688   (1)| 00:00:01 |
+|   4 |     HASH GROUP BY                 |              |  2736 |   256K|  3688   (1)| 00:00:01 |
+|*  5 |      HASH JOIN                    |              |  2736 |   256K|  3686   (1)| 00:00:01 |
+|*  6 |       TABLE ACCESS FULL           | DATE_DIM     |  6087 | 79131 |   376   (1)| 00:00:01 |
+|   7 |       NESTED LOOPS                |              |  2865 |   232K|  3310   (1)| 00:00:01 |
+|   8 |        NESTED LOOPS               |              |  2865 |   232K|  3310   (1)| 00:00:01 |
+|*  9 |         TABLE ACCESS FULL         | ITEM         |    18 |  1188 |   375   (0)| 00:00:01 |
+|* 10 |         INDEX RANGE SCAN          | SYS_C0010069 |   159 |       |     2   (0)| 00:00:01 |
+|  11 |        TABLE ACCESS BY INDEX ROWID| STORE_SALES  |   159 |  2703 |   163   (0)| 00:00:01 |
+--------------------------------------------------------------------------------------------------
+
+OceanBase 数据库执行计划展示如下：
+|ID|OPERATOR              |NAME       |EST. ROWS|COST |
+-------------------------------------------------------
+|0 |LIMIT                 |           |100      |81141|
+|1 | TOP-N SORT           |           |100      |81127|
+|2 |  HASH GROUP BY       |           |2924     |68551|
+|3 |   HASH JOIN          |           |2924     |65004|
+|4 |    SUBPLAN SCAN      |VIEW1      |2953     |19070|
+|5 |     HASH GROUP BY    |           |2953     |18662|
+|6 |      NESTED-LOOP JOIN|           |2953     |15080|
+|7 |       TABLE SCAN     |ITEM       |19       |11841|
+|8 |       TABLE SCAN     |STORE_SALES|161      |73   |
+|9 |    TABLE SCAN        |DT         |6088     |29401|
+=======================================================
+
+由示例可见，OceanBase 数据库的计划展示与 Oracle 数据库类似。
+
+OceanBase 数据库执行计划中的各列的含义如下：
+列名  含义
+ID  执行树按照前序遍历的方式得到的编号（从 0 开始）。
+OPERATOR    操作算子的名称。
+NAME    对应表操作的表名（索引名）。
+EST. ROWS   估算该操作算子的输出行数。
+COST    该操作算子的执行代价（微秒）。
+
+
+OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形结构展示。其中每一个操作在树中的层次通过其在 operator 中的缩进予以展示，层次最深的优先执行，层次相同的以特定算子的执行顺序为标准来执行。
+
+问题:  update a not exists (b…)
+我一开始以为 B是驱动表，B的数据挺多的 后来看到NLAJ，是说左边的表关联右边的表
+所以这个的驱动表是不是实际是A，用A的匹配B的，这个理解有问题吗
+
+回答: 没错 A 驱动 B的
+
+问题: 光知道最下最右的是驱动表了 所以一开始搞得有点懵 :sweat_smile:
+
+回答: nlj应该原理应该都是左表(驱动表)的记录探测右表(被驱动表)， 选哪张成为左表或右表就基于一些其他考量了，比如数据量， 而anti join/semi join只是对 not exist/exist的一种优化，相关的原理和资料网上可以查阅一下
+
+问题: 也就是nlj 就是按照之前理解的谁先执行 谁就是驱动表 也就是执行计划中的最右的表
+而anti join/semi join，谁在not exist左面，谁就是驱动表。这么理解对吧
+
+回答: nlj也是左表的表是驱动表，这个要了解下计划执行方面的基本原理，取左表的一行数据，再遍历右表，一旦满足连接条件，就可以返回数据
+anti/semi只是因为not exists/exist的语义只是返回左表数据，改成anti join是一种计划优化，连接的方式比子查询更优
+
+""" 
+
+from llama_index import Document
+text_list = [text1]
+documents = [Document(t) for t in text_list]
+
+num_output = 250
+max_input_size = 512
+
+max_chunk_overlap = 20
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+index.save_to_disk("index.json")
+
+
+if __name__ == "__main__":
+    import logging
+    logging.getLogger().setLevel(logging.CRITICAL)
+    for d in documents:
+        print(d)
+
+    response = index.query("数据库的执行计划命令有多少?")
+    print(response)
diff --git a/examples/gpt_index.py b/examples/gpt_index.py
new file mode 100644
index 000000000..29c0a3fe0
--- /dev/null
+++ b/examples/gpt_index.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import logging
+import sys
+
+from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+
+# read the document of data dir
+documents = SimpleDirectoryReader("data").load_data()
+# split the document to chunk, max token size=500, convert chunk to vector 
+
+index = GPTSimpleVectorIndex(documents)
+
+# save index
+index.save_to_disk("index.json")
\ No newline at end of file
diff --git a/examples/obgpt_index.ipynb b/examples/obgpt_index.ipynb
new file mode 100644
index 000000000..e69de29bb

From f2730d02d8eb19fefb4adbff05555cb9559e2951 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 17:56:00 +0800
Subject: [PATCH 02/23] update

---
 examples/obgpt_index.ipynb | 132 +++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/examples/obgpt_index.ipynb b/examples/obgpt_index.ipynb
index e69de29bb..bb7b014a9 100644
--- a/examples/obgpt_index.ipynb
+++ b/examples/obgpt_index.ipynb
@@ -0,0 +1,132 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/magic/miniconda3/envs/gpt_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper\n",
+    "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
+    "from llama_index import LLMPredictor\n",
+    "import torch\n",
+    "from langchain.llms.base import LLM\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "import os\n",
+    "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:512\"\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading checkpoint shards: 100%|██████████| 2/2 [00:34<00:00, 17.15s/it]\n"
+     ]
+    },
+    {
+     "ename": "OutOfMemoryError",
+     "evalue": "CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mFlanLLM\u001b[39;00m(LLM):\n\u001b[1;32m      2\u001b[0m     model_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m../../vicuna-7b\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m      3\u001b[0m     pipeline \u001b[39m=\u001b[39m pipeline(\u001b[39m\"\u001b[39m\u001b[39mtext-generation\u001b[39m\u001b[39m\"\u001b[39m, model\u001b[39m=\u001b[39mmodel_name, device\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, model_kwargs\u001b[39m=\u001b[39m{\n\u001b[1;32m      4\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mtorch_dtype\u001b[39m\u001b[39m\"\u001b[39m: torch\u001b[39m.\u001b[39mbfloat16\n\u001b[1;32m      5\u001b[0m     })\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/main.py:221\u001b[0m, in \u001b[0;36mpydantic.main.ModelMetaclass.__new__\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:506\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.infer\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:436\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.__init__\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:546\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.prepare\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:570\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField._set_default_and_type\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:439\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.get_default\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/utils.py:693\u001b[0m, in \u001b[0;36mpydantic.utils.smart_deepcopy\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    170\u001b[0m                 y \u001b[39m=\u001b[39m x\n\u001b[1;32m    171\u001b[0m             \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m                 y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m    174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m    175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    269\u001b[0m     \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m         state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    271\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m    272\u001b[0m         y\u001b[39m.\u001b[39m__setstate__(state)\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m    145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m     y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m    147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    148\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m    228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m    229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m     y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    170\u001b[0m                 y \u001b[39m=\u001b[39m x\n\u001b[1;32m    171\u001b[0m             \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m                 y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m    174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m    175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    269\u001b[0m     \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m         state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    271\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m    272\u001b[0m         y\u001b[39m.\u001b[39m__setstate__(state)\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m    145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m     y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m    147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    148\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m    228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m    229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m     y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n",
+      "    \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    294\u001b[0m     \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m    295\u001b[0m         key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m         value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    297\u001b[0m         y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m    298\u001b[0m \u001b[39melse\u001b[39;00m:\n",
+      "    \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (2 times), _deepcopy_dict at line 230 (1 times), _reconstruct at line 270 (1 times), deepcopy at line 146 (1 times)]\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    294\u001b[0m     \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m    295\u001b[0m         key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m         value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    297\u001b[0m         y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m    298\u001b[0m \u001b[39melse\u001b[39;00m:\n",
+      "    \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    269\u001b[0m     \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m         state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    271\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m    272\u001b[0m         y\u001b[39m.\u001b[39m__setstate__(state)\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m    145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m     y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m    147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    148\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m    228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m    229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m     y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    170\u001b[0m                 y \u001b[39m=\u001b[39m x\n\u001b[1;32m    171\u001b[0m             \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m                 y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m    174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m    175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    294\u001b[0m     \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m    295\u001b[0m         key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m         value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    297\u001b[0m         y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m    298\u001b[0m \u001b[39melse\u001b[39;00m:\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    151\u001b[0m copier \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39m__deepcopy__\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m    152\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m     y \u001b[39m=\u001b[39m copier(memo)\n\u001b[1;32m    154\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    155\u001b[0m     reductor \u001b[39m=\u001b[39m dispatch_table\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n",
+      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/torch/nn/parameter.py:55\u001b[0m, in \u001b[0;36mParameter.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m     53\u001b[0m     \u001b[39mreturn\u001b[39;00m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)]\n\u001b[1;32m     54\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 55\u001b[0m     result \u001b[39m=\u001b[39m \u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdata\u001b[39m.\u001b[39;49mclone(memory_format\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mpreserve_format), \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequires_grad)\n\u001b[1;32m     56\u001b[0m     memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)] \u001b[39m=\u001b[39m result\n\u001b[1;32m     57\u001b[0m     \u001b[39mreturn\u001b[39;00m result\n",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"
+     ]
+    }
+   ],
+   "source": [
+    "class FlanLLM(LLM):\n",
+    "    model_name = \"../../vicuna-7b\"\n",
+    "    pipeline = pipeline(\"text-generation\", model=model_name, device=0, model_kwargs={\n",
+    "        \"torch_dtype\": torch.bfloat16\n",
+    "    })\n",
+    "\n",
+    "    def _call(self, prompt, stop=None):\n",
+    "        return self.pipeline(prompt, max_length=9999)[0][\"generated_text\"]\n",
+    "\n",
+    "    def _identifying_params(self):\n",
+    "        return {\"name_of_model\": self.model_name}\n",
+    "\n",
+    "    def _llm_type(self):\n",
+    "        return \"custome\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_predictor = LLMPredictor(llm=FlanLLM())\n",
+    "hfemb = HuggingFaceEmbeddings()\n",
+    "embed_model = LangchainEmbedding(hfemb)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "gpt_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 83ae8005ae5d03a2048b0df8f20181be18bd1d92 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 17:56:27 +0800
Subject: [PATCH 03/23] rm test code

---
 examples/obgpt_index.ipynb | 132 -------------------------------------
 1 file changed, 132 deletions(-)
 delete mode 100644 examples/obgpt_index.ipynb

diff --git a/examples/obgpt_index.ipynb b/examples/obgpt_index.ipynb
deleted file mode 100644
index bb7b014a9..000000000
--- a/examples/obgpt_index.ipynb
+++ /dev/null
@@ -1,132 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/magic/miniconda3/envs/gpt_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper\n",
-    "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
-    "from llama_index import LLMPredictor\n",
-    "import torch\n",
-    "from langchain.llms.base import LLM\n",
-    "from transformers import pipeline\n",
-    "\n",
-    "import os\n",
-    "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:512\"\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading checkpoint shards: 100%|██████████| 2/2 [00:34<00:00, 17.15s/it]\n"
-     ]
-    },
-    {
-     "ename": "OutOfMemoryError",
-     "evalue": "CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mFlanLLM\u001b[39;00m(LLM):\n\u001b[1;32m      2\u001b[0m     model_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m../../vicuna-7b\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m      3\u001b[0m     pipeline \u001b[39m=\u001b[39m pipeline(\u001b[39m\"\u001b[39m\u001b[39mtext-generation\u001b[39m\u001b[39m\"\u001b[39m, model\u001b[39m=\u001b[39mmodel_name, device\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, model_kwargs\u001b[39m=\u001b[39m{\n\u001b[1;32m      4\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mtorch_dtype\u001b[39m\u001b[39m\"\u001b[39m: torch\u001b[39m.\u001b[39mbfloat16\n\u001b[1;32m      5\u001b[0m     })\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/main.py:221\u001b[0m, in \u001b[0;36mpydantic.main.ModelMetaclass.__new__\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:506\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.infer\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:436\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.__init__\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:546\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.prepare\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:570\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField._set_default_and_type\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:439\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.get_default\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/utils.py:693\u001b[0m, in \u001b[0;36mpydantic.utils.smart_deepcopy\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    170\u001b[0m                 y \u001b[39m=\u001b[39m x\n\u001b[1;32m    171\u001b[0m             \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m                 y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m    174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m    175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    269\u001b[0m     \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m         state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    271\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m    272\u001b[0m         y\u001b[39m.\u001b[39m__setstate__(state)\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m    145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m     y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m    147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    148\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m    228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m    229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m     y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    170\u001b[0m                 y \u001b[39m=\u001b[39m x\n\u001b[1;32m    171\u001b[0m             \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m                 y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m    174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m    175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    269\u001b[0m     \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m         state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    271\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m    272\u001b[0m         y\u001b[39m.\u001b[39m__setstate__(state)\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m    145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m     y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m    147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    148\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m    228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m    229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m     y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n",
-      "    \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    294\u001b[0m     \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m    295\u001b[0m         key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m         value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    297\u001b[0m         y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m    298\u001b[0m \u001b[39melse\u001b[39;00m:\n",
-      "    \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (2 times), _deepcopy_dict at line 230 (1 times), _reconstruct at line 270 (1 times), deepcopy at line 146 (1 times)]\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    294\u001b[0m     \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m    295\u001b[0m         key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m         value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    297\u001b[0m         y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m    298\u001b[0m \u001b[39melse\u001b[39;00m:\n",
-      "    \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    269\u001b[0m     \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m         state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    271\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m    272\u001b[0m         y\u001b[39m.\u001b[39m__setstate__(state)\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m    145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m     y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m    147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    148\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m    228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m    229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m     y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    170\u001b[0m                 y \u001b[39m=\u001b[39m x\n\u001b[1;32m    171\u001b[0m             \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m                 y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m    174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m    175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    294\u001b[0m     \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m    295\u001b[0m         key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m         value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m    297\u001b[0m         y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m    298\u001b[0m \u001b[39melse\u001b[39;00m:\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m    151\u001b[0m copier \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39m__deepcopy__\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m    152\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m     y \u001b[39m=\u001b[39m copier(memo)\n\u001b[1;32m    154\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    155\u001b[0m     reductor \u001b[39m=\u001b[39m dispatch_table\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n",
-      "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/torch/nn/parameter.py:55\u001b[0m, in \u001b[0;36mParameter.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m     53\u001b[0m     \u001b[39mreturn\u001b[39;00m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)]\n\u001b[1;32m     54\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 55\u001b[0m     result \u001b[39m=\u001b[39m \u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdata\u001b[39m.\u001b[39;49mclone(memory_format\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mpreserve_format), \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequires_grad)\n\u001b[1;32m     56\u001b[0m     memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)] \u001b[39m=\u001b[39m result\n\u001b[1;32m     57\u001b[0m     \u001b[39mreturn\u001b[39;00m result\n",
-      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"
-     ]
-    }
-   ],
-   "source": [
-    "class FlanLLM(LLM):\n",
-    "    model_name = \"../../vicuna-7b\"\n",
-    "    pipeline = pipeline(\"text-generation\", model=model_name, device=0, model_kwargs={\n",
-    "        \"torch_dtype\": torch.bfloat16\n",
-    "    })\n",
-    "\n",
-    "    def _call(self, prompt, stop=None):\n",
-    "        return self.pipeline(prompt, max_length=9999)[0][\"generated_text\"]\n",
-    "\n",
-    "    def _identifying_params(self):\n",
-    "        return {\"name_of_model\": self.model_name}\n",
-    "\n",
-    "    def _llm_type(self):\n",
-    "        return \"custome\"\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm_predictor = LLMPredictor(llm=FlanLLM())\n",
-    "hfemb = HuggingFaceEmbeddings()\n",
-    "embed_model = LangchainEmbedding(hfemb)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "gpt_env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From d308f0f5d82eabcde8b444c0adc3219b749b4cda Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 18:05:24 +0800
Subject: [PATCH 04/23] add requirements

---
 app.py => pilot/app.py  | 0
 pilot/server/chatbot.py | 3 +++
 pilot/server/sqlgpt.py  | 4 ++++
 requirements.txt        | 4 ++++
 4 files changed, 11 insertions(+)
 rename app.py => pilot/app.py (100%)
 create mode 100644 pilot/server/chatbot.py
 create mode 100644 pilot/server/sqlgpt.py
 create mode 100644 requirements.txt

diff --git a/app.py b/pilot/app.py
similarity index 100%
rename from app.py
rename to pilot/app.py
diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py
new file mode 100644
index 000000000..97206f2d5
--- /dev/null
+++ b/pilot/server/chatbot.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
new file mode 100644
index 000000000..f5a71c392
--- /dev/null
+++ b/pilot/server/sqlgpt.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+#-*- coding: utf-8 -*-
+
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..44e89c0b9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+accelerate==0.16.0
+torch==2.0.0
+torchvision==0.13.1
+torchaudio==0.12.1

From 02bc55ca24949d406e760dda54700a58837f9b66 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 21:59:18 +0800
Subject: [PATCH 05/23] init

---
 pilot/connections/mysql_conn.py |  2 ++
 pilot/connections/pg_conn.py    |  2 ++
 pilot/model/loader.py           | 39 +++++++++++++++++++++++++++++++++
 pilot/model/vicuna_llm.py       |  9 ++++++++
 pilot/server/sqlgpt.py          |  1 -
 pilot/utils.py                  | 22 +++++++++++++++++++
 6 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 pilot/connections/mysql_conn.py
 create mode 100644 pilot/connections/pg_conn.py
 create mode 100644 pilot/model/loader.py
 create mode 100644 pilot/model/vicuna_llm.py
 create mode 100644 pilot/utils.py

diff --git a/pilot/connections/mysql_conn.py b/pilot/connections/mysql_conn.py
new file mode 100644
index 000000000..1f776fc63
--- /dev/null
+++ b/pilot/connections/mysql_conn.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
\ No newline at end of file
diff --git a/pilot/connections/pg_conn.py b/pilot/connections/pg_conn.py
new file mode 100644
index 000000000..1f776fc63
--- /dev/null
+++ b/pilot/connections/pg_conn.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
\ No newline at end of file
diff --git a/pilot/model/loader.py b/pilot/model/loader.py
new file mode 100644
index 000000000..768bcea1d
--- /dev/null
+++ b/pilot/model/loader.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import torch
+from utils import get_gpu_memory
+from fastchat.serve.inference import compress_module
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+)
+
+class ModerLoader:
+
+    kwargs = {}
+
+    def __init__(self, 
+                 model_path) -> None:
+        
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model_path = model_path 
+        self.kwargs = {
+            "torch_dtype": torch.float16,
+            "device_map": "auto",
+            "max_memory": get_gpu_memory(),
+        }
+
+    def loader(self, load_8bit=False, debug=False):
+       
+        tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False)
+        model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs)
+
+        if load_8bit:
+            compress_module(model, self.device)
+
+        if debug:
+            print(model)
+
+        return model, tokenizer
+
diff --git a/pilot/model/vicuna_llm.py b/pilot/model/vicuna_llm.py
new file mode 100644
index 000000000..1cc0ca3c3
--- /dev/null
+++ b/pilot/model/vicuna_llm.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+from transformers import pipeline
+from langchain.llms.base import LLM
+from configs.model_config import *
+
+class VicunaLLM(LLM):
+    model_name = llm_model_config[LLM_MODEL]
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index f5a71c392..867053afa 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -1,4 +1,3 @@
 #!/usr/bin/env python3
 #-*- coding: utf-8 -*-
 
-
diff --git a/pilot/utils.py b/pilot/utils.py
new file mode 100644
index 000000000..093b14f99
--- /dev/null
+++ b/pilot/utils.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import torch
+
+def get_gpu_memory(max_gpus=None):
+    gpu_memory = []
+    num_gpus = (
+        torch.cuda.device_count()
+        if max_gpus is None 
+        else min(max_gpus, torch.cuda.device_count())
+    )
+
+    for gpu_id in range(num_gpus):
+        with torch.cuda.device(gpu_id):
+            device = torch.cuda.current_device()
+            gpu_properties = torch.cuda.get_device_properties(device)
+            total_memory = gpu_properties.total_memory / (1024 ** 3)
+            allocated_memory = torch.cuda.memory_allocated() / (1024 ** 3)
+            available_memory = total_memory - allocated_memory
+            gpu_memory.append(available_memory)
+        return gpu_memory

From 6acc9f8cb41e6a9c839b3d82b5bd3e38ea592846 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 22:04:37 +0800
Subject: [PATCH 06/23] model: add model file

---
 .gitignore             | 1 +
 pilot/model/loader.py  | 1 +
 pilot/server/sqlgpt.py | 6 ++++++
 3 files changed, 8 insertions(+)

diff --git a/.gitignore b/.gitignore
index b6e47617d..ea44648b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ parts/
 sdist/
 var/
 wheels/
+models/
 pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
diff --git a/pilot/model/loader.py b/pilot/model/loader.py
index 768bcea1d..7a6e8bbee 100644
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -37,3 +37,4 @@ class ModerLoader:
 
         return model, tokenizer
 
+
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 867053afa..c55d8e4c0 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -1,3 +1,9 @@
 #!/usr/bin/env python3
 #-*- coding: utf-8 -*-
 
+from model.loader import ModerLoader
+from fastchat.serve.inference import generate_stream
+from configs.model_config import *
+
+if __name__ == "__main__":
+    pass
\ No newline at end of file

From 57fd9d48ad0bf580957e545e4c85e2059057220d Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 22:18:08 +0800
Subject: [PATCH 07/23] init model and tokenizer

---
 pilot/__init__.py        |  3 +++
 pilot/model/__init__.py  |  2 ++
 pilot/model/loader.py    |  2 +-
 pilot/server/__init__.py |  0
 pilot/server/sqlgpt.py   | 12 +++++++++---
 5 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 pilot/__init__.py
 create mode 100644 pilot/model/__init__.py
 create mode 100644 pilot/server/__init__.py

diff --git a/pilot/__init__.py b/pilot/__init__.py
new file mode 100644
index 000000000..9244e14db
--- /dev/null
+++ b/pilot/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+__version__ = "0.0.1"
diff --git a/pilot/model/__init__.py b/pilot/model/__init__.py
new file mode 100644
index 000000000..0c23b3d79
--- /dev/null
+++ b/pilot/model/__init__.py
@@ -0,0 +1,2 @@
+
+from model.loader import *
\ No newline at end of file
diff --git a/pilot/model/loader.py b/pilot/model/loader.py
index 7a6e8bbee..7691a8092 100644
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import torch
-from utils import get_gpu_memory
+from pilot.utils import get_gpu_memory
 from fastchat.serve.inference import compress_module
 from transformers import (
     AutoTokenizer,
diff --git a/pilot/server/__init__.py b/pilot/server/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index c55d8e4c0..65966006a 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -1,9 +1,15 @@
 #!/usr/bin/env python3
 #-*- coding: utf-8 -*-
 
-from model.loader import ModerLoader
+from pilot.model.loader import ModerLoader
 from fastchat.serve.inference import generate_stream
-from configs.model_config import *
+from pilot.configs.model_config import *
 
 if __name__ == "__main__":
-    pass
\ No newline at end of file
+
+    model_path = llm_model_config[LLM_MODEL]
+
+    ml = ModerLoader(model_path)
+    model, tokenizer = ml.loader(load_8bit=True) 
+    print(model)
+    print(tokenizer)
\ No newline at end of file

From fc4a9a953bac8b7bd3d4fbfd1cd0c8cc97bd7734 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 22:48:39 +0800
Subject: [PATCH 08/23] update

---
 pilot/model/__init__.py |  2 --
 pilot/server/sqlgpt.py  | 24 ++++++++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/pilot/model/__init__.py b/pilot/model/__init__.py
index 0c23b3d79..e69de29bb 100644
--- a/pilot/model/__init__.py
+++ b/pilot/model/__init__.py
@@ -1,2 +0,0 @@
-
-from model.loader import *
\ No newline at end of file
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 65966006a..582775953 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -1,15 +1,23 @@
 #!/usr/bin/env python3
 #-*- coding: utf-8 -*-
 
-from pilot.model.loader import ModerLoader
-from fastchat.serve.inference import generate_stream
-from pilot.configs.model_config import *
+import torch
+from fastchat.serve.inference import generate_stream, compress_module
+
+BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b"
+from transformers import AutoTokenizer, AutoModelForCausalLM
 
 if __name__ == "__main__":
 
-    model_path = llm_model_config[LLM_MODEL]
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False)
+    model = AutoModelForCausalLM.from_pretrained(
+        BASE_MODE, 
+        low_cpu_mem_usage=True, 
+        torch_dtype=torch.float16,
+        device_map="auto",
+        )
 
-    ml = ModerLoader(model_path)
-    model, tokenizer = ml.loader(load_8bit=True) 
-    print(model)
-    print(tokenizer)
\ No newline at end of file
+    print(device)
+    #compress_module(model, device) 
+    print(model, tokenizer)
\ No newline at end of file

From cccfcff0089de33bc61ba9d407a7d2fb047cf4f0 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 22:50:44 +0800
Subject: [PATCH 09/23] add file

---
 asserts/readme.md | 0
 docs/introduct.md | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 asserts/readme.md
 create mode 100644 docs/introduct.md

diff --git a/asserts/readme.md b/asserts/readme.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/docs/introduct.md b/docs/introduct.md
new file mode 100644
index 000000000..4287ca861
--- /dev/null
+++ b/docs/introduct.md
@@ -0,0 +1 @@
+#
\ No newline at end of file

From 4def6b8109c52c515f378805c07d13cb77bdc9f7 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Fri, 28 Apr 2023 23:53:29 +0800
Subject: [PATCH 10/23] a demo

---
 pilot/model/loader.py  |  1 -
 pilot/server/sqlgpt.py | 36 ++++++++++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/pilot/model/loader.py b/pilot/model/loader.py
index 7691a8092..98af18982 100644
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -21,7 +21,6 @@ class ModerLoader:
         self.kwargs = {
             "torch_dtype": torch.float16,
             "device_map": "auto",
-            "max_memory": get_gpu_memory(),
         }
 
     def loader(self, load_8bit=False, debug=False):
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 582775953..5bf2244cc 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -1,23 +1,43 @@
 #!/usr/bin/env python3
 #-*- coding: utf-8 -*-
 
+import json
 import torch
 from fastchat.serve.inference import generate_stream, compress_module
 
-BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b"
+
 from transformers import AutoTokenizer, AutoModelForCausalLM
+device = "cuda" if torch.cuda.is_available() else "cpu"
+BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b"
 
-if __name__ == "__main__":
-
-    device = "cuda" if torch.cuda.is_available() else "cpu"
+def generate(prompt):    
     tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False)
     model = AutoModelForCausalLM.from_pretrained(
         BASE_MODE, 
         low_cpu_mem_usage=True, 
         torch_dtype=torch.float16,
         device_map="auto",
-        )
+    )
+    # compress_module(model, device) 
+    # model.to(device)
+    print(model, tokenizer)
+
+    params = {
+        "model": "vicuna-13b",
+        "prompt": prompt,
+        "temperature": 0.7,
+        "max_new_tokens": 512,
+        "stop": "###"
+    }
+    output = generate_stream(
+        model, tokenizer, params, device, context_len=2048, stream_interval=2)
+
+    yield output
+
+if __name__ == "__main__":
+    pass
+
+
+
+
 
-    print(device)
-    #compress_module(model, device) 
-    print(model, tokenizer)
\ No newline at end of file

From a7755ce2504583098c1c08f09f1ddd0a1d251ed2 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 00:33:14 +0800
Subject: [PATCH 11/23] add gradio

---
 README.md              |  1 +
 environment.yml        | 62 ++++++++++++++++++++++++++++++++++++++++++
 pilot/server/sqlgpt.py | 16 +++++++++--
 requirements.txt       | 48 ++++++++++++++++++++++++++++++++
 4 files changed, 124 insertions(+), 3 deletions(-)
 create mode 100644 environment.yml

diff --git a/README.md b/README.md
index 7bc71a23b..303aa41d2 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 # DB-GPT
 A Open Database-GPT Experiment
+
 ![GitHub Repo stars](https://img.shields.io/github/stars/csunny/db-gpt?style=social)
 
 
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 000000000..81872a557
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,62 @@
+name: db-pgt 
+channels:
+  - pytorch
+  - defaults
+  - anaconda
+dependencies:
+  - python=3.9
+  - cudatoolkit
+  - pip
+  - pytorch=1.12.1
+  - pytorch-mutex=1.0=cuda
+  - torchaudio=0.12.1
+  - torchvision=0.13.1
+  - pip:
+    - accelerate==0.16.0
+    - aiohttp==3.8.4
+    - aiosignal==1.3.1
+    - async-timeout==4.0.2
+    - attrs==22.2.0
+    - bitsandbytes==0.37.0
+    - cchardet==2.1.7
+    - chardet==5.1.0
+    - contourpy==1.0.7
+    - cycler==0.11.0
+    - filelock==3.9.0
+    - fonttools==4.38.0
+    - frozenlist==1.3.3
+    - huggingface-hub==0.13.4
+    - importlib-resources==5.12.0
+    - kiwisolver==1.4.4
+    - matplotlib==3.7.0
+    - multidict==6.0.4
+    - openai==0.27.0
+    - packaging==23.0
+    - psutil==5.9.4
+    - pycocotools==2.0.6
+    - pyparsing==3.0.9
+    - python-dateutil==2.8.2
+    - pyyaml==6.0
+    - regex==2022.10.31
+    - tokenizers==0.13.2
+    - tqdm==4.64.1
+    - transformers==4.28.0
+    - timm==0.6.13
+    - spacy==3.5.1
+    - webdataset==0.2.48
+    - scikit-learn==1.2.2
+    - scipy==1.10.1
+    - yarl==1.8.2
+    - zipp==3.14.0
+    - omegaconf==2.3.0
+    - opencv-python==4.7.0.72
+    - iopath==0.1.10
+    - tenacity==8.2.2
+    - peft
+    - pycocoevalcap
+    - sentence-transformers
+    - umap-learn
+    - notebook
+    - gradio==3.24.1
+    - gradio-client==0.0.8
+    - wandb
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 5bf2244cc..a74e99ad3 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 #-*- coding: utf-8 -*-
 
+
 import json
 import torch
+import gradio as gr
 from fastchat.serve.inference import generate_stream, compress_module
 
 
@@ -35,9 +37,17 @@ def generate(prompt):
     yield output
 
 if __name__ == "__main__":
-    pass
-
-
+    with gr.Blocks() as demo:
+        gr.Markdown("数据库SQL生成助手")
+        with gr.Tab("SQL生成"):
+            text_input = gr.TextArea()
+            text_output = gr.TextArea()
+            text_button = gr.Button("提交")
+        
+
+        text_button.click(generate, input=text_input, output=text_output)
+
+    demo.queue(concurrency_count=3).launch() 
 
 
 
diff --git a/requirements.txt b/requirements.txt
index 44e89c0b9..dd7bf5189 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,51 @@ accelerate==0.16.0
 torch==2.0.0
 torchvision==0.13.1
 torchaudio==0.12.1
+accelerate==0.16.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+async-timeout==4.0.2
+attrs==22.2.0
+bitsandbytes==0.37.0
+cchardet==2.1.7
+chardet==5.1.0
+contourpy==1.0.7
+cycler==0.11.0
+filelock==3.9.0
+fonttools==4.38.0
+frozenlist==1.3.3
+huggingface-hub==0.13.4
+importlib-resources==5.12.0
+kiwisolver==1.4.4
+matplotlib==3.7.0
+multidict==6.0.4
+openai==0.27.0
+packaging==23.0
+psutil==5.9.4
+pycocotools==2.0.6
+pyparsing==3.0.9
+python-dateutil==2.8.2
+pyyaml==6.0
+regex==2022.10.31
+tokenizers==0.13.2
+tqdm==4.64.1
+transformers==4.28.0
+timm==0.6.13
+spacy==3.5.1
+webdataset==0.2.48
+scikit-learn==1.2.2
+scipy==1.10.1
+yarl==1.8.2
+zipp==3.14.0
+omegaconf==2.3.0
+opencv-python==4.7.0.72
+iopath==0.1.10
+tenacity==8.2.2
+peft
+pycocoevalcap
+sentence-transformers
+umap-learn
+notebook
+gradio==3.24.1
+gradio-client==0.0.8
+wandb
\ No newline at end of file

From 2ff4d71fdd7c02ec02d49e4de0262a7e059d43c8 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 00:36:36 +0800
Subject: [PATCH 12/23] fix

---
 pilot/server/sqlgpt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index a74e99ad3..edd2baf84 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -45,7 +45,7 @@ if __name__ == "__main__":
             text_button = gr.Button("提交")
         
 
-        text_button.click(generate, input=text_input, output=text_output)
+        text_button.click(generate, inputs=text_input, outputs=text_output)
 
     demo.queue(concurrency_count=3).launch() 
 

From 75181d6f2fc43889f204fed00d4f61bd94006454 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 01:26:19 +0800
Subject: [PATCH 13/23] update

---
 environment.yml        |  2 +-
 pilot/server/sqlgpt.py | 23 +++++++++++++----------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/environment.yml b/environment.yml
index 81872a557..3ec4dfd98 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,4 +1,4 @@
-name: db-pgt 
+name: db_pgt 
 channels:
   - pytorch
   - defaults
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index edd2baf84..81f9b22dd 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -12,18 +12,18 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 device = "cuda" if torch.cuda.is_available() else "cpu"
 BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b"
 
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False)
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODE, 
+    low_cpu_mem_usage=True, 
+    torch_dtype=torch.float16,
+    device_map="auto",
+)
+
 def generate(prompt):    
-    tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False)
-    model = AutoModelForCausalLM.from_pretrained(
-        BASE_MODE, 
-        low_cpu_mem_usage=True, 
-        torch_dtype=torch.float16,
-        device_map="auto",
-    )
     # compress_module(model, device) 
     # model.to(device)
     print(model, tokenizer)
-
     params = {
         "model": "vicuna-13b",
         "prompt": prompt,
@@ -32,9 +32,12 @@ def generate(prompt):
         "stop": "###"
     }
     output = generate_stream(
-        model, tokenizer, params, device, context_len=2048, stream_interval=2)
+        model, tokenizer, params, device, context_len=2048, stream_interval=2): 
+        
 
-    yield output
+    for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"):
+        if chunk:
+            yield chunk
 
 if __name__ == "__main__":
     with gr.Blocks() as demo:

From 41388cec5e295b9781d92a95088a7afb0e41b547 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 01:44:12 +0800
Subject: [PATCH 14/23] add prompt

---
 pilot/server/sqlgpt.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 81f9b22dd..3148f58b6 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -32,12 +32,13 @@ def generate(prompt):
         "stop": "###"
     }
     output = generate_stream(
-        model, tokenizer, params, device, context_len=2048, stream_interval=2): 
+        model, tokenizer, params, device, context_len=2048, stream_interval=2)
         
-
-    for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"):
-        if chunk:
-            yield chunk
+    for chunk in output:
+        yield chunk
+    #for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"):
+    #    if chunk:
+    #        yield chunk
 
 if __name__ == "__main__":
     with gr.Blocks() as demo:
@@ -50,7 +51,7 @@ if __name__ == "__main__":
 
         text_button.click(generate, inputs=text_input, outputs=text_output)
 
-    demo.queue(concurrency_count=3).launch() 
+    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") 
 
 
 

From c7d3dd2ef2a2bf45f10e01000d234dfdcd66ca91 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 01:44:33 +0800
Subject: [PATCH 15/23] update

---
 pilot/server/sqlgpt.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 81f9b22dd..52522e6bd 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -7,7 +7,6 @@ import torch
 import gradio as gr
 from fastchat.serve.inference import generate_stream, compress_module
 
-
 from transformers import AutoTokenizer, AutoModelForCausalLM
 device = "cuda" if torch.cuda.is_available() else "cpu"
 BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b"
@@ -26,18 +25,19 @@ def generate(prompt):
     print(model, tokenizer)
     params = {
         "model": "vicuna-13b",
-        "prompt": prompt,
+        "prompt": "这是一个用户与助手之间的对话, 助手精通数据库领域的知识, 并能够对数据库领域知识做出非常专业的回答。以下是用户的问题:" + prompt,
         "temperature": 0.7,
         "max_new_tokens": 512,
         "stop": "###"
     }
-    output = generate_stream(
-        model, tokenizer, params, device, context_len=2048, stream_interval=2): 
+    for output in generate_stream(
+        model, tokenizer, params, device, context_len=2048, stream_interval=2):
+        ret = {
+            "text": output,
+            "error_code": 0
+        }
         
-
-    for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"):
-        if chunk:
-            yield chunk
+        yield json.dumps(ret).decode() + b"\0"
 
 if __name__ == "__main__":
     with gr.Blocks() as demo:
@@ -50,7 +50,7 @@ if __name__ == "__main__":
 
         text_button.click(generate, inputs=text_input, outputs=text_output)
 
-    demo.queue(concurrency_count=3).launch() 
+    demo.queue(concurrency_count=3).launch(host="0.0.0.0") 
 
 
 

From 6b770d8e96d82e26c32a9c8f3f5a32fe63545bb3 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 15:17:48 +0800
Subject: [PATCH 16/23] run a demo

---
 .vscode/launch.json       | 25 ++++++++++++++++++
 pilot/__init__.py         |  2 --
 pilot/model/inference.py  |  4 +++
 pilot/model/vicuna_llm.py | 22 +++++++++++++++-
 pilot/server/chatbot.py   | 53 +++++++++++++++++++++++++++++++++++++++
 pilot/server/sqlgpt.py    | 18 ++++---------
 6 files changed, 108 insertions(+), 16 deletions(-)
 create mode 100644 .vscode/launch.json
 create mode 100644 pilot/model/inference.py

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 000000000..09a35ce9c
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,25 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "env": {"PYTHONPATH": "${workspaceFolder}"},
+            "envFile": "${workspaceFolder}/.env"
+        },
+        {
+            "name": "Python: Module",
+            "type": "python",
+            "request": "launch",
+            "module": "pilot",
+            "justMyCode": true,
+        }
+    ]
+}
\ No newline at end of file
diff --git a/pilot/__init__.py b/pilot/__init__.py
index 9244e14db..f102a9cad 100644
--- a/pilot/__init__.py
+++ b/pilot/__init__.py
@@ -1,3 +1 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 __version__ = "0.0.1"
diff --git a/pilot/model/inference.py b/pilot/model/inference.py
new file mode 100644
index 000000000..c3698fb1f
--- /dev/null
+++ b/pilot/model/inference.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import torch
diff --git a/pilot/model/vicuna_llm.py b/pilot/model/vicuna_llm.py
index 1cc0ca3c3..b3ecd079d 100644
--- a/pilot/model/vicuna_llm.py
+++ b/pilot/model/vicuna_llm.py
@@ -1,9 +1,29 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
+import requests
+from typing import Any, Mapping, Optional, List
 from transformers import pipeline
 from langchain.llms.base import LLM
 from configs.model_config import *
 
 class VicunaLLM(LLM):
-    model_name = llm_model_config[LLM_MODEL]
+
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        url = vicuna_model_server
+        params = {
+            "model": "vicuna-13b",
+            "prompt": prompt,
+            "temperature": 0.7,
+            "max_new_tokens": 512,
+            "stop": "###"
+        }
+        pass
+
+    @property
+    def _llm_type(self) -> str:
+        return "custome"
+
+    def _identifying_params(self) -> Mapping[str, Any]:
+        return {}
+    
\ No newline at end of file
diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py
index 97206f2d5..6cc1b8904 100644
--- a/pilot/server/chatbot.py
+++ b/pilot/server/chatbot.py
@@ -1,3 +1,56 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
+import requests
+import json
+import time
+from urllib.parse import urljoin
+import gradio as gr
+from configs.model_config import *
+vicuna_base_uri = "http://192.168.31.114:21002/"
+vicuna_stream_path = "worker_generate_stream"
+vicuna_status_path = "worker_get_status"
+
+def generate(prompt):
+    params = {
+        "model": "vicuna-13b",
+        "prompt": "给出一个查询用户的SQL",
+        "temperature": 0.7,
+        "max_new_tokens": 512,
+        "stop": "###"
+    }
+
+    sts_response = requests.post(
+        url=urljoin(vicuna_base_uri, vicuna_status_path)
+    )
+    print(sts_response.text)
+
+    response = requests.post(
+        url=urljoin(vicuna_base_uri, vicuna_stream_path), data=json.dumps(params)
+    )
+
+    skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3
+    for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
+        if chunk:
+            data = json.loads(chunk.decode())
+            if data["error_code"] == 0:
+                output = data["text"]
+                yield(output) 
+            
+            time.sleep(0.02)
+
+if __name__ == "__main__":
+    print(LLM_MODEL)
+    with gr.Blocks() as demo:
+        gr.Markdown("数据库SQL生成助手")
+        with gr.Tab("SQL生成"):
+            text_input = gr.TextArea()
+            text_output = gr.TextArea()
+            text_button = gr.Button("提交")
+        
+
+        text_button.click(generate, inputs=text_input, outputs=text_output)
+
+    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") 
+
+    
\ No newline at end of file
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 52522e6bd..6dbf1bfc1 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -20,8 +20,8 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 
 def generate(prompt):    
-    # compress_module(model, device) 
-    # model.to(device)
+    compress_module(model, device) 
+    model.to(device)
     print(model, tokenizer)
     params = {
         "model": "vicuna-13b",
@@ -31,13 +31,8 @@ def generate(prompt):
         "stop": "###"
     }
     for output in generate_stream(
-        model, tokenizer, params, device, context_len=2048, stream_interval=2):
-        ret = {
-            "text": output,
-            "error_code": 0
-        }
-        
-        yield json.dumps(ret).decode() + b"\0"
+        model, tokenizer, params, device, context_len=2048, stream_interval=1):
+        yield output 
 
 if __name__ == "__main__":
     with gr.Blocks() as demo:
@@ -50,7 +45,4 @@ if __name__ == "__main__":
 
         text_button.click(generate, inputs=text_input, outputs=text_output)
 
-    demo.queue(concurrency_count=3).launch(host="0.0.0.0") 
-
-
-
+    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") 

From 7566d636b6d24964b8422f1ce1291aff1eee6da4 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 18:28:42 +0800
Subject: [PATCH 17/23] add vicuna embedding

---
 examples/t5_example.py        | 240 +++++++++++++++++++++++++++++++++
 pilot/app.py                  | 247 ++--------------------------------
 pilot/model/inference.py      |  82 +++++++++++
 pilot/model/loader.py         |   8 +-
 pilot/model/vicuna_llm.py     |  73 ++++++++--
 pilot/server/chatbot.py       |   2 +-
 pilot/server/vicuna_server.py |  48 +++++++
 7 files changed, 450 insertions(+), 250 deletions(-)
 create mode 100644 examples/t5_example.py
 create mode 100644 pilot/server/vicuna_server.py

diff --git a/examples/t5_example.py b/examples/t5_example.py
new file mode 100644
index 000000000..a63c9f961
--- /dev/null
+++ b/examples/t5_example.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from llama_index import LLMPredictor
+import torch
+from langchain.llms.base import LLM
+from transformers import pipeline
+
+
+class FlanLLM(LLM):
+    model_name = "google/flan-t5-large"
+    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
+        "torch_dtype": torch.bfloat16
+    })
+
+    def _call(self, prompt, stop=None):
+        return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
+
+    def _identifying_params(self):
+        return {"name_of_model": self.model_name}
+
+    def _llm_type(self):
+        return "custome"
+
+llm_predictor = LLMPredictor(llm=FlanLLM())
+hfemb = HuggingFaceEmbeddings()
+embed_model = LangchainEmbedding(hfemb)
+
+text1 = """
+    执行计划是对一条 SQL 查询语句在数据库中执行过程的描述。用户可以通过 EXPLAIN 命令查看优化器针对指定 SQL 生成的逻辑执行计划。
+
+如果要分析某条 SQL 的性能问题，通常需要先查看 SQL 的执行计划，排查每一步 SQL 执行是否存在问题。所以读懂执行计划是 SQL 优化的先决条件，而了解执行计划的算子是理解 EXPLAIN 命令的关键。
+
+OceanBase 数据库的执行计划命令有三种模式：EXPLAIN BASIC、EXPLAIN 和 EXPLAIN EXTENDED。这三种模式对执行计划展现不同粒度的细节信息:
+
+EXPLAIN BASIC 命令用于最基本的计划展示。
+
+EXPLAIN EXTENDED 命令用于最详细的计划展示（通常在排查问题时使用这种展示模式）。
+
+EXPLAIN 命令所展示的信息可以帮助普通用户了解整个计划的执行方式。
+
+EXPLAIN 命令格式如下：
+EXPLAIN [BASIC | EXTENDED | PARTITIONS | FORMAT = format_name] [PRETTY | PRETTY_COLOR] explainable_stmt
+format_name: 
+  { TRADITIONAL | JSON }
+explainable_stmt: 
+  { SELECT statement
+ | DELETE statement
+ | INSERT statement
+ | REPLACE statement
+ | UPDATE statement }
+
+
+EXPLAIN 命令适用于 SELECT、DELETE、INSERT、REPLACE 和 UPDATE 语句，显示优化器所提供的有关语句执行计划的信息，包括如何处理该语句，如何联接表以及以何种顺序联接表等信息。
+
+一般来说，可以使用 EXPLAIN EXTENDED 命令，将表扫描的范围段展示出来。使用 EXPLAIN OUTLINE 命令可以显示 Outline 信息。
+
+FORMAT 选项可用于选择输出格式。TRADITIONAL 表示以表格格式显示输出，这也是默认设置。JSON 表示以 JSON 格式显示信息。
+
+使用 EXPLAIN PARTITITIONS 也可用于检查涉及分区表的查询。如果检查针对非分区表的查询，则不会产生错误，但 PARTIONS 列的值始终为 NULL。
+
+对于复杂的执行计划，可以使用 PRETTY 或者 PRETTY_COLOR 选项将计划树中的父节点和子节点使用树线或彩色树线连接起来，使得执行计划展示更方便阅读。示例如下：
+obclient> CREATE TABLE p1table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 2;
+Query OK, 0 rows affected
+
+obclient> CREATE TABLE p2table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 4;
+Query OK, 0 rows affected
+
+obclient> EXPLAIN EXTENDED PRETTY_COLOR SELECT  * FROM p1table p1 JOIN p2table p2 ON p1.c1=p2.c2\G
+*************************** 1. row ***************************
+Query Plan: ==========================================================
+|ID|OPERATOR                     |NAME    |EST. ROWS|COST|
+----------------------------------------------------------
+|0 |PX COORDINATOR               |        |1        |278 |
+|1 | EXCHANGE OUT DISTR          |:EX10001|1        |277 |
+|2 |  HASH JOIN                  |        |1        |276 |
+|3 |  ├PX PARTITION ITERATOR     |        |1        |92  |
+|4 |  │ TABLE SCAN               |P1      |1        |92  |
+|5 |  └EXCHANGE IN DISTR         |        |1        |184 |
+|6 |    EXCHANGE OUT DISTR (PKEY)|:EX10000|1        |184 |
+|7 |     PX PARTITION ITERATOR   |        |1        |183 |
+|8 |      TABLE SCAN             |P2      |1        |183 |
+==========================================================
+
+Outputs & filters:
+-------------------------------------
+  0 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil)
+  1 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil), dop=1
+  2 - output([P1.C1], [P2.C2], [P1.C2], [P2.C1]), filter(nil),
+      equal_conds([P1.C1 = P2.C2]), other_conds(nil)
+  3 - output([P1.C1], [P1.C2]), filter(nil)
+  4 - output([P1.C1], [P1.C2]), filter(nil),
+      access([P1.C1], [P1.C2]), partitions(p[0-1])
+  5 - output([P2.C2], [P2.C1]), filter(nil)
+  6 - (#keys=1, [P2.C2]), output([P2.C2], [P2.C1]), filter(nil), dop=1
+  7 - output([P2.C1], [P2.C2]), filter(nil)
+  8 - output([P2.C1], [P2.C2]), filter(nil),
+      access([P2.C1], [P2.C2]), partitions(p[0-3])
+
+1 row in set 
+
+
+
+ 
+## 执行计划形状与算子信息 
+
+在数据库系统中，执行计划在内部通常是以树的形式来表示的，但是不同的数据库会选择不同的方式展示给用户。
+
+如下示例分别为 PostgreSQL 数据库、Oracle 数据库和 OceanBase 数据库对于 TPCDS Q3 的计划展示。
+
+```sql
+obclient> SELECT /*TPC-DS Q3*/ * 
+     FROM (SELECT dt.d_year, 
+                  item.i_brand_id    brand_id, 
+                  item.i_brand       brand, 
+                  Sum(ss_net_profit) sum_agg 
+           FROM   date_dim dt, 
+                  store_sales, 
+                  item 
+           WHERE  dt.d_date_sk = store_sales.ss_sold_date_sk 
+                  AND store_sales.ss_item_sk = item.i_item_sk 
+                  AND item.i_manufact_id = 914 
+                  AND dt.d_moy = 11 
+           GROUP  BY dt.d_year, 
+                  item.i_brand, 
+                  item.i_brand_id 
+           ORDER  BY dt.d_year, 
+                  sum_agg DESC, 
+                  brand_id) 
+     WHERE ROWNUM <= 100; 
+
+PostgreSQL 数据库执行计划展示如下：
+Limit  (cost=13986.86..13987.20 rows=27 width=91)
+         Sort  (cost=13986.86..13986.93 rows=27 width=65)
+         Sort Key: dt.d_year, (sum(store_sales.ss_net_profit)), item.i_brand_id
+              HashAggregate  (cost=13985.95..13986.22 rows=27 width=65)
+                     Merge Join  (cost=13884.21..13983.91 rows=204 width=65)
+                     Merge Cond: (dt.d_date_sk = store_sales.ss_sold_date_sk)
+                           Index Scan using date_dim_pkey on date_dim dt  (cost=0.00..3494.62 rows=6080 width=8)
+                           Filter: (d_moy = 11)
+                           Sort  (cost=12170.87..12177.27 rows=2560 width=65)
+                           Sort Key: store_sales.ss_sold_date_sk
+                                 Nested Loop  (cost=6.02..12025.94 rows=2560 width=65)
+                                       Seq Scan on item  (cost=0.00..1455.00 rows=16 width=59)
+                                       Filter: (i_manufact_id = 914)
+                                       Bitmap Heap Scan on store_sales  (cost=6.02..658.94 rows=174 width=14)
+                                       Recheck Cond: (ss_item_sk = item.i_item_sk)
+                                             Bitmap Index Scan on store_sales_pkey  (cost=0.00..5.97 rows=174 width=0)
+                                             Index Cond: (ss_item_sk = item.i_item_sk)
+
+
+
+Oracle 数据库执行计划展示如下：
+Plan hash value: 2331821367
+--------------------------------------------------------------------------------------------------
+| Id  | Operation                         | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
+--------------------------------------------------------------------------------------------------
+|   0 | SELECT STATEMENT                  |              |   100 |  9100 |  3688   (1)| 00:00:01 |
+|*  1 |  COUNT STOPKEY                    |              |       |       |            |          |
+|   2 |   VIEW                            |              |  2736 |   243K|  3688   (1)| 00:00:01 |
+|*  3 |    SORT ORDER BY STOPKEY          |              |  2736 |   256K|  3688   (1)| 00:00:01 |
+|   4 |     HASH GROUP BY                 |              |  2736 |   256K|  3688   (1)| 00:00:01 |
+|*  5 |      HASH JOIN                    |              |  2736 |   256K|  3686   (1)| 00:00:01 |
+|*  6 |       TABLE ACCESS FULL           | DATE_DIM     |  6087 | 79131 |   376   (1)| 00:00:01 |
+|   7 |       NESTED LOOPS                |              |  2865 |   232K|  3310   (1)| 00:00:01 |
+|   8 |        NESTED LOOPS               |              |  2865 |   232K|  3310   (1)| 00:00:01 |
+|*  9 |         TABLE ACCESS FULL         | ITEM         |    18 |  1188 |   375   (0)| 00:00:01 |
+|* 10 |         INDEX RANGE SCAN          | SYS_C0010069 |   159 |       |     2   (0)| 00:00:01 |
+|  11 |        TABLE ACCESS BY INDEX ROWID| STORE_SALES  |   159 |  2703 |   163   (0)| 00:00:01 |
+--------------------------------------------------------------------------------------------------
+
+OceanBase 数据库执行计划展示如下：
+|ID|OPERATOR              |NAME       |EST. ROWS|COST |
+-------------------------------------------------------
+|0 |LIMIT                 |           |100      |81141|
+|1 | TOP-N SORT           |           |100      |81127|
+|2 |  HASH GROUP BY       |           |2924     |68551|
+|3 |   HASH JOIN          |           |2924     |65004|
+|4 |    SUBPLAN SCAN      |VIEW1      |2953     |19070|
+|5 |     HASH GROUP BY    |           |2953     |18662|
+|6 |      NESTED-LOOP JOIN|           |2953     |15080|
+|7 |       TABLE SCAN     |ITEM       |19       |11841|
+|8 |       TABLE SCAN     |STORE_SALES|161      |73   |
+|9 |    TABLE SCAN        |DT         |6088     |29401|
+=======================================================
+
+由示例可见，OceanBase 数据库的计划展示与 Oracle 数据库类似。
+
+OceanBase 数据库执行计划中的各列的含义如下：
+列名  含义
+ID  执行树按照前序遍历的方式得到的编号（从 0 开始）。
+OPERATOR    操作算子的名称。
+NAME    对应表操作的表名（索引名）。
+EST. ROWS   估算该操作算子的输出行数。
+COST    该操作算子的执行代价（微秒）。
+
+
+OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形结构展示。其中每一个操作在树中的层次通过其在 operator 中的缩进予以展示，层次最深的优先执行，层次相同的以特定算子的执行顺序为标准来执行。
+
+问题:  update a not exists (b…)
+我一开始以为 B是驱动表，B的数据挺多的 后来看到NLAJ，是说左边的表关联右边的表
+所以这个的驱动表是不是实际是A，用A的匹配B的，这个理解有问题吗
+
+回答: 没错 A 驱动 B的
+
+问题: 光知道最下最右的是驱动表了 所以一开始搞得有点懵 :sweat_smile:
+
+回答: nlj应该原理应该都是左表(驱动表)的记录探测右表(被驱动表)， 选哪张成为左表或右表就基于一些其他考量了，比如数据量， 而anti join/semi join只是对 not exist/exist的一种优化，相关的原理和资料网上可以查阅一下
+
+问题: 也就是nlj 就是按照之前理解的谁先执行 谁就是驱动表 也就是执行计划中的最右的表
+而anti join/semi join，谁在not exist左面，谁就是驱动表。这么理解对吧
+
+回答: nlj也是左表的表是驱动表，这个要了解下计划执行方面的基本原理，取左表的一行数据，再遍历右表，一旦满足连接条件，就可以返回数据
+anti/semi只是因为not exists/exist的语义只是返回左表数据，改成anti join是一种计划优化，连接的方式比子查询更优
+""" 
+
+from llama_index import Document
+text_list = [text1]
+documents = [Document(t) for t in text_list]
+
+num_output = 250
+max_input_size = 512
+
+max_chunk_overlap = 20
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+index.save_to_disk("index.json")
+
+
+if __name__ == "__main__":
+    import logging
+    logging.getLogger().setLevel(logging.CRITICAL)
+    for d in documents:
+        print(d)
+
+    response = index.query("数据库的执行计划命令有多少?")
+    print(response)
diff --git a/pilot/app.py b/pilot/app.py
index b001d0933..5f2cf93db 100644
--- a/pilot/app.py
+++ b/pilot/app.py
@@ -1,241 +1,18 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
-from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
-from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from llama_index import LLMPredictor
-import torch
-from langchain.llms.base import LLM
-from transformers import pipeline
 
+from langchain.agents import (
+    load_tools,
+    initialize_agent,
+    AgentType
+)
 
-class FlanLLM(LLM):
-    model_name = "google/flan-t5-large"
-    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
-        "torch_dtype": torch.bfloat16
-    })
+from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
+llm = VicunaRequestLLM()
 
-    def _call(self, prompt, stop=None):
-        return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
-
-    def _identifying_params(self):
-        return {"name_of_model": self.model_name}
-
-    def _llm_type(self):
-        return "custome"
-
-llm_predictor = LLMPredictor(llm=FlanLLM())
-hfemb = HuggingFaceEmbeddings()
-embed_model = LangchainEmbedding(hfemb)
-
-text1 = """
-    执行计划是对一条 SQL 查询语句在数据库中执行过程的描述。用户可以通过 EXPLAIN 命令查看优化器针对指定 SQL 生成的逻辑执行计划。
-
-如果要分析某条 SQL 的性能问题，通常需要先查看 SQL 的执行计划，排查每一步 SQL 执行是否存在问题。所以读懂执行计划是 SQL 优化的先决条件，而了解执行计划的算子是理解 EXPLAIN 命令的关键。
-
-OceanBase 数据库的执行计划命令有三种模式：EXPLAIN BASIC、EXPLAIN 和 EXPLAIN EXTENDED。这三种模式对执行计划展现不同粒度的细节信息:
-
-EXPLAIN BASIC 命令用于最基本的计划展示。
-
-EXPLAIN EXTENDED 命令用于最详细的计划展示（通常在排查问题时使用这种展示模式）。
-
-EXPLAIN 命令所展示的信息可以帮助普通用户了解整个计划的执行方式。
-
-EXPLAIN 命令格式如下：
-EXPLAIN [BASIC | EXTENDED | PARTITIONS | FORMAT = format_name] [PRETTY | PRETTY_COLOR] explainable_stmt
-format_name: 
-  { TRADITIONAL | JSON }
-explainable_stmt: 
-  { SELECT statement
- | DELETE statement
- | INSERT statement
- | REPLACE statement
- | UPDATE statement }
-
-
-EXPLAIN 命令适用于 SELECT、DELETE、INSERT、REPLACE 和 UPDATE 语句，显示优化器所提供的有关语句执行计划的信息，包括如何处理该语句，如何联接表以及以何种顺序联接表等信息。
-
-一般来说，可以使用 EXPLAIN EXTENDED 命令，将表扫描的范围段展示出来。使用 EXPLAIN OUTLINE 命令可以显示 Outline 信息。
-
-FORMAT 选项可用于选择输出格式。TRADITIONAL 表示以表格格式显示输出，这也是默认设置。JSON 表示以 JSON 格式显示信息。
-
-使用 EXPLAIN PARTITITIONS 也可用于检查涉及分区表的查询。如果检查针对非分区表的查询，则不会产生错误，但 PARTIONS 列的值始终为 NULL。
-
-对于复杂的执行计划，可以使用 PRETTY 或者 PRETTY_COLOR 选项将计划树中的父节点和子节点使用树线或彩色树线连接起来，使得执行计划展示更方便阅读。示例如下：
-obclient> CREATE TABLE p1table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 2;
-Query OK, 0 rows affected
-
-obclient> CREATE TABLE p2table(c1 INT ,c2 INT) PARTITION BY HASH(c1) PARTITIONS 4;
-Query OK, 0 rows affected
-
-obclient> EXPLAIN EXTENDED PRETTY_COLOR SELECT  * FROM p1table p1 JOIN p2table p2 ON p1.c1=p2.c2\G
-*************************** 1. row ***************************
-Query Plan: ==========================================================
-|ID|OPERATOR                     |NAME    |EST. ROWS|COST|
-----------------------------------------------------------
-|0 |PX COORDINATOR               |        |1        |278 |
-|1 | EXCHANGE OUT DISTR          |:EX10001|1        |277 |
-|2 |  HASH JOIN                  |        |1        |276 |
-|3 |  ├PX PARTITION ITERATOR     |        |1        |92  |
-|4 |  │ TABLE SCAN               |P1      |1        |92  |
-|5 |  └EXCHANGE IN DISTR         |        |1        |184 |
-|6 |    EXCHANGE OUT DISTR (PKEY)|:EX10000|1        |184 |
-|7 |     PX PARTITION ITERATOR   |        |1        |183 |
-|8 |      TABLE SCAN             |P2      |1        |183 |
-==========================================================
-
-Outputs & filters:
--------------------------------------
-  0 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil)
-  1 - output([INTERNAL_FUNCTION(P1.C1, P1.C2, P2.C1, P2.C2)]), filter(nil), dop=1
-  2 - output([P1.C1], [P2.C2], [P1.C2], [P2.C1]), filter(nil),
-      equal_conds([P1.C1 = P2.C2]), other_conds(nil)
-  3 - output([P1.C1], [P1.C2]), filter(nil)
-  4 - output([P1.C1], [P1.C2]), filter(nil),
-      access([P1.C1], [P1.C2]), partitions(p[0-1])
-  5 - output([P2.C2], [P2.C1]), filter(nil)
-  6 - (#keys=1, [P2.C2]), output([P2.C2], [P2.C1]), filter(nil), dop=1
-  7 - output([P2.C1], [P2.C2]), filter(nil)
-  8 - output([P2.C1], [P2.C2]), filter(nil),
-      access([P2.C1], [P2.C2]), partitions(p[0-3])
-
-1 row in set 
-
-
-
- 
-## 执行计划形状与算子信息 
-
-在数据库系统中，执行计划在内部通常是以树的形式来表示的，但是不同的数据库会选择不同的方式展示给用户。
-
-如下示例分别为 PostgreSQL 数据库、Oracle 数据库和 OceanBase 数据库对于 TPCDS Q3 的计划展示。
-
-```sql
-obclient> SELECT /*TPC-DS Q3*/ * 
-     FROM (SELECT dt.d_year, 
-                  item.i_brand_id    brand_id, 
-                  item.i_brand       brand, 
-                  Sum(ss_net_profit) sum_agg 
-           FROM   date_dim dt, 
-                  store_sales, 
-                  item 
-           WHERE  dt.d_date_sk = store_sales.ss_sold_date_sk 
-                  AND store_sales.ss_item_sk = item.i_item_sk 
-                  AND item.i_manufact_id = 914 
-                  AND dt.d_moy = 11 
-           GROUP  BY dt.d_year, 
-                  item.i_brand, 
-                  item.i_brand_id 
-           ORDER  BY dt.d_year, 
-                  sum_agg DESC, 
-                  brand_id) 
-     WHERE ROWNUM <= 100; 
-
-PostgreSQL 数据库执行计划展示如下：
-Limit  (cost=13986.86..13987.20 rows=27 width=91)
-         Sort  (cost=13986.86..13986.93 rows=27 width=65)
-         Sort Key: dt.d_year, (sum(store_sales.ss_net_profit)), item.i_brand_id
-              HashAggregate  (cost=13985.95..13986.22 rows=27 width=65)
-                     Merge Join  (cost=13884.21..13983.91 rows=204 width=65)
-                     Merge Cond: (dt.d_date_sk = store_sales.ss_sold_date_sk)
-                           Index Scan using date_dim_pkey on date_dim dt  (cost=0.00..3494.62 rows=6080 width=8)
-                           Filter: (d_moy = 11)
-                           Sort  (cost=12170.87..12177.27 rows=2560 width=65)
-                           Sort Key: store_sales.ss_sold_date_sk
-                                 Nested Loop  (cost=6.02..12025.94 rows=2560 width=65)
-                                       Seq Scan on item  (cost=0.00..1455.00 rows=16 width=59)
-                                       Filter: (i_manufact_id = 914)
-                                       Bitmap Heap Scan on store_sales  (cost=6.02..658.94 rows=174 width=14)
-                                       Recheck Cond: (ss_item_sk = item.i_item_sk)
-                                             Bitmap Index Scan on store_sales_pkey  (cost=0.00..5.97 rows=174 width=0)
-                                             Index Cond: (ss_item_sk = item.i_item_sk)
-
-
-
-Oracle 数据库执行计划展示如下：
-Plan hash value: 2331821367
---------------------------------------------------------------------------------------------------
-| Id  | Operation                         | Name         | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------------
-|   0 | SELECT STATEMENT                  |              |   100 |  9100 |  3688   (1)| 00:00:01 |
-|*  1 |  COUNT STOPKEY                    |              |       |       |            |          |
-|   2 |   VIEW                            |              |  2736 |   243K|  3688   (1)| 00:00:01 |
-|*  3 |    SORT ORDER BY STOPKEY          |              |  2736 |   256K|  3688   (1)| 00:00:01 |
-|   4 |     HASH GROUP BY                 |              |  2736 |   256K|  3688   (1)| 00:00:01 |
-|*  5 |      HASH JOIN                    |              |  2736 |   256K|  3686   (1)| 00:00:01 |
-|*  6 |       TABLE ACCESS FULL           | DATE_DIM     |  6087 | 79131 |   376   (1)| 00:00:01 |
-|   7 |       NESTED LOOPS                |              |  2865 |   232K|  3310   (1)| 00:00:01 |
-|   8 |        NESTED LOOPS               |              |  2865 |   232K|  3310   (1)| 00:00:01 |
-|*  9 |         TABLE ACCESS FULL         | ITEM         |    18 |  1188 |   375   (0)| 00:00:01 |
-|* 10 |         INDEX RANGE SCAN          | SYS_C0010069 |   159 |       |     2   (0)| 00:00:01 |
-|  11 |        TABLE ACCESS BY INDEX ROWID| STORE_SALES  |   159 |  2703 |   163   (0)| 00:00:01 |
---------------------------------------------------------------------------------------------------
-
-OceanBase 数据库执行计划展示如下：
-|ID|OPERATOR              |NAME       |EST. ROWS|COST |
--------------------------------------------------------
-|0 |LIMIT                 |           |100      |81141|
-|1 | TOP-N SORT           |           |100      |81127|
-|2 |  HASH GROUP BY       |           |2924     |68551|
-|3 |   HASH JOIN          |           |2924     |65004|
-|4 |    SUBPLAN SCAN      |VIEW1      |2953     |19070|
-|5 |     HASH GROUP BY    |           |2953     |18662|
-|6 |      NESTED-LOOP JOIN|           |2953     |15080|
-|7 |       TABLE SCAN     |ITEM       |19       |11841|
-|8 |       TABLE SCAN     |STORE_SALES|161      |73   |
-|9 |    TABLE SCAN        |DT         |6088     |29401|
-=======================================================
-
-由示例可见，OceanBase 数据库的计划展示与 Oracle 数据库类似。
-
-OceanBase 数据库执行计划中的各列的含义如下：
-列名  含义
-ID  执行树按照前序遍历的方式得到的编号（从 0 开始）。
-OPERATOR    操作算子的名称。
-NAME    对应表操作的表名（索引名）。
-EST. ROWS   估算该操作算子的输出行数。
-COST    该操作算子的执行代价（微秒）。
-
-
-OceanBase 数据库 EXPLAIN 命令输出的第一部分是执行计划的树形结构展示。其中每一个操作在树中的层次通过其在 operator 中的缩进予以展示，层次最深的优先执行，层次相同的以特定算子的执行顺序为标准来执行。
-
-问题:  update a not exists (b…)
-我一开始以为 B是驱动表，B的数据挺多的 后来看到NLAJ，是说左边的表关联右边的表
-所以这个的驱动表是不是实际是A，用A的匹配B的，这个理解有问题吗
-
-回答: 没错 A 驱动 B的
-
-问题: 光知道最下最右的是驱动表了 所以一开始搞得有点懵 :sweat_smile:
-
-回答: nlj应该原理应该都是左表(驱动表)的记录探测右表(被驱动表)， 选哪张成为左表或右表就基于一些其他考量了，比如数据量， 而anti join/semi join只是对 not exist/exist的一种优化，相关的原理和资料网上可以查阅一下
-
-问题: 也就是nlj 就是按照之前理解的谁先执行 谁就是驱动表 也就是执行计划中的最右的表
-而anti join/semi join，谁在not exist左面，谁就是驱动表。这么理解对吧
-
-回答: nlj也是左表的表是驱动表，这个要了解下计划执行方面的基本原理，取左表的一行数据，再遍历右表，一旦满足连接条件，就可以返回数据
-anti/semi只是因为not exists/exist的语义只是返回左表数据，改成anti join是一种计划优化，连接的方式比子查询更优
-
-""" 
-
-from llama_index import Document
-text_list = [text1]
-documents = [Document(t) for t in text_list]
-
-num_output = 250
-max_input_size = 512
-
-max_chunk_overlap = 20
-prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
-
-index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
-index.save_to_disk("index.json")
-
-
-if __name__ == "__main__":
-    import logging
-    logging.getLogger().setLevel(logging.CRITICAL)
-    for d in documents:
-        print(d)
-
-    response = index.query("数据库的执行计划命令有多少?")
-    print(response)
+tools = load_tools(['python_repl'], llm=llm)
+agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
+agent.run(
+    "Write a python script that prints 'Hello World!'"
+)
\ No newline at end of file
diff --git a/pilot/model/inference.py b/pilot/model/inference.py
index c3698fb1f..2f2948457 100644
--- a/pilot/model/inference.py
+++ b/pilot/model/inference.py
@@ -2,3 +2,85 @@
 # -*- coding: utf-8 -*-
 
 import torch
+
+@torch.inference_mode()
+def generate_output(model, tokenizer, params, device, context_len=2048):
+    prompt = params["prompt"]
+    temperature = float(params.get("temperature", 1.0))
+    max_new_tokens = int(params.get("max_new_tokens", 256))
+    stop_parameter = params.get("stop", None)
+
+    if stop_parameter == tokenizer.eso_token:
+        stop_parameter = None
+    
+    stop_strings = []
+    if isinstance(stop_parameter, str):
+        stop_strings.append(stop_parameter)
+    elif isinstance(stop_parameter, list):
+        stop_strings = stop_parameter
+    elif stop_parameter is None:
+        pass
+    else:
+        raise TypeError("Stop parameter must be string or list of strings.")
+
+    pos = -1
+    input_ids = tokenizer(prompt).input_ids
+    output_ids = []
+
+    max_src_len = context_len - max_new_tokens - 8
+    input_ids = input_ids[-max_src_len:]
+
+    for i in range(max_new_tokens):
+        if i == 0:
+            out = model(torch.as_tensor([input_ids], device=device), use_cache=True)
+            logits = out.logits
+            past_key_values = out.past_key_values
+        else:
+            out = model(
+                input_ids=torch.as_tensor([[token]], device=device),
+                use_cache=True,
+                past_key_values=past_key_values,
+            )
+            logits = out.logits
+            past_key_values = out.past_key_value
+        last_token_logits = logits[0][-1]
+
+        if temperature < 1e-4:
+            token = int(torch.argmax(last_token_logits))
+        else:
+            probs = torch.softmax(last_token_logits / temperature, dim=1)
+            token = int(torch.multinomial(probs, num_samples=1))
+        
+        output_ids.append(token)
+
+        if token == tokenizer.eos_token_id:
+            stopped = True
+        else:
+            stopped = False
+        
+        output = tokenizer.decode(output_ids, skip_special_tokens=True)
+        for stop_str in stop_strings:
+            pos = output.rfind(stop_str)
+            if pos != -1:
+                output = output[:pos]
+                stoppped = True
+                break
+            else:
+                pass
+        
+        if stoppped:
+            break
+
+    del past_key_values
+    if pos != -1:
+        return output[:pos]
+    return output
+
+
+@torch.inference_mode()
+def get_embeddings(model, tokenizer, prompt):
+    input_ids = tokenizer(prompt).input_ids
+    input_embeddings = model.get_input_embeddings()
+    embeddings = input_embeddings(torch.LongTensor([input_ids]))
+    mean = torch.mean(embeddings[0], 0).cpu().detach()
+    return mean
\ No newline at end of file
diff --git a/pilot/model/loader.py b/pilot/model/loader.py
index 98af18982..979b2bb89 100644
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -2,8 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import torch
-from pilot.utils import get_gpu_memory
-from fastchat.serve.inference import compress_module
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
@@ -28,12 +26,12 @@ class ModerLoader:
         tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False)
         model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs)
 
-        if load_8bit:
-            compress_module(model, self.device)
-
         if debug:
             print(model)
 
+        if self.device == "cuda":
+            model.to(self.device)
+
         return model, tokenizer
 
 
diff --git a/pilot/model/vicuna_llm.py b/pilot/model/vicuna_llm.py
index b3ecd079d..be433c7c3 100644
--- a/pilot/model/vicuna_llm.py
+++ b/pilot/model/vicuna_llm.py
@@ -1,24 +1,34 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
+import json
 import requests
+from urllib.parse import urljoin
+from langchain.embeddings.base import Embeddings
+from pydantic import BaseModel
 from typing import Any, Mapping, Optional, List
-from transformers import pipeline
 from langchain.llms.base import LLM
 from configs.model_config import *
 
-class VicunaLLM(LLM):
+class VicunaRequestLLM(LLM):
 
+    vicuna_generate_path = "generate"
     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-        url = vicuna_model_server
+        if isinstance(stop, list):
+            stop = stop + ["Observation:"]
+        
         params = {
-            "model": "vicuna-13b",
             "prompt": prompt,
-            "temperature": 0.7,
-            "max_new_tokens": 512,
-            "stop": "###"
+            "temperature": 0,
+            "max_new_tokens": 256,
+            "stop": stop
         }
-        pass
+        response = requests.post(
+            url=urljoin(vicuna_model_server, self.vicuna_generate_path),
+            data=json.dumps(params)
+        )
+        response.raise_for_status()
+        return response.json()["response"]
 
     @property
     def _llm_type(self) -> str:
@@ -26,4 +36,49 @@ class VicunaLLM(LLM):
 
     def _identifying_params(self) -> Mapping[str, Any]:
         return {}
-    
\ No newline at end of file
+    
+
+class VicunaEmbeddingLLM(BaseModel, Embeddings):
+    
+    vicuna_embedding_path = "embedding"
+
+    def _call(self, prompt: str) -> str:
+        p = prompt.strip()
+        print("Sending prompt ", p)
+
+        response = requests.post(
+            url=urljoin(vicuna_model_server, self.vicuna_embedding_path),
+            json={
+                "prompt": p
+            }
+        )
+        response.raise_for_status()
+        return response.json()["response"]
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """ Call out to Vicuna's server embedding endpoint for embedding search docs.
+
+        Args:
+            texts: The list of text to embed
+
+        Returns:
+            List of embeddings. one for each text.
+        """
+        results = []
+        for text in texts:
+            response = self.embed_query(text)
+            results.append(response)
+        return results 
+    
+
+    def embed_query(self, text: str) -> List[float]:
+        """ Call out to Vicuna's server embedding endpoint for embedding query text.
+        
+        Args: 
+            text: The text to embed.
+        Returns:
+            Embedding for the text
+        """
+        embedding = self._call(text)
+        return embedding
+
diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py
index 6cc1b8904..5796a8c66 100644
--- a/pilot/server/chatbot.py
+++ b/pilot/server/chatbot.py
@@ -14,7 +14,7 @@ vicuna_status_path = "worker_get_status"
 def generate(prompt):
     params = {
         "model": "vicuna-13b",
-        "prompt": "给出一个查询用户的SQL",
+        "prompt": prompt,
         "temperature": 0.7,
         "max_new_tokens": 512,
         "stop": "###"
diff --git a/pilot/server/vicuna_server.py b/pilot/server/vicuna_server.py
new file mode 100644
index 000000000..996a3e5aa
--- /dev/null
+++ b/pilot/server/vicuna_server.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from typing import Optional, List
+from fastapi import FastAPI
+from pydantic import BaseModel
+from pilot.model.inference import generate_output, get_embeddings
+from pilot.model.loader import ModerLoader
+from pilot.configs.model_config import *
+
+model_path = llm_model_config[LLM_MODEL] 
+ml = ModerLoader(model_path=model_path)
+model, tokenizer = ml.loader(load_8bit=isload_8bit, debug=isdebug)
+
+app = FastAPI()
+
+class PromptRequest(BaseModel):
+    prompt: str
+    temperature: float
+    max_new_tokens: int
+    stop: Optional(List[str]) = None
+
+
+class EmbeddingRequest(BaseModel):
+    prompt: str
+
+
+@app.post("/generate")
+def generate(prompt_request: PromptRequest):
+    params = {
+        "prompt": prompt_request.prompt,
+        "temperature": prompt_request.temperature,
+        "max_new_tokens": prompt_request.max_new_tokens,
+        "stop": prompt_request.stop
+    }
+
+    print("Receive prompt: ", params["prompt"])
+    output = generate_output(model, tokenizer, params, DEVICE)
+    print("Output: ", output)
+    return {"response": output}
+
+
+@app.post("/embedding")
+def embeddings(prompt_request: EmbeddingRequest):
+    params = {"prompt": prompt_request.prompt}
+    print("Received prompt: ", params["prompt"])
+    output = get_embeddings(model, tokenizer, params["prompt"])
+    return {"response": [float(x) for x in output]}
\ No newline at end of file

From 3b5e4d80767671ce898efa9aaa531ef629d7e5b3 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 21:50:47 +0800
Subject: [PATCH 18/23] fix problem

---
 pilot/model/loader.py   | 4 ++--
 pilot/server/chatbot.py | 2 +-
 pilot/server/sqlgpt.py  | 3 +--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pilot/model/loader.py b/pilot/model/loader.py
index 979b2bb89..7b78ebe8c 100644
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -29,8 +29,8 @@ class ModerLoader:
         if debug:
             print(model)
 
-        if self.device == "cuda":
-            model.to(self.device)
+        # if self.device == "cuda":
+        #     model.to(self.device)
 
         return model, tokenizer
 
diff --git a/pilot/server/chatbot.py b/pilot/server/chatbot.py
index 5796a8c66..5e0ad9294 100644
--- a/pilot/server/chatbot.py
+++ b/pilot/server/chatbot.py
@@ -6,7 +6,7 @@ import json
 import time
 from urllib.parse import urljoin
 import gradio as gr
-from configs.model_config import *
+from pilot.configs.model_config import *
 vicuna_base_uri = "http://192.168.31.114:21002/"
 vicuna_stream_path = "worker_generate_stream"
 vicuna_status_path = "worker_get_status"
diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py
index 6dbf1bfc1..773de8611 100644
--- a/pilot/server/sqlgpt.py
+++ b/pilot/server/sqlgpt.py
@@ -5,7 +5,7 @@
 import json
 import torch
 import gradio as gr
-from fastchat.serve.inference import generate_stream, compress_module
+from fastchat.serve.inference import generate_stream 
 
 from transformers import AutoTokenizer, AutoModelForCausalLM
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -20,7 +20,6 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 
 def generate(prompt):    
-    compress_module(model, device) 
     model.to(device)
     print(model, tokenizer)
     params = {

From 7aee4186a2d99e72e0822be9c792bc8bea7cdc76 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 22:30:42 +0800
Subject: [PATCH 19/23] gpu out of memory

---
 README.md                     | 17 ++++++++++++++---
 pilot/model/inference.py      |  3 ++-
 pilot/server/vicuna_server.py |  2 +-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 303aa41d2..f3f135932 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,20 @@ A Open Database-GPT Experiment
 
 ![GitHub Repo stars](https://img.shields.io/github/stars/csunny/db-gpt?style=social)
 
+DB-GPT is an experimental open-source application that builds upon the fastchat model and uses vicuna as its base model. Additionally, it looks like this application incorporates langchain and llama-index embedding knowledge to improve Database-QA capabilities. 
 
-DB-GPT is an experimental open-source application, which based on the vicuna base model. 
+Overall, it appears to be a sophisticated and innovative tool for working with databases. If you have any specific questions about how to use or implement DB-GPT in your work, please let me know and I'll do my best to assist you.
 
+# Install
+1. Run model server
+```
+cd pilot/server
+uvicorn icuna_server:app --host 0.0.0.0
+```
 
-## Featurs
-Coming soon, please wait... 
\ No newline at end of file
+2. Run gradio webui
+
+# Featurs
+- SQL-Generate
+- Database-QA Based Knowledge 
+- SQL-diagnosis
\ No newline at end of file
diff --git a/pilot/model/inference.py b/pilot/model/inference.py
index 2f2948457..426043aa5 100644
--- a/pilot/model/inference.py
+++ b/pilot/model/inference.py
@@ -10,7 +10,8 @@ def generate_output(model, tokenizer, params, device, context_len=2048):
     max_new_tokens = int(params.get("max_new_tokens", 256))
     stop_parameter = params.get("stop", None)
 
-    if stop_parameter == tokenizer.eso_token:
+    print(tokenizer.__dir__())
+    if stop_parameter == tokenizer.eos_token:
         stop_parameter = None
     
     stop_strings = []
diff --git a/pilot/server/vicuna_server.py b/pilot/server/vicuna_server.py
index 996a3e5aa..20ed928d0 100644
--- a/pilot/server/vicuna_server.py
+++ b/pilot/server/vicuna_server.py
@@ -18,7 +18,7 @@ class PromptRequest(BaseModel):
     prompt: str
     temperature: float
     max_new_tokens: int
-    stop: Optional(List[str]) = None
+    stop: Optional[List[str]] = None
 
 
 class EmbeddingRequest(BaseModel):

From 8324f3df8bd0deee77a83750180cd66effdf769f Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 22:55:21 +0800
Subject: [PATCH 20/23] update

---
 pilot/app.py | 46 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/pilot/app.py b/pilot/app.py
index 5f2cf93db..f5fcb3ad1 100644
--- a/pilot/app.py
+++ b/pilot/app.py
@@ -1,18 +1,48 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
-
+import streamlit as st
 from langchain.agents import (
     load_tools,
     initialize_agent,
     AgentType
 )
-
 from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
-llm = VicunaRequestLLM()
+from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from llama_index import Document, GPTSimpleVectorIndex
 
-tools = load_tools(['python_repl'], llm=llm)
-agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
-agent.run(
-    "Write a python script that prints 'Hello World!'"
-)
\ No newline at end of file
+def agent_demo():
+    llm = VicunaRequestLLM()
+
+    tools = load_tools(['python_repl'], llm=llm)
+    agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
+    agent.run(
+        "Write a SQL script that Query 'select count(1)!'"
+    )
+
+def knowledged_qa_demo(text_list):
+    llm_predictor = LLMPredictor(llm=VicunaRequestLLM)
+    hfemb = VicunaEmbeddingLLM()
+    embed_model = LangchainEmbedding(hfemb)
+    documents = [Document(t) for t in text_list]
+
+    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
+    index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) 
+    return index
+
+
+if __name__ == "__main__":
+    # agent_demo()
+
+    test1 = """ 这是一段测试文字  """
+    text_list = [test1]
+    index = knowledged_qa_demo(text_list)
+
+    st.title("智能助手")
+    query = st.text_input("请提问.")
+    
+    if st.button("提交"):
+        response = index.query(query)
+        print(query, response.response)
+        st.write(response.response)
\ No newline at end of file

From 3080f50fe4a745fa0319798d96b2856756509c08 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 23:02:13 +0800
Subject: [PATCH 21/23] fix load model gpu oom

---
 pilot/model/loader.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pilot/model/loader.py b/pilot/model/loader.py
index 7b78ebe8c..5f18a023c 100644
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -7,6 +7,8 @@ from transformers import (
     AutoModelForCausalLM,
 )
 
+from fastchat.serve.compression import compress_module
+
 class ModerLoader:
 
     kwargs = {}
@@ -29,6 +31,9 @@ class ModerLoader:
         if debug:
             print(model)
 
+        if load_8bit:
+            compress_module(model, self.device) 
+
         # if self.device == "cuda":
         #     model.to(self.device)
 

From dd31aa98efbd1fea18e0ef63395bf71938dc9a50 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 23:07:05 +0800
Subject: [PATCH 22/23] update readme file

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f3f135932..71c30dc61 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Overall, it appears to be a sophisticated and innovative tool for working with d
 1. Run model server
 ```
 cd pilot/server
-uvicorn icuna_server:app --host 0.0.0.0
+uvicorn vicuna_server:app --host 0.0.0.0
 ```
 
 2. Run gradio webui

From 172e010843978452edcb041b9f4ce6f9fd03d586 Mon Sep 17 00:00:00 2001
From: csunny <cfqcsunny@gmail.com>
Date: Sat, 29 Apr 2023 23:28:11 +0800
Subject: [PATCH 23/23] add knownledge base QA

---
 README.md    |  3 +++
 pilot/app.py | 31 +++++++++++++++++++------------
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 71c30dc61..370b035c5 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,9 @@ uvicorn vicuna_server:app --host 0.0.0.0
 ```
 
 2. Run gradio webui
+```
+python app.py 
+```
 
 # Featurs
 - SQL-Generate
diff --git a/pilot/app.py b/pilot/app.py
index f5fcb3ad1..6a7a76f3d 100644
--- a/pilot/app.py
+++ b/pilot/app.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
-import streamlit as st
+import gradio as gr
 from langchain.agents import (
     load_tools,
     initialize_agent,
@@ -22,7 +22,7 @@ def agent_demo():
     )
 
 def knowledged_qa_demo(text_list):
-    llm_predictor = LLMPredictor(llm=VicunaRequestLLM)
+    llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
     hfemb = VicunaEmbeddingLLM()
     embed_model = LangchainEmbedding(hfemb)
     documents = [Document(t) for t in text_list]
@@ -32,17 +32,24 @@ def knowledged_qa_demo(text_list):
     return index
 
 
+def get_answer(q):
+    base_knowledge = """ 这是一段测试文字  """
+    text_list = [base_knowledge]
+    index = knowledged_qa_demo(text_list)
+    response = index.query(q)
+    return response.response
+
 if __name__ == "__main__":
     # agent_demo()
 
-    test1 = """ 这是一段测试文字  """
-    text_list = [test1]
-    index = knowledged_qa_demo(text_list)
+    with gr.Blocks() as demo:
+        gr.Markdown("数据库智能助手")
+        with gr.Tab("知识问答"):
+            text_input = gr.TextArea()
+            text_output = gr.TextArea()
+            text_button = gr.Button()
+        
+        text_button.click(get_answer, inputs=text_input, outputs=text_output)
 
-    st.title("智能助手")
-    query = st.text_input("请提问.")
-    
-    if st.button("提交"):
-        response = index.query(query)
-        print(query, response.response)
-        st.write(response.response)
\ No newline at end of file
+    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
+