From ba668a9b40da4dd923a4b3b1f07eb5203925cfd2 Mon Sep 17 00:00:00 2001 From: isadba Date: Wed, 31 May 2023 21:32:51 +0800 Subject: [PATCH 1/8] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docker安装的MySQL密码与代码中的初始化密码不一致。 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 18dd30f80..2427b54d9 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ As our project has the ability to achieve ChatGPT performance of over 85%, there This project relies on a local MySQL database service, which you need to install locally. We recommend using Docker for installation. ```bash -$ docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest +$ docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa123456 -dit mysql:latest ``` We use [Chroma embedding database](https://github.com/chroma-core/chroma) as the default for our vector database, so there is no need for special installation. If you choose to connect to other databases, you can follow our tutorial for installation and configuration. For the entire installation process of DB-GPT, we use the miniconda3 virtual environment. Create a virtual environment and install the Python dependencies. From bd3cf25fbb708db2228f7ad5df0aaaeab291e5c5 Mon Sep 17 00:00:00 2001 From: fenghao Date: Wed, 31 May 2023 21:42:01 +0800 Subject: [PATCH 2/8] =?UTF-8?q?Docker=E5=AE=89=E8=A3=85=E7=9A=84MySQL?= =?UTF-8?q?=E5=AF=86=E7=A0=81=E4=B8=8E=E4=BB=A3=E7=A0=81=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=AF=86=E7=A0=81=E4=B8=8D=E4=B8=80?= =?UTF-8?q?=E8=87=B4=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.zh.md b/README.zh.md index b84671da2..0b9456e99 100644 --- a/README.zh.md +++ b/README.zh.md @@ -147,7 +147,7 @@ TODO: 在终端展示上,我们将提供多端产品界面。包括PC、手机 本项目依赖一个本地的 MySQL 数据库服务,你需要本地安装,推荐直接使用 Docker 安装。 ``` -docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest +docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa123456 -dit mysql:latest ``` 向量数据库我们默认使用的是Chroma内存数据库,所以无需特殊安装,如果有需要连接其他的同学,可以按照我们的教程进行安装配置。整个DB-GPT的安装过程,我们使用的是miniconda3的虚拟环境。创建虚拟环境,并安装python依赖包 From 2dd12d0c3b62fdcfa2b0b4831ade37b1f43612d1 Mon Sep 17 00:00:00 2001 From: fenghao Date: Fri, 2 Jun 2023 21:12:16 +0800 Subject: [PATCH 3/8] fix .env.template --- .env.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.template b/.env.template index 3fe762e73..39f9eeaf7 100644 --- a/.env.template +++ b/.env.template @@ -41,7 +41,7 @@ MAX_POSITION_EMBEDDINGS=4096 #** DATABASE SETTINGS **# #*******************************************************************# LOCAL_DB_USER=root -LOCAL_DB_PASSWORD=aa12345678 +LOCAL_DB_PASSWORD=aa123456 LOCAL_DB_HOST=127.0.0.1 LOCAL_DB_PORT=3306 From 4170074f324f38f1598c04336420b6711ed821c2 Mon Sep 17 00:00:00 2001 From: csunny Date: Sun, 4 Jun 2023 19:13:23 +0800 Subject: [PATCH 4/8] stream output for guanaco --- pilot/model/llm_out/guanaco_llm.py | 51 +++++++++++++++++++++++++++++- pilot/server/chat_adapter.py | 16 +++++----- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/pilot/model/llm_out/guanaco_llm.py b/pilot/model/llm_out/guanaco_llm.py index 37c4c423b..5b24e69ec 100644 --- a/pilot/model/llm_out/guanaco_llm.py +++ b/pilot/model/llm_out/guanaco_llm.py @@ -1,5 +1,4 @@ import torch -import copy from threading import Thread from transformers import TextIteratorStreamer, StoppingCriteriaList, StoppingCriteria from pilot.conversation import ROLE_ASSISTANT, ROLE_USER @@ -57,3 +56,53 @@ def guanaco_generate_output(model, tokenizer, params, device, context_len=2048): out = decoded_output.split("### Response:")[-1].strip() yield out + + +def guanaco_generate_stream(model, tokenizer, params, device, context_len=2048): + """Fork from: https://github.com/KohakuBlueleaf/guanaco-lora/blob/main/generate.py""" + tokenizer.bos_token_id = 1 + print(params) + stop = params.get("stop", "###") + prompt = params["prompt"] + query = prompt + print("Query Message: ", query) + + input_ids = tokenizer(query, return_tensors="pt").input_ids + input_ids = input_ids.to(model.device) + + streamer = TextIteratorStreamer( + tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True + ) + + tokenizer.bos_token_id = 1 + stop_token_ids = [0] + + class StopOnTokens(StoppingCriteria): + def __call__( + self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs + ) -> bool: + for stop_id in stop_token_ids: + if input_ids[0][-1] == stop_id: + return True + return False + + stop = StopOnTokens() + + generate_kwargs = dict( + input_ids=input_ids, + max_new_tokens=512, + temperature=1.0, + do_sample=True, + top_k=1, + streamer=streamer, + repetition_penalty=1.7, + stopping_criteria=StoppingCriteriaList([stop]), + ) + + + generator = model.generate(**generate_kwargs) + out = "" + for new_text in streamer: + out += new_text + yield new_text + return out \ No newline at end of file diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py index 86901dea3..63d922672 100644 --- a/pilot/server/chat_adapter.py +++ b/pilot/server/chat_adapter.py @@ -4,7 +4,7 @@ from functools import cache from typing import List -from pilot.model.llm_out.vicuna_base_llm import generate_stream +from pilot.model.inference import generate_stream class BaseChatAdpter: @@ -55,7 +55,7 @@ class ChatGLMChatAdapter(BaseChatAdpter): return "chatglm" in model_path def get_generate_stream_func(self): - from pilot.model.llm_out.chatglm_llm import chatglm_generate_stream + from pilot.model.chatglm_llm import chatglm_generate_stream return chatglm_generate_stream @@ -85,15 +85,15 @@ class CodeGenChatAdapter(BaseChatAdpter): class GuanacoChatAdapter(BaseChatAdpter): - """Model chat adapter for Guanaco""" - + """Model chat adapter for Guanaco """ + def match(self, model_path: str): return "guanaco" in model_path def get_generate_stream_func(self): - from pilot.model.llm_out.guanaco_llm import guanaco_generate_output - - return guanaco_generate_output + from pilot.model.guanaco_llm import guanaco_generate_stream + + return guanaco_generate_stream class ProxyllmChatAdapter(BaseChatAdpter): @@ -101,7 +101,7 @@ class ProxyllmChatAdapter(BaseChatAdpter): return "proxyllm" in model_path def get_generate_stream_func(self): - from pilot.model.llm_out.proxy_llm import proxyllm_generate_stream + from pilot.model.proxy_llm import proxyllm_generate_stream return proxyllm_generate_stream From f7fe66b5e560aec07513d07273931cbd36d34550 Mon Sep 17 00:00:00 2001 From: csunny Date: Sun, 4 Jun 2023 20:15:27 +0800 Subject: [PATCH 5/8] fix: guanaco output --- pilot/out_parser/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pilot/out_parser/base.py b/pilot/out_parser/base.py index 0538aa54c..bb2d0b2b2 100644 --- a/pilot/out_parser/base.py +++ b/pilot/out_parser/base.py @@ -57,7 +57,7 @@ class BaseOutputParser(ABC): output = data["text"][skip_echo_len:].strip() elif "guanaco" in CFG.LLM_MODEL: # output = data["text"][skip_echo_len + 14:].replace("", "").strip() - output = data["text"][skip_echo_len:].replace("", "").strip() + output = data["text"][skip_echo_len + 2:].replace("", "").strip() else: output = data["text"].strip() From fe8291b198e0d91f1d57e81d1b44e1221a14c501 Mon Sep 17 00:00:00 2001 From: csunny Date: Sun, 4 Jun 2023 20:38:34 +0800 Subject: [PATCH 6/8] feature: guanaco stream output --- pilot/model/loader.py | 2 ++ pilot/server/chat_adapter.py | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 9fe6207c1..6fd6143ff 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -118,6 +118,8 @@ class ModelLoader(metaclass=Singleton): model.to(self.device) except ValueError: pass + except AttributeError: + pass if debug: print(model) diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py index 63d922672..8db61d09f 100644 --- a/pilot/server/chat_adapter.py +++ b/pilot/server/chat_adapter.py @@ -4,7 +4,7 @@ from functools import cache from typing import List -from pilot.model.inference import generate_stream +from pilot.model.llm_out.vicuna_base_llm import generate_stream class BaseChatAdpter: @@ -55,7 +55,7 @@ class ChatGLMChatAdapter(BaseChatAdpter): return "chatglm" in model_path def get_generate_stream_func(self): - from pilot.model.chatglm_llm import chatglm_generate_stream + from pilot.model.llm_out.chatglm_llm import chatglm_generate_stream return chatglm_generate_stream @@ -85,14 +85,14 @@ class CodeGenChatAdapter(BaseChatAdpter): class GuanacoChatAdapter(BaseChatAdpter): - """Model chat adapter for Guanaco """ - + """Model chat adapter for Guanaco""" + def match(self, model_path: str): return "guanaco" in model_path def get_generate_stream_func(self): - from pilot.model.guanaco_llm import guanaco_generate_stream - + from pilot.model.llm_out.guanaco_llm import guanaco_generate_stream + return guanaco_generate_stream @@ -101,7 +101,7 @@ class ProxyllmChatAdapter(BaseChatAdpter): return "proxyllm" in model_path def get_generate_stream_func(self): - from pilot.model.proxy_llm import proxyllm_generate_stream + from pilot.model.llm_out.proxy_llm import proxyllm_generate_stream return proxyllm_generate_stream From ff6cc05e1146b723fd37e6fee85fecb09ca59333 Mon Sep 17 00:00:00 2001 From: csunny Date: Sun, 4 Jun 2023 21:20:09 +0800 Subject: [PATCH 7/8] guanaco: add stream output func (#154) --- pilot/model/llm_out/guanaco_llm.py | 15 +++++++++------ pilot/model/llm_out/proxy_llm.py | 4 ---- pilot/out_parser/base.py | 8 ++++++-- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pilot/model/llm_out/guanaco_llm.py b/pilot/model/llm_out/guanaco_llm.py index 5b24e69ec..9b8008702 100644 --- a/pilot/model/llm_out/guanaco_llm.py +++ b/pilot/model/llm_out/guanaco_llm.py @@ -64,6 +64,9 @@ def guanaco_generate_stream(model, tokenizer, params, device, context_len=2048): print(params) stop = params.get("stop", "###") prompt = params["prompt"] + max_new_tokens = params.get("max_new_tokens", 512) + temerature = params.get("temperature", 1.0) + query = prompt print("Query Message: ", query) @@ -82,7 +85,7 @@ def guanaco_generate_stream(model, tokenizer, params, device, context_len=2048): self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs ) -> bool: for stop_id in stop_token_ids: - if input_ids[0][-1] == stop_id: + if input_ids[-1][-1] == stop_id: return True return False @@ -90,8 +93,8 @@ def guanaco_generate_stream(model, tokenizer, params, device, context_len=2048): generate_kwargs = dict( input_ids=input_ids, - max_new_tokens=512, - temperature=1.0, + max_new_tokens=max_new_tokens, + temperature=temerature, do_sample=True, top_k=1, streamer=streamer, @@ -100,9 +103,9 @@ def guanaco_generate_stream(model, tokenizer, params, device, context_len=2048): ) - generator = model.generate(**generate_kwargs) + model.generate(**generate_kwargs) + out = "" for new_text in streamer: out += new_text - yield new_text - return out \ No newline at end of file + yield out \ No newline at end of file diff --git a/pilot/model/llm_out/proxy_llm.py b/pilot/model/llm_out/proxy_llm.py index 92887cfc6..68512ec3c 100644 --- a/pilot/model/llm_out/proxy_llm.py +++ b/pilot/model/llm_out/proxy_llm.py @@ -68,15 +68,11 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) "max_tokens": params.get("max_new_tokens"), } - print(payloads) - print(headers) res = requests.post( CFG.proxy_server_url, headers=headers, json=payloads, stream=True ) text = "" - print("====================================res================") - print(res) for line in res.iter_lines(): if line: decoded_line = line.decode("utf-8") diff --git a/pilot/out_parser/base.py b/pilot/out_parser/base.py index bb2d0b2b2..d1dee2e37 100644 --- a/pilot/out_parser/base.py +++ b/pilot/out_parser/base.py @@ -56,8 +56,12 @@ class BaseOutputParser(ABC): # output = data["text"][skip_echo_len + 11:].strip() output = data["text"][skip_echo_len:].strip() elif "guanaco" in CFG.LLM_MODEL: - # output = data["text"][skip_echo_len + 14:].replace("", "").strip() - output = data["text"][skip_echo_len + 2:].replace("", "").strip() + + # NO stream output + # output = data["text"][skip_echo_len + 2:].replace("", "").strip() + + # stream out output + output = data["text"][11:].replace("", "").strip() else: output = data["text"].strip() From e8a193ef467bf31752c16840b3d662bb8edfe618 Mon Sep 17 00:00:00 2001 From: csunny Date: Sun, 4 Jun 2023 21:47:21 +0800 Subject: [PATCH 8/8] feature: stream output for guanaco (#154) --- pilot/model/llm_out/guanaco_llm.py | 5 ++--- pilot/out_parser/base.py | 1 - pilot/server/chat_adapter.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pilot/model/llm_out/guanaco_llm.py b/pilot/model/llm_out/guanaco_llm.py index 9b8008702..1a2d1ae8b 100644 --- a/pilot/model/llm_out/guanaco_llm.py +++ b/pilot/model/llm_out/guanaco_llm.py @@ -76,7 +76,7 @@ def guanaco_generate_stream(model, tokenizer, params, device, context_len=2048): streamer = TextIteratorStreamer( tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True ) - + tokenizer.bos_token_id = 1 stop_token_ids = [0] @@ -102,10 +102,9 @@ def guanaco_generate_stream(model, tokenizer, params, device, context_len=2048): stopping_criteria=StoppingCriteriaList([stop]), ) - model.generate(**generate_kwargs) out = "" for new_text in streamer: out += new_text - yield out \ No newline at end of file + yield out diff --git a/pilot/out_parser/base.py b/pilot/out_parser/base.py index d1dee2e37..909023f07 100644 --- a/pilot/out_parser/base.py +++ b/pilot/out_parser/base.py @@ -56,7 +56,6 @@ class BaseOutputParser(ABC): # output = data["text"][skip_echo_len + 11:].strip() output = data["text"][skip_echo_len:].strip() elif "guanaco" in CFG.LLM_MODEL: - # NO stream output # output = data["text"][skip_echo_len + 2:].replace("", "").strip() diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py index 8db61d09f..4dec22655 100644 --- a/pilot/server/chat_adapter.py +++ b/pilot/server/chat_adapter.py @@ -91,7 +91,7 @@ class GuanacoChatAdapter(BaseChatAdpter): return "guanaco" in model_path def get_generate_stream_func(self): - from pilot.model.llm_out.guanaco_llm import guanaco_generate_stream + from pilot.model.llm_out.guanaco_llm import guanaco_generate_stream return guanaco_generate_stream