mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-01 00:03:29 +00:00
llm proxy framework design, adding support for bard large language model proxy (#376)
1. llm proxy framework design, adding support for bard large language model proxy 2. add Alibaba Cloud Image Deployment Solution. Close #369
This commit is contained in:
commit
9236f4260e
@ -121,6 +121,8 @@ LANGUAGE=en
|
|||||||
PROXY_API_KEY={your-openai-sk}
|
PROXY_API_KEY={your-openai-sk}
|
||||||
PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
||||||
|
|
||||||
|
# from https://bard.google.com/ f12-> application-> __Secure-1PSID
|
||||||
|
BARD_PROXY_API_KEY={your-bard-token}
|
||||||
|
|
||||||
#*******************************************************************#
|
#*******************************************************************#
|
||||||
# ** SUMMARY_CONFIG
|
# ** SUMMARY_CONFIG
|
||||||
|
2
.github/workflows/python-publish.yml
vendored
2
.github/workflows/python-publish.yml
vendored
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
# This workflow uses actions that are not certified by GitHub.
|
# This workflow uses actions that are not certified by GitHub.
|
||||||
# They are provided by a third-party and are governed by
|
# They are provided by a third-party and are governed by
|
||||||
# separate terms of service, privacy policy, and support
|
# separate terms of service, data_privacy policy, and support
|
||||||
# documentation.
|
# documentation.
|
||||||
|
|
||||||
name: Upload Python Package
|
name: Upload Python Package
|
||||||
|
@ -145,6 +145,8 @@ DB-GPT基于 [FastChat](https://github.com/lm-sys/FastChat) 构建大模型运
|
|||||||
## Image
|
## Image
|
||||||
🌐 [AutoDL镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
|
🌐 [AutoDL镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
|
||||||
|
|
||||||
|
🌐 [阿里云镜像](https://www.zhihu.com/pin/1668226536363728896?utm_psn=1668228728445579265)
|
||||||
|
|
||||||
## 安装
|
## 安装
|
||||||
[快速开始](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/getting_started.html)
|
[快速开始](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/getting_started.html)
|
||||||
|
|
||||||
|
@ -126,3 +126,11 @@ MODEL_SERVER=127.0.0.1:8000
|
|||||||
PROXY_API_KEY=sk-xxx
|
PROXY_API_KEY=sk-xxx
|
||||||
PROXY_SERVER_URL={your-openai-proxy-server/v1/chat/completions}
|
PROXY_SERVER_URL={your-openai-proxy-server/v1/chat/completions}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 2. Bard Proxy
|
||||||
|
- If your environment deploying DB-GPT has access to https://bard.google.com/ (F12-> application-> __Secure-1PSID), then modify the .env configuration file as below will work.
|
||||||
|
```
|
||||||
|
LLM_MODEL=bard_proxyllm
|
||||||
|
MODEL_SERVER=127.0.0.1:8000
|
||||||
|
BARD_PROXY_API_KEY={your-bard-key}
|
||||||
|
```
|
@ -45,6 +45,7 @@ class Config(metaclass=Singleton):
|
|||||||
|
|
||||||
# This is a proxy server, just for test_py. we will remove this later.
|
# This is a proxy server, just for test_py. we will remove this later.
|
||||||
self.proxy_api_key = os.getenv("PROXY_API_KEY")
|
self.proxy_api_key = os.getenv("PROXY_API_KEY")
|
||||||
|
self.bard_proxy_api_key = os.getenv("BARD_PROXY_API_KEY")
|
||||||
self.proxy_server_url = os.getenv("PROXY_SERVER_URL")
|
self.proxy_server_url = os.getenv("PROXY_SERVER_URL")
|
||||||
|
|
||||||
self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
|
self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
|
||||||
|
@ -46,7 +46,13 @@ LLM_MODEL_CONFIG = {
|
|||||||
"falcon-40b": os.path.join(MODEL_PATH, "falcon-40b"),
|
"falcon-40b": os.path.join(MODEL_PATH, "falcon-40b"),
|
||||||
"gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
|
"gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
|
||||||
"gptj-6b": os.path.join(MODEL_PATH, "ggml-gpt4all-j-v1.3-groovy.bin"),
|
"gptj-6b": os.path.join(MODEL_PATH, "ggml-gpt4all-j-v1.3-groovy.bin"),
|
||||||
"proxyllm": "proxyllm",
|
"proxyllm": "chatgpt_proxyllm",
|
||||||
|
"chatgpt_proxyllm": "chatgpt_proxyllm",
|
||||||
|
"bard_proxyllm": "bard_proxyllm",
|
||||||
|
"claude_proxyllm": "claude_proxyllm",
|
||||||
|
"wenxin_proxyllm": "wenxin_proxyllm",
|
||||||
|
"tongyi_proxyllm": "tongyi_proxyllm",
|
||||||
|
"gpt4_proxyllm": "gpt4_proxyllm",
|
||||||
"llama-2-7b": os.path.join(MODEL_PATH, "Llama-2-7b-chat-hf"),
|
"llama-2-7b": os.path.join(MODEL_PATH, "Llama-2-7b-chat-hf"),
|
||||||
"llama-2-13b": os.path.join(MODEL_PATH, "Llama-2-13b-chat-hf"),
|
"llama-2-13b": os.path.join(MODEL_PATH, "Llama-2-13b-chat-hf"),
|
||||||
"llama-2-70b": os.path.join(MODEL_PATH, "Llama-2-70b-chat-hf"),
|
"llama-2-70b": os.path.join(MODEL_PATH, "Llama-2-70b-chat-hf"),
|
||||||
|
@ -1,74 +1,33 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
import time
|
||||||
|
|
||||||
import json
|
|
||||||
import requests
|
|
||||||
from typing import List
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
|
|
||||||
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
|
from pilot.model.proxy.llms.chatgpt import chatgpt_generate_stream
|
||||||
|
from pilot.model.proxy.llms.bard import bard_generate_stream
|
||||||
|
from pilot.model.proxy.llms.claude import claude_generate_stream
|
||||||
|
from pilot.model.proxy.llms.wenxin import wenxin_generate_stream
|
||||||
|
from pilot.model.proxy.llms.tongyi import tongyi_generate_stream
|
||||||
|
from pilot.model.proxy.llms.gpt4 import gpt4_generate_stream
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
||||||
|
|
||||||
def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048):
|
def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048):
|
||||||
history = []
|
generator_mapping = {
|
||||||
|
"proxyllm": chatgpt_generate_stream,
|
||||||
prompt = params["prompt"]
|
"chatgpt_proxyllm": chatgpt_generate_stream,
|
||||||
stop = params.get("stop", "###")
|
"bard_proxyllm": bard_generate_stream,
|
||||||
|
"claude_proxyllm": claude_generate_stream,
|
||||||
headers = {
|
"gpt4_proxyllm": gpt4_generate_stream,
|
||||||
"Authorization": "Bearer " + CFG.proxy_api_key,
|
"wenxin_proxyllm": wenxin_generate_stream,
|
||||||
"Token": CFG.proxy_api_key,
|
"tongyi_proxyllm": tongyi_generate_stream,
|
||||||
}
|
}
|
||||||
|
|
||||||
messages: List[ModelMessage] = params["messages"]
|
default_error_message = f"{CFG.LLM_MODEL} LLM is not supported"
|
||||||
# Add history conversation
|
generator_function = generator_mapping.get(
|
||||||
for message in messages:
|
CFG.LLM_MODEL, lambda: default_error_message
|
||||||
if message.role == ModelMessageRoleType.HUMAN:
|
|
||||||
history.append({"role": "user", "content": message.content})
|
|
||||||
elif message.role == ModelMessageRoleType.SYSTEM:
|
|
||||||
history.append({"role": "system", "content": message.content})
|
|
||||||
elif message.role == ModelMessageRoleType.AI:
|
|
||||||
history.append({"role": "assistant", "content": message.content})
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Move the last user's information to the end
|
|
||||||
temp_his = history[::-1]
|
|
||||||
last_user_input = None
|
|
||||||
for m in temp_his:
|
|
||||||
if m["role"] == "user":
|
|
||||||
last_user_input = m
|
|
||||||
break
|
|
||||||
if last_user_input:
|
|
||||||
history.remove(last_user_input)
|
|
||||||
history.append(last_user_input)
|
|
||||||
|
|
||||||
payloads = {
|
|
||||||
"model": "gpt-3.5-turbo", # just for test, remove this later
|
|
||||||
"messages": history,
|
|
||||||
"temperature": params.get("temperature"),
|
|
||||||
"max_tokens": params.get("max_new_tokens"),
|
|
||||||
"stream": True,
|
|
||||||
}
|
|
||||||
|
|
||||||
res = requests.post(
|
|
||||||
CFG.proxy_server_url, headers=headers, json=payloads, stream=True
|
|
||||||
)
|
)
|
||||||
|
|
||||||
text = ""
|
yield from generator_function(model, tokenizer, params, device, context_len)
|
||||||
for line in res.iter_lines():
|
|
||||||
if line:
|
|
||||||
if not line.startswith(b"data: "):
|
|
||||||
error_message = line.decode("utf-8")
|
|
||||||
yield error_message
|
|
||||||
else:
|
|
||||||
json_data = line.split(b": ", 1)[1]
|
|
||||||
decoded_line = json_data.decode("utf-8")
|
|
||||||
if decoded_line.lower() != "[DONE]".lower():
|
|
||||||
obj = json.loads(json_data)
|
|
||||||
if obj["choices"][0]["delta"].get("content") is not None:
|
|
||||||
content = obj["choices"][0]["delta"]["content"]
|
|
||||||
text += content
|
|
||||||
yield text
|
|
||||||
|
3
pilot/model/proxy/data_privacy/mask/__init__.py
Normal file
3
pilot/model/proxy/data_privacy/mask/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
"""
|
||||||
|
data masking, transform private sensitive data into mask data, based on the tool sensitive data recognition.
|
||||||
|
"""
|
3
pilot/model/proxy/data_privacy/mask/masking.py
Normal file
3
pilot/model/proxy/data_privacy/mask/masking.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
"""
|
||||||
|
mask the sensitive data before upload LLM inference service
|
||||||
|
"""
|
3
pilot/model/proxy/data_privacy/mask/recovery.py
Normal file
3
pilot/model/proxy/data_privacy/mask/recovery.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
"""
|
||||||
|
recovery the data after LLM inference
|
||||||
|
"""
|
3
pilot/model/proxy/data_privacy/sensitive_detection.py
Normal file
3
pilot/model/proxy/data_privacy/sensitive_detection.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
"""
|
||||||
|
a tool to discovery sensitive data
|
||||||
|
"""
|
5
pilot/model/proxy/llms/__init__.py
Normal file
5
pilot/model/proxy/llms/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
"""
|
||||||
|
There are several limitations to privatizing large models: high deployment costs and poor performance.
|
||||||
|
In scenarios where data data_privacy requirements are relatively low, connecting with commercial large models can enable
|
||||||
|
rapid and efficient product implementation with high quality.
|
||||||
|
"""
|
42
pilot/model/proxy/llms/bard.py
Normal file
42
pilot/model/proxy/llms/bard.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import bardapi
|
||||||
|
from typing import List
|
||||||
|
from pilot.configs.config import Config
|
||||||
|
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
|
||||||
|
|
||||||
|
CFG = Config()
|
||||||
|
|
||||||
|
|
||||||
|
def bard_generate_stream(model, tokenizer, params, device, context_len=2048):
|
||||||
|
token = CFG.bard_proxy_api_key
|
||||||
|
|
||||||
|
history = []
|
||||||
|
messages: List[ModelMessage] = params["messages"]
|
||||||
|
for message in messages:
|
||||||
|
if message.role == ModelMessageRoleType.HUMAN:
|
||||||
|
history.append({"role": "user", "content": message.content})
|
||||||
|
elif message.role == ModelMessageRoleType.SYSTEM:
|
||||||
|
history.append({"role": "system", "content": message.content})
|
||||||
|
elif message.role == ModelMessageRoleType.AI:
|
||||||
|
history.append({"role": "assistant", "content": message.content})
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
temp_his = history[::-1]
|
||||||
|
last_user_input = None
|
||||||
|
for m in temp_his:
|
||||||
|
if m["role"] == "user":
|
||||||
|
last_user_input = m
|
||||||
|
break
|
||||||
|
if last_user_input:
|
||||||
|
history.remove(last_user_input)
|
||||||
|
history.append(last_user_input)
|
||||||
|
|
||||||
|
msgs = []
|
||||||
|
for msg in history:
|
||||||
|
if msg.get("content"):
|
||||||
|
msgs.append(msg["content"])
|
||||||
|
response = bardapi.core.Bard(token).get_answer("\n".join(msgs))
|
||||||
|
if response is not None and response.get("content") is not None:
|
||||||
|
yield str(response["content"])
|
||||||
|
else:
|
||||||
|
yield f"bard response error: {str(response)}"
|
70
pilot/model/proxy/llms/chatgpt.py
Normal file
70
pilot/model/proxy/llms/chatgpt.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from typing import List
|
||||||
|
from pilot.configs.config import Config
|
||||||
|
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
|
||||||
|
|
||||||
|
CFG = Config()
|
||||||
|
|
||||||
|
|
||||||
|
def chatgpt_generate_stream(model, tokenizer, params, device, context_len=2048):
|
||||||
|
history = []
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": "Bearer " + CFG.proxy_api_key,
|
||||||
|
"Token": CFG.proxy_api_key,
|
||||||
|
}
|
||||||
|
|
||||||
|
messages: List[ModelMessage] = params["messages"]
|
||||||
|
# Add history conversation
|
||||||
|
for message in messages:
|
||||||
|
if message.role == ModelMessageRoleType.HUMAN:
|
||||||
|
history.append({"role": "user", "content": message.content})
|
||||||
|
elif message.role == ModelMessageRoleType.SYSTEM:
|
||||||
|
history.append({"role": "system", "content": message.content})
|
||||||
|
elif message.role == ModelMessageRoleType.AI:
|
||||||
|
history.append({"role": "assistant", "content": message.content})
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Move the last user's information to the end
|
||||||
|
temp_his = history[::-1]
|
||||||
|
last_user_input = None
|
||||||
|
for m in temp_his:
|
||||||
|
if m["role"] == "user":
|
||||||
|
last_user_input = m
|
||||||
|
break
|
||||||
|
if last_user_input:
|
||||||
|
history.remove(last_user_input)
|
||||||
|
history.append(last_user_input)
|
||||||
|
|
||||||
|
payloads = {
|
||||||
|
"model": "gpt-3.5-turbo", # just for test, remove this later
|
||||||
|
"messages": history,
|
||||||
|
"temperature": params.get("temperature"),
|
||||||
|
"max_tokens": params.get("max_new_tokens"),
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
res = requests.post(
|
||||||
|
CFG.proxy_server_url, headers=headers, json=payloads, stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
text = ""
|
||||||
|
for line in res.iter_lines():
|
||||||
|
if line:
|
||||||
|
if not line.startswith(b"data: "):
|
||||||
|
error_message = line.decode("utf-8")
|
||||||
|
yield error_message
|
||||||
|
else:
|
||||||
|
json_data = line.split(b": ", 1)[1]
|
||||||
|
decoded_line = json_data.decode("utf-8")
|
||||||
|
if decoded_line.lower() != "[DONE]".lower():
|
||||||
|
obj = json.loads(json_data)
|
||||||
|
if obj["choices"][0]["delta"].get("content") is not None:
|
||||||
|
content = obj["choices"][0]["delta"]["content"]
|
||||||
|
text += content
|
||||||
|
yield text
|
7
pilot/model/proxy/llms/claude.py
Normal file
7
pilot/model/proxy/llms/claude.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from pilot.configs.config import Config
|
||||||
|
|
||||||
|
CFG = Config()
|
||||||
|
|
||||||
|
|
||||||
|
def claude_generate_stream(model, tokenizer, params, device, context_len=2048):
|
||||||
|
yield "claude LLM was not supported!"
|
7
pilot/model/proxy/llms/gpt4.py
Normal file
7
pilot/model/proxy/llms/gpt4.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from pilot.configs.config import Config
|
||||||
|
|
||||||
|
CFG = Config()
|
||||||
|
|
||||||
|
|
||||||
|
def gpt4_generate_stream(model, tokenizer, params, device, context_len=2048):
|
||||||
|
yield "gpt4 LLM was not supported!"
|
7
pilot/model/proxy/llms/tongyi.py
Normal file
7
pilot/model/proxy/llms/tongyi.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from pilot.configs.config import Config
|
||||||
|
|
||||||
|
CFG = Config()
|
||||||
|
|
||||||
|
|
||||||
|
def tongyi_generate_stream(model, tokenizer, params, device, context_len=2048):
|
||||||
|
yield "tongyi LLM was not supported!"
|
7
pilot/model/proxy/llms/wenxin.py
Normal file
7
pilot/model/proxy/llms/wenxin.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from pilot.configs.config import Config
|
||||||
|
|
||||||
|
CFG = Config()
|
||||||
|
|
||||||
|
|
||||||
|
def wenxin_generate_stream(model, tokenizer, params, device, context_len=2048):
|
||||||
|
yield "wenxin LLM is not supported!"
|
@ -38,7 +38,7 @@ class ChatDashboard(BaseChat):
|
|||||||
current_user_input=user_input,
|
current_user_input=user_input,
|
||||||
)
|
)
|
||||||
if not db_name:
|
if not db_name:
|
||||||
raise ValueError(f"{ChatScene.ChatDashboard.value} mode should chose db!")
|
raise ValueError(f"{ChatScene.ChatDashboard.value} mode should choose db!")
|
||||||
self.db_name = db_name
|
self.db_name = db_name
|
||||||
self.report_name = report_name
|
self.report_name = report_name
|
||||||
|
|
||||||
|
@ -68,6 +68,7 @@ playsound
|
|||||||
distro
|
distro
|
||||||
pypdf
|
pypdf
|
||||||
weaviate-client
|
weaviate-client
|
||||||
|
bardapi==0.1.29
|
||||||
|
|
||||||
# database
|
# database
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user