mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-03 10:05:13 +00:00
282 lines
11 KiB
Python
282 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding:utf-8 -*-
|
|
|
|
import dataclasses
|
|
from enum import auto, Enum
|
|
from typing import List, Any
|
|
from pilot.configs.config import Config
|
|
|
|
CFG = Config()
|
|
|
|
DB_SETTINGS = {
|
|
"user": CFG.LOCAL_DB_USER,
|
|
"password": CFG.LOCAL_DB_PASSWORD,
|
|
"host": CFG.LOCAL_DB_HOST,
|
|
"port": CFG.LOCAL_DB_PORT
|
|
}
|
|
|
|
ROLE_USER = "USER"
|
|
ROLE_ASSISTANT = "Assistant"
|
|
|
|
class SeparatorStyle(Enum):
|
|
SINGLE = auto()
|
|
TWO = auto()
|
|
THREE = auto()
|
|
FOUR = auto()
|
|
|
|
@ dataclasses.dataclass
|
|
class Conversation:
|
|
"""This class keeps all conversation history. """
|
|
|
|
system: str
|
|
roles: List[str]
|
|
messages: List[List[str]]
|
|
offset: int
|
|
sep_style: SeparatorStyle = SeparatorStyle.SINGLE
|
|
sep: str = "###"
|
|
sep2: str = None
|
|
|
|
# Used for gradio server
|
|
skip_next: bool = False
|
|
conv_id: Any = None
|
|
|
|
def get_prompt(self):
|
|
if self.sep_style == SeparatorStyle.SINGLE:
|
|
ret = self.system + self.sep
|
|
for role, message in self.messages:
|
|
if message:
|
|
ret += role + ": " + message + self.sep
|
|
else:
|
|
ret += role + ":"
|
|
return ret
|
|
|
|
elif self.sep_style == SeparatorStyle.TWO:
|
|
seps = [self.sep, self.sep2]
|
|
ret = self.system + seps[0]
|
|
for i, (role, message) in enumerate(self.messages):
|
|
if message:
|
|
ret += role + ":" + message + seps[i % 2]
|
|
else:
|
|
ret += role + ":"
|
|
return ret
|
|
else:
|
|
raise ValueError(f"Invalid style: {self.sep_style}")
|
|
|
|
def append_message(self, role, message):
|
|
self.messages.append([role, message])
|
|
|
|
def to_gradio_chatbot(self):
|
|
ret = []
|
|
for i, (role, msg) in enumerate(self.messages[self.offset:]):
|
|
if i % 2 == 0:
|
|
ret.append([msg, None])
|
|
else:
|
|
ret[-1][-1] = msg
|
|
|
|
return ret
|
|
|
|
def copy(self):
|
|
return Conversation(
|
|
system=self.system,
|
|
roles=self.roles,
|
|
messages=[[x, y] for x, y in self.messages],
|
|
offset=self.offset,
|
|
sep_style=self.sep_style,
|
|
sep=self.sep,
|
|
sep2=self.sep2,
|
|
conv_id=self.conv_id,
|
|
)
|
|
|
|
def dict(self):
|
|
return {
|
|
"system": self.system,
|
|
"roles": self.roles,
|
|
"messages": self.messages,
|
|
"offset": self.offset,
|
|
"sep": self.sep,
|
|
"sep2": self.sep2,
|
|
"conv_id": self.conv_id
|
|
}
|
|
|
|
|
|
def gen_sqlgen_conversation(dbname):
|
|
from pilot.connections.mysql import MySQLOperator
|
|
mo = MySQLOperator(
|
|
**(DB_SETTINGS)
|
|
)
|
|
|
|
message = ""
|
|
|
|
schemas = mo.get_schema(dbname)
|
|
for s in schemas:
|
|
message += s["schema_info"] + ";"
|
|
return f"Database {dbname} Schema information as follows: {message}\n"
|
|
|
|
|
|
conv_one_shot = Conversation(
|
|
system="A chat between a curious user and an artificial intelligence assistant, who very familiar with database related knowledge. "
|
|
"The assistant gives helpful, detailed, professional and polite answers to the user's questions. ",
|
|
roles=("USER", "Assistant"),
|
|
messages=(
|
|
(
|
|
"USER",
|
|
"What are the key differences between mysql and postgres?",
|
|
),
|
|
(
|
|
"Assistant",
|
|
"MySQL and PostgreSQL are both popular open-source relational database management systems (RDBMS) "
|
|
"that have many similarities but also some differences. Here are some key differences: \n"
|
|
"1. Data Types: PostgreSQL has a more extensive set of data types, "
|
|
"including support for array, hstore, JSON, and XML, whereas MySQL has a more limited set.\n"
|
|
"2. ACID compliance: Both MySQL and PostgreSQL support ACID compliance (Atomicity, Consistency, Isolation, Durability), "
|
|
"but PostgreSQL is generally considered to be more strict in enforcing it.\n"
|
|
"3. Replication: MySQL has a built-in replication feature, which allows you to replicate data across multiple servers,"
|
|
"whereas PostgreSQL has a similar feature, but it is not as mature as MySQL's.\n"
|
|
"4. Performance: MySQL is generally considered to be faster and more efficient in handling large datasets, "
|
|
"whereas PostgreSQL is known for its robustness and reliability.\n"
|
|
"5. Licensing: MySQL is licensed under the GPL (General Public License), which means that it is free and open-source software, "
|
|
"whereas PostgreSQL is licensed under the PostgreSQL License, which is also free and open-source but with different terms.\n"
|
|
|
|
"Ultimately, the choice between MySQL and PostgreSQL depends on the specific needs and requirements of your application. "
|
|
"Both are excellent database management systems, and choosing the right one "
|
|
"for your project requires careful consideration of your application's requirements, performance needs, and scalability."
|
|
),
|
|
),
|
|
offset=2,
|
|
sep_style=SeparatorStyle.SINGLE,
|
|
sep="###"
|
|
)
|
|
|
|
conv_vicuna_v1 = Conversation(
|
|
system="A chat between a curious user and an artificial intelligence assistant. who very familiar with database related knowledge. "
|
|
"The assistant gives helpful, detailed, professional and polite answers to the user's questions. ",
|
|
roles=("USER", "ASSISTANT"),
|
|
messages=(),
|
|
offset=0,
|
|
sep_style=SeparatorStyle.TWO,
|
|
sep=" ",
|
|
sep2="</s>",
|
|
)
|
|
|
|
auto_dbgpt_one_shot = Conversation(
|
|
system="You are DB-GPT, an AI designed to answer questions about HackerNews by query `hackerbews` database in MySQL. "
|
|
"Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.",
|
|
roles=("USER", "ASSISTANT"),
|
|
messages=(
|
|
(
|
|
"USER",
|
|
""" Answer how many users does app_users have by query ob database
|
|
Constraints:
|
|
1. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
|
|
2. No user assistance
|
|
3. Exclusively use the commands listed in double quotes e.g. "command name"
|
|
|
|
|
|
Schema:
|
|
Database gpt-user Schema information as follows: users(city,create_time,email,last_login_time,phone,user_name);
|
|
|
|
|
|
Commands:
|
|
1. analyze_code: Analyze Code, args: "code": "<full_code_string>"
|
|
2. execute_python_file: Execute Python File, args: "filename": "<filename>"
|
|
3. append_to_file: Append to file, args: "filename": "<filename>", "text": "<text>"
|
|
4. delete_file: Delete file, args: "filename": "<filename>"
|
|
5. list_files: List Files in Directory, args: "directory": "<directory>"
|
|
6. read_file: Read file, args: "filename": "<filename>"
|
|
7. write_to_file: Write to file, args: "filename": "<filename>", "text": "<text>"
|
|
8. db_sql_executor: "Execute SQL in Database.", args: "sql": "<sql>"
|
|
|
|
You should only respond in JSON format as described below and ensure the response can be parsed by Python json.loads
|
|
Response Format:
|
|
{
|
|
"thoughts": {
|
|
"text": "thought",
|
|
"reasoning": "reasoning",
|
|
"plan": "- short bulleted\n- list that conveys\n- long-term plan",
|
|
"criticism": "constructive self-criticism",
|
|
"speak": "thoughts summary to say to user"
|
|
},
|
|
"command": {
|
|
"name": "command name",
|
|
"args": {
|
|
"arg name": "value"
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
),
|
|
(
|
|
"ASSISTANT",
|
|
"""
|
|
{
|
|
"thoughts": {
|
|
"text": "To answer how many users by query database we need to write SQL query to get the count of the distinct users from the database. We can use db_sql_executor command to execute the SQL query in database.",
|
|
"reasoning": "We can use the sql_executor command to execute the SQL query for getting count of distinct users from the users database. We can select the count of the distinct users from the users table.",
|
|
"plan": "- Write SQL query to get count of distinct users from users database\n- Use db_sql_executor to execute the SQL query in OB database\n- Parse the SQL result to get the count\n- Respond with the count as the answer",
|
|
"criticism": "None",
|
|
"speak": "To get the number of users in users, I will execute an SQL query in OB database using the db_sql_executor command and respond with the count."
|
|
},
|
|
"command": {
|
|
"name": "db_sql_executor",
|
|
"args": {
|
|
"sql": "SELECT COUNT(DISTINCT(user_name)) FROM users ;"
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
)
|
|
),
|
|
offset=0,
|
|
sep_style=SeparatorStyle.SINGLE,
|
|
sep="###",
|
|
)
|
|
|
|
auto_dbgpt_without_shot = Conversation(
|
|
system="You are DB-GPT, an AI designed to answer questions about users by query `users` database in MySQL. "
|
|
"Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.",
|
|
roles=("USER", "ASSISTANT"),
|
|
messages=(),
|
|
offset=0,
|
|
sep_style=SeparatorStyle.SINGLE,
|
|
sep=" ",
|
|
sep2="</s>",
|
|
)
|
|
|
|
conv_qa_prompt_template = """ 基于以下已知的信息, 专业、简要的回答用户的问题,
|
|
如果无法从提供的恶内容中获取答案, 请说: "知识库中提供的内容不足以回答此问题" 禁止胡乱编造。
|
|
已知内容:
|
|
{context}
|
|
问题:
|
|
{question}
|
|
"""
|
|
|
|
# conv_qa_prompt_template = """ Please provide the known information so that I can professionally and briefly answer the user's question. If the answer cannot be obtained from the provided content,
|
|
# please say: "The information provided in the knowledge base is insufficient to answer this question." Fabrication is prohibited.。
|
|
# known information:
|
|
# {context}
|
|
# question:
|
|
# {question}
|
|
# """
|
|
default_conversation = conv_one_shot
|
|
|
|
conversation_sql_mode ={
|
|
"auto_execute_ai_response": "直接执行结果",
|
|
"dont_execute_ai_response": "不直接执行结果"
|
|
}
|
|
|
|
conversation_types = {
|
|
"native": "LLM原生对话",
|
|
"default_knownledge": "默认知识库对话",
|
|
"custome": "新增知识库对话",
|
|
}
|
|
|
|
conv_templates = {
|
|
"conv_one_shot": conv_one_shot,
|
|
"vicuna_v1": conv_vicuna_v1,
|
|
"auto_dbgpt_one_shot": auto_dbgpt_one_shot
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
message = gen_sqlgen_conversation("dbgpt")
|
|
print(message)
|