feat: Knowledge QA support SQLite

This commit is contained in:
FangYin Cheng 2023-08-11 18:23:57 +08:00
parent 0859f36a89
commit 8cea0b9a9f
25 changed files with 318 additions and 55 deletions

View File

@ -54,17 +54,16 @@ KNOWLEDGE_SEARCH_TOP_SIZE=5
#*******************************************************************# #*******************************************************************#
#** DATABASE SETTINGS **# #** DATABASE SETTINGS **#
#*******************************************************************# #*******************************************************************#
### MYSQL database(Current default database) ### SQLite database (Current default database)
LOCAL_DB_TYPE=mysql LOCAL_DB_PATH=data/default_sqlite.db
LOCAL_DB_USER=root LOCAL_DB_TYPE=sqlite
LOCAL_DB_PASSWORD=aa12345678
LOCAL_DB_HOST=127.0.0.1
LOCAL_DB_PORT=3306
### SQLite database (TODO: SQLite database will become the default database configuration when it is stable.)
# LOCAL_DB_PATH=data/default_sqlite.db
# LOCAL_DB_TYPE=sqlite
### MYSQL database
# LOCAL_DB_TYPE=mysql
# LOCAL_DB_USER=root
# LOCAL_DB_PASSWORD=aa12345678
# LOCAL_DB_HOST=127.0.0.1
# LOCAL_DB_PORT=3306
### MILVUS ### MILVUS
## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530) ## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530)

View File

@ -1,4 +1,4 @@
ARG BASE_IMAGE="db-gpt:latest" ARG BASE_IMAGE="eosphorosai/dbgpt:latest"
FROM ${BASE_IMAGE} FROM ${BASE_IMAGE}
@ -25,6 +25,6 @@ ENV LOCAL_DB_PASSWORD="$MYSQL_ROOT_PASSWORD"
RUN cp /app/assets/schema/knowledge_management.sql /docker-entrypoint-initdb.d/ RUN cp /app/assets/schema/knowledge_management.sql /docker-entrypoint-initdb.d/
COPY docker/allinone/allinone-entrypoint.sh /usr/local/bin/allinone-entrypoint.sh COPY docker/allinone/allinone-entrypoint.sh /usr/local/bin/allinone-entrypoint.sh
COPY docker/examples/sqls/ /docker-entrypoint-initdb.d/ COPY docker/examples/sqls/*_mysql.sql /docker-entrypoint-initdb.d/
ENTRYPOINT ["/usr/local/bin/allinone-entrypoint.sh"] ENTRYPOINT ["/usr/local/bin/allinone-entrypoint.sh"]

View File

@ -4,6 +4,6 @@ SCRIPT_LOCATION=$0
cd "$(dirname "$SCRIPT_LOCATION")" cd "$(dirname "$SCRIPT_LOCATION")"
WORK_DIR=$(pwd) WORK_DIR=$(pwd)
IMAGE_NAME="db-gpt-allinone" IMAGE_NAME="eosphorosai/dbgpt-allinone"
docker build -f Dockerfile -t $IMAGE_NAME $WORK_DIR/../../ docker build -f Dockerfile -t $IMAGE_NAME $WORK_DIR/../../

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
docker run --gpus "device=0" -d -p 3306:3306 \ docker run --gpus all -d -p 3306:3306 \
-p 5000:5000 \ -p 5000:5000 \
-e LOCAL_DB_HOST=127.0.0.1 \ -e LOCAL_DB_HOST=127.0.0.1 \
-e LOCAL_DB_PASSWORD=aa123456 \ -e LOCAL_DB_PASSWORD=aa123456 \
@ -9,5 +9,5 @@ docker run --gpus "device=0" -d -p 3306:3306 \
-e LANGUAGE=zh \ -e LANGUAGE=zh \
-v /data:/data \ -v /data:/data \
-v /data/models:/app/models \ -v /data/models:/app/models \
--name db-gpt-allinone \ --name dbgpt-allinone \
db-gpt-allinone eosphorosai/dbgpt-allinone

View File

@ -4,7 +4,7 @@
PROXY_API_KEY="$PROXY_API_KEY" PROXY_API_KEY="$PROXY_API_KEY"
PROXY_SERVER_URL="${PROXY_SERVER_URL-'https://api.openai.com/v1/chat/completions'}" PROXY_SERVER_URL="${PROXY_SERVER_URL-'https://api.openai.com/v1/chat/completions'}"
docker run --gpus "device=0" -d -p 3306:3306 \ docker run --gpus all -d -p 3306:3306 \
-p 5000:5000 \ -p 5000:5000 \
-e LOCAL_DB_HOST=127.0.0.1 \ -e LOCAL_DB_HOST=127.0.0.1 \
-e LOCAL_DB_PASSWORD=aa123456 \ -e LOCAL_DB_PASSWORD=aa123456 \
@ -15,5 +15,5 @@ docker run --gpus "device=0" -d -p 3306:3306 \
-e LANGUAGE=zh \ -e LANGUAGE=zh \
-v /data:/data \ -v /data:/data \
-v /data/models:/app/models \ -v /data/models:/app/models \
--name db-gpt-allinone \ --name dbgpt-allinone \
db-gpt-allinone eosphorosai/dbgpt-allinone

View File

@ -45,4 +45,14 @@ RUN (if [ "${BUILD_LOCAL_CODE}" = "true" ]; \
else rm -rf /tmp/app; \ else rm -rf /tmp/app; \
fi;) fi;)
ARG LOAD_EXAMPLES="true"
RUN (if [ "${LOAD_EXAMPLES}" = "true" ]; \
then mkdir -p /app/pilot/data && sqlite3 /app/pilot/data/default_sqlite.db < /app/docker/examples/sqls/case_1_student_manager_sqlite.sql \
&& sqlite3 /app/pilot/data/default_sqlite.db < /app/docker/examples/sqls/case_2_ecom_sqlite.sql \
&& sqlite3 /app/pilot/data/default_sqlite.db < /app/docker/examples/sqls/test_case_info_sqlite.sql; \
fi;)
EXPOSE 5000 EXPOSE 5000
CMD ["python3", "pilot/server/dbgpt_server.py"]

View File

@ -5,12 +5,13 @@ cd "$(dirname "$SCRIPT_LOCATION")"
WORK_DIR=$(pwd) WORK_DIR=$(pwd)
BASE_IMAGE="nvidia/cuda:11.8.0-devel-ubuntu22.04" BASE_IMAGE="nvidia/cuda:11.8.0-devel-ubuntu22.04"
IMAGE_NAME="db-gpt" IMAGE_NAME="eosphorosai/dbgpt"
# zh: https://pypi.tuna.tsinghua.edu.cn/simple # zh: https://pypi.tuna.tsinghua.edu.cn/simple
PIP_INDEX_URL="https://pypi.org/simple" PIP_INDEX_URL="https://pypi.org/simple"
# en or zh # en or zh
LANGUAGE="en" LANGUAGE="en"
BUILD_LOCAL_CODE="false" BUILD_LOCAL_CODE="false"
LOAD_EXAMPLES="true"
usage () { usage () {
echo "USAGE: $0 [--base-image nvidia/cuda:11.8.0-devel-ubuntu22.04] [--image-name db-gpt]" echo "USAGE: $0 [--base-image nvidia/cuda:11.8.0-devel-ubuntu22.04] [--image-name db-gpt]"
@ -19,6 +20,7 @@ usage () {
echo " [-i|--pip-index-url pip index url] Pip index url, default: https://pypi.org/simple" echo " [-i|--pip-index-url pip index url] Pip index url, default: https://pypi.org/simple"
echo " [--language en or zh] You language, default: en" echo " [--language en or zh] You language, default: en"
echo " [--build-local-code true or false] Whether to use the local project code to package the image, default: false" echo " [--build-local-code true or false] Whether to use the local project code to package the image, default: false"
echo " [--load-examples true or false] Whether to load examples to default database default: true"
echo " [-h|--help] Usage message" echo " [-h|--help] Usage message"
} }
@ -50,6 +52,11 @@ while [[ $# -gt 0 ]]; do
shift shift
shift shift
;; ;;
--load-examples)
LOAD_EXAMPLES="$2"
shift
shift
;;
-h|--help) -h|--help)
help="true" help="true"
shift shift
@ -71,5 +78,6 @@ docker build \
--build-arg PIP_INDEX_URL=$PIP_INDEX_URL \ --build-arg PIP_INDEX_URL=$PIP_INDEX_URL \
--build-arg LANGUAGE=$LANGUAGE \ --build-arg LANGUAGE=$LANGUAGE \
--build-arg BUILD_LOCAL_CODE=$BUILD_LOCAL_CODE \ --build-arg BUILD_LOCAL_CODE=$BUILD_LOCAL_CODE \
--build-arg LOAD_EXAMPLES=$LOAD_EXAMPLES \
-f Dockerfile \ -f Dockerfile \
-t $IMAGE_NAME $WORK_DIR/../../ -t $IMAGE_NAME $WORK_DIR/../../

12
docker/base/run_sqlite.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
docker run --gpus all -d \
-p 5000:5000 \
-e LOCAL_DB_TYPE=sqlite \
-e LOCAL_DB_PATH=data/default_sqlite.db \
-e LLM_MODEL=vicuna-13b-v1.5 \
-e LANGUAGE=zh \
-v /data:/data \
-v /data/models:/app/models \
--name dbgpt \
eosphorosai/dbgpt

View File

@ -0,0 +1,18 @@
#!/bin/bash
# Your api key
PROXY_API_KEY="$PROXY_API_KEY"
PROXY_SERVER_URL="${PROXY_SERVER_URL-'https://api.openai.com/v1/chat/completions'}"
docker run --gpus all -d \
-p 5000:5000 \
-e LOCAL_DB_TYPE=sqlite \
-e LOCAL_DB_PATH=data/default_sqlite.db \
-e LLM_MODEL=proxyllm \
-e PROXY_API_KEY=$PROXY_API_KEY \
-e PROXY_SERVER_URL=$PROXY_SERVER_URL \
-e LANGUAGE=zh \
-v /data:/data \
-v /data/models:/app/models \
--name dbgpt \
eosphorosai/dbgpt

View File

@ -0,0 +1,59 @@
CREATE TABLE students (
student_id INTEGER PRIMARY KEY,
student_name VARCHAR(100),
major VARCHAR(100),
year_of_enrollment INTEGER,
student_age INTEGER
);
CREATE TABLE courses (
course_id INTEGER PRIMARY KEY,
course_name VARCHAR(100),
credit REAL
);
CREATE TABLE scores (
student_id INTEGER,
course_id INTEGER,
score INTEGER,
semester VARCHAR(50),
PRIMARY KEY (student_id, course_id),
FOREIGN KEY (student_id) REFERENCES students(student_id),
FOREIGN KEY (course_id) REFERENCES courses(course_id)
);
INSERT INTO students (student_id, student_name, major, year_of_enrollment, student_age) VALUES
(1, '张三', '计算机科学', 2020, 20),
(2, '李四', '计算机科学', 2021, 19),
(3, '王五', '物理学', 2020, 21),
(4, '赵六', '数学', 2021, 19),
(5, '周七', '计算机科学', 2022, 18),
(6, '吴八', '物理学', 2020, 21),
(7, '郑九', '数学', 2021, 19),
(8, '孙十', '计算机科学', 2022, 18),
(9, '刘十一', '物理学', 2020, 21),
(10, '陈十二', '数学', 2021, 19);
INSERT INTO courses (course_id, course_name, credit) VALUES
(1, '计算机基础', 3),
(2, '数据结构', 4),
(3, '高等物理', 3),
(4, '线性代数', 4),
(5, '微积分', 5),
(6, '编程语言', 4),
(7, '量子力学', 3),
(8, '概率论', 4),
(9, '数据库系统', 4),
(10, '计算机网络', 4);
INSERT INTO scores (student_id, course_id, score, semester) VALUES
(1, 1, 90, '2020年秋季'),
(1, 2, 85, '2021年春季'),
(2, 1, 88, '2021年秋季'),
(2, 2, 90, '2022年春季'),
(3, 3, 92, '2020年秋季'),
(3, 4, 85, '2021年春季'),
(4, 3, 88, '2021年秋季'),
(4, 4, 86, '2022年春季'),
(5, 1, 90, '2022年秋季'),
(5, 2, 87, '2023年春季');

View File

@ -0,0 +1,59 @@
CREATE TABLE users (
user_id INTEGER PRIMARY KEY,
user_name VARCHAR(100),
user_email VARCHAR(100),
registration_date DATE,
user_country VARCHAR(100)
);
CREATE TABLE products (
product_id INTEGER PRIMARY KEY,
product_name VARCHAR(100),
product_price REAL
);
CREATE TABLE orders (
order_id INTEGER PRIMARY KEY,
user_id INTEGER,
product_id INTEGER,
quantity INTEGER,
order_date DATE,
FOREIGN KEY (user_id) REFERENCES users(user_id),
FOREIGN KEY (product_id) REFERENCES products(product_id)
);
INSERT INTO users (user_id, user_name, user_email, registration_date, user_country) VALUES
(1, 'John', 'john@gmail.com', '2020-01-01', 'USA'),
(2, 'Mary', 'mary@gmail.com', '2021-01-01', 'UK'),
(3, 'Bob', 'bob@gmail.com', '2020-01-01', 'USA'),
(4, 'Alice', 'alice@gmail.com', '2021-01-01', 'UK'),
(5, 'Charlie', 'charlie@gmail.com', '2020-01-01', 'USA'),
(6, 'David', 'david@gmail.com', '2021-01-01', 'UK'),
(7, 'Eve', 'eve@gmail.com', '2020-01-01', 'USA'),
(8, 'Frank', 'frank@gmail.com', '2021-01-01', 'UK'),
(9, 'Grace', 'grace@gmail.com', '2020-01-01', 'USA'),
(10, 'Helen', 'helen@gmail.com', '2021-01-01', 'UK');
INSERT INTO products (product_id, product_name, product_price) VALUES
(1, 'iPhone', 699),
(2, 'Samsung Galaxy', 599),
(3, 'iPad', 329),
(4, 'Macbook', 1299),
(5, 'Apple Watch', 399),
(6, 'AirPods', 159),
(7, 'Echo', 99),
(8, 'Kindle', 89),
(9, 'Fire TV Stick', 39),
(10, 'Echo Dot', 49);
INSERT INTO orders (order_id, user_id, product_id, quantity, order_date) VALUES
(1, 1, 1, 1, '2022-01-01'),
(2, 1, 2, 1, '2022-02-01'),
(3, 2, 3, 2, '2022-03-01'),
(4, 2, 4, 1, '2022-04-01'),
(5, 3, 5, 2, '2022-05-01'),
(6, 3, 6, 3, '2022-06-01'),
(7, 4, 7, 2, '2022-07-01'),
(8, 4, 8, 1, '2022-08-01'),
(9, 5, 9, 2, '2022-09-01'),
(10, 5, 10, 3, '2022-10-01');

View File

@ -0,0 +1,17 @@
CREATE TABLE test_cases (
case_id INTEGER PRIMARY KEY AUTOINCREMENT,
scenario_name VARCHAR(100),
scenario_description TEXT,
test_question VARCHAR(500),
expected_sql TEXT,
correct_output TEXT
);
INSERT INTO test_cases (scenario_name, scenario_description, test_question, expected_sql, correct_output) VALUES
('学校管理系统', '测试SQL助手的联合查询条件查询和排序功能', '查询所有学生的姓名,专业和成绩,按成绩降序排序', 'SELECT students.student_name, students.major, scores.score FROM students JOIN scores ON students.student_id = scores.student_id ORDER BY scores.score DESC;', '返回所有学生的姓名,专业和成绩,按成绩降序排序的结果'),
('学校管理系统', '测试SQL助手的联合查询条件查询和排序功能', '查询计算机科学专业的学生的平均成绩', 'SELECT AVG(scores.score) as avg_score FROM students JOIN scores ON students.student_id = scores.student_id WHERE students.major = ''计算机科学'';', '返回计算机科学专业学生的平均成绩'),
('学校管理系统', '测试SQL助手的联合查询条件查询和排序功能', '查询哪些学生在2023年秋季学期的课程学分总和超过15', 'SELECT students.student_name FROM students JOIN scores ON students.student_id = scores.student_id JOIN courses ON scores.course_id = courses.course_id WHERE scores.semester = ''2023年秋季'' GROUP BY students.student_id HAVING SUM(courses.credit) > 15;', '返回在2023年秋季学期的课程学分总和超过15的学生的姓名'),
('电商系统', '测试SQL助手的数据聚合和分组功能', '查询每个用户的总订单数量', 'SELECT users.user_name, COUNT(orders.order_id) as order_count FROM users JOIN orders ON users.user_id = orders.user_id GROUP BY users.user_id;', '返回每个用户的总订单数量'),
('电商系统', '测试SQL助手的数据聚合和分组功能', '查询每种商品的总销售额', 'SELECT products.product_name, SUM(products.product_price * orders.quantity) as total_sales FROM products JOIN orders ON products.product_id = orders.product_id GROUP BY products.product_id;', '返回每种商品的总销售额'),
('电商系统', '测试SQL助手的数据聚合和分组功能', '查询2023年最受欢迎的商品订单数量最多的商品', 'SELECT products.product_name FROM products JOIN orders ON products.product_id = orders.product_id WHERE YEAR(orders.order_date) = 2023 GROUP BY products.product_id ORDER BY COUNT(orders.order_id) DESC LIMIT 1;', '返回2023年最受欢迎的商品订单数量最多的商品的名称');

View File

@ -123,7 +123,7 @@ class Config(metaclass=Singleton):
### default Local database connection configuration ### default Local database connection configuration
self.LOCAL_DB_HOST = os.getenv("LOCAL_DB_HOST") self.LOCAL_DB_HOST = os.getenv("LOCAL_DB_HOST")
self.LOCAL_DB_PATH = os.getenv("LOCAL_DB_PATH", "") self.LOCAL_DB_PATH = os.getenv("LOCAL_DB_PATH", "")
self.LOCAL_DB_TYPE = os.getenv("LOCAL_DB_TYPE") self.LOCAL_DB_TYPE = os.getenv("LOCAL_DB_TYPE", "mysql")
if self.LOCAL_DB_HOST is None and self.LOCAL_DB_PATH == "": if self.LOCAL_DB_HOST is None and self.LOCAL_DB_PATH == "":
self.LOCAL_DB_HOST = "127.0.0.1" self.LOCAL_DB_HOST = "127.0.0.1"

View File

View File

@ -93,18 +93,25 @@ class ConnectManager:
db_name = CFG.LOCAL_DB_NAME db_name = CFG.LOCAL_DB_NAME
db_type = CFG.LOCAL_DB_TYPE db_type = CFG.LOCAL_DB_TYPE
db_path = CFG.LOCAL_DB_PATH db_path = CFG.LOCAL_DB_PATH
if not db_type:
# Default file database type
db_type = DBType.DuckDb.value()
if not db_name: if not db_name:
db_type, db_name = self._parse_file_db_info(db_type, db_path)
if db_name:
print(
f"Add file db, db_name: {db_name}, db_type: {db_type}, db_path: {db_path}"
)
self.storage.add_file_db(db_name, db_type, db_path)
def _parse_file_db_info(self, db_type: str, db_path: str):
if db_type is None or db_type == DBType.DuckDb.value(): if db_type is None or db_type == DBType.DuckDb.value():
# file db is duckdb # file db is duckdb
db_name = self.storage.get_file_db_name(db_path) db_name = self.storage.get_file_db_name(db_path)
db_type = DBType.DuckDb.value() db_type = DBType.DuckDb.value()
else: else:
db_name = DBType.parse_file_db_name_from_path(db_type, db_path) db_name = DBType.parse_file_db_name_from_path(db_type, db_path)
if db_name: return db_type, db_name
print(
f"Add file db, db_name: {db_name}, db_type: {db_type}, db_path: {db_path}"
)
self.storage.add_file_db(db_name, db_type, db_path)
def get_connect(self, db_name): def get_connect(self, db_name):
db_config = self.storage.get_db_config(db_name) db_config = self.storage.get_db_config(db_name)

View File

@ -0,0 +1,83 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pilot.configs.config import Config
from pilot.common.schema import DBType
from pilot.connections.rdbms.base import RDBMSDatabase
from pilot.logs import logger
CFG = Config()
class BaseDao:
def __init__(
self, orm_base=None, database: str = None, create_not_exist_table: bool = False
) -> None:
"""BaseDAO, If the current database is a file database and create_not_exist_table=True, we will automatically create a table that does not exist"""
self._orm_base = orm_base
self._database = database
self._create_not_exist_table = create_not_exist_table
self._db_engine = None
self._session = None
self._connection = None
@property
def db_engine(self):
if not self._db_engine:
# lazy loading
db_engine, connection = _get_db_engine(
self._orm_base, self._database, self._create_not_exist_table
)
self._db_engine = db_engine
self._connection = connection
return self._db_engine
@property
def Session(self):
if not self._session:
self._session = sessionmaker(bind=self.db_engine)
return self._session
def _get_db_engine(
orm_base=None, database: str = None, create_not_exist_table: bool = False
):
db_engine = None
connection: RDBMSDatabase = None
db_type = DBType.of_db_type(CFG.LOCAL_DB_TYPE)
if db_type is None or db_type == DBType.Mysql:
# default database
db_engine = create_engine(
f"mysql+pymysql://{CFG.LOCAL_DB_USER}:{CFG.LOCAL_DB_PASSWORD}@{CFG.LOCAL_DB_HOST}:{CFG.LOCAL_DB_PORT}/{database}",
echo=True,
)
else:
db_namager = CFG.LOCAL_DB_MANAGE
if not db_namager:
raise Exception(
"LOCAL_DB_MANAGE is not initialized, please check the system configuration"
)
if db_type.is_file_db():
db_path = CFG.LOCAL_DB_PATH
if db_path is None or db_path == "":
raise ValueError(
"You LOCAL_DB_TYPE is file db, but LOCAL_DB_PATH is not configured, please configure LOCAL_DB_PATH in you .env file"
)
_, database = db_namager._parse_file_db_info(db_type.value(), db_path)
logger.info(
f"Current DAO database is file database, db_type: {db_type.value()}, db_path: {db_path}, db_name: {database}"
)
logger.info(f"Get DAO database connection with database name {database}")
connection: RDBMSDatabase = db_namager.get_connect(database)
if not isinstance(connection, RDBMSDatabase):
raise ValueError(
"Currently only supports `RDBMSDatabase` database as the underlying database of BaseDao, please check your database configuration"
)
db_engine = connection._engine
if db_type.is_file_db() and orm_base is not None and create_not_exist_table:
logger.info("Current database is file database, create not exist table")
orm_base.metadata.create_all(db_engine)
return db_engine, connection

View File

@ -22,6 +22,8 @@ class SQLiteConnect(RDBMSDatabase):
) -> RDBMSDatabase: ) -> RDBMSDatabase:
"""Construct a SQLAlchemy engine from URI.""" """Construct a SQLAlchemy engine from URI."""
_engine_args = engine_args or {} _engine_args = engine_args or {}
_engine_args["connect_args"] = {"check_same_thread": False}
# _engine_args["echo"] = True
return cls(create_engine("sqlite:///" + file_path, **_engine_args), **kwargs) return cls(create_engine("sqlite:///" + file_path, **_engine_args), **kwargs)
def get_indexes(self, table_name): def get_indexes(self, table_name):

View File

@ -144,10 +144,8 @@ async def document_upload(
request = KnowledgeDocumentRequest() request = KnowledgeDocumentRequest()
request.doc_name = doc_name request.doc_name = doc_name
request.doc_type = doc_type request.doc_type = doc_type
request.content = ( request.content = os.path.join(
os.path.join(
KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename
),
) )
return Result.succ( return Result.succ(
knowledge_space_service.create_knowledge_document( knowledge_space_service.create_knowledge_document(

View File

@ -5,7 +5,7 @@ from sqlalchemy import Column, String, DateTime, Integer, Text, create_engine, f
from sqlalchemy.orm import declarative_base, sessionmaker from sqlalchemy.orm import declarative_base, sessionmaker
from pilot.configs.config import Config from pilot.configs.config import Config
from pilot.connections.rdbms.base_dao import BaseDao
CFG = Config() CFG = Config()
@ -27,14 +27,11 @@ class DocumentChunkEntity(Base):
return f"DocumentChunkEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', document_id='{self.document_id}', content='{self.content}', meta_info='{self.meta_info}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')" return f"DocumentChunkEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', document_id='{self.document_id}', content='{self.content}', meta_info='{self.meta_info}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
class DocumentChunkDao: class DocumentChunkDao(BaseDao):
def __init__(self): def __init__(self):
database = "knowledge_management" super().__init__(
self.db_engine = create_engine( database="knowledge_management", orm_base=Base, create_not_exist_table=True
f"mysql+pymysql://{CFG.LOCAL_DB_USER}:{CFG.LOCAL_DB_PASSWORD}@{CFG.LOCAL_DB_HOST}:{CFG.LOCAL_DB_PORT}/{database}",
echo=True,
) )
self.Session = sessionmaker(bind=self.db_engine)
def create_documents_chunks(self, documents: List): def create_documents_chunks(self, documents: List):
session = self.Session() session = self.Session()

View File

@ -4,7 +4,7 @@ from sqlalchemy import Column, String, DateTime, Integer, Text, create_engine, f
from sqlalchemy.orm import declarative_base, sessionmaker from sqlalchemy.orm import declarative_base, sessionmaker
from pilot.configs.config import Config from pilot.configs.config import Config
from pilot.connections.rdbms.base_dao import BaseDao
CFG = Config() CFG = Config()
@ -19,7 +19,7 @@ class KnowledgeDocumentEntity(Base):
space = Column(String(100)) space = Column(String(100))
chunk_size = Column(Integer) chunk_size = Column(Integer)
status = Column(String(100)) status = Column(String(100))
last_sync = Column(String(100)) last_sync = Column(DateTime)
content = Column(Text) content = Column(Text)
result = Column(Text) result = Column(Text)
vector_ids = Column(Text) vector_ids = Column(Text)
@ -30,14 +30,11 @@ class KnowledgeDocumentEntity(Base):
return f"KnowledgeDocumentEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', chunk_size='{self.chunk_size}', status='{self.status}', last_sync='{self.last_sync}', content='{self.content}', result='{self.result}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')" return f"KnowledgeDocumentEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', chunk_size='{self.chunk_size}', status='{self.status}', last_sync='{self.last_sync}', content='{self.content}', result='{self.result}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
class KnowledgeDocumentDao: class KnowledgeDocumentDao(BaseDao):
def __init__(self): def __init__(self):
database = "knowledge_management" super().__init__(
self.db_engine = create_engine( database="knowledge_management", orm_base=Base, create_not_exist_table=True
f"mysql+pymysql://{CFG.LOCAL_DB_USER}:{CFG.LOCAL_DB_PASSWORD}@{CFG.LOCAL_DB_HOST}:{CFG.LOCAL_DB_PORT}/{database}",
echo=True,
) )
self.Session = sessionmaker(bind=self.db_engine)
def create_knowledge_document(self, document: KnowledgeDocumentEntity): def create_knowledge_document(self, document: KnowledgeDocumentEntity):
session = self.Session() session = self.Session()

View File

@ -2,11 +2,11 @@ from datetime import datetime
from sqlalchemy import Column, Integer, Text, String, DateTime, create_engine from sqlalchemy import Column, Integer, Text, String, DateTime, create_engine
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from pilot.configs.config import Config from pilot.configs.config import Config
from pilot.server.knowledge.request.request import KnowledgeSpaceRequest from pilot.server.knowledge.request.request import KnowledgeSpaceRequest
from sqlalchemy.orm import sessionmaker from pilot.connections.rdbms.base_dao import BaseDao
CFG = Config() CFG = Config()
Base = declarative_base() Base = declarative_base()
@ -27,14 +27,11 @@ class KnowledgeSpaceEntity(Base):
return f"KnowledgeSpaceEntity(id={self.id}, name='{self.name}', vector_type='{self.vector_type}', desc='{self.desc}', owner='{self.owner}' context='{self.context}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')" return f"KnowledgeSpaceEntity(id={self.id}, name='{self.name}', vector_type='{self.vector_type}', desc='{self.desc}', owner='{self.owner}' context='{self.context}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
class KnowledgeSpaceDao: class KnowledgeSpaceDao(BaseDao):
def __init__(self): def __init__(self):
database = "knowledge_management" super().__init__(
self.db_engine = create_engine( database="knowledge_management", orm_base=Base, create_not_exist_table=True
f"mysql+pymysql://{CFG.LOCAL_DB_USER}:{CFG.LOCAL_DB_PASSWORD}@{CFG.LOCAL_DB_HOST}:{CFG.LOCAL_DB_PORT}/{database}",
echo=True,
) )
self.Session = sessionmaker(bind=self.db_engine)
def create_knowledge_space(self, space: KnowledgeSpaceRequest): def create_knowledge_space(self, space: KnowledgeSpaceRequest):
session = self.Session() session = self.Session()