Update data_loader.py - timestamp+id (#1572)

This commit is contained in:
yongzheJIN 2024-05-30 10:28:09 +08:00 committed by GitHub
parent 47b0630e88
commit 14e159fa67
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,6 +1,7 @@
import logging import logging
from decimal import Decimal from decimal import Decimal
from typing import List from typing import List
import datetime
from dbgpt._private.config import Config from dbgpt._private.config import Config
from dbgpt.app.scene.chat_dashboard.data_preparation.report_schma import ValueItem from dbgpt.app.scene.chat_dashboard.data_preparation.report_schma import ValueItem
@ -22,7 +23,6 @@ class DashboardDataLoader:
# try: # try:
values: List[ValueItem] = [] values: List[ValueItem] = []
data_map = {} data_map = {}
data_map.update( data_map.update(
{ {
f"{field_name}": [row[index] for row in datas] f"{field_name}": [row[index] for row in datas]
@ -31,50 +31,89 @@ class DashboardDataLoader:
) )
# to Check Whether there are data in it # to Check Whether there are data in it
if len(datas) != 0: if len(datas) != 0:
# find the first string column # try to find string index
str_index = next( string_index = next(
( (
index index
for index, value in enumerate(datas[0]) for index, value in enumerate(datas[0])
if isinstance(value, str) if isinstance(value, str)
), ),
1, -1,
) )
if type(datas[0][str_index]) == str:
tempFieldName = field_names[:str_index] # try to find datetime index
tempFieldName.extend(field_names[str_index + 1 :]) datetime_index = next(
for field_name in tempFieldName: (
index
for index, value in enumerate(datas[0])
if isinstance(value, (datetime.date, datetime.datetime))
),
-1,
)
# on the other aspect the primary key including "id"
id_index = next(
(index for index, value in enumerate(field_names) if "id" in value), -1
)
# while there are no datetime and there are no string
if string_index == -1 and datetime_index == -1 and id_index == -1:
# ignore Null Value in the data
result = [
sum(values for values in data if values is not None)
for data in zip(*datas)
]
for index, field_name in enumerate(field_names):
value_item = ValueItem(
name=field_name,
type=f"{field_name}_amount",
value=str(result[index]),
)
values.append(value_item)
# there are string index (or/and) datetime; first choose string->datetime->id
else:
# triple judge index
primary_index = (
string_index
if string_index != -1
else (datetime_index if datetime_index != -1 else id_index)
)
temp_field_name = field_names[:primary_index]
temp_field_name.extend(field_names[primary_index + 1 :])
for field_name in temp_field_name:
for data in datas: for data in datas:
# None Data won't be ok for the chart # None Data won't be ok for the chart
if not any(item is None for item in data): if not any(item is None for item in data):
value_item = ValueItem( value_item = ValueItem(
name=data[str_index], name=str(data[primary_index]),
type=field_name, type=field_name,
value=str(data[field_names.index(field_name)]), value=str(data[field_names.index(field_name)])
if not isinstance(
type(data[field_names.index(field_name)]),
(datetime.datetime, datetime.date),
)
else str(
data[field_names.index(field_name)].strftime(
"%Y%m%d"
)
),
) )
values.append(value_item) values.append(value_item)
# handle None Data as "0" for number and "19700101" for datetime
else: else:
value_item = ValueItem( value_item = ValueItem(
name=data[str_index], name=data[string_index],
type=field_name, type=field_name,
value="0", value="0"
if not isinstance(
type(data[field_names.index(field_name)]),
(datetime.datetime, datetime.date),
) )
values.append(value_item) else "19700101",
else:
result = [sum(values) for values in zip(*datas)]
for index, field_name in enumerate(field_names):
value_item = ValueItem(
name=field_name,
type=f"{field_name}_count",
value=str(result[index]),
) )
values.append(value_item) values.append(value_item)
return field_names, values return field_names, values
else:
return field_names, [
ValueItem(name=f"{field_name}", type=f"{field_name}", value="0")
for index, field_name in enumerate(field_names)
]
def get_chart_values_by_db(self, db_name: str, chart_sql: str): def get_chart_values_by_db(self, db_name: str, chart_sql: str):
logger.info(f"get_chart_values_by_db:{db_name},{chart_sql}") logger.info(f"get_chart_values_by_db:{db_name},{chart_sql}")