mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-11 13:12:18 +00:00
style:fmt
This commit is contained in:
commit
dcc2ad0e08
@ -4,13 +4,13 @@ from pilot.commands.command_mange import command
|
|||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import uuid
|
import uuid
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
import matplotlib
|
import matplotlib
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
matplotlib.use("Agg")
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.ticker as mtick
|
||||||
from matplotlib.font_manager import FontManager
|
from matplotlib.font_manager import FontManager
|
||||||
|
|
||||||
from pilot.configs.model_config import LOGDIR
|
from pilot.configs.model_config import LOGDIR
|
||||||
@ -21,6 +21,54 @@ logger = build_logger("show_chart_gen", LOGDIR + "show_chart_gen.log")
|
|||||||
static_message_img_path = os.path.join(os.getcwd(), "message/img")
|
static_message_img_path = os.path.join(os.getcwd(), "message/img")
|
||||||
|
|
||||||
|
|
||||||
|
def data_pre_classification(df: DataFrame):
|
||||||
|
## Data pre-classification
|
||||||
|
columns = df.columns.tolist()
|
||||||
|
|
||||||
|
number_columns = []
|
||||||
|
non_numeric_colums = []
|
||||||
|
|
||||||
|
# 收集数据分类小于10个的列
|
||||||
|
non_numeric_colums_value_map = {}
|
||||||
|
numeric_colums_value_map = {}
|
||||||
|
for column_name in columns:
|
||||||
|
if pd.api.types.is_numeric_dtype(df[column_name].dtypes):
|
||||||
|
number_columns.append(column_name)
|
||||||
|
unique_values = df[column_name].unique()
|
||||||
|
numeric_colums_value_map.update({column_name: len(unique_values)})
|
||||||
|
else:
|
||||||
|
non_numeric_colums.append(column_name)
|
||||||
|
unique_values = df[column_name].unique()
|
||||||
|
non_numeric_colums_value_map.update({column_name: len(unique_values)})
|
||||||
|
|
||||||
|
sorted_numeric_colums_value_map = dict(
|
||||||
|
sorted(numeric_colums_value_map.items(), key=lambda x: x[1])
|
||||||
|
)
|
||||||
|
numeric_colums_sort_list = list(sorted_numeric_colums_value_map.keys())
|
||||||
|
|
||||||
|
sorted_colums_value_map = dict(
|
||||||
|
sorted(non_numeric_colums_value_map.items(), key=lambda x: x[1])
|
||||||
|
)
|
||||||
|
non_numeric_colums_sort_list = list(sorted_colums_value_map.keys())
|
||||||
|
|
||||||
|
# Analyze x-coordinate
|
||||||
|
if len(non_numeric_colums_sort_list) > 0:
|
||||||
|
x_cloumn = non_numeric_colums_sort_list[-1]
|
||||||
|
non_numeric_colums_sort_list.remove(x_cloumn)
|
||||||
|
else:
|
||||||
|
x_cloumn = number_columns[0]
|
||||||
|
numeric_colums_sort_list.remove(x_cloumn)
|
||||||
|
|
||||||
|
# Analyze y-coordinate
|
||||||
|
if len(numeric_colums_sort_list) > 0:
|
||||||
|
y_column = numeric_colums_sort_list[0]
|
||||||
|
numeric_colums_sort_list.remove(y_column)
|
||||||
|
else:
|
||||||
|
raise ValueError("Not enough numeric columns for chart!")
|
||||||
|
|
||||||
|
return x_cloumn, y_column, non_numeric_colums_sort_list, numeric_colums_sort_list
|
||||||
|
|
||||||
|
|
||||||
def zh_font_set():
|
def zh_font_set():
|
||||||
font_names = [
|
font_names = [
|
||||||
"Heiti TC",
|
"Heiti TC",
|
||||||
@ -48,9 +96,6 @@ def zh_font_set():
|
|||||||
)
|
)
|
||||||
def response_line_chart(speak: str, df: DataFrame) -> str:
|
def response_line_chart(speak: str, df: DataFrame) -> str:
|
||||||
logger.info(f"response_line_chart:{speak},")
|
logger.info(f"response_line_chart:{speak},")
|
||||||
|
|
||||||
columns = df.columns.tolist()
|
|
||||||
|
|
||||||
if df.size <= 0:
|
if df.size <= 0:
|
||||||
raise ValueError("No Data!")
|
raise ValueError("No Data!")
|
||||||
|
|
||||||
@ -85,7 +130,19 @@ def response_line_chart(speak: str, df: DataFrame) -> str:
|
|||||||
sns.set(context="notebook", style="ticks", rc=rc)
|
sns.set(context="notebook", style="ticks", rc=rc)
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
||||||
sns.lineplot(df, x=columns[0], y=columns[1], ax=ax)
|
x, y, non_num_columns, num_colmns = data_pre_classification(df)
|
||||||
|
# ## 复杂折线图实现
|
||||||
|
if len(num_colmns) > 0:
|
||||||
|
num_colmns.append(y)
|
||||||
|
df_melted = pd.melt(
|
||||||
|
df, id_vars=x, value_vars=num_colmns, var_name="line", value_name="Value"
|
||||||
|
)
|
||||||
|
sns.lineplot(data=df_melted, x=x, y="Value", hue="line", ax=ax, palette="Set2")
|
||||||
|
else:
|
||||||
|
sns.lineplot(data=df, x=x, y=y, ax=ax, palette="Set2")
|
||||||
|
|
||||||
|
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda y, _: "{:,.0f}".format(y)))
|
||||||
|
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: "{:,.0f}".format(x)))
|
||||||
|
|
||||||
chart_name = "line_" + str(uuid.uuid1()) + ".png"
|
chart_name = "line_" + str(uuid.uuid1()) + ".png"
|
||||||
chart_path = static_message_img_path + "/" + chart_name
|
chart_path = static_message_img_path + "/" + chart_name
|
||||||
@ -102,7 +159,6 @@ def response_line_chart(speak: str, df: DataFrame) -> str:
|
|||||||
)
|
)
|
||||||
def response_bar_chart(speak: str, df: DataFrame) -> str:
|
def response_bar_chart(speak: str, df: DataFrame) -> str:
|
||||||
logger.info(f"response_bar_chart:{speak},")
|
logger.info(f"response_bar_chart:{speak},")
|
||||||
columns = df.columns.tolist()
|
|
||||||
if df.size <= 0:
|
if df.size <= 0:
|
||||||
raise ValueError("No Data!")
|
raise ValueError("No Data!")
|
||||||
|
|
||||||
@ -136,9 +192,44 @@ def response_bar_chart(speak: str, df: DataFrame) -> str:
|
|||||||
sns.set(context="notebook", style="ticks", rc=rc)
|
sns.set(context="notebook", style="ticks", rc=rc)
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
||||||
sns.barplot(df, x=df[columns[0]], y=df[columns[1]], ax=ax)
|
|
||||||
|
|
||||||
chart_name = "pie_" + str(uuid.uuid1()) + ".png"
|
hue = None
|
||||||
|
x, y, non_num_columns, num_colmns = data_pre_classification(df)
|
||||||
|
if len(non_num_columns) >= 1:
|
||||||
|
hue = non_num_columns[0]
|
||||||
|
|
||||||
|
if len(num_colmns) >= 1:
|
||||||
|
if hue:
|
||||||
|
if len(num_colmns) >= 2:
|
||||||
|
can_use_columns = num_colmns[:2]
|
||||||
|
else:
|
||||||
|
can_use_columns = num_colmns
|
||||||
|
sns.barplot(data=df, x=x, y=y, hue=hue, palette="Set2", ax=ax)
|
||||||
|
for sub_y_column in can_use_columns:
|
||||||
|
sns.barplot(
|
||||||
|
data=df, x=x, y=sub_y_column, hue=hue, palette="Set2", ax=ax
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if len(num_colmns) >= 3:
|
||||||
|
can_use_columns = num_colmns[:3]
|
||||||
|
else:
|
||||||
|
can_use_columns = num_colmns
|
||||||
|
sns.barplot(
|
||||||
|
data=df, x=x, y=y, hue=can_use_columns[0], palette="Set2", ax=ax
|
||||||
|
)
|
||||||
|
|
||||||
|
for sub_y_column in can_use_columns[1:]:
|
||||||
|
sns.barplot(
|
||||||
|
data=df, x=x, y=sub_y_column, hue=hue, palette="Set2", ax=ax
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
sns.barplot(data=df, x=x, y=y, hue=hue, palette="Set2", ax=ax)
|
||||||
|
|
||||||
|
# 设置 y 轴刻度格式为普通数字格式
|
||||||
|
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda y, _: "{:,.0f}".format(y)))
|
||||||
|
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: "{:,.0f}".format(x)))
|
||||||
|
|
||||||
|
chart_name = "bar_" + str(uuid.uuid1()) + ".png"
|
||||||
chart_path = static_message_img_path + "/" + chart_name
|
chart_path = static_message_img_path + "/" + chart_name
|
||||||
plt.savefig(chart_path, bbox_inches="tight", dpi=100)
|
plt.savefig(chart_path, bbox_inches="tight", dpi=100)
|
||||||
html_img = f"""<h5>{speak}</h5><img style='max-width: 100%; max-height: 70%;' src="/images/{chart_name}" />"""
|
html_img = f"""<h5>{speak}</h5><img style='max-width: 100%; max-height: 70%;' src="/images/{chart_name}" />"""
|
||||||
@ -188,13 +279,6 @@ def response_pie_chart(speak: str, df: DataFrame) -> str:
|
|||||||
startangle=90,
|
startangle=90,
|
||||||
autopct="%1.1f%%",
|
autopct="%1.1f%%",
|
||||||
)
|
)
|
||||||
# 手动设置 labels 的位置和大小
|
|
||||||
ax.legend(
|
|
||||||
loc="upper right",
|
|
||||||
bbox_to_anchor=(0, 0, 1, 1),
|
|
||||||
labels=df[columns[0]].values,
|
|
||||||
fontsize=10,
|
|
||||||
)
|
|
||||||
|
|
||||||
plt.axis("equal") # 使饼图为正圆形
|
plt.axis("equal") # 使饼图为正圆形
|
||||||
# plt.title(columns[0])
|
# plt.title(columns[0])
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 19 KiB |
Binary file not shown.
Before Width: | Height: | Size: 18 KiB |
@ -1,9 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
import duckdb
|
import duckdb
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
import matplotlib
|
import matplotlib
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
@ -29,8 +31,12 @@ def data_pre_classification(df: DataFrame):
|
|||||||
# 收集数据分类小于10个的列
|
# 收集数据分类小于10个的列
|
||||||
non_numeric_colums_value_map = {}
|
non_numeric_colums_value_map = {}
|
||||||
numeric_colums_value_map = {}
|
numeric_colums_value_map = {}
|
||||||
|
df_filtered = df.dropna()
|
||||||
for column_name in columns:
|
for column_name in columns:
|
||||||
if pd.to_numeric(df[column_name], errors="coerce").notna().all():
|
print(np.issubdtype(df_filtered[column_name].dtype, np.number))
|
||||||
|
# if pd.to_numeric(df[column_name], errors='coerce').notna().all():
|
||||||
|
# if np.issubdtype(df_filtered[column_name].dtype, np.number):
|
||||||
|
if pd.api.types.is_numeric_dtype(df[column_name].dtypes):
|
||||||
number_columns.append(column_name)
|
number_columns.append(column_name)
|
||||||
unique_values = df[column_name].unique()
|
unique_values = df[column_name].unique()
|
||||||
numeric_colums_value_map.update({column_name: len(unique_values)})
|
numeric_colums_value_map.update({column_name: len(unique_values)})
|
||||||
@ -39,27 +45,68 @@ def data_pre_classification(df: DataFrame):
|
|||||||
unique_values = df[column_name].unique()
|
unique_values = df[column_name].unique()
|
||||||
non_numeric_colums_value_map.update({column_name: len(unique_values)})
|
non_numeric_colums_value_map.update({column_name: len(unique_values)})
|
||||||
|
|
||||||
if len(non_numeric_colums) <= 0:
|
sorted_numeric_colums_value_map = dict(
|
||||||
sorted_colums_value_map = dict(
|
sorted(numeric_colums_value_map.items(), key=lambda x: x[1])
|
||||||
sorted(numeric_colums_value_map.items(), key=lambda x: x[1])
|
)
|
||||||
)
|
numeric_colums_sort_list = list(sorted_numeric_colums_value_map.keys())
|
||||||
numeric_colums_sort_list = list(sorted_colums_value_map.keys())
|
|
||||||
x_column = number_columns[0]
|
|
||||||
hue_column = numeric_colums_sort_list[0]
|
|
||||||
y_column = numeric_colums_sort_list[1]
|
|
||||||
elif len(number_columns) <= 0:
|
|
||||||
raise ValueError("Have No numeric Column!")
|
|
||||||
else:
|
|
||||||
# 数字和非数字都存在多列,放弃部分数字列
|
|
||||||
y_column = number_columns[0]
|
|
||||||
x_column = non_numeric_colums[0]
|
|
||||||
# if len(non_numeric_colums) > 1:
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
|
|
||||||
# non_numeric_colums_sort_list.remove(non_numeric_colums[0])
|
sorted_colums_value_map = dict(
|
||||||
# hue_column = non_numeric_colums_sort_list
|
sorted(non_numeric_colums_value_map.items(), key=lambda x: x[1])
|
||||||
return x_column, y_column, hue_column
|
)
|
||||||
|
non_numeric_colums_sort_list = list(sorted_colums_value_map.keys())
|
||||||
|
|
||||||
|
# Analyze x-coordinate
|
||||||
|
if len(non_numeric_colums_sort_list) > 0:
|
||||||
|
x_cloumn = non_numeric_colums_sort_list[-1]
|
||||||
|
non_numeric_colums_sort_list.remove(x_cloumn)
|
||||||
|
else:
|
||||||
|
x_cloumn = number_columns[0]
|
||||||
|
numeric_colums_sort_list.remove(x_cloumn)
|
||||||
|
|
||||||
|
# Analyze y-coordinate
|
||||||
|
if len(numeric_colums_sort_list) > 0:
|
||||||
|
y_column = numeric_colums_sort_list[0]
|
||||||
|
numeric_colums_sort_list.remove(y_column)
|
||||||
|
else:
|
||||||
|
raise ValueError("Not enough numeric columns for chart!")
|
||||||
|
|
||||||
|
return x_cloumn, y_column, non_numeric_colums_sort_list, numeric_colums_sort_list
|
||||||
|
|
||||||
|
#
|
||||||
|
# if len(non_numeric_colums) <=0:
|
||||||
|
# sorted_colums_value_map = dict(sorted(numeric_colums_value_map.items(), key=lambda x: x[1]))
|
||||||
|
# numeric_colums_sort_list = list(sorted_colums_value_map.keys())
|
||||||
|
# x_column = number_columns[0]
|
||||||
|
# hue_column = numeric_colums_sort_list[0]
|
||||||
|
# y_column = numeric_colums_sort_list[1]
|
||||||
|
# cols = numeric_colums_sort_list[2:]
|
||||||
|
# elif len(number_columns) <=0:
|
||||||
|
# raise ValueError("Have No numeric Column!")
|
||||||
|
# else:
|
||||||
|
# # 数字和非数字都存在多列,放弃部分数字列
|
||||||
|
# x_column = non_numeric_colums[0]
|
||||||
|
# y_column = number_columns[0]
|
||||||
|
# if len(non_numeric_colums) > 1:
|
||||||
|
# sorted_colums_value_map = dict(sorted(non_numeric_colums_value_map.items(), key=lambda x: x[1]))
|
||||||
|
# non_numeric_colums_sort_list = list(sorted_colums_value_map.keys())
|
||||||
|
# non_numeric_colums_sort_list.remove(non_numeric_colums[0])
|
||||||
|
# hue_column = non_numeric_colums_sort_list[0]
|
||||||
|
# if len(number_columns) > 1:
|
||||||
|
# # try multiple charts
|
||||||
|
# cols = number_columns.remove( number_columns[0])
|
||||||
|
#
|
||||||
|
# else:
|
||||||
|
# sorted_colums_value_map = dict(sorted(numeric_colums_value_map.items(), key=lambda x: x[1]))
|
||||||
|
# numeric_colums_sort_list = list(sorted_colums_value_map.keys())
|
||||||
|
# numeric_colums_sort_list.remove(number_columns[0])
|
||||||
|
# if sorted_colums_value_map[numeric_colums_sort_list[0]].value < 5:
|
||||||
|
# hue_column = numeric_colums_sort_list[0]
|
||||||
|
# if len(number_columns) > 2:
|
||||||
|
# # try multiple charts
|
||||||
|
# cols = numeric_colums_sort_list.remove(numeric_colums_sort_list[0])
|
||||||
|
#
|
||||||
|
# print(x_column, y_column, hue_column, cols)
|
||||||
|
# return x_column, y_column, hue_column
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -79,17 +126,47 @@ if __name__ == "__main__":
|
|||||||
# 获取系统中的默认中文字体名称
|
# 获取系统中的默认中文字体名称
|
||||||
# default_font = fm.fontManager.defaultFontProperties.get_family()
|
# default_font = fm.fontManager.defaultFontProperties.get_family()
|
||||||
|
|
||||||
|
# 创建一个示例 DataFrame
|
||||||
|
df = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"A": [1, 2, 3, None, 5],
|
||||||
|
"B": [10, 20, 30, 40, 50],
|
||||||
|
"C": [1.1, 2.2, None, 4.4, 5.5],
|
||||||
|
"D": ["a", "b", "c", "d", "e"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 判断列是否为数字列
|
||||||
|
column_name = "A" # 要判断的列名
|
||||||
|
is_numeric = pd.to_numeric(df[column_name], errors="coerce").notna().all()
|
||||||
|
|
||||||
|
if is_numeric:
|
||||||
|
print(
|
||||||
|
f"Column '{column_name}' is a numeric column (ignoring null and NaN values in some elements)."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Column '{column_name}' is not a numeric column (ignoring null and NaN values in some elements)."
|
||||||
|
)
|
||||||
|
|
||||||
#
|
#
|
||||||
excel_reader = ExcelReader("/Users/tuyang.yhj/Downloads/example.xlsx")
|
# excel_reader = ExcelReader("/Users/tuyang.yhj/Downloads/example.xlsx")
|
||||||
|
excel_reader = ExcelReader("/Users/tuyang.yhj/Downloads/yhj-zx.csv")
|
||||||
#
|
#
|
||||||
# # colunms, datas = excel_reader.run( "SELECT CONCAT(Year, '-', Quarter) AS QuarterYear, SUM(Sales) AS TotalSales FROM example GROUP BY QuarterYear ORDER BY QuarterYear")
|
# # colunms, datas = excel_reader.run( "SELECT CONCAT(Year, '-', Quarter) AS QuarterYear, SUM(Sales) AS TotalSales FROM example GROUP BY QuarterYear ORDER BY QuarterYear")
|
||||||
# # colunms, datas = excel_reader.run( """ SELECT Year, SUM(Sales) AS Total_Sales FROM example GROUP BY Year ORDER BY Year; """)
|
# # colunms, datas = excel_reader.run( """ SELECT Year, SUM(Sales) AS Total_Sales FROM example GROUP BY Year ORDER BY Year; """)
|
||||||
|
# df = excel_reader.get_df_by_sql_ex(""" SELECT Segment, Country, SUM(Sales) AS Total_Sales, SUM(Profit) AS Total_Profit FROM example GROUP BY Segment, Country """)
|
||||||
df = excel_reader.get_df_by_sql_ex(
|
df = excel_reader.get_df_by_sql_ex(
|
||||||
""" SELECT Segment, Country, SUM(Sales) AS Total_Sales, SUM(Profit) AS Total_Profit FROM example GROUP BY Segment, Country """
|
""" SELECT `明细`, `费用小计`, `支出小计` FROM yhj-zx limit 10"""
|
||||||
)
|
)
|
||||||
|
|
||||||
x, y, hue = data_pre_classification(df)
|
for column_name in df.columns.tolist():
|
||||||
print(x, y, hue)
|
print(column_name + ":" + str(df[column_name].dtypes))
|
||||||
|
print(
|
||||||
|
column_name
|
||||||
|
+ ":"
|
||||||
|
+ str(pd.api.types.is_numeric_dtype(df[column_name].dtypes))
|
||||||
|
)
|
||||||
|
|
||||||
columns = df.columns.tolist()
|
columns = df.columns.tolist()
|
||||||
font_names = [
|
font_names = [
|
||||||
@ -118,116 +195,66 @@ if __name__ == "__main__":
|
|||||||
sns.color_palette("hls", 10)
|
sns.color_palette("hls", 10)
|
||||||
sns.hls_palette(8, l=0.5, s=0.7)
|
sns.hls_palette(8, l=0.5, s=0.7)
|
||||||
sns.set(context="notebook", style="ticks", rc=rc)
|
sns.set(context="notebook", style="ticks", rc=rc)
|
||||||
# sns.set_palette("Set3") # 设置颜色主题
|
|
||||||
# sns.set_style("dark")
|
|
||||||
# sns.color_palette("hls", 10)
|
|
||||||
# sns.hls_palette(8, l=.5, s=.7)
|
|
||||||
# sns.set(context='notebook', style='ticks', rc=rc)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
||||||
# plt.ticklabel_format(style='plain')
|
# plt.ticklabel_format(style='plain')
|
||||||
# ax = df.plot(kind='bar', ax=ax)
|
# ax = df.plot(kind='bar', ax=ax)
|
||||||
# sns.barplot(df, x=x, y=y, hue= "Country", ax=ax)
|
# sns.barplot(df, x=x, y="Total_Sales", hue='Country', ax=ax)
|
||||||
sns.catplot(data=df, x=x, y=y, hue="Country", kind="bar")
|
# sns.barplot(df, x=x, y="Total_Profit", hue='Country', ax=ax)
|
||||||
|
|
||||||
|
# sns.catplot(data=df, x=x, y=y, hue='Country', kind='bar')
|
||||||
|
x, y, non_num_columns, num_colmns = data_pre_classification(df)
|
||||||
|
print(x, y, str(non_num_columns), str(num_colmns))
|
||||||
|
## 复杂折线图实现
|
||||||
|
if len(num_colmns) > 0:
|
||||||
|
num_colmns.append(y)
|
||||||
|
df_melted = pd.melt(
|
||||||
|
df, id_vars=x, value_vars=num_colmns, var_name="line", value_name="Value"
|
||||||
|
)
|
||||||
|
sns.lineplot(data=df_melted, x=x, y="Value", hue="line", ax=ax, palette="Set2")
|
||||||
|
else:
|
||||||
|
sns.lineplot(data=df, x=x, y=y, ax=ax, palette="Set2")
|
||||||
|
|
||||||
|
# hue = None
|
||||||
|
# ## 复杂柱状图实现
|
||||||
|
# x,y, non_num_columns, num_colmns =data_pre_classification(df)
|
||||||
|
# if len(non_num_columns) >= 1:
|
||||||
|
# hue = non_num_columns[0]
|
||||||
|
|
||||||
|
# if len(num_colmns)>=1:
|
||||||
|
# if hue:
|
||||||
|
# if len(num_colmns) >= 2:
|
||||||
|
# can_use_columns = num_colmns[:2]
|
||||||
|
# else:
|
||||||
|
# can_use_columns = num_colmns
|
||||||
|
# sns.barplot(data=df, x=x, y=y, hue=hue, palette="Set2", ax=ax)
|
||||||
|
# for sub_y_column in can_use_columns:
|
||||||
|
# sns.barplot(data=df, x=x, y=sub_y_column, hue=hue, palette="Set2", ax=ax)
|
||||||
|
# else:
|
||||||
|
# if len(num_colmns) >= 3:
|
||||||
|
# can_use_columns = num_colmns[:3]
|
||||||
|
# else:
|
||||||
|
# can_use_columns = num_colmns
|
||||||
|
# sns.barplot(data=df, x=x, y=y, hue=can_use_columns[0], palette="Set2", ax=ax)
|
||||||
|
#
|
||||||
|
# for sub_y_column in can_use_columns[1:]:
|
||||||
|
# sns.barplot(data=df, x=x, y=sub_y_column, hue=hue, palette="Set2", ax=ax)
|
||||||
|
# else:
|
||||||
|
# sns.barplot(data=df, x=x, y=y, hue=hue, palette="Set2", ax=ax)
|
||||||
|
|
||||||
|
# # 转换 DataFrame 格式
|
||||||
|
# df_melted = pd.melt(df, id_vars=x, value_vars=['Total_Sales', 'Total_Profit'], var_name='line', value_name='y')
|
||||||
|
#
|
||||||
|
# # 绘制多列柱状图
|
||||||
|
#
|
||||||
|
# sns.barplot(data=df, x=x, y="Total_Sales", hue = "Country", palette="Set2", ax=ax)
|
||||||
|
# sns.barplot(data=df, x=x, y="Total_Profit", hue = "Country", palette="Set1", ax=ax)
|
||||||
|
|
||||||
# 设置 y 轴刻度格式为普通数字格式
|
# 设置 y 轴刻度格式为普通数字格式
|
||||||
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: "{:,.0f}".format(x)))
|
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: "{:,.0f}".format(x)))
|
||||||
|
|
||||||
# fonts = font_manager.findSystemFonts()
|
|
||||||
# font_path = ""
|
|
||||||
# for font in fonts:
|
|
||||||
# if "Heiti" in font:
|
|
||||||
# font_path = font
|
|
||||||
# my_font = font_manager.FontProperties(fname=font_path)
|
|
||||||
# plt.title("测试", fontproperties=my_font)
|
|
||||||
# plt.ylabel(columns[1], fontproperties=my_font)
|
|
||||||
# plt.xlabel(columns[0], fontproperties=my_font)
|
|
||||||
|
|
||||||
chart_name = "bar_" + str(uuid.uuid1()) + ".png"
|
chart_name = "bar_" + str(uuid.uuid1()) + ".png"
|
||||||
chart_path = chart_name
|
chart_path = chart_name
|
||||||
plt.savefig(chart_path, bbox_inches="tight", dpi=100)
|
plt.savefig(chart_path, bbox_inches="tight", dpi=100)
|
||||||
|
|
||||||
# sns.set(context="notebook", style="ticks", color_codes=True)
|
|
||||||
# sns.set_palette("Set3") # 设置颜色主题
|
|
||||||
#
|
|
||||||
# # fig, ax = plt.pie(df[columns[1]], labels=df[columns[0]], autopct='%1.1f%%', startangle=90)
|
|
||||||
# fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
|
||||||
# plt.subplots_adjust(top=0.9)
|
|
||||||
# ax = df.plot(kind='pie', y=columns[1], ax=ax, labels=df[columns[0]].values, startangle=90, autopct='%1.1f%%')
|
|
||||||
# # 手动设置 labels 的位置和大小
|
|
||||||
# ax.legend(loc='center left', bbox_to_anchor=(-1, 0.5, 0,0), labels=None, fontsize=10)
|
|
||||||
# plt.axis('equal') # 使饼图为正圆形
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def csv_colunm_foramt(val):
|
|
||||||
# if str(val).find("$") >= 0:
|
|
||||||
# return float(val.replace('$', '').replace(',', ''))
|
|
||||||
# if str(val).find("¥") >= 0:
|
|
||||||
# return float(val.replace('¥', '').replace(',', ''))
|
|
||||||
# return val
|
|
||||||
#
|
|
||||||
# # 获取当前时间戳,作为代码开始的时间
|
|
||||||
# start_time = int(time.time() * 1000)
|
|
||||||
#
|
|
||||||
# df = pd.read_excel('/Users/tuyang.yhj/Downloads/example.xlsx')
|
|
||||||
# # 读取 Excel 文件为 Pandas DataFrame
|
|
||||||
# df = pd.read_excel('/Users/tuyang.yhj/Downloads/example.xlsx', converters={i: csv_colunm_foramt for i in range(df.shape[1])})
|
|
||||||
#
|
|
||||||
# # d = df.values
|
|
||||||
# # print(d.shape[0])
|
|
||||||
# # for row in d:
|
|
||||||
# # print(row[0])
|
|
||||||
# # print(len(row))
|
|
||||||
# # r = df.iterrows()
|
|
||||||
#
|
|
||||||
# # 获取当前时间戳,作为代码结束的时间
|
|
||||||
# end_time = int(time.time() * 1000)
|
|
||||||
#
|
|
||||||
# print(f"耗时:{(end_time-start_time)/1000}秒")
|
|
||||||
#
|
|
||||||
# # 连接 DuckDB 数据库
|
|
||||||
# con = duckdb.connect(database=':memory:', read_only=False)
|
|
||||||
#
|
|
||||||
# # 将 DataFrame 写入 DuckDB 数据库中的一个表
|
|
||||||
# con.register('example', df)
|
|
||||||
#
|
|
||||||
# # 查询 DuckDB 数据库中的表
|
|
||||||
# conn = con.cursor()
|
|
||||||
# results = con.execute('SELECT Country, SUM(Profit) AS Total_Profit FROM example GROUP BY Country ORDER BY Total_Profit DESC LIMIT 1;')
|
|
||||||
# colunms = []
|
|
||||||
# for descrip in results.description:
|
|
||||||
# colunms.append(descrip[0])
|
|
||||||
# print(colunms)
|
|
||||||
# for row in results.fetchall():
|
|
||||||
# print(row)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# # 连接 DuckDB 数据库
|
|
||||||
# # con = duckdb.connect(':memory:')
|
|
||||||
#
|
|
||||||
# # # 加载 spatial 扩展
|
|
||||||
# # con.execute('install spatial;')
|
|
||||||
# # con.execute('load spatial;')
|
|
||||||
# #
|
|
||||||
# # # 查询 duckdb_internal 系统表,获取扩展列表
|
|
||||||
# # result = con.execute("SELECT * FROM duckdb_internal.functions WHERE schema='list_extensions';")
|
|
||||||
# #
|
|
||||||
# # # 遍历查询结果,输出扩展名称和版本号
|
|
||||||
# # for row in result:
|
|
||||||
# # print(row['name'], row['return_type'])
|
|
||||||
# # duckdb.read_csv('/Users/tuyang.yhj/Downloads/example_csc.csv')
|
|
||||||
# # result = duckdb.sql('SELECT * FROM "/Users/tuyang.yhj/Downloads/yhj-zx.csv" ')
|
|
||||||
# # result = duckdb.sql('SELECT * FROM "/Users/tuyang.yhj/Downloads/example_csc.csv" limit 20')
|
|
||||||
# # for row in result.fetchall():
|
|
||||||
# # print(row)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# # result = con.execute("SELECT * FROM st_read('/Users/tuyang.yhj/Downloads/example.xlsx', layer='Sheet1')")
|
|
||||||
# # # 遍历查询结果
|
|
||||||
# # for row in result.fetchall():
|
|
||||||
# # print(row)
|
|
||||||
# print("xx")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
#
|
||||||
|
@ -71,7 +71,8 @@ class ExcelReader:
|
|||||||
for column_name in df_tmp.columns:
|
for column_name in df_tmp.columns:
|
||||||
self.columns_map.update({column_name: excel_colunm_format(column_name)})
|
self.columns_map.update({column_name: excel_colunm_format(column_name)})
|
||||||
try:
|
try:
|
||||||
self.df[column_name] = self.df[column_name].astype(float)
|
self.df[column_name] = pd.to_numeric(self.df[column_name])
|
||||||
|
self.df[column_name] = self.df[column_name].fillna(0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("transfor column error!" + column_name)
|
print("transfor column error!" + column_name)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user