feat(editor): ChatExcel
🔥ChatExcel Mode Operation Manual
BIN
assets/chat_excel/chat_excel_1.png
Normal file
After Width: | Height: | Size: 783 KiB |
BIN
assets/chat_excel/chat_excel_2.png
Normal file
After Width: | Height: | Size: 53 KiB |
BIN
assets/chat_excel/chat_excel_3.png
Normal file
After Width: | Height: | Size: 56 KiB |
BIN
assets/chat_excel/chat_excel_4.png
Normal file
After Width: | Height: | Size: 366 KiB |
BIN
assets/chat_excel/chat_excel_5.png
Normal file
After Width: | Height: | Size: 110 KiB |
BIN
assets/chat_excel/chat_excel_6.png
Normal file
After Width: | Height: | Size: 124 KiB |
BIN
assets/chat_excel/chat_excel_7.png
Normal file
After Width: | Height: | Size: 138 KiB |
26
docs/getting_started/application/chatexcel/chatexcel.md
Normal file
@ -0,0 +1,26 @@
|
||||
ChatExcel
|
||||
==================================
|
||||
ChatExcel uses natural language to analyze and query Excel data.
|
||||
|
||||
### 1.Select And Upload Excel or CSV File
|
||||
Select your excel or csv file to upload and start the conversation.
|
||||
```{tip}
|
||||
ChatExcel
|
||||
|
||||
The ChatExcel function supports Excel and CSV format files, select the corresponding file to use.
|
||||
```
|
||||

|
||||

|
||||
|
||||
### 2.Wait for Data Processing
|
||||
After the data is uploaded, it will first learn and process the data structure and field meaning.
|
||||

|
||||
|
||||
### 3.Use Data Analysis Calculation
|
||||
Now you can use natural language to analyze and query data in the dialog box.
|
||||

|
||||

|
||||

|
||||
|
||||
|
||||
|
@ -3,38 +3,155 @@ import duckdb
|
||||
import pandas as pd
|
||||
import matplotlib
|
||||
import seaborn as sns
|
||||
import uuid
|
||||
from pandas import DataFrame
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mtick
|
||||
from matplotlib import font_manager
|
||||
from matplotlib.font_manager import FontManager
|
||||
matplotlib.use("Agg")
|
||||
import time
|
||||
from fsspec import filesystem
|
||||
import spatial
|
||||
import spatial
|
||||
|
||||
from pilot.scene.chat_data.chat_excel.excel_reader import ExcelReader
|
||||
|
||||
|
||||
def data_pre_classification(df: DataFrame):
|
||||
## Data pre-classification
|
||||
columns = df.columns.tolist()
|
||||
|
||||
|
||||
number_columns = []
|
||||
non_numeric_colums = []
|
||||
|
||||
# 收集数据分类小于10个的列
|
||||
non_numeric_colums_value_map = {}
|
||||
numeric_colums_value_map = {}
|
||||
for column_name in columns:
|
||||
|
||||
if pd.to_numeric(df[column_name], errors='coerce').notna().all():
|
||||
number_columns.append(column_name)
|
||||
unique_values = df[column_name].unique()
|
||||
numeric_colums_value_map.update({column_name: len(unique_values)})
|
||||
else:
|
||||
non_numeric_colums.append(column_name)
|
||||
unique_values = df[column_name].unique()
|
||||
non_numeric_colums_value_map.update({column_name: len(unique_values)})
|
||||
|
||||
|
||||
if len(non_numeric_colums) <=0:
|
||||
sorted_colums_value_map = dict(sorted(numeric_colums_value_map.items(), key=lambda x: x[1]))
|
||||
numeric_colums_sort_list = list(sorted_colums_value_map.keys())
|
||||
x_column = number_columns[0]
|
||||
hue_column = numeric_colums_sort_list[0]
|
||||
y_column = numeric_colums_sort_list[1]
|
||||
elif len(number_columns) <=0:
|
||||
raise ValueError("Have No numeric Column!")
|
||||
else:
|
||||
# 数字和非数字都存在多列,放弃部分数字列
|
||||
y_column = number_columns[0]
|
||||
x_column = non_numeric_colums[0]
|
||||
# if len(non_numeric_colums) > 1:
|
||||
#
|
||||
# else:
|
||||
|
||||
# non_numeric_colums_sort_list.remove(non_numeric_colums[0])
|
||||
# hue_column = non_numeric_colums_sort_list
|
||||
return x_column, y_column, hue_column
|
||||
|
||||
if __name__ == "__main__":
|
||||
# connect = duckdb.connect("/Users/tuyang.yhj/Downloads/example.xlsx")
|
||||
#
|
||||
|
||||
# fonts = fm.findSystemFonts()
|
||||
# for font in fonts:
|
||||
# if 'Hei' in font:
|
||||
# print(font)
|
||||
|
||||
# fm = FontManager()
|
||||
# mat_fonts = set(f.name for f in fm.ttflist)
|
||||
# for i in mat_fonts:
|
||||
# print(i)
|
||||
# print(len(mat_fonts))
|
||||
# 获取系统中的默认中文字体名称
|
||||
# default_font = fm.fontManager.defaultFontProperties.get_family()
|
||||
|
||||
|
||||
#
|
||||
excel_reader = ExcelReader("/Users/tuyang.yhj/Downloads/example.xlsx")
|
||||
#
|
||||
# # colunms, datas = excel_reader.run( "SELECT CONCAT(Year, '-', Quarter) AS QuarterYear, SUM(Sales) AS TotalSales FROM example GROUP BY QuarterYear ORDER BY QuarterYear")
|
||||
# # colunms, datas = excel_reader.run( """ SELECT Year, SUM(Sales) AS Total_Sales FROM example GROUP BY Year ORDER BY Year; """)
|
||||
df = excel_reader.get_df_by_sql_ex(""" SELECT Segment, Country, SUM(Sales) AS Total_Sales, SUM(Profit) AS Total_Profit FROM example GROUP BY Segment, Country """)
|
||||
|
||||
x,y,hue =data_pre_classification(df)
|
||||
print(x, y, hue)
|
||||
|
||||
# colunms, datas = excel_reader.run( "SELECT CONCAT(Year, '-', Quarter) AS QuarterYear, SUM(Sales) AS TotalSales FROM example GROUP BY QuarterYear ORDER BY QuarterYear")
|
||||
colunms, datas = excel_reader.run( """ SELECT Year, SUM(Sales) AS Total_Sales FROM example GROUP BY Year ORDER BY Year; """)
|
||||
df = excel_reader.get_df_by_sql_ex("SELECT Country, SUM(Profit) AS Total_Profit FROM example GROUP BY Country;")
|
||||
columns = df.columns.tolist()
|
||||
plt.rcParams["font.family"] = ["sans-serif"]
|
||||
rc = {"font.sans-serif": "SimHei", "axes.unicode_minus": False}
|
||||
sns.set_style(rc={'font.sans-serif': "Microsoft Yahei"})
|
||||
sns.set(context="notebook", style="ticks", color_codes=True, rc=rc)
|
||||
sns.set_palette("Set3") # 设置颜色主题
|
||||
font_names = ['Heiti TC', 'Songti SC', 'STHeiti Light', 'Microsoft YaHei', 'SimSun', 'SimHei', 'KaiTi']
|
||||
fm = FontManager()
|
||||
mat_fonts = set(f.name for f in fm.ttflist)
|
||||
can_use_fonts = []
|
||||
for font_name in font_names:
|
||||
if font_name in mat_fonts:
|
||||
can_use_fonts.append(font_name)
|
||||
if len(can_use_fonts) > 0:
|
||||
plt.rcParams['font.sans-serif'] = can_use_fonts
|
||||
|
||||
rc = {'font.sans-serif': can_use_fonts}
|
||||
plt.rcParams['axes.unicode_minus'] = False # 解决无法显示符号的问题
|
||||
sns.set(font='Heiti TC', font_scale=0.8) # 解决Seaborn中文显示问题
|
||||
sns.set_palette("Set3") # 设置颜色主题
|
||||
sns.set_style("dark")
|
||||
sns.color_palette("hls", 10)
|
||||
sns.hls_palette(8, l=.5, s=.7)
|
||||
sns.set(context='notebook', style='ticks', rc=rc)
|
||||
# sns.set_palette("Set3") # 设置颜色主题
|
||||
# sns.set_style("dark")
|
||||
# sns.color_palette("hls", 10)
|
||||
# sns.hls_palette(8, l=.5, s=.7)
|
||||
# sns.set(context='notebook', style='ticks', rc=rc)
|
||||
|
||||
# fig, ax = plt.pie(df[columns[1]], labels=df[columns[0]], autopct='%1.1f%%', startangle=90)
|
||||
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
||||
plt.subplots_adjust(top=0.9)
|
||||
ax = df.plot(kind='pie', y=columns[1], ax=ax, labels=df[columns[0]].values, startangle=90, autopct='%1.1f%%')
|
||||
# 手动设置 labels 的位置和大小
|
||||
ax.legend(loc='center left', bbox_to_anchor=(-1, 0.5, 0,0), labels=None, fontsize=10)
|
||||
plt.axis('equal') # 使饼图为正圆形
|
||||
plt.show()
|
||||
# plt.ticklabel_format(style='plain')
|
||||
# ax = df.plot(kind='bar', ax=ax)
|
||||
# sns.barplot(df, x=x, y=y, hue= "Country", ax=ax)
|
||||
sns.catplot(data=df, x=x, y=y, hue='Country', kind='bar')
|
||||
# 设置 y 轴刻度格式为普通数字格式
|
||||
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: '{:,.0f}'.format(x)))
|
||||
|
||||
# fonts = font_manager.findSystemFonts()
|
||||
# font_path = ""
|
||||
# for font in fonts:
|
||||
# if "Heiti" in font:
|
||||
# font_path = font
|
||||
# my_font = font_manager.FontProperties(fname=font_path)
|
||||
# plt.title("测试", fontproperties=my_font)
|
||||
# plt.ylabel(columns[1], fontproperties=my_font)
|
||||
# plt.xlabel(columns[0], fontproperties=my_font)
|
||||
|
||||
|
||||
|
||||
chart_name = "bar_" + str(uuid.uuid1()) + ".png"
|
||||
chart_path = chart_name
|
||||
plt.savefig(chart_path, bbox_inches='tight', dpi=100)
|
||||
|
||||
|
||||
|
||||
# sns.set(context="notebook", style="ticks", color_codes=True)
|
||||
# sns.set_palette("Set3") # 设置颜色主题
|
||||
#
|
||||
# # fig, ax = plt.pie(df[columns[1]], labels=df[columns[0]], autopct='%1.1f%%', startangle=90)
|
||||
# fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
|
||||
# plt.subplots_adjust(top=0.9)
|
||||
# ax = df.plot(kind='pie', y=columns[1], ax=ax, labels=df[columns[0]].values, startangle=90, autopct='%1.1f%%')
|
||||
# # 手动设置 labels 的位置和大小
|
||||
# ax.legend(loc='center left', bbox_to_anchor=(-1, 0.5, 0,0), labels=None, fontsize=10)
|
||||
# plt.axis('equal') # 使饼图为正圆形
|
||||
# plt.show()
|
||||
|
||||
#
|
||||
#
|
||||
# def csv_colunm_foramt(val):
|
||||
|