mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-16 23:17:11 +00:00
Tweaks for Excel to Markdown conversion (#3022)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
dc82f883f8
commit
b850e7c867
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -16,4 +16,4 @@
|
|||||||
url = https://github.com/nomic-ai/DuckX.git
|
url = https://github.com/nomic-ai/DuckX.git
|
||||||
[submodule "gpt4all-chat/deps/QXlsx"]
|
[submodule "gpt4all-chat/deps/QXlsx"]
|
||||||
path = gpt4all-chat/deps/QXlsx
|
path = gpt4all-chat/deps/QXlsx
|
||||||
url = https://github.com/QtExcel/QXlsx.git
|
url = https://github.com/nomic-ai/QXlsx.git
|
||||||
|
@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
|
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
|
||||||
- Simplify chatmodel to get rid of unnecessary field and bump chat version ([#3016](https://github.com/nomic-ai/gpt4all/pull/3016))
|
- Simplify chatmodel to get rid of unnecessary field and bump chat version ([#3016](https://github.com/nomic-ai/gpt4all/pull/3016))
|
||||||
- Allow ChatLLM to have direct access to ChatModel for restoring state from text ([#3018](https://github.com/nomic-ai/gpt4all/pull/3018))
|
- Allow ChatLLM to have direct access to ChatModel for restoring state from text ([#3018](https://github.com/nomic-ai/gpt4all/pull/3018))
|
||||||
|
- Improvements to XLSX conversion and UI fix ([#3022](https://github.com/nomic-ai/gpt4all/pull/3022))
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995))
|
- Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995))
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit fda6b806e2ceebd81c01cdded07ae84c94f5879c
|
Subproject commit 29e81b369128525749dcb6516195b6b062eda955
|
@ -939,6 +939,7 @@ Rectangle {
|
|||||||
|
|
||||||
Text {
|
Text {
|
||||||
id: attachmentFileText
|
id: attachmentFileText
|
||||||
|
width: 295
|
||||||
height: 40
|
height: 40
|
||||||
text: modelData.file
|
text: modelData.file
|
||||||
color: theme.textColor
|
color: theme.textColor
|
||||||
@ -947,6 +948,7 @@ Rectangle {
|
|||||||
font.pixelSize: theme.fontSizeMedium
|
font.pixelSize: theme.fontSizeMedium
|
||||||
font.bold: true
|
font.bold: true
|
||||||
wrapMode: Text.WrapAnywhere
|
wrapMode: Text.WrapAnywhere
|
||||||
|
elide: Qt.ElideRight
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1971,6 +1973,7 @@ Rectangle {
|
|||||||
|
|
||||||
Text {
|
Text {
|
||||||
id: attachmentFileText2
|
id: attachmentFileText2
|
||||||
|
width: 265
|
||||||
height: 40
|
height: 40
|
||||||
text: model.file
|
text: model.file
|
||||||
color: theme.textColor
|
color: theme.textColor
|
||||||
@ -1979,6 +1982,7 @@ Rectangle {
|
|||||||
font.pixelSize: theme.fontSizeMedium
|
font.pixelSize: theme.fontSizeMedium
|
||||||
font.bold: true
|
font.bold: true
|
||||||
wrapMode: Text.WrapAnywhere
|
wrapMode: Text.WrapAnywhere
|
||||||
|
elide: Qt.ElideRight
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,8 +10,10 @@
|
|||||||
#include <QDateTime>
|
#include <QDateTime>
|
||||||
#include <QDebug>
|
#include <QDebug>
|
||||||
#include <QList>
|
#include <QList>
|
||||||
|
#include <QRegularExpression>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
|
#include <QStringView>
|
||||||
#include <QVariant>
|
#include <QVariant>
|
||||||
#include <QtGlobal>
|
#include <QtGlobal>
|
||||||
#include <QtLogging>
|
#include <QtLogging>
|
||||||
@ -33,7 +35,7 @@ static QString formatCellText(const QXlsx::Cell *cell)
|
|||||||
if (cell->isDateTime()) {
|
if (cell->isDateTime()) {
|
||||||
// Handle DateTime
|
// Handle DateTime
|
||||||
QDateTime dateTime = cell->dateTime().toDateTime();
|
QDateTime dateTime = cell->dateTime().toDateTime();
|
||||||
cellText = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd") : value.toString();
|
cellText = dateTime.isValid() ? dateTime.toString(QStringView(u"yyyy-MM-dd")) : value.toString();
|
||||||
} else {
|
} else {
|
||||||
cellText = value.toString();
|
cellText = value.toString();
|
||||||
}
|
}
|
||||||
@ -41,23 +43,32 @@ static QString formatCellText(const QXlsx::Cell *cell)
|
|||||||
if (cellText.isEmpty())
|
if (cellText.isEmpty())
|
||||||
return QString();
|
return QString();
|
||||||
|
|
||||||
// Apply Markdown and HTML formatting based on font styles
|
// Escape special characters
|
||||||
QString formattedText = cellText;
|
static QRegularExpression special(
|
||||||
|
QStringLiteral(
|
||||||
if (format.fontBold() && format.fontItalic())
|
R"(()([\\`*_[\]<>()!|])|)" // special characters
|
||||||
formattedText = "***" + formattedText + "***";
|
R"(^(\s*)(#+(?:\s|$))|)" // headings
|
||||||
else if (format.fontBold())
|
R"(^(\s*[0-9])(\.(?:\s|$))|)" // ordered lists ("1. a")
|
||||||
formattedText = "**" + formattedText + "**";
|
R"(^(\s*)([+-](?:\s|$)))" // unordered lists ("- a")
|
||||||
else if (format.fontItalic())
|
),
|
||||||
formattedText = "*" + formattedText + "*";
|
QRegularExpression::MultilineOption
|
||||||
|
);
|
||||||
|
cellText.replace(special, uR"(\1\\2)"_s);
|
||||||
|
cellText.replace(u'&', "&"_L1);
|
||||||
|
cellText.replace(u'<', "<"_L1);
|
||||||
|
cellText.replace(u'>', ">"_L1);
|
||||||
|
|
||||||
|
// Apply Markdown formatting based on font styles
|
||||||
|
if (format.fontUnderline())
|
||||||
|
cellText = u"_%1_"_s.arg(cellText);
|
||||||
|
if (format.fontBold())
|
||||||
|
cellText = u"**%1**"_s.arg(cellText);
|
||||||
|
if (format.fontItalic())
|
||||||
|
cellText = u"*%1*"_s.arg(cellText);
|
||||||
if (format.fontStrikeOut())
|
if (format.fontStrikeOut())
|
||||||
formattedText = "~~" + formattedText + "~~";
|
cellText = u"~~%1~~"_s.arg(cellText);
|
||||||
|
|
||||||
// Escape pipe characters to prevent Markdown table issues
|
return cellText;
|
||||||
formattedText.replace("|", "\\|");
|
|
||||||
|
|
||||||
return formattedText;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static QString getCellValue(QXlsx::Worksheet *sheet, int row, int col)
|
static QString getCellValue(QXlsx::Worksheet *sheet, int row, int col)
|
||||||
@ -124,44 +135,35 @@ QString XLSXToMD::toMarkdown(QIODevice *xlsxDevice)
|
|||||||
|
|
||||||
if (firstRow > lastRow || firstCol > lastCol) {
|
if (firstRow > lastRow || firstCol > lastCol) {
|
||||||
qWarning() << "Sheet" << sheetName << "is empty.";
|
qWarning() << "Sheet" << sheetName << "is empty.";
|
||||||
markdown += "*No data available.*\n\n";
|
markdown += QStringView(u"*No data available.*\n\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assume the first row is the header
|
auto appendRow = [&markdown](auto &list) { markdown += u"|%1|\n"_s.arg(list.join(u'|')); };
|
||||||
int headerRow = firstRow;
|
|
||||||
|
|
||||||
// Collect headers
|
// Empty header
|
||||||
|
static QString header(u' ');
|
||||||
|
static QString separator(u'-');
|
||||||
QStringList headers;
|
QStringList headers;
|
||||||
for (int col = firstCol; col <= lastCol; ++col) {
|
|
||||||
QString header = getCellValue(sheet, headerRow, col);
|
|
||||||
headers << header;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create Markdown header row
|
|
||||||
QString headerRowMarkdown = "|" + headers.join("|") + "|";
|
|
||||||
markdown += headerRowMarkdown + "\n";
|
|
||||||
|
|
||||||
// Create Markdown separator row
|
|
||||||
QStringList separators;
|
QStringList separators;
|
||||||
for (int i = 0; i < headers.size(); ++i)
|
for (int col = firstCol; col <= lastCol; ++col) {
|
||||||
separators << "---";
|
headers << header;
|
||||||
QString separatorRow = "|" + separators.join("|") + "|";
|
separators << separator;
|
||||||
markdown += separatorRow + "\n";
|
}
|
||||||
|
appendRow(headers);
|
||||||
|
appendRow(separators);
|
||||||
|
|
||||||
// Iterate through data rows (starting from the row after header)
|
// Iterate through data rows
|
||||||
for (int row = headerRow + 1; row <= lastRow; ++row) {
|
for (int row = firstRow; row <= lastRow; ++row) {
|
||||||
QStringList rowData;
|
QStringList rowData;
|
||||||
for (int col = firstCol; col <= lastCol; ++col) {
|
for (int col = firstCol; col <= lastCol; ++col) {
|
||||||
QString cellText = getCellValue(sheet, row, col);
|
QString cellText = getCellValue(sheet, row, col);
|
||||||
rowData << cellText;
|
rowData << (cellText.isEmpty() ? u" "_s : cellText);
|
||||||
}
|
}
|
||||||
|
appendRow(rowData);
|
||||||
QString dataRow = "|" + rowData.join("|") + "|";
|
|
||||||
markdown += dataRow + "\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
markdown += "\n"; // Add an empty line between sheets
|
markdown += u'\n'; // Add an empty line between sheets
|
||||||
}
|
}
|
||||||
return markdown;
|
return markdown;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user