backend: rebase llama.cpp on upstream as of Sep 26th (#2998)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-09-27 12:05:59 -04:00
committed by GitHub
parent 8bd937eb68
commit f9d6be8afb
16 changed files with 165 additions and 600 deletions

View File

@@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
### Added
- Add bm25 hybrid search to localdocs ([#2969](https://github.com/nomic-ai/gpt4all/pull/2969))
### Changed
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
### Fixed
- Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995))
- Fix the local server rejecting min\_p/top\_p less than 1 ([#2996](https://github.com/nomic-ai/gpt4all/pull/2996))

View File

@@ -71,19 +71,19 @@ bool ChatAPI::isModelLoaded() const
// All three of the state virtual functions are handled custom inside of chatllm save/restore
size_t ChatAPI::stateSize() const
{
return 0;
throw std::logic_error("not implemented");
}
size_t ChatAPI::saveState(uint8_t *dest) const
size_t ChatAPI::saveState(std::span<uint8_t> dest) const
{
Q_UNUSED(dest);
return 0;
throw std::logic_error("not implemented");
}
size_t ChatAPI::restoreState(const uint8_t *src)
size_t ChatAPI::restoreState(std::span<const uint8_t> src)
{
Q_UNUSED(src);
return 0;
throw std::logic_error("not implemented");
}
void ChatAPI::prompt(const std::string &prompt,

View File

@@ -64,8 +64,8 @@ public:
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override;
size_t stateSize() const override;
size_t saveState(uint8_t *dest) const override;
size_t restoreState(const uint8_t *src) override;
size_t saveState(std::span<uint8_t> dest) const override;
size_t restoreState(std::span<const uint8_t> src) override;
void prompt(const std::string &prompt,
const std::string &promptTemplate,
std::function<bool(int32_t)> promptCallback,
@@ -118,12 +118,14 @@ protected:
throw std::logic_error("not implemented");
}
Token sampleToken(PromptContext &ctx) const override
void initSampler(PromptContext &ctx) override
{
(void)ctx;
throw std::logic_error("not implemented");
}
Token sampleToken() const override { throw std::logic_error("not implemented"); }
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override
{
(void)ctx;

View File

@@ -1174,7 +1174,13 @@ void ChatLLM::saveState()
#if defined(DEBUG)
qDebug() << "saveState" << m_llmThread.objectName() << "size:" << m_state.size();
#endif
m_llModelInfo.model->saveState(static_cast<uint8_t*>(reinterpret_cast<void*>(m_state.data())));
bool ok = m_llModelInfo.model->saveState({reinterpret_cast<uint8_t *>(m_state.data()), size_t(m_state.size())});
if (!ok) {
// FIXME(jared): how badly does this situation break GPT4All?
qWarning() << "ChatLLM failed to save LLModel state";
m_state.clear();
m_state.squeeze();
}
}
void ChatLLM::restoreState()
@@ -1183,7 +1189,7 @@ void ChatLLM::restoreState()
return;
if (m_llModelType == LLModelType::API_) {
QDataStream stream(&m_state, QIODeviceBase::ReadOnly);
QDataStream stream(m_state);
stream.setVersion(QDataStream::Qt_6_4);
ChatAPI *chatAPI = static_cast<ChatAPI*>(m_llModelInfo.model.get());
QList<QString> context;
@@ -1201,12 +1207,12 @@ void ChatLLM::restoreState()
if (m_state.isEmpty())
return;
if (m_llModelInfo.model->stateSize() == m_state.size()) {
m_llModelInfo.model->restoreState(static_cast<const uint8_t*>(reinterpret_cast<void*>(m_state.data())));
size_t bytesRead = m_llModelInfo.model->restoreState({reinterpret_cast<uint8_t *>(m_state.data()), size_t(m_state.size())});
if (bytesRead) {
m_processedSystemPrompt = true;
m_pristineLoadedState = true;
} else {
qWarning() << "restoring state from text because" << m_llModelInfo.model->stateSize() << "!=" << m_state.size();
qWarning() << "restoring state from text because of error reading state (mismatch or corrupt data)";
m_restoreStateFromText = true;
}