mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-25 06:53:05 +00:00
chatllm: do not attempt to serialize incompatible state (#1742)
This commit is contained in:
parent
9481762802
commit
0600f551b3
@ -163,6 +163,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)
|
|||||||
d_ptr->ctx_params.seed = params.seed;
|
d_ptr->ctx_params.seed = params.seed;
|
||||||
d_ptr->ctx_params.f16_kv = params.memory_f16;
|
d_ptr->ctx_params.f16_kv = params.memory_f16;
|
||||||
|
|
||||||
|
// The new batch API provides space for n_vocab*n_tokens logits. Tell llama.cpp early
|
||||||
|
// that we want this many logits so the state serializes consistently.
|
||||||
|
d_ptr->ctx_params.logits_all = true;
|
||||||
|
|
||||||
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||||
d_ptr->ctx_params.n_threads = d_ptr->n_threads;
|
d_ptr->ctx_params.n_threads = d_ptr->n_threads;
|
||||||
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
|
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
|
||||||
|
@ -435,7 +435,6 @@ bool Chat::deserialize(QDataStream &stream, int version)
|
|||||||
if (!m_chatModel->deserialize(stream, version))
|
if (!m_chatModel->deserialize(stream, version))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!deserializeKV || discardKV)
|
|
||||||
m_llmodel->setStateFromText(m_chatModel->text());
|
m_llmodel->setStateFromText(m_chatModel->text());
|
||||||
|
|
||||||
emit chatModelChanged();
|
emit chatModelChanged();
|
||||||
|
@ -863,11 +863,11 @@ bool ChatLLM::deserialize(QDataStream &stream, int version, bool deserializeKV,
|
|||||||
if (!discardKV)
|
if (!discardKV)
|
||||||
m_state = qUncompress(compressed);
|
m_state = qUncompress(compressed);
|
||||||
} else {
|
} else {
|
||||||
if (!discardKV)
|
if (!discardKV) {
|
||||||
stream >> m_state;
|
stream >> m_state;
|
||||||
else {
|
} else {
|
||||||
QByteArray state;
|
QByteArray state;
|
||||||
stream >> m_state;
|
stream >> state;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -912,7 +912,7 @@ void ChatLLM::restoreState()
|
|||||||
stream >> context;
|
stream >> context;
|
||||||
chatGPT->setContext(context);
|
chatGPT->setContext(context);
|
||||||
m_state.clear();
|
m_state.clear();
|
||||||
m_state.resize(0);
|
m_state.squeeze();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -923,10 +923,16 @@ void ChatLLM::restoreState()
|
|||||||
if (m_state.isEmpty())
|
if (m_state.isEmpty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
m_processedSystemPrompt = true;
|
if (m_llModelInfo.model->stateSize() == m_state.size()) {
|
||||||
m_llModelInfo.model->restoreState(static_cast<const uint8_t*>(reinterpret_cast<void*>(m_state.data())));
|
m_llModelInfo.model->restoreState(static_cast<const uint8_t*>(reinterpret_cast<void*>(m_state.data())));
|
||||||
|
m_processedSystemPrompt = true;
|
||||||
|
} else {
|
||||||
|
qWarning() << "restoring state from text because" << m_llModelInfo.model->stateSize() << "!=" << m_state.size() << "\n";
|
||||||
|
m_restoreStateFromText = true;
|
||||||
|
}
|
||||||
|
|
||||||
m_state.clear();
|
m_state.clear();
|
||||||
m_state.resize(0);
|
m_state.squeeze();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::processSystemPrompt()
|
void ChatLLM::processSystemPrompt()
|
||||||
|
Loading…
Reference in New Issue
Block a user