server: do not process the system prompt twice for new models (#2924)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-08-30 12:30:24 -04:00
committed by GitHub
parent 2f02cd407f
commit 813ccaf5d1
5 changed files with 21 additions and 18 deletions

View File

@@ -249,9 +249,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
// and what the type and name of that model is. I've tried to comment extensively in this method
// to provide an overview of what we're doing here.
// We're already loaded with this model
if (isModelLoaded() && this->modelInfo() == modelInfo)
return true;
if (isModelLoaded() && this->modelInfo() == modelInfo) {
// already acquired -> keep it and reset
resetContext();
return true; // already loaded
}
// reset status
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
@@ -659,20 +661,25 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
emit modelInfoChanged(modelInfo);
}
void ChatLLM::acquireModel() {
void ChatLLM::acquireModel()
{
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
emit loadedModelInfoChanged();
}
void ChatLLM::resetModel() {
void ChatLLM::resetModel()
{
m_llModelInfo = {};
emit loadedModelInfoChanged();
}
void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
{
m_shouldBeLoaded = true;
loadModel(modelInfo);
// ignore attempts to switch to the same model twice
if (!isModelLoaded() || this->modelInfo() != modelInfo) {
m_shouldBeLoaded = true;
loadModel(modelInfo);
}
}
bool ChatLLM::handlePrompt(int32_t token)