diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index d19c9d97..847006c8 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -93,6 +93,7 @@ struct LLamaPrivate { llama_context *ctx = nullptr; llama_context_params params; int64_t n_threads = 0; + std::vector end_tokens; }; LLamaModel::LLamaModel() @@ -176,6 +177,8 @@ bool LLamaModel::loadModel(const std::string &modelPath) return false; } + d_ptr->end_tokens = {llama_token_eos(d_ptr->ctx)}; + #ifdef GGML_USE_KOMPUTE if (ggml_vk_has_device()) { std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl; @@ -259,8 +262,7 @@ int32_t LLamaModel::contextLength() const const std::vector &LLamaModel::endTokens() const { - static const std::vector fres = {llama_token_eos(d_ptr->ctx)}; - return fres; + return d_ptr->end_tokens; } #if defined(GGML_USE_KOMPUTE)