diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index d19c9d97..847006c8 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -93,6 +93,7 @@ struct LLamaPrivate {
     llama_context *ctx = nullptr;
     llama_context_params params;
     int64_t n_threads = 0;
+    std::vector<LLModel::Token> end_tokens;
 };
 
 LLamaModel::LLamaModel()
@@ -176,6 +177,8 @@ bool LLamaModel::loadModel(const std::string &modelPath)
         return false;
     }
 
+    d_ptr->end_tokens = {llama_token_eos(d_ptr->ctx)};
+
 #ifdef GGML_USE_KOMPUTE
     if (ggml_vk_has_device()) {
         std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
@@ -259,8 +262,7 @@ int32_t LLamaModel::contextLength() const
 
 const std::vector<LLModel::Token> &LLamaModel::endTokens() const
 {
-    static const std::vector<LLModel::Token> fres = {llama_token_eos(d_ptr->ctx)};
-    return fres;
+    return d_ptr->end_tokens;
 }
 
 #if defined(GGML_USE_KOMPUTE)