diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index 795f1e7e..2e2d3792 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -302,8 +302,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
 
     if (llama_verbose()) {
         std::cerr << "llama.cpp: using Metal" << std::endl;
-        d_ptr->backend_name = "metal";
     }
+    d_ptr->backend_name = "metal";
 
     // always fully offload on Metal
     // TODO(cebtenzzre): use this parameter to allow using more than 53% of system RAM to load a model
diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py
index 4d4f6f8c..86e5e6f3 100644
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@@ -68,7 +68,7 @@ def get_long_description():
 
 setup(
     name=package_name,
-    version="2.5.0",
+    version="2.5.1",
     description="Python bindings for GPT4All",
     long_description=get_long_description(),
     long_description_content_type="text/markdown",