From cff5a5371840e60f29d221e3d7e758db18cf72cf Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Thu, 30 May 2024 16:36:11 -0400
Subject: [PATCH] llamamodel: set batch size to known max to reduce mem usage

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
---
 gpt4all-backend/llamamodel.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index e32aa582..a9f89b1a 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -393,6 +393,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
             std::cerr << "warning: model was trained on only " << n_ctx_train << " context tokens ("
                       << n_ctx << " specified)\n";
         }
+
+        // GPT4All defaults to 128 tokens which is also the hardcoded maximum
+        d_ptr->ctx_params.n_batch  = LLMODEL_MAX_PROMPT_BATCH;
+        d_ptr->ctx_params.n_ubatch = LLMODEL_MAX_PROMPT_BATCH;
     }
 
     d_ptr->ctx_params.n_ctx   = n_ctx;