From 7a5f6e4726bf24434d51f79c6bb35d3d6cde61d7 Mon Sep 17 00:00:00 2001
From: Aaron Miller <apage43@ninjawhale.com>
Date: Fri, 30 Jun 2023 16:13:25 -0700
Subject: [PATCH] limit prompt batch size to 128

---
 gpt4all-backend/llmodel.h          | 2 ++
 gpt4all-backend/llmodel_shared.cpp | 1 +
 2 files changed, 3 insertions(+)
diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h
index 8e3e5ea2..ce7a6f57 100644
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@@ -9,6 +9,8 @@
 #include <cstdint>
 #include <limits>
 
+#define LLMODEL_MAX_PROMPT_BATCH 128
+
 class Dlhandle;
 
 class LLModel {
diff --git a/gpt4all-backend/llmodel_shared.cpp b/gpt4all-backend/llmodel_shared.cpp
index dfc07b76..cd4ace04 100644
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@@ -52,6 +52,7 @@ void LLModel::prompt(const std::string &prompt,
 
     promptCtx.n_predict = std::min(promptCtx.n_predict, promptCtx.n_ctx - (int) embd_inp.size());
     promptCtx.n_past = std::min(promptCtx.n_past, promptCtx.n_ctx);
+    promptCtx.n_batch = std::min(promptCtx.n_batch, LLMODEL_MAX_PROMPT_BATCH);
 
     // process the prompt in batches
     size_t i = 0;