Implement configurable context length (#1749)

2025-09-06 02:50:36 +00:00 · 2023-12-16 17:58:15 -05:00
parent 7aa0f779de
commit d1c56b8b28
31 changed files with 291 additions and 135 deletions
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@@ -138,7 +138,7 @@ const LLModel::Implementation* LLModel::Implementation::implementation(const cha
    return nullptr;
 }

-LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant) {
+LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant, int n_ctx) {
    if (!has_at_least_minimal_hardware()) {
        std::cerr << "LLModel ERROR: CPU does not support AVX\n";
        return nullptr;
@@ -154,7 +154,11 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
            if(impl) {
                LLModel* metalimpl = impl->m_construct();
                metalimpl->m_implementation = impl;
-                size_t req_mem = metalimpl->requiredMem(modelPath);
+                /* TODO(cebtenzzre): after we fix requiredMem, we should change this to happen at
+                 * load time, not construct time. right now n_ctx is incorrectly hardcoded 2048 in
+                 * most (all?) places where this is called, causing underestimation of required
+                 * memory. */
+                size_t req_mem = metalimpl->requiredMem(modelPath, n_ctx);
                float req_to_total = (float) req_mem / (float) total_mem;
                // on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
                if (req_to_total >= 0.53) {
@@ -165,6 +169,8 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
                }
            }
        }
+    #else
+        (void)n_ctx;
    #endif

    if (!impl) {