mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-22 05:29:20 +00:00
llamamodel: set batch size to known max to reduce mem usage
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
b48e33638e
commit
cff5a53718
@ -393,6 +393,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
|||||||
std::cerr << "warning: model was trained on only " << n_ctx_train << " context tokens ("
|
std::cerr << "warning: model was trained on only " << n_ctx_train << " context tokens ("
|
||||||
<< n_ctx << " specified)\n";
|
<< n_ctx << " specified)\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GPT4All defaults to 128 tokens which is also the hardcoded maximum
|
||||||
|
d_ptr->ctx_params.n_batch = LLMODEL_MAX_PROMPT_BATCH;
|
||||||
|
d_ptr->ctx_params.n_ubatch = LLMODEL_MAX_PROMPT_BATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
d_ptr->ctx_params.n_ctx = n_ctx;
|
d_ptr->ctx_params.n_ctx = n_ctx;
|
||||||
|
Loading…
Reference in New Issue
Block a user