mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 11:00:48 +00:00
expose n_gpu_layers parameter of llama.cpp (#1890)
Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -672,8 +672,9 @@ GPTJ::GPTJ()
|
||||
d_ptr->modelLoaded = false;
|
||||
}
|
||||
|
||||
size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx) {
|
||||
size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx, int ngl) {
|
||||
(void)n_ctx;
|
||||
(void)ngl;
|
||||
gptj_model dummy_model;
|
||||
gpt_vocab dummy_vocab;
|
||||
size_t mem_req;
|
||||
@@ -681,8 +682,9 @@ size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx) {
|
||||
return mem_req;
|
||||
}
|
||||
|
||||
bool GPTJ::loadModel(const std::string &modelPath, int n_ctx) {
|
||||
bool GPTJ::loadModel(const std::string &modelPath, int n_ctx, int ngl) {
|
||||
(void)n_ctx;
|
||||
(void)ngl;
|
||||
std::mt19937 rng(time(NULL));
|
||||
d_ptr->rng = rng;
|
||||
|
||||
|
Reference in New Issue
Block a user