From 0866a984fef8e9346c143ff35a8210ac789afc75 Mon Sep 17 00:00:00 2001
From: Bob Lin <bob401710@gmail.com>
Date: Mon, 29 Jan 2024 08:46:50 +0800
Subject: [PATCH] Update `n_gpu_layers`"s description (#16685)

The `n_gpu_layers` parameter in `llama.cpp` supports the use of `-1`,
which means to offload all layers to the GPU, so the document has been
updated.

Ref:
https://github.com/abetlen/llama-cpp-python/blob/35918873b4010a230a9aa478fd16f35127d7eb9a/llama_cpp/server/settings.py#L29C22-L29C117


https://github.com/abetlen/llama-cpp-python/blob/35918873b4010a230a9aa478fd16f35127d7eb9a/llama_cpp/llama.py#L125
---
 docs/docs/integrations/llms/llamacpp.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/docs/integrations/llms/llamacpp.ipynb b/docs/docs/integrations/llms/llamacpp.ipynb
index d624779b69e..58bb7f38d8d 100644
--- a/docs/docs/integrations/llms/llamacpp.ipynb
+++ b/docs/docs/integrations/llms/llamacpp.ipynb
@@ -415,7 +415,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.\n",
+    "n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
     "\n",
     "# Make sure the model path is correct for your system!\n",
@@ -501,7 +501,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "n_gpu_layers = 1  # Change this value based on your model and your GPU VRAM pool.\n",
+    "n_gpu_layers = 1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
@@ -559,7 +559,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
+    "n_gpu_layers = 1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",