mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-16 17:53:37 +00:00
llama-cpp: add gpu layers parameter (#4739)
Adds gpu layers parameter to llama.cpp wrapper Co-authored-by: andrew.khvalenski <andrew.khvalenski@behavox.com> Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
36c9fd1af7
commit
7d15669b41
@ -53,6 +53,9 @@ class LlamaCppEmbeddings(BaseModel, Embeddings):
|
|||||||
"""Number of tokens to process in parallel.
|
"""Number of tokens to process in parallel.
|
||||||
Should be a number between 1 and n_ctx."""
|
Should be a number between 1 and n_ctx."""
|
||||||
|
|
||||||
|
n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
|
||||||
|
"""Number of layers to be loaded into gpu memory. Default None."""
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Configuration for this pydantic object."""
|
"""Configuration for this pydantic object."""
|
||||||
|
|
||||||
@ -62,40 +65,37 @@ class LlamaCppEmbeddings(BaseModel, Embeddings):
|
|||||||
def validate_environment(cls, values: Dict) -> Dict:
|
def validate_environment(cls, values: Dict) -> Dict:
|
||||||
"""Validate that llama-cpp-python library is installed."""
|
"""Validate that llama-cpp-python library is installed."""
|
||||||
model_path = values["model_path"]
|
model_path = values["model_path"]
|
||||||
n_ctx = values["n_ctx"]
|
model_param_names = [
|
||||||
n_parts = values["n_parts"]
|
"n_ctx",
|
||||||
seed = values["seed"]
|
"n_parts",
|
||||||
f16_kv = values["f16_kv"]
|
"seed",
|
||||||
logits_all = values["logits_all"]
|
"f16_kv",
|
||||||
vocab_only = values["vocab_only"]
|
"logits_all",
|
||||||
use_mlock = values["use_mlock"]
|
"vocab_only",
|
||||||
n_threads = values["n_threads"]
|
"use_mlock",
|
||||||
n_batch = values["n_batch"]
|
"n_threads",
|
||||||
|
"n_batch",
|
||||||
|
]
|
||||||
|
model_params = {k: values[k] for k in model_param_names}
|
||||||
|
# For backwards compatibility, only include if non-null.
|
||||||
|
if values["n_gpu_layers"] is not None:
|
||||||
|
model_params["n_gpu_layers"] = values["n_gpu_layers"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
|
|
||||||
values["client"] = Llama(
|
values["client"] = Llama(model_path, embedding=True, **model_params)
|
||||||
model_path=model_path,
|
|
||||||
n_ctx=n_ctx,
|
|
||||||
n_parts=n_parts,
|
|
||||||
seed=seed,
|
|
||||||
f16_kv=f16_kv,
|
|
||||||
logits_all=logits_all,
|
|
||||||
vocab_only=vocab_only,
|
|
||||||
use_mlock=use_mlock,
|
|
||||||
n_threads=n_threads,
|
|
||||||
n_batch=n_batch,
|
|
||||||
embedding=True,
|
|
||||||
)
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ModuleNotFoundError(
|
raise ModuleNotFoundError(
|
||||||
"Could not import llama-cpp-python library. "
|
"Could not import llama-cpp-python library. "
|
||||||
"Please install the llama-cpp-python library to "
|
"Please install the llama-cpp-python library to "
|
||||||
"use this embedding model: pip install llama-cpp-python"
|
"use this embedding model: pip install llama-cpp-python"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
raise NameError(f"Could not load Llama model from path: {model_path}")
|
raise ValueError(
|
||||||
|
f"Could not load Llama model from path: {model_path}. "
|
||||||
|
f"Received error {e}"
|
||||||
|
)
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
@ -64,6 +64,9 @@ class LlamaCpp(LLM):
|
|||||||
"""Number of tokens to process in parallel.
|
"""Number of tokens to process in parallel.
|
||||||
Should be a number between 1 and n_ctx."""
|
Should be a number between 1 and n_ctx."""
|
||||||
|
|
||||||
|
n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
|
||||||
|
"""Number of layers to be loaded into gpu memory. Default None."""
|
||||||
|
|
||||||
suffix: Optional[str] = Field(None)
|
suffix: Optional[str] = Field(None)
|
||||||
"""A suffix to append to the generated text. If None, no suffix is appended."""
|
"""A suffix to append to the generated text. If None, no suffix is appended."""
|
||||||
|
|
||||||
@ -104,47 +107,41 @@ class LlamaCpp(LLM):
|
|||||||
def validate_environment(cls, values: Dict) -> Dict:
|
def validate_environment(cls, values: Dict) -> Dict:
|
||||||
"""Validate that llama-cpp-python library is installed."""
|
"""Validate that llama-cpp-python library is installed."""
|
||||||
model_path = values["model_path"]
|
model_path = values["model_path"]
|
||||||
lora_path = values["lora_path"]
|
model_param_names = [
|
||||||
lora_base = values["lora_base"]
|
"lora_path",
|
||||||
n_ctx = values["n_ctx"]
|
"lora_base",
|
||||||
n_parts = values["n_parts"]
|
"n_ctx",
|
||||||
seed = values["seed"]
|
"n_parts",
|
||||||
f16_kv = values["f16_kv"]
|
"seed",
|
||||||
logits_all = values["logits_all"]
|
"f16_kv",
|
||||||
vocab_only = values["vocab_only"]
|
"logits_all",
|
||||||
use_mlock = values["use_mlock"]
|
"vocab_only",
|
||||||
n_threads = values["n_threads"]
|
"use_mlock",
|
||||||
n_batch = values["n_batch"]
|
"n_threads",
|
||||||
use_mmap = values["use_mmap"]
|
"n_batch",
|
||||||
last_n_tokens_size = values["last_n_tokens_size"]
|
"use_mmap",
|
||||||
|
"last_n_tokens_size",
|
||||||
|
]
|
||||||
|
model_params = {k: values[k] for k in model_param_names}
|
||||||
|
# For backwards compatibility, only include if non-null.
|
||||||
|
if values["n_gpu_layers"] is not None:
|
||||||
|
model_params["n_gpu_layers"] = values["n_gpu_layers"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
|
|
||||||
values["client"] = Llama(
|
values["client"] = Llama(model_path, **model_params)
|
||||||
model_path=model_path,
|
|
||||||
lora_base=lora_base,
|
|
||||||
lora_path=lora_path,
|
|
||||||
n_ctx=n_ctx,
|
|
||||||
n_parts=n_parts,
|
|
||||||
seed=seed,
|
|
||||||
f16_kv=f16_kv,
|
|
||||||
logits_all=logits_all,
|
|
||||||
vocab_only=vocab_only,
|
|
||||||
use_mlock=use_mlock,
|
|
||||||
n_threads=n_threads,
|
|
||||||
n_batch=n_batch,
|
|
||||||
use_mmap=use_mmap,
|
|
||||||
last_n_tokens_size=last_n_tokens_size,
|
|
||||||
)
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ModuleNotFoundError(
|
raise ModuleNotFoundError(
|
||||||
"Could not import llama-cpp-python library. "
|
"Could not import llama-cpp-python library. "
|
||||||
"Please install the llama-cpp-python library to "
|
"Please install the llama-cpp-python library to "
|
||||||
"use this embedding model: pip install llama-cpp-python"
|
"use this embedding model: pip install llama-cpp-python"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
raise NameError(f"Could not load Llama model from path: {model_path}")
|
raise ValueError(
|
||||||
|
f"Could not load Llama model from path: {model_path}. "
|
||||||
|
f"Received error {e}"
|
||||||
|
)
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user