mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-08 03:49:10 +00:00
add min_p sampling parameter (#2014)
Signed-off-by: Christopher Barrera <cb@arda.tx.rr.com> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
This commit is contained in:
@@ -49,6 +49,7 @@ class LLModelPromptContext(ctypes.Structure):
|
||||
("n_predict", ctypes.c_int32),
|
||||
("top_k", ctypes.c_int32),
|
||||
("top_p", ctypes.c_float),
|
||||
("min_p", ctypes.c_float),
|
||||
("temp", ctypes.c_float),
|
||||
("n_batch", ctypes.c_int32),
|
||||
("repeat_penalty", ctypes.c_float),
|
||||
@@ -241,6 +242,7 @@ class LLModel:
|
||||
n_predict: int = 4096,
|
||||
top_k: int = 40,
|
||||
top_p: float = 0.9,
|
||||
min_p: float = 0.0,
|
||||
temp: float = 0.1,
|
||||
n_batch: int = 8,
|
||||
repeat_penalty: float = 1.2,
|
||||
@@ -257,6 +259,7 @@ class LLModel:
|
||||
n_predict=n_predict,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
min_p=min_p,
|
||||
temp=temp,
|
||||
n_batch=n_batch,
|
||||
repeat_penalty=repeat_penalty,
|
||||
@@ -272,6 +275,7 @@ class LLModel:
|
||||
self.context.n_predict = n_predict
|
||||
self.context.top_k = top_k
|
||||
self.context.top_p = top_p
|
||||
self.context.min_p = min_p
|
||||
self.context.temp = temp
|
||||
self.context.n_batch = n_batch
|
||||
self.context.repeat_penalty = repeat_penalty
|
||||
@@ -297,6 +301,7 @@ class LLModel:
|
||||
n_predict: int = 4096,
|
||||
top_k: int = 40,
|
||||
top_p: float = 0.9,
|
||||
min_p: float = 0.0,
|
||||
temp: float = 0.1,
|
||||
n_batch: int = 8,
|
||||
repeat_penalty: float = 1.2,
|
||||
@@ -334,6 +339,7 @@ class LLModel:
|
||||
n_predict=n_predict,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
min_p=min_p,
|
||||
temp=temp,
|
||||
n_batch=n_batch,
|
||||
repeat_penalty=repeat_penalty,
|
||||
|
@@ -289,6 +289,7 @@ class GPT4All:
|
||||
temp: float = 0.7,
|
||||
top_k: int = 40,
|
||||
top_p: float = 0.4,
|
||||
min_p: float = 0.0,
|
||||
repeat_penalty: float = 1.18,
|
||||
repeat_last_n: int = 64,
|
||||
n_batch: int = 8,
|
||||
@@ -305,6 +306,7 @@ class GPT4All:
|
||||
temp: The model temperature. Larger values increase creativity but decrease factuality.
|
||||
top_k: Randomly sample from the top_k most likely tokens at each generation step. Set this to 1 for greedy decoding.
|
||||
top_p: Randomly sample at each generation step from the top most likely tokens whose probabilities add up to top_p.
|
||||
min_p: Randomly sample at each generation step from the top most likely tokens whose probabilities are at least min_p.
|
||||
repeat_penalty: Penalize the model for repetition. Higher values result in less repetition.
|
||||
repeat_last_n: How far in the models generation history to apply the repeat penalty.
|
||||
n_batch: Number of prompt tokens processed in parallel. Larger values decrease latency but increase resource requirements.
|
||||
@@ -325,6 +327,7 @@ class GPT4All:
|
||||
temp=temp,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
min_p=min_p,
|
||||
repeat_penalty=repeat_penalty,
|
||||
repeat_last_n=repeat_last_n,
|
||||
n_batch=n_batch,
|
||||
|
Reference in New Issue
Block a user