add min_p sampling parameter (#2014)

Signed-off-by: Christopher Barrera <cb@arda.tx.rr.com> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
2025-09-08 03:49:10 +00:00 · 2024-02-24 16:51:34 -06:00
parent a153cc5b25
commit f8b1069a1c
28 changed files with 176 additions and 14 deletions
--- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
@@ -49,6 +49,7 @@ class LLModelPromptContext(ctypes.Structure):
        ("n_predict", ctypes.c_int32),
        ("top_k", ctypes.c_int32),
        ("top_p", ctypes.c_float),
+        ("min_p", ctypes.c_float),
        ("temp", ctypes.c_float),
        ("n_batch", ctypes.c_int32),
        ("repeat_penalty", ctypes.c_float),
@@ -241,6 +242,7 @@ class LLModel:
        n_predict: int = 4096,
        top_k: int = 40,
        top_p: float = 0.9,
+        min_p: float = 0.0,
        temp: float = 0.1,
        n_batch: int = 8,
        repeat_penalty: float = 1.2,
@@ -257,6 +259,7 @@ class LLModel:
                n_predict=n_predict,
                top_k=top_k,
                top_p=top_p,
+                min_p=min_p,
                temp=temp,
                n_batch=n_batch,
                repeat_penalty=repeat_penalty,
@@ -272,6 +275,7 @@ class LLModel:
        self.context.n_predict = n_predict
        self.context.top_k = top_k
        self.context.top_p = top_p
+        self.context.min_p = min_p
        self.context.temp = temp
        self.context.n_batch = n_batch
        self.context.repeat_penalty = repeat_penalty
@@ -297,6 +301,7 @@ class LLModel:
        n_predict: int = 4096,
        top_k: int = 40,
        top_p: float = 0.9,
+        min_p: float = 0.0,
        temp: float = 0.1,
        n_batch: int = 8,
        repeat_penalty: float = 1.2,
@@ -334,6 +339,7 @@ class LLModel:
            n_predict=n_predict,
            top_k=top_k,
            top_p=top_p,
+            min_p=min_p,
            temp=temp,
            n_batch=n_batch,
            repeat_penalty=repeat_penalty,
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@@ -289,6 +289,7 @@ class GPT4All:
        temp: float = 0.7,
        top_k: int = 40,
        top_p: float = 0.4,
+        min_p: float = 0.0,
        repeat_penalty: float = 1.18,
        repeat_last_n: int = 64,
        n_batch: int = 8,
@@ -305,6 +306,7 @@ class GPT4All:
            temp: The model temperature. Larger values increase creativity but decrease factuality.
            top_k: Randomly sample from the top_k most likely tokens at each generation step. Set this to 1 for greedy decoding.
            top_p: Randomly sample at each generation step from the top most likely tokens whose probabilities add up to top_p.
+            min_p: Randomly sample at each generation step from the top most likely tokens whose probabilities are at least min_p.
            repeat_penalty: Penalize the model for repetition. Higher values result in less repetition.
            repeat_last_n: How far in the models generation history to apply the repeat penalty.
            n_batch: Number of prompt tokens processed in parallel. Larger values decrease latency but increase resource requirements.
@@ -325,6 +327,7 @@ class GPT4All:
            temp=temp,
            top_k=top_k,
            top_p=top_p,
+            min_p=min_p,
            repeat_penalty=repeat_penalty,
            repeat_last_n=repeat_last_n,
            n_batch=n_batch,