mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 19:10:15 +00:00
add min_p sampling parameter (#2014)
Signed-off-by: Christopher Barrera <cb@arda.tx.rr.com> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
This commit is contained in:
@@ -120,6 +120,7 @@ def _old_loop(gpt4all_instance):
|
||||
n_predict=200,
|
||||
top_k=40,
|
||||
top_p=0.9,
|
||||
min_p=0.0,
|
||||
temp=0.9,
|
||||
n_batch=9,
|
||||
repeat_penalty=1.1,
|
||||
@@ -156,6 +157,7 @@ def _new_loop(gpt4all_instance):
|
||||
temp=0.9,
|
||||
top_k=40,
|
||||
top_p=0.9,
|
||||
min_p=0.0,
|
||||
repeat_penalty=1.1,
|
||||
repeat_last_n=64,
|
||||
n_batch=9,
|
||||
|
@@ -64,6 +64,15 @@ public unsafe class LLModelPromptContext
|
||||
set => _ctx.top_p = value;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// min p sampling probability threshold
|
||||
/// </summary>
|
||||
public float MinP
|
||||
{
|
||||
get => _ctx.min_p;
|
||||
set => _ctx.min_p = value;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// temperature to adjust model's output distribution
|
||||
/// </summary>
|
||||
|
@@ -29,6 +29,8 @@ public unsafe partial struct llmodel_prompt_context
|
||||
|
||||
public float top_p;
|
||||
|
||||
public float min_p;
|
||||
|
||||
public float temp;
|
||||
|
||||
[NativeTypeName("int32_t")]
|
||||
|
@@ -16,6 +16,7 @@ internal static class LLPromptContextExtensions
|
||||
n_predict = {ctx.n_predict}
|
||||
top_k = {ctx.top_k}
|
||||
top_p = {ctx.top_p}
|
||||
min_p = {ctx.min_p}
|
||||
temp = {ctx.temp}
|
||||
n_batch = {ctx.n_batch}
|
||||
repeat_penalty = {ctx.repeat_penalty}
|
||||
|
@@ -12,6 +12,7 @@ public static class PredictRequestOptionsExtensions
|
||||
TokensSize = opts.TokensSize,
|
||||
TopK = opts.TopK,
|
||||
TopP = opts.TopP,
|
||||
MinP = opts.MinP,
|
||||
PastNum = opts.PastConversationTokensNum,
|
||||
RepeatPenalty = opts.RepeatPenalty,
|
||||
Temperature = opts.Temperature,
|
||||
|
@@ -16,6 +16,8 @@ public record PredictRequestOptions
|
||||
|
||||
public float TopP { get; init; } = 0.9f;
|
||||
|
||||
public float MinP { get; init; } = 0.0f;
|
||||
|
||||
public float Temperature { get; init; } = 0.1f;
|
||||
|
||||
public int Batches { get; init; } = 8;
|
||||
|
@@ -36,7 +36,7 @@ std::string res = "";
|
||||
void * mm;
|
||||
|
||||
void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n, float repeat_penalty, int n_ctx, int tokens, int top_k,
|
||||
float top_p, float temp, int n_batch,float ctx_erase)
|
||||
float top_p, float min_p, float temp, int n_batch,float ctx_erase)
|
||||
{
|
||||
llmodel_model* model = (llmodel_model*) m;
|
||||
|
||||
@@ -69,6 +69,7 @@ void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n,
|
||||
.n_predict = 50,
|
||||
.top_k = 10,
|
||||
.top_p = 0.9,
|
||||
.min_p = 0.0,
|
||||
.temp = 1.0,
|
||||
.n_batch = 1,
|
||||
.repeat_penalty = 1.2,
|
||||
@@ -83,6 +84,7 @@ void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n,
|
||||
prompt_context->top_k = top_k;
|
||||
prompt_context->context_erase = ctx_erase;
|
||||
prompt_context->top_p = top_p;
|
||||
prompt_context->min_p = min_p;
|
||||
prompt_context->temp = temp;
|
||||
prompt_context->n_batch = n_batch;
|
||||
|
||||
|
@@ -7,7 +7,7 @@ extern "C" {
|
||||
void* load_model(const char *fname, int n_threads);
|
||||
|
||||
void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n, float repeat_penalty, int n_ctx, int tokens, int top_k,
|
||||
float top_p, float temp, int n_batch,float ctx_erase);
|
||||
float top_p, float min_p, float temp, int n_batch,float ctx_erase);
|
||||
|
||||
void free_model(void *state_ptr);
|
||||
|
||||
@@ -15,4 +15,4 @@ extern unsigned char getTokenCallback(void *, char *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -7,7 +7,7 @@ package gpt4all
|
||||
// #cgo LDFLAGS: -lgpt4all -lm -lstdc++ -ldl
|
||||
// void* load_model(const char *fname, int n_threads);
|
||||
// void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n, float repeat_penalty, int n_ctx, int tokens, int top_k,
|
||||
// float top_p, float temp, int n_batch,float ctx_erase);
|
||||
// float top_p, float min_p, float temp, int n_batch,float ctx_erase);
|
||||
// void free_model(void *state_ptr);
|
||||
// extern unsigned char getTokenCallback(void *, char *);
|
||||
// void llmodel_set_implementation_search_path(const char *path);
|
||||
@@ -58,7 +58,7 @@ func (l *Model) Predict(text string, opts ...PredictOption) (string, error) {
|
||||
out := make([]byte, po.Tokens)
|
||||
|
||||
C.model_prompt(input, l.state, (*C.char)(unsafe.Pointer(&out[0])), C.int(po.RepeatLastN), C.float(po.RepeatPenalty), C.int(po.ContextSize),
|
||||
C.int(po.Tokens), C.int(po.TopK), C.float(po.TopP), C.float(po.Temperature), C.int(po.Batch), C.float(po.ContextErase))
|
||||
C.int(po.Tokens), C.int(po.TopK), C.float(po.TopP), C.float(po.MinP), C.float(po.Temperature), C.int(po.Batch), C.float(po.ContextErase))
|
||||
|
||||
res := C.GoString((*C.char)(unsafe.Pointer(&out[0])))
|
||||
res = strings.TrimPrefix(res, " ")
|
||||
|
@@ -2,7 +2,7 @@ package gpt4all
|
||||
|
||||
type PredictOptions struct {
|
||||
ContextSize, RepeatLastN, Tokens, TopK, Batch int
|
||||
TopP, Temperature, ContextErase, RepeatPenalty float64
|
||||
TopP, MinP, Temperature, ContextErase, RepeatPenalty float64
|
||||
}
|
||||
|
||||
type PredictOption func(p *PredictOptions)
|
||||
@@ -11,6 +11,7 @@ var DefaultOptions PredictOptions = PredictOptions{
|
||||
Tokens: 200,
|
||||
TopK: 10,
|
||||
TopP: 0.90,
|
||||
MinP: 0.0,
|
||||
Temperature: 0.96,
|
||||
Batch: 1,
|
||||
ContextErase: 0.55,
|
||||
@@ -50,6 +51,13 @@ func SetTopP(topp float64) PredictOption {
|
||||
}
|
||||
}
|
||||
|
||||
// SetMinP sets the value for min p sampling
|
||||
func SetMinP(minp float64) PredictOption {
|
||||
return func(p *PredictOptions) {
|
||||
p.MinP = minp
|
||||
}
|
||||
}
|
||||
|
||||
// SetRepeatPenalty sets the repeat penalty.
|
||||
func SetRepeatPenalty(ce float64) PredictOption {
|
||||
return func(p *PredictOptions) {
|
||||
|
@@ -32,6 +32,7 @@ public class LLModel implements AutoCloseable {
|
||||
n_predict.set(128);
|
||||
top_k.set(40);
|
||||
top_p.set(0.95);
|
||||
min_p.set(0.0);
|
||||
temp.set(0.28);
|
||||
n_batch.set(8);
|
||||
repeat_penalty.set(1.1);
|
||||
@@ -71,6 +72,11 @@ public class LLModel implements AutoCloseable {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMinP(float min_p) {
|
||||
configToBuild.min_p.set(min_p);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withTemp(float temp) {
|
||||
configToBuild.temp.set(temp);
|
||||
return this;
|
||||
|
@@ -48,6 +48,7 @@ public interface LLModelLibrary {
|
||||
public final int32_t n_predict = new int32_t();
|
||||
public final int32_t top_k = new int32_t();
|
||||
public final Float top_p = new Float();
|
||||
public final Float min_p = new Float();
|
||||
public final Float temp = new Float();
|
||||
public final int32_t n_batch = new int32_t();
|
||||
public final Float repeat_penalty = new Float();
|
||||
|
@@ -49,6 +49,7 @@ class LLModelPromptContext(ctypes.Structure):
|
||||
("n_predict", ctypes.c_int32),
|
||||
("top_k", ctypes.c_int32),
|
||||
("top_p", ctypes.c_float),
|
||||
("min_p", ctypes.c_float),
|
||||
("temp", ctypes.c_float),
|
||||
("n_batch", ctypes.c_int32),
|
||||
("repeat_penalty", ctypes.c_float),
|
||||
@@ -241,6 +242,7 @@ class LLModel:
|
||||
n_predict: int = 4096,
|
||||
top_k: int = 40,
|
||||
top_p: float = 0.9,
|
||||
min_p: float = 0.0,
|
||||
temp: float = 0.1,
|
||||
n_batch: int = 8,
|
||||
repeat_penalty: float = 1.2,
|
||||
@@ -257,6 +259,7 @@ class LLModel:
|
||||
n_predict=n_predict,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
min_p=min_p,
|
||||
temp=temp,
|
||||
n_batch=n_batch,
|
||||
repeat_penalty=repeat_penalty,
|
||||
@@ -272,6 +275,7 @@ class LLModel:
|
||||
self.context.n_predict = n_predict
|
||||
self.context.top_k = top_k
|
||||
self.context.top_p = top_p
|
||||
self.context.min_p = min_p
|
||||
self.context.temp = temp
|
||||
self.context.n_batch = n_batch
|
||||
self.context.repeat_penalty = repeat_penalty
|
||||
@@ -297,6 +301,7 @@ class LLModel:
|
||||
n_predict: int = 4096,
|
||||
top_k: int = 40,
|
||||
top_p: float = 0.9,
|
||||
min_p: float = 0.0,
|
||||
temp: float = 0.1,
|
||||
n_batch: int = 8,
|
||||
repeat_penalty: float = 1.2,
|
||||
@@ -334,6 +339,7 @@ class LLModel:
|
||||
n_predict=n_predict,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
min_p=min_p,
|
||||
temp=temp,
|
||||
n_batch=n_batch,
|
||||
repeat_penalty=repeat_penalty,
|
||||
|
@@ -289,6 +289,7 @@ class GPT4All:
|
||||
temp: float = 0.7,
|
||||
top_k: int = 40,
|
||||
top_p: float = 0.4,
|
||||
min_p: float = 0.0,
|
||||
repeat_penalty: float = 1.18,
|
||||
repeat_last_n: int = 64,
|
||||
n_batch: int = 8,
|
||||
@@ -305,6 +306,7 @@ class GPT4All:
|
||||
temp: The model temperature. Larger values increase creativity but decrease factuality.
|
||||
top_k: Randomly sample from the top_k most likely tokens at each generation step. Set this to 1 for greedy decoding.
|
||||
top_p: Randomly sample at each generation step from the top most likely tokens whose probabilities add up to top_p.
|
||||
min_p: Randomly sample at each generation step from the top most likely tokens whose probabilities are at least min_p.
|
||||
repeat_penalty: Penalize the model for repetition. Higher values result in less repetition.
|
||||
repeat_last_n: How far in the models generation history to apply the repeat penalty.
|
||||
n_batch: Number of prompt tokens processed in parallel. Larger values decrease latency but increase resource requirements.
|
||||
@@ -325,6 +327,7 @@ class GPT4All:
|
||||
temp=temp,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
min_p=min_p,
|
||||
repeat_penalty=repeat_penalty,
|
||||
repeat_last_n=repeat_last_n,
|
||||
n_batch=n_batch,
|
||||
|
@@ -248,6 +248,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
.n_predict = 128,
|
||||
.top_k = 40,
|
||||
.top_p = 0.9f,
|
||||
.min_p = 0.0f,
|
||||
.temp = 0.72f,
|
||||
.n_batch = 8,
|
||||
.repeat_penalty = 1.0f,
|
||||
@@ -277,6 +278,8 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
promptContext.top_k = inputObject.Get("top_k").As<Napi::Number>().Int32Value();
|
||||
if(inputObject.Has("top_p"))
|
||||
promptContext.top_p = inputObject.Get("top_p").As<Napi::Number>().FloatValue();
|
||||
if(inputObject.Has("min_p"))
|
||||
promptContext.min_p = inputObject.Get("min_p").As<Napi::Number>().FloatValue();
|
||||
if(inputObject.Has("temp"))
|
||||
promptContext.temp = inputObject.Get("temp").As<Napi::Number>().FloatValue();
|
||||
if(inputObject.Has("n_batch"))
|
||||
|
Reference in New Issue
Block a user