mirror of
https://github.com/k8sgpt-ai/k8sgpt.git
synced 2025-08-01 23:49:14 +00:00
feat: implement Top-K sampling for improved user control (#1110)
This commit adds Top-K sampling, a feature that allows users to control the randomness of the generated text by specifying the number of most probable next words considered by the model. This enhances user control and potentially improves the quality of the generated outputs. Fixes: https://github.com/k8sgpt-ai/k8sgpt/issues/1105 Signed-off-by: VaibhavMalik4187 <vaibhavmalik2018@gmail.com> Co-authored-by: Alex Jones <alexsimonjones@gmail.com>
This commit is contained in:
parent
882c6f5225
commit
eda52312ae
@ -100,6 +100,10 @@ var addCmd = &cobra.Command{
|
||||
color.Red("Error: topP ranges from 0 to 1.")
|
||||
os.Exit(1)
|
||||
}
|
||||
if topK < 1 || topK > 100 {
|
||||
color.Red("Error: topK ranges from 1 to 100.")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if ai.NeedPassword(backend) && password == "" {
|
||||
fmt.Printf("Enter %s Key: ", backend)
|
||||
@ -124,6 +128,7 @@ var addCmd = &cobra.Command{
|
||||
ProviderRegion: providerRegion,
|
||||
ProviderId: providerId,
|
||||
TopP: topP,
|
||||
TopK: topK,
|
||||
MaxTokens: maxTokens,
|
||||
}
|
||||
|
||||
@ -156,6 +161,8 @@ func init() {
|
||||
addCmd.Flags().StringVarP(&endpointName, "endpointname", "n", "", "Endpoint Name, e.g. `endpoint-xxxxxxxxxxxx` (only for amazonbedrock, amazonsagemaker backends)")
|
||||
// add flag for topP
|
||||
addCmd.Flags().Float32VarP(&topP, "topp", "c", 0.5, "Probability Cutoff: Set a threshold (0.0-1.0) to limit word choices. Higher values add randomness, lower values increase predictability.")
|
||||
// add flag for topK
|
||||
addCmd.Flags().Int32VarP(&topK, "topk", "c", 50, "Sampling Cutoff: Set a threshold (1-100) to restrict the sampling process to the top K most probable words at each step. Higher values lead to greater variability, lower values increases predictability.")
|
||||
// max tokens
|
||||
addCmd.Flags().IntVarP(&maxTokens, "maxtokens", "l", 2048, "Specify a maximum output length. Adjust (1-...) to control text length. Higher values produce longer output, lower values limit length")
|
||||
// add flag for temperature
|
||||
|
@ -29,6 +29,7 @@ var (
|
||||
providerRegion string
|
||||
providerId string
|
||||
topP float32
|
||||
topK int32
|
||||
maxTokens int
|
||||
)
|
||||
|
||||
|
@ -28,6 +28,7 @@ import (
|
||||
const (
|
||||
defaultTemperature float32 = 0.7
|
||||
defaultTopP float32 = 1.0
|
||||
defaultTopK int32 = 50
|
||||
)
|
||||
|
||||
var (
|
||||
@ -84,6 +85,22 @@ var ServeCmd = &cobra.Command{
|
||||
}
|
||||
return float32(topP)
|
||||
}
|
||||
topK := func() int32 {
|
||||
env := os.Getenv("K8SGPT_TOP_K")
|
||||
if env == "" {
|
||||
return defaultTopK
|
||||
}
|
||||
topK, err := strconv.ParseFloat(env, 32)
|
||||
if err != nil {
|
||||
color.Red("Unable to convert topK value: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if topK < 10 || topK > 100 {
|
||||
color.Red("Error: topK ranges from 1 to 100.")
|
||||
os.Exit(1)
|
||||
}
|
||||
return int32(topK)
|
||||
}
|
||||
// Check for env injection
|
||||
backend = os.Getenv("K8SGPT_BACKEND")
|
||||
password := os.Getenv("K8SGPT_PASSWORD")
|
||||
@ -104,6 +121,7 @@ var ServeCmd = &cobra.Command{
|
||||
ProxyEndpoint: proxyEndpoint,
|
||||
Temperature: temperature(),
|
||||
TopP: topP(),
|
||||
TopK: topK(),
|
||||
}
|
||||
|
||||
configAI.Providers = append(configAI.Providers, *aiProvider)
|
||||
|
@ -33,6 +33,7 @@ type SageMakerAIClient struct {
|
||||
temperature float32
|
||||
endpoint string
|
||||
topP float32
|
||||
topK int32
|
||||
maxTokens int
|
||||
}
|
||||
|
||||
@ -56,6 +57,7 @@ type Message struct {
|
||||
type Parameters struct {
|
||||
MaxNewTokens int `json:"max_new_tokens"`
|
||||
TopP float64 `json:"top_p"`
|
||||
TopK float64 `json:"top_k"`
|
||||
Temperature float64 `json:"temperature"`
|
||||
}
|
||||
|
||||
@ -74,6 +76,7 @@ func (c *SageMakerAIClient) Configure(config IAIConfig) error {
|
||||
c.temperature = config.GetTemperature()
|
||||
c.maxTokens = config.GetMaxTokens()
|
||||
c.topP = config.GetTopP()
|
||||
c.topK = config.GetTopK()
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -90,6 +93,7 @@ func (c *SageMakerAIClient) GetCompletion(_ context.Context, prompt string) (str
|
||||
Parameters: Parameters{
|
||||
MaxNewTokens: int(c.maxTokens),
|
||||
TopP: float64(c.topP),
|
||||
TopK: float64(c.topK),
|
||||
Temperature: float64(c.temperature),
|
||||
},
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ type GoogleGenAIClient struct {
|
||||
model string
|
||||
temperature float32
|
||||
topP float32
|
||||
topK int32
|
||||
maxTokens int
|
||||
}
|
||||
|
||||
@ -53,6 +54,7 @@ func (c *GoogleGenAIClient) Configure(config IAIConfig) error {
|
||||
c.model = config.GetModel()
|
||||
c.temperature = config.GetTemperature()
|
||||
c.topP = config.GetTopP()
|
||||
c.topK = config.GetTopK()
|
||||
c.maxTokens = config.GetMaxTokens()
|
||||
return nil
|
||||
}
|
||||
@ -62,6 +64,7 @@ func (c *GoogleGenAIClient) GetCompletion(ctx context.Context, prompt string) (s
|
||||
model := c.client.GenerativeModel(c.model)
|
||||
model.SetTemperature(c.temperature)
|
||||
model.SetTopP(c.topP)
|
||||
model.SetTopK(c.topK)
|
||||
model.SetMaxOutputTokens(int32(c.maxTokens))
|
||||
|
||||
// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.
|
||||
|
@ -30,6 +30,7 @@ type GoogleVertexAIClient struct {
|
||||
model string
|
||||
temperature float32
|
||||
topP float32
|
||||
topK int32
|
||||
maxTokens int
|
||||
}
|
||||
|
||||
@ -111,6 +112,7 @@ func (g *GoogleVertexAIClient) Configure(config IAIConfig) error {
|
||||
g.model = GetVertexAIModelOrDefault(config.GetModel())
|
||||
g.temperature = config.GetTemperature()
|
||||
g.topP = config.GetTopP()
|
||||
g.topK = config.GetTopK()
|
||||
g.maxTokens = config.GetMaxTokens()
|
||||
|
||||
return nil
|
||||
@ -121,6 +123,7 @@ func (g *GoogleVertexAIClient) GetCompletion(ctx context.Context, prompt string)
|
||||
model := g.client.GenerativeModel(g.model)
|
||||
model.SetTemperature(g.temperature)
|
||||
model.SetTopP(g.topP)
|
||||
model.SetTopK(float32(g.topK))
|
||||
model.SetMaxOutputTokens(int32(g.maxTokens))
|
||||
|
||||
// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.
|
||||
|
@ -2,6 +2,7 @@ package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/hupe1980/go-huggingface"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
@ -14,6 +15,7 @@ type HuggingfaceClient struct {
|
||||
client *huggingface.InferenceClient
|
||||
model string
|
||||
topP float32
|
||||
topK int32
|
||||
temperature float32
|
||||
maxTokens int
|
||||
}
|
||||
@ -26,6 +28,7 @@ func (c *HuggingfaceClient) Configure(config IAIConfig) error {
|
||||
c.client = client
|
||||
c.model = config.GetModel()
|
||||
c.topP = config.GetTopP()
|
||||
c.topK = config.GetTopK()
|
||||
c.temperature = config.GetTemperature()
|
||||
if config.GetMaxTokens() > 500 {
|
||||
c.maxTokens = 500
|
||||
@ -43,6 +46,7 @@ func (c *HuggingfaceClient) GetCompletion(ctx context.Context, prompt string) (s
|
||||
Model: c.model,
|
||||
Parameters: huggingface.ConversationalParameters{
|
||||
TopP: ptr.To[float64](float64(c.topP)),
|
||||
TopK: ptr.To[int](int(c.topK)),
|
||||
Temperature: ptr.To[float64](float64(c.temperature)),
|
||||
MaxLength: &c.maxTokens,
|
||||
},
|
||||
|
@ -72,6 +72,7 @@ type IAIConfig interface {
|
||||
GetTemperature() float32
|
||||
GetProviderRegion() string
|
||||
GetTopP() float32
|
||||
GetTopK() int32
|
||||
GetMaxTokens() int
|
||||
GetProviderId() string
|
||||
}
|
||||
@ -104,6 +105,7 @@ type AIProvider struct {
|
||||
ProviderRegion string `mapstructure:"providerregion" yaml:"providerregion,omitempty"`
|
||||
ProviderId string `mapstructure:"providerid" yaml:"providerid,omitempty"`
|
||||
TopP float32 `mapstructure:"topp" yaml:"topp,omitempty"`
|
||||
TopK int32 `mapstructure:"topk" yaml:"topk,omitempty"`
|
||||
MaxTokens int `mapstructure:"maxtokens" yaml:"maxtokens,omitempty"`
|
||||
}
|
||||
|
||||
@ -123,6 +125,10 @@ func (p *AIProvider) GetTopP() float32 {
|
||||
return p.TopP
|
||||
}
|
||||
|
||||
func (p *AIProvider) GetTopK() int32 {
|
||||
return p.TopK
|
||||
}
|
||||
|
||||
func (p *AIProvider) GetMaxTokens() int {
|
||||
return p.MaxTokens
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user