feat: implement Top-K sampling for improved user control (#1110)

This commit adds Top-K sampling, a feature that allows users to control the randomness of the generated text by specifying the number of most probable next words considered by the model. This enhances user control and potentially improves the quality of the generated outputs. Fixes: https://github.com/k8sgpt-ai/k8sgpt/issues/1105 Signed-off-by: VaibhavMalik4187 <vaibhavmalik2018@gmail.com> Co-authored-by: Alex Jones <alexsimonjones@gmail.com>
2025-08-11 12:41:39 +00:00 · 2024-05-16 20:11:07 +05:30 · 2024-05-16 20:11:07 +05:30 · eda52312ae
commit eda52312ae
parent 882c6f5225
8 changed files with 46 additions and 0 deletions
--- a/cmd/auth/add.go
+++ b/cmd/auth/add.go
@ -100,6 +100,10 @@ var addCmd = &cobra.Command{
 			color.Red("Error: topP ranges from 0 to 1.")
 			os.Exit(1)
 		}
 		if topK < 1 || topK > 100 {
 			color.Red("Error: topK ranges from 1 to 100.")
 			os.Exit(1)
 		}
 		if ai.NeedPassword(backend) && password == "" {
 			fmt.Printf("Enter %s Key: ", backend)
@ -124,6 +128,7 @@ var addCmd = &cobra.Command{
 			ProviderRegion: providerRegion,
 			ProviderId:     providerId,
 			TopP:           topP,
 			TopK:           topK,
 			MaxTokens:      maxTokens,
 		}
@ -156,6 +161,8 @@ func init() {
 	addCmd.Flags().StringVarP(&endpointName, "endpointname", "n", "", "Endpoint Name, e.g. `endpoint-xxxxxxxxxxxx` (only for amazonbedrock, amazonsagemaker backends)")
 	// add flag for topP
 	addCmd.Flags().Float32VarP(&topP, "topp", "c", 0.5, "Probability Cutoff: Set a threshold (0.0-1.0) to limit word choices. Higher values add randomness, lower values increase predictability.")
 	// add flag for topK
 	addCmd.Flags().Int32VarP(&topK, "topk", "c", 50, "Sampling Cutoff: Set a threshold (1-100) to restrict the sampling process to the top K most probable words at each step. Higher values lead to greater variability, lower values increases predictability.")
 	// max tokens
 	addCmd.Flags().IntVarP(&maxTokens, "maxtokens", "l", 2048, "Specify a maximum output length. Adjust (1-...) to control text length. Higher values produce longer output, lower values limit length")
 	// add flag for temperature
--- a/cmd/auth/auth.go
+++ b/cmd/auth/auth.go
@ -29,6 +29,7 @@ var (
 	providerRegion string
 	providerId     string
 	topP           float32
 	topK           int32
 	maxTokens      int
 )
--- a/cmd/serve/serve.go
+++ b/cmd/serve/serve.go
@ -28,6 +28,7 @@ import (
 const (
 	defaultTemperature float32 = 0.7
 	defaultTopP        float32 = 1.0
 	defaultTopK        int32   = 50
 )
 var (
@ -84,6 +85,22 @@ var ServeCmd = &cobra.Command{
 				}
 				return float32(topP)
 			}
 			topK := func() int32 {
 				env := os.Getenv("K8SGPT_TOP_K")
 				if env == "" {
 					return defaultTopK
 				}
 				topK, err := strconv.ParseFloat(env, 32)
 				if err != nil {
 					color.Red("Unable to convert topK value: %v", err)
 					os.Exit(1)
 				}
 				if topK < 10 || topK > 100 {
 					color.Red("Error: topK ranges from 1 to 100.")
 					os.Exit(1)
 				}
 				return int32(topK)
 			}
 			// Check for env injection
 			backend = os.Getenv("K8SGPT_BACKEND")
 			password := os.Getenv("K8SGPT_PASSWORD")
@ -104,6 +121,7 @@ var ServeCmd = &cobra.Command{
 					ProxyEndpoint: proxyEndpoint,
 					Temperature:   temperature(),
 					TopP:          topP(),
 					TopK:          topK(),
 				}
 				configAI.Providers = append(configAI.Providers, *aiProvider)
--- a/pkg/ai/amazonsagemaker.go
+++ b/pkg/ai/amazonsagemaker.go
@ -33,6 +33,7 @@ type SageMakerAIClient struct {
 	temperature float32
 	endpoint    string
 	topP        float32
 	topK        int32
 	maxTokens   int
 }
@ -56,6 +57,7 @@ type Message struct {
 type Parameters struct {
 	MaxNewTokens int     `json:"max_new_tokens"`
 	TopP         float64 `json:"top_p"`
 	TopK         float64 `json:"top_k"`
 	Temperature  float64 `json:"temperature"`
 }
@ -74,6 +76,7 @@ func (c *SageMakerAIClient) Configure(config IAIConfig) error {
 	c.temperature = config.GetTemperature()
 	c.maxTokens = config.GetMaxTokens()
 	c.topP = config.GetTopP()
 	c.topK = config.GetTopK()
 	return nil
 }
@ -90,6 +93,7 @@ func (c *SageMakerAIClient) GetCompletion(_ context.Context, prompt string) (str
 		Parameters: Parameters{
 			MaxNewTokens: int(c.maxTokens),
 			TopP:         float64(c.topP),
 			TopK:         float64(c.topK),
 			Temperature:  float64(c.temperature),
 		},
 	}
--- a/pkg/ai/googlegenai.go
+++ b/pkg/ai/googlegenai.go
@ -31,6 +31,7 @@ type GoogleGenAIClient struct {
 	model       string
 	temperature float32
 	topP        float32
 	topK        int32
 	maxTokens   int
 }
@ -53,6 +54,7 @@ func (c *GoogleGenAIClient) Configure(config IAIConfig) error {
 	c.model = config.GetModel()
 	c.temperature = config.GetTemperature()
 	c.topP = config.GetTopP()
 	c.topK = config.GetTopK()
 	c.maxTokens = config.GetMaxTokens()
 	return nil
 }
@ -62,6 +64,7 @@ func (c *GoogleGenAIClient) GetCompletion(ctx context.Context, prompt string) (s
 	model := c.client.GenerativeModel(c.model)
 	model.SetTemperature(c.temperature)
 	model.SetTopP(c.topP)
 	model.SetTopK(c.topK)
 	model.SetMaxOutputTokens(int32(c.maxTokens))
 	// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.
--- a/pkg/ai/googlevertexai.go
+++ b/pkg/ai/googlevertexai.go
@ -30,6 +30,7 @@ type GoogleVertexAIClient struct {
 	model       string
 	temperature float32
 	topP        float32
 	topK        int32
 	maxTokens   int
 }
@ -111,6 +112,7 @@ func (g *GoogleVertexAIClient) Configure(config IAIConfig) error {
 	g.model = GetVertexAIModelOrDefault(config.GetModel())
 	g.temperature = config.GetTemperature()
 	g.topP = config.GetTopP()
 	g.topK = config.GetTopK()
 	g.maxTokens = config.GetMaxTokens()
 	return nil
@ -121,6 +123,7 @@ func (g *GoogleVertexAIClient) GetCompletion(ctx context.Context, prompt string)
 	model := g.client.GenerativeModel(g.model)
 	model.SetTemperature(g.temperature)
 	model.SetTopP(g.topP)
 	model.SetTopK(float32(g.topK))
 	model.SetMaxOutputTokens(int32(g.maxTokens))
 	// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.
--- a/pkg/ai/huggingface.go
+++ b/pkg/ai/huggingface.go
@ -2,6 +2,7 @@ package ai
 import (
 	"context"
 	"github.com/hupe1980/go-huggingface"
 	"k8s.io/utils/ptr"
 )
@ -14,6 +15,7 @@ type HuggingfaceClient struct {
 	client      *huggingface.InferenceClient
 	model       string
 	topP        float32
 	topK        int32
 	temperature float32
 	maxTokens   int
 }
@ -26,6 +28,7 @@ func (c *HuggingfaceClient) Configure(config IAIConfig) error {
 	c.client = client
 	c.model = config.GetModel()
 	c.topP = config.GetTopP()
 	c.topK = config.GetTopK()
 	c.temperature = config.GetTemperature()
 	if config.GetMaxTokens() > 500 {
 		c.maxTokens = 500
@ -43,6 +46,7 @@ func (c *HuggingfaceClient) GetCompletion(ctx context.Context, prompt string) (s
 		Model: c.model,
 		Parameters: huggingface.ConversationalParameters{
 			TopP:        ptr.To[float64](float64(c.topP)),
 			TopK:        ptr.To[int](int(c.topK)),
 			Temperature: ptr.To[float64](float64(c.temperature)),
 			MaxLength:   &c.maxTokens,
 		},
--- a/pkg/ai/iai.go
+++ b/pkg/ai/iai.go
@ -72,6 +72,7 @@ type IAIConfig interface {
 	GetTemperature() float32
 	GetProviderRegion() string
 	GetTopP() float32
 	GetTopK() int32
 	GetMaxTokens() int
 	GetProviderId() string
 }
@ -104,6 +105,7 @@ type AIProvider struct {
 	ProviderRegion string  `mapstructure:"providerregion" yaml:"providerregion,omitempty"`
 	ProviderId     string  `mapstructure:"providerid" yaml:"providerid,omitempty"`
 	TopP           float32 `mapstructure:"topp" yaml:"topp,omitempty"`
 	TopK           int32   `mapstructure:"topk" yaml:"topk,omitempty"`
 	MaxTokens      int     `mapstructure:"maxtokens" yaml:"maxtokens,omitempty"`
 }
@ -123,6 +125,10 @@ func (p *AIProvider) GetTopP() float32 {
 	return p.TopP
 }
 func (p *AIProvider) GetTopK() int32 {
 	return p.TopK
 }
 func (p *AIProvider) GetMaxTokens() int {
 	return p.MaxTokens
 }