feat: implement Top-K sampling for improved user control (#1110)

This commit adds Top-K sampling, a feature that allows users to control
the randomness of the generated text by specifying the number of most
probable next words considered by the model. This enhances user control
and potentially improves the quality of the generated outputs.

Fixes: https://github.com/k8sgpt-ai/k8sgpt/issues/1105

Signed-off-by: VaibhavMalik4187 <vaibhavmalik2018@gmail.com>
Co-authored-by: Alex Jones <alexsimonjones@gmail.com>
This commit is contained in:
Vaibhav Malik 2024-05-16 20:11:07 +05:30 committed by GitHub
parent 882c6f5225
commit eda52312ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 46 additions and 0 deletions

View File

@ -100,6 +100,10 @@ var addCmd = &cobra.Command{
color.Red("Error: topP ranges from 0 to 1.") color.Red("Error: topP ranges from 0 to 1.")
os.Exit(1) os.Exit(1)
} }
if topK < 1 || topK > 100 {
color.Red("Error: topK ranges from 1 to 100.")
os.Exit(1)
}
if ai.NeedPassword(backend) && password == "" { if ai.NeedPassword(backend) && password == "" {
fmt.Printf("Enter %s Key: ", backend) fmt.Printf("Enter %s Key: ", backend)
@ -124,6 +128,7 @@ var addCmd = &cobra.Command{
ProviderRegion: providerRegion, ProviderRegion: providerRegion,
ProviderId: providerId, ProviderId: providerId,
TopP: topP, TopP: topP,
TopK: topK,
MaxTokens: maxTokens, MaxTokens: maxTokens,
} }
@ -156,6 +161,8 @@ func init() {
addCmd.Flags().StringVarP(&endpointName, "endpointname", "n", "", "Endpoint Name, e.g. `endpoint-xxxxxxxxxxxx` (only for amazonbedrock, amazonsagemaker backends)") addCmd.Flags().StringVarP(&endpointName, "endpointname", "n", "", "Endpoint Name, e.g. `endpoint-xxxxxxxxxxxx` (only for amazonbedrock, amazonsagemaker backends)")
// add flag for topP // add flag for topP
addCmd.Flags().Float32VarP(&topP, "topp", "c", 0.5, "Probability Cutoff: Set a threshold (0.0-1.0) to limit word choices. Higher values add randomness, lower values increase predictability.") addCmd.Flags().Float32VarP(&topP, "topp", "c", 0.5, "Probability Cutoff: Set a threshold (0.0-1.0) to limit word choices. Higher values add randomness, lower values increase predictability.")
// add flag for topK
addCmd.Flags().Int32VarP(&topK, "topk", "c", 50, "Sampling Cutoff: Set a threshold (1-100) to restrict the sampling process to the top K most probable words at each step. Higher values lead to greater variability, lower values increases predictability.")
// max tokens // max tokens
addCmd.Flags().IntVarP(&maxTokens, "maxtokens", "l", 2048, "Specify a maximum output length. Adjust (1-...) to control text length. Higher values produce longer output, lower values limit length") addCmd.Flags().IntVarP(&maxTokens, "maxtokens", "l", 2048, "Specify a maximum output length. Adjust (1-...) to control text length. Higher values produce longer output, lower values limit length")
// add flag for temperature // add flag for temperature

View File

@ -29,6 +29,7 @@ var (
providerRegion string providerRegion string
providerId string providerId string
topP float32 topP float32
topK int32
maxTokens int maxTokens int
) )

View File

@ -28,6 +28,7 @@ import (
const ( const (
defaultTemperature float32 = 0.7 defaultTemperature float32 = 0.7
defaultTopP float32 = 1.0 defaultTopP float32 = 1.0
defaultTopK int32 = 50
) )
var ( var (
@ -84,6 +85,22 @@ var ServeCmd = &cobra.Command{
} }
return float32(topP) return float32(topP)
} }
topK := func() int32 {
env := os.Getenv("K8SGPT_TOP_K")
if env == "" {
return defaultTopK
}
topK, err := strconv.ParseFloat(env, 32)
if err != nil {
color.Red("Unable to convert topK value: %v", err)
os.Exit(1)
}
if topK < 10 || topK > 100 {
color.Red("Error: topK ranges from 1 to 100.")
os.Exit(1)
}
return int32(topK)
}
// Check for env injection // Check for env injection
backend = os.Getenv("K8SGPT_BACKEND") backend = os.Getenv("K8SGPT_BACKEND")
password := os.Getenv("K8SGPT_PASSWORD") password := os.Getenv("K8SGPT_PASSWORD")
@ -104,6 +121,7 @@ var ServeCmd = &cobra.Command{
ProxyEndpoint: proxyEndpoint, ProxyEndpoint: proxyEndpoint,
Temperature: temperature(), Temperature: temperature(),
TopP: topP(), TopP: topP(),
TopK: topK(),
} }
configAI.Providers = append(configAI.Providers, *aiProvider) configAI.Providers = append(configAI.Providers, *aiProvider)

View File

@ -33,6 +33,7 @@ type SageMakerAIClient struct {
temperature float32 temperature float32
endpoint string endpoint string
topP float32 topP float32
topK int32
maxTokens int maxTokens int
} }
@ -56,6 +57,7 @@ type Message struct {
type Parameters struct { type Parameters struct {
MaxNewTokens int `json:"max_new_tokens"` MaxNewTokens int `json:"max_new_tokens"`
TopP float64 `json:"top_p"` TopP float64 `json:"top_p"`
TopK float64 `json:"top_k"`
Temperature float64 `json:"temperature"` Temperature float64 `json:"temperature"`
} }
@ -74,6 +76,7 @@ func (c *SageMakerAIClient) Configure(config IAIConfig) error {
c.temperature = config.GetTemperature() c.temperature = config.GetTemperature()
c.maxTokens = config.GetMaxTokens() c.maxTokens = config.GetMaxTokens()
c.topP = config.GetTopP() c.topP = config.GetTopP()
c.topK = config.GetTopK()
return nil return nil
} }
@ -90,6 +93,7 @@ func (c *SageMakerAIClient) GetCompletion(_ context.Context, prompt string) (str
Parameters: Parameters{ Parameters: Parameters{
MaxNewTokens: int(c.maxTokens), MaxNewTokens: int(c.maxTokens),
TopP: float64(c.topP), TopP: float64(c.topP),
TopK: float64(c.topK),
Temperature: float64(c.temperature), Temperature: float64(c.temperature),
}, },
} }

View File

@ -31,6 +31,7 @@ type GoogleGenAIClient struct {
model string model string
temperature float32 temperature float32
topP float32 topP float32
topK int32
maxTokens int maxTokens int
} }
@ -53,6 +54,7 @@ func (c *GoogleGenAIClient) Configure(config IAIConfig) error {
c.model = config.GetModel() c.model = config.GetModel()
c.temperature = config.GetTemperature() c.temperature = config.GetTemperature()
c.topP = config.GetTopP() c.topP = config.GetTopP()
c.topK = config.GetTopK()
c.maxTokens = config.GetMaxTokens() c.maxTokens = config.GetMaxTokens()
return nil return nil
} }
@ -62,6 +64,7 @@ func (c *GoogleGenAIClient) GetCompletion(ctx context.Context, prompt string) (s
model := c.client.GenerativeModel(c.model) model := c.client.GenerativeModel(c.model)
model.SetTemperature(c.temperature) model.SetTemperature(c.temperature)
model.SetTopP(c.topP) model.SetTopP(c.topP)
model.SetTopK(c.topK)
model.SetMaxOutputTokens(int32(c.maxTokens)) model.SetMaxOutputTokens(int32(c.maxTokens))
// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type. // Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.

View File

@ -30,6 +30,7 @@ type GoogleVertexAIClient struct {
model string model string
temperature float32 temperature float32
topP float32 topP float32
topK int32
maxTokens int maxTokens int
} }
@ -111,6 +112,7 @@ func (g *GoogleVertexAIClient) Configure(config IAIConfig) error {
g.model = GetVertexAIModelOrDefault(config.GetModel()) g.model = GetVertexAIModelOrDefault(config.GetModel())
g.temperature = config.GetTemperature() g.temperature = config.GetTemperature()
g.topP = config.GetTopP() g.topP = config.GetTopP()
g.topK = config.GetTopK()
g.maxTokens = config.GetMaxTokens() g.maxTokens = config.GetMaxTokens()
return nil return nil
@ -121,6 +123,7 @@ func (g *GoogleVertexAIClient) GetCompletion(ctx context.Context, prompt string)
model := g.client.GenerativeModel(g.model) model := g.client.GenerativeModel(g.model)
model.SetTemperature(g.temperature) model.SetTemperature(g.temperature)
model.SetTopP(g.topP) model.SetTopP(g.topP)
model.SetTopK(float32(g.topK))
model.SetMaxOutputTokens(int32(g.maxTokens)) model.SetMaxOutputTokens(int32(g.maxTokens))
// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type. // Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.

View File

@ -2,6 +2,7 @@ package ai
import ( import (
"context" "context"
"github.com/hupe1980/go-huggingface" "github.com/hupe1980/go-huggingface"
"k8s.io/utils/ptr" "k8s.io/utils/ptr"
) )
@ -14,6 +15,7 @@ type HuggingfaceClient struct {
client *huggingface.InferenceClient client *huggingface.InferenceClient
model string model string
topP float32 topP float32
topK int32
temperature float32 temperature float32
maxTokens int maxTokens int
} }
@ -26,6 +28,7 @@ func (c *HuggingfaceClient) Configure(config IAIConfig) error {
c.client = client c.client = client
c.model = config.GetModel() c.model = config.GetModel()
c.topP = config.GetTopP() c.topP = config.GetTopP()
c.topK = config.GetTopK()
c.temperature = config.GetTemperature() c.temperature = config.GetTemperature()
if config.GetMaxTokens() > 500 { if config.GetMaxTokens() > 500 {
c.maxTokens = 500 c.maxTokens = 500
@ -43,6 +46,7 @@ func (c *HuggingfaceClient) GetCompletion(ctx context.Context, prompt string) (s
Model: c.model, Model: c.model,
Parameters: huggingface.ConversationalParameters{ Parameters: huggingface.ConversationalParameters{
TopP: ptr.To[float64](float64(c.topP)), TopP: ptr.To[float64](float64(c.topP)),
TopK: ptr.To[int](int(c.topK)),
Temperature: ptr.To[float64](float64(c.temperature)), Temperature: ptr.To[float64](float64(c.temperature)),
MaxLength: &c.maxTokens, MaxLength: &c.maxTokens,
}, },

View File

@ -72,6 +72,7 @@ type IAIConfig interface {
GetTemperature() float32 GetTemperature() float32
GetProviderRegion() string GetProviderRegion() string
GetTopP() float32 GetTopP() float32
GetTopK() int32
GetMaxTokens() int GetMaxTokens() int
GetProviderId() string GetProviderId() string
} }
@ -104,6 +105,7 @@ type AIProvider struct {
ProviderRegion string `mapstructure:"providerregion" yaml:"providerregion,omitempty"` ProviderRegion string `mapstructure:"providerregion" yaml:"providerregion,omitempty"`
ProviderId string `mapstructure:"providerid" yaml:"providerid,omitempty"` ProviderId string `mapstructure:"providerid" yaml:"providerid,omitempty"`
TopP float32 `mapstructure:"topp" yaml:"topp,omitempty"` TopP float32 `mapstructure:"topp" yaml:"topp,omitempty"`
TopK int32 `mapstructure:"topk" yaml:"topk,omitempty"`
MaxTokens int `mapstructure:"maxtokens" yaml:"maxtokens,omitempty"` MaxTokens int `mapstructure:"maxtokens" yaml:"maxtokens,omitempty"`
} }
@ -123,6 +125,10 @@ func (p *AIProvider) GetTopP() float32 {
return p.TopP return p.TopP
} }
func (p *AIProvider) GetTopK() int32 {
return p.TopK
}
func (p *AIProvider) GetMaxTokens() int { func (p *AIProvider) GetMaxTokens() int {
return p.MaxTokens return p.MaxTokens
} }