From 2e767c8c28fac0b0de7fc1d75a06e956090f7fba Mon Sep 17 00:00:00 2001 From: Jiaqi Luo <6218999+jiaqiluo@users.noreply.github.com> Date: Wed, 13 Mar 2024 18:10:58 -0700 Subject: [PATCH] This PR fixes the issue that kube-apiserver does not restart in all CP nodes after changing the Pod Security Admission Configuration. A new environment variable RKE_ADMISSION_CONFIG_CHECKSUM whose value is the checksum of the content of the admission configuration file is added to the env list that is set in the `kube-apiserver` container configuration, so any changes in the admission configuration file will result in a change in the container's configuration. RKE will detect the changes during reconciliation and therefore restart the kube-apiserver container on all CP nodes. The upgrade cadence is thresholded by the upgrade strategy in the cluster. This PR also drops the unnecessary appending of env var to the cluster object which shows in the cluster.rkestate file. Signed-off-by: Jiaqi Luo <6218999+jiaqiluo@users.noreply.github.com> --- cluster/plan.go | 34 ++++++++++++++++++++++------------ cluster/reconcile.go | 16 ---------------- cmd/up.go | 4 ---- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/cluster/plan.go b/cluster/plan.go index 3e1251e7..9d1846aa 100644 --- a/cluster/plan.go +++ b/cluster/plan.go @@ -40,10 +40,11 @@ const ( NetworkConfigurationEnv = "RKE_NETWORK_CONFIGURATION" - EtcdPathPrefix = "/registry" - CloudConfigSumEnv = "RKE_CLOUD_CONFIG_CHECKSUM" - CloudProviderNameEnv = "RKE_CLOUD_PROVIDER_NAME" - AuditLogConfigSumEnv = "RKE_AUDITLOG_CONFIG_CHECKSUM" + EtcdPathPrefix = "/registry" + CloudConfigSumEnv = "RKE_CLOUD_CONFIG_CHECKSUM" + CloudProviderNameEnv = "RKE_CLOUD_PROVIDER_NAME" + AuditLogConfigSumEnv = "RKE_AUDITLOG_CONFIG_CHECKSUM" + AdmissionConfigSumEnv = "RKE_ADMISSION_CONFIG_CHECKSUM" DefaultToolsEntrypoint = "/opt/rke-tools/entrypoint.sh" DefaultToolsEntrypointVersion = "0.1.13" @@ -202,6 +203,7 @@ func (c *Cluster) BuildKubeAPIProcess(host *hosts.Host, serviceOptions v3.Kubern "tls-private-key-file": pki.GetKeyPath(pki.KubeAPICertName), } CommandArrayArgs := make(map[string][]string, len(c.Services.KubeAPI.ExtraArgsArray)) + Env := make([]string, len(c.Services.KubeAPI.ExtraEnv)) if len(c.CloudProvider.Name) > 0 { CommandArgs["cloud-config"] = cloudConfigFileName @@ -211,9 +213,7 @@ func (c *Cluster) BuildKubeAPIProcess(host *hosts.Host, serviceOptions v3.Kubern CommandArgs["authentication-token-webhook-cache-ttl"] = c.Authentication.Webhook.CacheTimeout } if len(c.CloudProvider.Name) > 0 { - c.Services.KubeAPI.ExtraEnv = append( - c.Services.KubeAPI.ExtraEnv, - fmt.Sprintf("%s=%s", CloudConfigSumEnv, getStringChecksum(c.CloudConfigFile))) + Env = append(Env, fmt.Sprintf("%s=%s", CloudConfigSumEnv, getStringChecksum(c.CloudConfigFile))) } if c.EncryptionConfig.EncryptionProviderFile != "" { CommandArgs[EncryptionProviderConfigArgument] = EncryptionProviderFilePath @@ -286,16 +286,24 @@ func (c *Cluster) BuildKubeAPIProcess(host *hosts.Host, serviceOptions v3.Kubern fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(host.PrefixPath, "/etc/kubernetes")), } + if _, ok := c.Services.KubeAPI.ExtraArgs[KubeAPIArgAdmissionControlConfigFile]; !ok { + admissionConfig, err := c.getConsolidatedAdmissionConfiguration() + if err != nil { + logrus.Warnf("Error while getting consolidated admission configuration: %v", err) + } + bytes, err := yaml.Marshal(admissionConfig) + if err != nil { + logrus.Warnf("Error while marshalling admission configuration: %v", err) + } + Env = append(Env, fmt.Sprintf("%s=%s", AdmissionConfigSumEnv, getStringChecksum(string(bytes)))) + } if c.Services.KubeAPI.AuditLog != nil && c.Services.KubeAPI.AuditLog.Enabled { Binds = append(Binds, fmt.Sprintf("%s:/var/log/kube-audit", path.Join(host.PrefixPath, "/var/log/kube-audit"))) bytes, err := yaml.Marshal(c.Services.KubeAPI.AuditLog.Configuration.Policy) if err != nil { logrus.Warnf("Error while marshalling auditlog policy: %v", err) } - - c.Services.KubeAPI.ExtraEnv = append( - c.Services.KubeAPI.ExtraEnv, - fmt.Sprintf("%s=%s", AuditLogConfigSumEnv, getStringChecksum(string(bytes)))) + Env = append(Env, fmt.Sprintf("%s=%s", AuditLogConfigSumEnv, getStringChecksum(string(bytes)))) } matchedRange, err := util.SemVerMatchRange(c.Version, util.SemVerK8sVersion122OrHigher) @@ -328,12 +336,14 @@ func (c *Cluster) BuildKubeAPIProcess(host *hosts.Host, serviceOptions v3.Kubern } registryAuthConfig, _, _ := docker.GetImageRegistryConfig(c.Services.KubeAPI.Image, c.PrivateRegistriesMap) + Env = append(Env, c.Services.KubeAPI.ExtraEnv...) + return v3.Process{ Name: services.KubeAPIContainerName, Command: Command, VolumesFrom: VolumesFrom, Binds: getUniqStringList(Binds), - Env: getUniqStringList(c.Services.KubeAPI.ExtraEnv), + Env: getUniqStringList(Env), NetworkMode: "host", RestartPolicy: "always", Image: c.Services.KubeAPI.Image, diff --git a/cluster/reconcile.go b/cluster/reconcile.go index 543c12fc..12f44f08 100644 --- a/cluster/reconcile.go +++ b/cluster/reconcile.go @@ -3,7 +3,6 @@ package cluster import ( "context" "fmt" - "reflect" "time" "github.com/rancher/rke/docker" @@ -489,18 +488,3 @@ func getTaintKey(taint v3.RKETaint) string { func getTaintValue(taint v3.RKETaint) string { return fmt.Sprintf("%s=%s:%s", taint.Key, taint.Value, taint.Effect) } - -// RestartKubeAPIServerWhenConfigChanges restarts the kube-apiserver container on the control plane nodes -// when changes are detected on the to-be-applied kube-api configuration. This is needed to handle the case -// where changes happen on the generated admission-control-config-file but not on the kube-apiserver container -func RestartKubeAPIServerWhenConfigChanges(ctx context.Context, kubeCluster, currentCluster *Cluster) error { - if currentCluster == nil { - return nil - } - if !reflect.DeepEqual(currentCluster.Services.KubeAPI, kubeCluster.Services.KubeAPI) { - for _, host := range kubeCluster.ControlPlaneHosts { - return services.RestartKubeAPI(ctx, host) - } - } - return nil -} diff --git a/cmd/up.go b/cmd/up.go index 989bfb5f..55910d00 100644 --- a/cmd/up.go +++ b/cmd/up.go @@ -200,10 +200,6 @@ func ClusterUp(ctx context.Context, dialersOptions hosts.DialersOptions, flags c return APIURL, caCrt, clientCert, clientKey, nil, err } - if err := cluster.RestartKubeAPIServerWhenConfigChanges(ctx, kubeCluster, currentCluster); err != nil { - return APIURL, caCrt, clientCert, clientKey, nil, err - } - if err := kubeCluster.PrePullK8sImages(ctx); err != nil { return APIURL, caCrt, clientCert, clientKey, nil, err }