From f36f591be996b218aafd002044b3b8060c53337a Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 3 Mar 2026 22:36:59 -0500 Subject: [PATCH 1/2] k8sclient: use ServerVersion instead of pod list for kubeconfig validation Listing all pods across all namespaces during bootstrap is expensive in large clusters and unnecessary since the result is discarded. Use the lightweight /version endpoint to validate connectivity instead. Signed-off-by: Peng Liu --- pkg/k8sclient/kubeconfig.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/k8sclient/kubeconfig.go b/pkg/k8sclient/kubeconfig.go index b9ca1b402..d96fde1d2 100644 --- a/pkg/k8sclient/kubeconfig.go +++ b/pkg/k8sclient/kubeconfig.go @@ -27,7 +27,6 @@ import ( certificatesv1 "k8s.io/api/certificates/v1" v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -96,7 +95,7 @@ func PerNodeK8sClient(nodeName, bootstrapKubeconfigFile string, certDuration tim if err != nil { logging.Errorf("failed to read kubeconfig from cert manager: %v", err) } else { - _, err := tempClient.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{}) + _, err := tempClient.Discovery().ServerVersion() // tls unknown authority error is unrecoverable error with retry if err != nil { if strings.Contains(err.Error(), "x509: certificate signed by unknown authority") { @@ -105,7 +104,7 @@ func PerNodeK8sClient(nodeName, bootstrapKubeconfigFile string, certDuration tim newBootstrapKubeconfig, _ := clientcmd.BuildConfigFromFlags("", bootstrapKubeconfigFile) cfg = newBootstrapKubeconfig } else { - logging.Errorf("failed to list pods with new certs: %v", err) + logging.Errorf("failed to validate kubeconfig with new certs: %v", err) } } From ddd00fe48bda0bd13902d97ef02b3a2ec4a03c92 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 17 Mar 2026 15:06:12 +0800 Subject: [PATCH 2/2] k8sclient: fix per-node kubeconfig fallback Validate the per-node kubeconfig when a current certificate is available and fall back to the bootstrap kubeconfig only when the per-node config is no longer trusted. Also rebuild the derived per-node rest.Config from the reloaded bootstrap config so TLS settings are preserved and refreshed consistently. Signed-off-by: Peng Liu --- pkg/k8sclient/kubeconfig.go | 78 ++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/pkg/k8sclient/kubeconfig.go b/pkg/k8sclient/kubeconfig.go index d96fde1d2..d4e072c49 100644 --- a/pkg/k8sclient/kubeconfig.go +++ b/pkg/k8sclient/kubeconfig.go @@ -55,25 +55,29 @@ var ( // getPerNodeKubeconfig creates new kubeConfig, based on bootstrap, with new certDir func getPerNodeKubeconfig(bootstrap *rest.Config, certDir string) *rest.Config { - return &rest.Config{ - Host: bootstrap.Host, - APIPath: bootstrap.APIPath, - ContentConfig: rest.ContentConfig{ - AcceptContentTypes: "application/vnd.kubernetes.protobuf,application/json", - ContentType: "application/vnd.kubernetes.protobuf", - }, - TLSClientConfig: rest.TLSClientConfig{ - KeyFile: path.Join(certDir, certNamePrefix+"-current.pem"), - CertFile: path.Join(certDir, certNamePrefix+"-current.pem"), - CAData: bootstrap.TLSClientConfig.CAData, - }, - // Allow multus (especially in server mode) to make more concurrent requests - // to reduce client-side throttling - QPS: 50, - Burst: 50, - // Set the config timeout to one minute. - Timeout: time.Minute, - } + config := rest.CopyConfig(bootstrap) + config.TLSClientConfig.CertFile = path.Join(certDir, certNamePrefix+"-current.pem") + config.TLSClientConfig.KeyFile = path.Join(certDir, certNamePrefix+"-current.pem") + config.TLSClientConfig.CertData = nil + config.TLSClientConfig.KeyData = nil + + // Switch to the per-node client certificate instead of reusing bootstrap auth. + config.BearerToken = "" + config.BearerTokenFile = "" + config.Username = "" + config.Password = "" + config.ExecProvider = nil + config.AuthProvider = nil + + config.AcceptContentTypes = "application/vnd.kubernetes.protobuf,application/json" + config.ContentType = "application/vnd.kubernetes.protobuf" + // Allow multus (especially in server mode) to make more concurrent requests + // to reduce client-side throttling + config.QPS = 50 + config.Burst = 50 + // Set the config timeout to one minute. + config.Timeout = time.Minute + return config } // PerNodeK8sClient creates/reload new multus kubeconfig per-node. @@ -84,33 +88,35 @@ func PerNodeK8sClient(nodeName, bootstrapKubeconfigFile string, certDuration tim } config := getPerNodeKubeconfig(bootstrapKubeconfig, certDir) - // If we have a valid certificate, user that to fetch CSRs. + // If we have a valid certificate, use that to fetch CSRs. // Otherwise, use the bootstrap credentials from bootstrapKubeconfig // https://github.com/kubernetes/kubernetes/blob/068ee321bc7bfe1c2cefb87fb4d9e5deea84fbc8/cmd/kubelet/app/server.go#L953-L963 newClientsetFn := func(current *tls.Certificate) (kubernetes.Interface, error) { cfg := bootstrapKubeconfig - // validate the kubeconfig - tempClient, err := kubernetes.NewForConfig(cfg) - if err != nil { - logging.Errorf("failed to read kubeconfig from cert manager: %v", err) - } else { - _, err := tempClient.Discovery().ServerVersion() - // tls unknown authority error is unrecoverable error with retry + // When a current certificate exists, prefer it for CSR operations but fall back + // to bootstrap credentials if the stored per-node config is no longer trusted. + if current != nil { + cfg = config + tempClient, err := kubernetes.NewForConfig(cfg) + if err != nil { + return nil, logging.Errorf("failed to create client from per-node kubeconfig: %v", err) + } + _, err = tempClient.Discovery().ServerVersion() if err != nil { if strings.Contains(err.Error(), "x509: certificate signed by unknown authority") { - logging.Verbosef("cert mgr gets invalid config. rebuild from bootstrap kubeconfig") - // reload and use bootstrapKubeconfig again - newBootstrapKubeconfig, _ := clientcmd.BuildConfigFromFlags("", bootstrapKubeconfigFile) - cfg = newBootstrapKubeconfig + logging.Verbosef("cert mgr gets invalid per-node config. rebuild from bootstrap kubeconfig") + newBootstrapKubeconfig, reloadErr := clientcmd.BuildConfigFromFlags("", bootstrapKubeconfigFile) + if reloadErr != nil { + return nil, logging.Errorf("failed to reload bootstrap kubeconfig: %v", reloadErr) + } + bootstrapKubeconfig = newBootstrapKubeconfig + config = getPerNodeKubeconfig(bootstrapKubeconfig, certDir) + cfg = bootstrapKubeconfig } else { - logging.Errorf("failed to validate kubeconfig with new certs: %v", err) + logging.Errorf("failed to validate per-node kubeconfig: %v", err) } } - - if current != nil { - cfg = config - } } return kubernetes.NewForConfig(cfg) }