Merge pull request #108315 from Monokaix/print-getStaticPodSingleHash-err

kubeadm: improve getStaticPodSingleHash error messages
This commit is contained in:
Kubernetes Prow Robot 2022-03-02 09:17:14 -08:00 committed by GitHub
commit 4fcfc58d1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 10 deletions

View File

@ -151,7 +151,6 @@ func runApply(flags *applyFlags, args []string) error {
waiter := getWaiter(flags.dryRun, client, upgrade.UpgradeManifestTimeout) waiter := getWaiter(flags.dryRun, client, upgrade.UpgradeManifestTimeout)
// Now; perform the upgrade procedure // Now; perform the upgrade procedure
klog.V(1).Infoln("[upgrade/apply] performing upgrade")
if err := PerformControlPlaneUpgrade(flags, client, waiter, cfg); err != nil { if err := PerformControlPlaneUpgrade(flags, client, waiter, cfg); err != nil {
return errors.Wrap(err, "[upgrade/apply] FATAL") return errors.Wrap(err, "[upgrade/apply] FATAL")
} }
@ -222,7 +221,8 @@ func EnforceVersionPolicies(newK8sVersionStr string, newK8sVersion *version.Vers
func PerformControlPlaneUpgrade(flags *applyFlags, client clientset.Interface, waiter apiclient.Waiter, internalcfg *kubeadmapi.InitConfiguration) error { func PerformControlPlaneUpgrade(flags *applyFlags, client clientset.Interface, waiter apiclient.Waiter, internalcfg *kubeadmapi.InitConfiguration) error {
// OK, the cluster is hosted using static pods. Upgrade a static-pod hosted cluster // OK, the cluster is hosted using static pods. Upgrade a static-pod hosted cluster
fmt.Printf("[upgrade/apply] Upgrading your Static Pod-hosted control plane to version %q...\n", internalcfg.KubernetesVersion) fmt.Printf("[upgrade/apply] Upgrading your Static Pod-hosted control plane to version %q (timeout: %v)...\n",
internalcfg.KubernetesVersion, upgrade.UpgradeManifestTimeout)
if flags.dryRun { if flags.dryRun {
return upgrade.DryRunStaticPodUpgrade(flags.patchesDir, internalcfg) return upgrade.DryRunStaticPodUpgrade(flags.patchesDir, internalcfg)

View File

@ -321,7 +321,7 @@ func performEtcdStaticPodUpgrade(certsRenewMgr *renewal.Manager, client clientse
beforeEtcdPodHash, err := waiter.WaitForStaticPodSingleHash(cfg.NodeRegistration.Name, constants.Etcd) beforeEtcdPodHash, err := waiter.WaitForStaticPodSingleHash(cfg.NodeRegistration.Name, constants.Etcd)
if err != nil { if err != nil {
return true, errors.Wrap(err, "failed to get etcd pod's hash") return true, err
} }
// Write the updated etcd static Pod manifest into the temporary directory, at this point no etcd change // Write the updated etcd static Pod manifest into the temporary directory, at this point no etcd change

View File

@ -185,18 +185,19 @@ func (w *KubeWaiter) SetTimeout(timeout time.Duration) {
func (w *KubeWaiter) WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error) { func (w *KubeWaiter) WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error) {
componentHash := "" componentHash := ""
var err error var err, lastErr error
mirrorPodHashes := map[string]string{} mirrorPodHashes := map[string]string{}
for _, component := range kubeadmconstants.ControlPlaneComponents { for _, component := range kubeadmconstants.ControlPlaneComponents {
err = wait.PollImmediate(kubeadmconstants.APICallRetryInterval, w.timeout, func() (bool, error) { err = wait.PollImmediate(kubeadmconstants.APICallRetryInterval, w.timeout, func() (bool, error) {
componentHash, err = getStaticPodSingleHash(w.client, nodeName, component) componentHash, err = getStaticPodSingleHash(w.client, nodeName, component)
if err != nil { if err != nil {
lastErr = err
return false, nil return false, nil
} }
return true, nil return true, nil
}) })
if err != nil { if err != nil {
return nil, err return nil, lastErr
} }
mirrorPodHashes[component] = componentHash mirrorPodHashes[component] = componentHash
} }
@ -208,27 +209,34 @@ func (w *KubeWaiter) WaitForStaticPodControlPlaneHashes(nodeName string) (map[st
func (w *KubeWaiter) WaitForStaticPodSingleHash(nodeName string, component string) (string, error) { func (w *KubeWaiter) WaitForStaticPodSingleHash(nodeName string, component string) (string, error) {
componentPodHash := "" componentPodHash := ""
var err error var err, lastErr error
err = wait.PollImmediate(kubeadmconstants.APICallRetryInterval, w.timeout, func() (bool, error) { err = wait.PollImmediate(kubeadmconstants.APICallRetryInterval, w.timeout, func() (bool, error) {
componentPodHash, err = getStaticPodSingleHash(w.client, nodeName, component) componentPodHash, err = getStaticPodSingleHash(w.client, nodeName, component)
if err != nil { if err != nil {
lastErr = err
return false, nil return false, nil
} }
return true, nil return true, nil
}) })
if err != nil {
err = lastErr
}
return componentPodHash, err return componentPodHash, err
} }
// WaitForStaticPodHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed // WaitForStaticPodHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed
// This implicitly means this function blocks until the kubelet has restarted the Static Pod in question // This implicitly means this function blocks until the kubelet has restarted the Static Pod in question
func (w *KubeWaiter) WaitForStaticPodHashChange(nodeName, component, previousHash string) error { func (w *KubeWaiter) WaitForStaticPodHashChange(nodeName, component, previousHash string) error {
return wait.PollImmediate(kubeadmconstants.APICallRetryInterval, w.timeout, func() (bool, error) { var err, lastErr error
err = wait.PollImmediate(kubeadmconstants.APICallRetryInterval, w.timeout, func() (bool, error) {
hash, err := getStaticPodSingleHash(w.client, nodeName, component) hash, err := getStaticPodSingleHash(w.client, nodeName, component)
if err != nil { if err != nil {
lastErr = err
return false, nil return false, nil
} }
// Set lastErr to nil to be able to later distinguish between getStaticPodSingleHash() and timeout errors
lastErr = nil
// We should continue polling until the UID changes // We should continue polling until the UID changes
if hash == previousHash { if hash == previousHash {
return false, nil return false, nil
@ -236,6 +244,15 @@ func (w *KubeWaiter) WaitForStaticPodHashChange(nodeName, component, previousHas
return true, nil return true, nil
}) })
// If lastError is not nil, this must be a getStaticPodSingleHash() error, else if err is not nil there was a poll timeout
if lastErr != nil {
return lastErr
}
if err != nil {
return errors.Wrapf(err, "static Pod hash for component %s on Node %s did not change after %v", component, nodeName, w.timeout)
}
return nil
} }
// getStaticPodSingleHash computes hashes for a single Static Pod resource // getStaticPodSingleHash computes hashes for a single Static Pod resource
@ -244,11 +261,10 @@ func getStaticPodSingleHash(client clientset.Interface, nodeName string, compone
staticPodName := fmt.Sprintf("%s-%s", component, nodeName) staticPodName := fmt.Sprintf("%s-%s", component, nodeName)
staticPod, err := client.CoreV1().Pods(metav1.NamespaceSystem).Get(context.TODO(), staticPodName, metav1.GetOptions{}) staticPod, err := client.CoreV1().Pods(metav1.NamespaceSystem).Get(context.TODO(), staticPodName, metav1.GetOptions{})
if err != nil { if err != nil {
return "", err return "", errors.Wrapf(err, "failed to obtain static Pod hash for component %s on Node %s", component, nodeName)
} }
staticPodHash := staticPod.Annotations["kubernetes.io/config.hash"] staticPodHash := staticPod.Annotations["kubernetes.io/config.hash"]
fmt.Printf("Static pod: %s hash: %s\n", staticPodName, staticPodHash)
return staticPodHash, nil return staticPodHash, nil
} }