mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-26 21:17:23 +00:00
[kubeadm] Modify the kubeadm upgrade DAG for the TLS Upgrade
- Calculate `beforePodHashMap` before the etcd upgrade in anticipation of KubeAPIServer downtime - Detect if pre-upgrade etcd static pod cluster `HasTLS()==false` to switch on the Etcd TLS Upgrade if TLS Upgrade: - Skip L7 Etcd check (could implement a waiter for this) - Skip data rollback on etcd upgrade failure due to lack of L7 check (APIServer is already down unable to serve new requests) - On APIServer upgrade failure, also rollback the etcd manifest to maintain protocol compatibility - Add logging
This commit is contained in:
parent
4a37e05665
commit
8129480d44
@ -181,6 +181,25 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP
|
|||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("[upgrade/staticpods] Moved new manifest to %q and backed up old manifest to %q\n", currentManifestPath, backupManifestPath)
|
fmt.Printf("[upgrade/staticpods] Moved new manifest to %q and backed up old manifest to %q\n", currentManifestPath, backupManifestPath)
|
||||||
|
|
||||||
|
waitForComponentRestart := true
|
||||||
|
if isTLSUpgrade {
|
||||||
|
// We currently depend on getting the Etcd mirror Pod hash from the KubeAPIServer;
|
||||||
|
// Upgrading the Etcd protocol takes down the apiserver, so we can't verify component restarts if we restart Etcd independently.
|
||||||
|
// Skip waiting for Etcd to restart and immediately move on to updating the apiserver.
|
||||||
|
if component == constants.Etcd {
|
||||||
|
waitForComponentRestart = false
|
||||||
|
}
|
||||||
|
// Normally, if an Etcd upgrade is successful, but the apiserver upgrade fails, Etcd is not rolled back.
|
||||||
|
// In the case of a TLS upgrade, the old KubeAPIServer config is incompatible with the new Etcd confg, so we rollback Etcd
|
||||||
|
// if the APIServer upgrade fails.
|
||||||
|
if component == constants.KubeAPIServer {
|
||||||
|
recoverEtcd = true
|
||||||
|
fmt.Printf("[upgrade/staticpods] The %s manifest will be restored if component %q fails to upgrade\n", constants.Etcd, component)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if waitForComponentRestart {
|
||||||
fmt.Println("[upgrade/staticpods] Waiting for the kubelet to restart the component")
|
fmt.Println("[upgrade/staticpods] Waiting for the kubelet to restart the component")
|
||||||
|
|
||||||
// Wait for the mirror Pod hash to change; otherwise we'll run into race conditions here when the kubelet hasn't had time to
|
// Wait for the mirror Pod hash to change; otherwise we'll run into race conditions here when the kubelet hasn't had time to
|
||||||
@ -197,6 +216,10 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP
|
|||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("[upgrade/staticpods] Component %q upgraded successfully!\n", component)
|
fmt.Printf("[upgrade/staticpods] Component %q upgraded successfully!\n", component)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("[upgrade/staticpods] Not waiting for pod-hash change for component %q\n", component)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,8 +299,11 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
|
|||||||
return true, fmt.Errorf("fatal error when trying to upgrade the etcd cluster: %v, rolled the state back to pre-upgrade state", err)
|
return true, fmt.Errorf("fatal error when trying to upgrade the etcd cluster: %v, rolled the state back to pre-upgrade state", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if isTLSUpgrade {
|
||||||
|
fmt.Printf("[upgrade/etcd] Skipping L7 health-check for %s (as well as data rollback on failure)\n", constants.Etcd)
|
||||||
|
} else {
|
||||||
// Checking health state of etcd after the upgrade
|
// Checking health state of etcd after the upgrade
|
||||||
if _, err = etcdCluster.GetEtcdClusterStatus(); err != nil {
|
if _, err = newEtcdCluster.GetStatus(); err != nil {
|
||||||
// Despite the fact that upgradeComponent was successful, there is something wrong with etcd cluster
|
// Despite the fact that upgradeComponent was successful, there is something wrong with etcd cluster
|
||||||
// First step is to restore back up of datastore
|
// First step is to restore back up of datastore
|
||||||
if err := rollbackEtcdData(cfg, fmt.Errorf("etcd cluster is not healthy after upgrade: %v rolling back", err), pathMgr); err != nil {
|
if err := rollbackEtcdData(cfg, fmt.Errorf("etcd cluster is not healthy after upgrade: %v rolling back", err), pathMgr); err != nil {
|
||||||
@ -290,13 +316,14 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
|
|||||||
return true, fmt.Errorf("fatal error upgrading local etcd cluster: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir)
|
return true, fmt.Errorf("fatal error upgrading local etcd cluster: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir)
|
||||||
}
|
}
|
||||||
// Since rollback of the old etcd manifest was successful, checking again the status of etcd cluster
|
// Since rollback of the old etcd manifest was successful, checking again the status of etcd cluster
|
||||||
if _, err := etcdCluster.GetEtcdClusterStatus(); err != nil {
|
if _, err := oldEtcdCluster.GetStatus(); err != nil {
|
||||||
// Nothing else left to try to recover etcd cluster
|
// Nothing else left to try to recover etcd cluster
|
||||||
return true, fmt.Errorf("fatal error upgrading local etcd cluster: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir)
|
return true, fmt.Errorf("fatal error upgrading local etcd cluster: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
return true, fmt.Errorf("fatal error upgrading local etcd cluster: %v, rolled the state back to pre-upgrade state", err)
|
return true, fmt.Errorf("fatal error upgrading local etcd cluster: %v, rolled the state back to pre-upgrade state", err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
@ -306,8 +333,24 @@ func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager
|
|||||||
recoverManifests := map[string]string{}
|
recoverManifests := map[string]string{}
|
||||||
var isTLSUpgrade bool
|
var isTLSUpgrade bool
|
||||||
|
|
||||||
|
beforePodHashMap, err := waiter.WaitForStaticPodControlPlaneHashes(cfg.NodeName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// etcd upgrade is done prior to other control plane components
|
// etcd upgrade is done prior to other control plane components
|
||||||
if etcdUpgrade {
|
if etcdUpgrade {
|
||||||
|
previousEtcdHasTLS, err := oldEtcdCluster.HasTLS()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to determine if previous etcd was using TLS: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the TLS upgrade flag for all components
|
||||||
|
isTLSUpgrade = !previousEtcdHasTLS
|
||||||
|
if isTLSUpgrade {
|
||||||
|
fmt.Printf("[upgrade/etcd] Upgrading to TLS for %s\n", constants.Etcd)
|
||||||
|
}
|
||||||
|
|
||||||
// Perform etcd upgrade using common to all control plane components function
|
// Perform etcd upgrade using common to all control plane components function
|
||||||
fatal, err := performEtcdStaticPodUpgrade(waiter, pathMgr, cfg, recoverManifests, isTLSUpgrade, oldEtcdCluster, newEtcdCluster)
|
fatal, err := performEtcdStaticPodUpgrade(waiter, pathMgr, cfg, recoverManifests, isTLSUpgrade, oldEtcdCluster, newEtcdCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -318,11 +361,6 @@ func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
beforePodHashMap, err := waiter.WaitForStaticPodControlPlaneHashes(cfg.NodeName)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write the updated static Pod manifests into the temporary directory
|
// Write the updated static Pod manifests into the temporary directory
|
||||||
fmt.Printf("[upgrade/staticpods] Writing new Static Pod manifests to %q\n", pathMgr.TempManifestDir())
|
fmt.Printf("[upgrade/staticpods] Writing new Static Pod manifests to %q\n", pathMgr.TempManifestDir())
|
||||||
err = controlplanephase.CreateInitStaticPodManifestFiles(pathMgr.TempManifestDir(), cfg)
|
err = controlplanephase.CreateInitStaticPodManifestFiles(pathMgr.TempManifestDir(), cfg)
|
||||||
|
Loading…
Reference in New Issue
Block a user