mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 14:37:00 +00:00
[kubeadm] fix mirror-pod hash race condition
- Update kubeadm static pod upgrades to use the kubetypes.ConfigHashAnnotationKey annotation on the mirror pod rather than generating a hash from the full object info. Previously, a status update for the pod would allow the upgrade to proceed before the new static pod manifest was actually deployed. Signed-off-by: Jason DeTiberus <detiber@gmail.com>
This commit is contained in:
parent
2bdca2b75f
commit
d55d1b6fbe
@ -186,7 +186,7 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP
|
||||
// notice the removal of the Static Pod, leading to a false positive below where we check that the API endpoint is healthy
|
||||
// If we don't do this, there is a case where we remove the Static Pod manifest, kubelet is slow to react, kubeadm checks the
|
||||
// API endpoint below of the OLD Static Pod component and proceeds quickly enough, which might lead to unexpected results.
|
||||
if err := waiter.WaitForStaticPodControlPlaneHashChange(cfg.NodeName, component, beforePodHash); err != nil {
|
||||
if err := waiter.WaitForStaticPodHashChange(cfg.NodeName, component, beforePodHash); err != nil {
|
||||
return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd)
|
||||
}
|
||||
|
||||
|
@ -117,8 +117,8 @@ func (w *fakeWaiter) WaitForStaticPodSingleHash(_ string, _ string) (string, err
|
||||
return "", w.errsToReturn[waitForHashes]
|
||||
}
|
||||
|
||||
// WaitForStaticPodControlPlaneHashChange returns an error if set from errsToReturn
|
||||
func (w *fakeWaiter) WaitForStaticPodControlPlaneHashChange(_, _, _ string) error {
|
||||
// WaitForStaticPodHashChange returns an error if set from errsToReturn
|
||||
func (w *fakeWaiter) WaitForStaticPodHashChange(_, _, _ string) error {
|
||||
return w.errsToReturn[waitForHashChange]
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@ go_library(
|
||||
deps = [
|
||||
"//cmd/kubeadm/app/constants:go_default_library",
|
||||
"//cmd/kubeadm/app/util:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/registry/core/service/ipallocator:go_default_library",
|
||||
"//vendor/k8s.io/api/apps/v1:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
|
@ -17,8 +17,6 @@ limitations under the License.
|
||||
package apiclient
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@ -31,6 +29,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
)
|
||||
|
||||
// Waiter is an interface for waiting for criteria in Kubernetes to happen
|
||||
@ -43,11 +42,11 @@ type Waiter interface {
|
||||
WaitForPodToDisappear(staticPodName string) error
|
||||
// WaitForStaticPodSingleHash fetches sha256 hash for the control plane static pod
|
||||
WaitForStaticPodSingleHash(nodeName string, component string) (string, error)
|
||||
// WaitForStaticPodHashChange waits for the given static pod component's static pod hash to get updated.
|
||||
// By doing that we can be sure that the kubelet has restarted the given Static Pod
|
||||
WaitForStaticPodHashChange(nodeName, component, previousHash string) error
|
||||
// WaitForStaticPodControlPlaneHashes fetches sha256 hashes for the control plane static pods
|
||||
WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error)
|
||||
// WaitForStaticPodControlPlaneHashChange waits for the given static pod component's static pod hash to get updated.
|
||||
// By doing that we can be sure that the kubelet has restarted the given Static Pod
|
||||
WaitForStaticPodControlPlaneHashChange(nodeName, component, previousHash string) error
|
||||
// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
|
||||
WaitForHealthyKubelet(initalTimeout time.Duration, healthzEndpoint string) error
|
||||
// SetTimeout adjusts the timeout to the specified duration
|
||||
@ -194,17 +193,17 @@ func (w *KubeWaiter) WaitForStaticPodSingleHash(nodeName string, component strin
|
||||
return componentPodHash, err
|
||||
}
|
||||
|
||||
// WaitForStaticPodControlPlaneHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed
|
||||
// WaitForStaticPodHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed
|
||||
// This implicitly means this function blocks until the kubelet has restarted the Static Pod in question
|
||||
func (w *KubeWaiter) WaitForStaticPodControlPlaneHashChange(nodeName, component, previousHash string) error {
|
||||
func (w *KubeWaiter) WaitForStaticPodHashChange(nodeName, component, previousHash string) error {
|
||||
return wait.PollImmediate(constants.APICallRetryInterval, w.timeout, func() (bool, error) {
|
||||
|
||||
hashes, err := getStaticPodControlPlaneHashes(w.client, nodeName)
|
||||
hash, err := getStaticPodSingleHash(w.client, nodeName, component)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
// We should continue polling until the UID changes
|
||||
if hashes[component] == previousHash {
|
||||
if hash == previousHash {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@ -235,12 +234,9 @@ func getStaticPodSingleHash(client clientset.Interface, nodeName string, compone
|
||||
return "", err
|
||||
}
|
||||
|
||||
podBytes, err := json.Marshal(staticPod)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%x", sha256.Sum256(podBytes)), nil
|
||||
staticPodHash := staticPod.Annotations[kubetypes.ConfigHashAnnotationKey]
|
||||
fmt.Printf("Static pod: %s hash: %s\n", staticPodName, staticPodHash)
|
||||
return staticPodHash, nil
|
||||
}
|
||||
|
||||
// TryRunCommand runs a function a maximum of failureThreshold times, and retries on error. If failureThreshold is hit; the last error is returned
|
||||
|
@ -106,8 +106,7 @@ func (w *Waiter) WaitForHealthyKubelet(_ time.Duration, healthzEndpoint string)
|
||||
// SetTimeout is a no-op; we don't wait in this implementation
|
||||
func (w *Waiter) SetTimeout(_ time.Duration) {}
|
||||
|
||||
// WaitForStaticPodControlPlaneHashes returns an empty hash for all control plane images; WaitForStaticPodControlPlaneHashChange won't block in any case
|
||||
// but the empty strings there are needed
|
||||
// WaitForStaticPodControlPlaneHashes returns an empty hash for all control plane images;
|
||||
func (w *Waiter) WaitForStaticPodControlPlaneHashes(_ string) (map[string]string, error) {
|
||||
return map[string]string{
|
||||
constants.KubeAPIServer: "",
|
||||
@ -122,7 +121,7 @@ func (w *Waiter) WaitForStaticPodSingleHash(_ string, _ string) (string, error)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// WaitForStaticPodControlPlaneHashChange returns a dummy nil error in order for the flow to just continue as we're dryrunning
|
||||
func (w *Waiter) WaitForStaticPodControlPlaneHashChange(_, _, _ string) error {
|
||||
// WaitForStaticPodHashChange returns a dummy nil error in order for the flow to just continue as we're dryrunning
|
||||
func (w *Waiter) WaitForStaticPodHashChange(_, _, _ string) error {
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user