[kubeadm] fix mirror-pod hash race condition

- Update kubeadm static pod upgrades to use the
  kubetypes.ConfigHashAnnotationKey annotation on the mirror pod rather
  than generating a hash from the full object info. Previously, a status
  update for the pod would allow the upgrade to proceed before the
  new static pod manifest was actually deployed.

Signed-off-by: Jason DeTiberus <detiber@gmail.com>
This commit is contained in:
Jason DeTiberus 2018-03-30 10:19:26 -04:00 committed by leigh schrandt
parent 2bdca2b75f
commit d55d1b6fbe
5 changed files with 18 additions and 22 deletions

View File

@ -186,7 +186,7 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP
// notice the removal of the Static Pod, leading to a false positive below where we check that the API endpoint is healthy
// If we don't do this, there is a case where we remove the Static Pod manifest, kubelet is slow to react, kubeadm checks the
// API endpoint below of the OLD Static Pod component and proceeds quickly enough, which might lead to unexpected results.
if err := waiter.WaitForStaticPodControlPlaneHashChange(cfg.NodeName, component, beforePodHash); err != nil {
if err := waiter.WaitForStaticPodHashChange(cfg.NodeName, component, beforePodHash); err != nil {
return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd)
}

View File

@ -117,8 +117,8 @@ func (w *fakeWaiter) WaitForStaticPodSingleHash(_ string, _ string) (string, err
return "", w.errsToReturn[waitForHashes]
}
// WaitForStaticPodControlPlaneHashChange returns an error if set from errsToReturn
func (w *fakeWaiter) WaitForStaticPodControlPlaneHashChange(_, _, _ string) error {
// WaitForStaticPodHashChange returns an error if set from errsToReturn
func (w *fakeWaiter) WaitForStaticPodHashChange(_, _, _ string) error {
return w.errsToReturn[waitForHashChange]
}

View File

@ -19,6 +19,7 @@ go_library(
deps = [
"//cmd/kubeadm/app/constants:go_default_library",
"//cmd/kubeadm/app/util:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/registry/core/service/ipallocator:go_default_library",
"//vendor/k8s.io/api/apps/v1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",

View File

@ -17,8 +17,6 @@ limitations under the License.
package apiclient
import (
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"net/http"
@ -31,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
// Waiter is an interface for waiting for criteria in Kubernetes to happen
@ -43,11 +42,11 @@ type Waiter interface {
WaitForPodToDisappear(staticPodName string) error
// WaitForStaticPodSingleHash fetches sha256 hash for the control plane static pod
WaitForStaticPodSingleHash(nodeName string, component string) (string, error)
// WaitForStaticPodHashChange waits for the given static pod component's static pod hash to get updated.
// By doing that we can be sure that the kubelet has restarted the given Static Pod
WaitForStaticPodHashChange(nodeName, component, previousHash string) error
// WaitForStaticPodControlPlaneHashes fetches sha256 hashes for the control plane static pods
WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error)
// WaitForStaticPodControlPlaneHashChange waits for the given static pod component's static pod hash to get updated.
// By doing that we can be sure that the kubelet has restarted the given Static Pod
WaitForStaticPodControlPlaneHashChange(nodeName, component, previousHash string) error
// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
WaitForHealthyKubelet(initalTimeout time.Duration, healthzEndpoint string) error
// SetTimeout adjusts the timeout to the specified duration
@ -194,17 +193,17 @@ func (w *KubeWaiter) WaitForStaticPodSingleHash(nodeName string, component strin
return componentPodHash, err
}
// WaitForStaticPodControlPlaneHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed
// WaitForStaticPodHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed
// This implicitly means this function blocks until the kubelet has restarted the Static Pod in question
func (w *KubeWaiter) WaitForStaticPodControlPlaneHashChange(nodeName, component, previousHash string) error {
func (w *KubeWaiter) WaitForStaticPodHashChange(nodeName, component, previousHash string) error {
return wait.PollImmediate(constants.APICallRetryInterval, w.timeout, func() (bool, error) {
hashes, err := getStaticPodControlPlaneHashes(w.client, nodeName)
hash, err := getStaticPodSingleHash(w.client, nodeName, component)
if err != nil {
return false, nil
}
// We should continue polling until the UID changes
if hashes[component] == previousHash {
if hash == previousHash {
return false, nil
}
@ -235,12 +234,9 @@ func getStaticPodSingleHash(client clientset.Interface, nodeName string, compone
return "", err
}
podBytes, err := json.Marshal(staticPod)
if err != nil {
return "", err
}
return fmt.Sprintf("%x", sha256.Sum256(podBytes)), nil
staticPodHash := staticPod.Annotations[kubetypes.ConfigHashAnnotationKey]
fmt.Printf("Static pod: %s hash: %s\n", staticPodName, staticPodHash)
return staticPodHash, nil
}
// TryRunCommand runs a function a maximum of failureThreshold times, and retries on error. If failureThreshold is hit; the last error is returned

View File

@ -106,8 +106,7 @@ func (w *Waiter) WaitForHealthyKubelet(_ time.Duration, healthzEndpoint string)
// SetTimeout is a no-op; we don't wait in this implementation
func (w *Waiter) SetTimeout(_ time.Duration) {}
// WaitForStaticPodControlPlaneHashes returns an empty hash for all control plane images; WaitForStaticPodControlPlaneHashChange won't block in any case
// but the empty strings there are needed
// WaitForStaticPodControlPlaneHashes returns an empty hash for all control plane images;
func (w *Waiter) WaitForStaticPodControlPlaneHashes(_ string) (map[string]string, error) {
return map[string]string{
constants.KubeAPIServer: "",
@ -122,7 +121,7 @@ func (w *Waiter) WaitForStaticPodSingleHash(_ string, _ string) (string, error)
return "", nil
}
// WaitForStaticPodControlPlaneHashChange returns a dummy nil error in order for the flow to just continue as we're dryrunning
func (w *Waiter) WaitForStaticPodControlPlaneHashChange(_, _, _ string) error {
// WaitForStaticPodHashChange returns a dummy nil error in order for the flow to just continue as we're dryrunning
func (w *Waiter) WaitForStaticPodHashChange(_, _, _ string) error {
return nil
}