Merge pull request #128079 from pohly/e2e-daemonset-check-daemon-status-polling

e2e daemon set: better polling in CheckDaemonStatus
This commit is contained in:
Kubernetes Prow Robot 2024-10-15 12:24:21 +01:00 committed by GitHub
commit 1cd8074b83
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 38 additions and 34 deletions

View File

@ -136,8 +136,8 @@ var _ = SIGDescribe("ControllerRevision", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.") ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset)) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
framework.ExpectNoError(err, "error waiting for daemon pod to start") framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By(fmt.Sprintf("Confirm DaemonSet %q successfully created with %q label", dsName, dsLabelSelector)) ginkgo.By(fmt.Sprintf("Confirm DaemonSet %q successfully created with %q label", dsName, dsLabelSelector))
dsList, err := csAppsV1.DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: dsLabelSelector}) dsList, err := csAppsV1.DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: dsLabelSelector})

View File

@ -184,8 +184,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.") ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
framework.ExpectNoError(err, "error waiting for daemon pod to start") framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By("Stop a daemon pod, check that the daemon pod is revived.") ginkgo.By("Stop a daemon pod, check that the daemon pod is revived.")
podList := listDaemonPods(ctx, c, ns, label) podList := listDaemonPods(ctx, c, ns, label)
@ -224,8 +224,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1)) gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name})) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes") framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By("Update the node label to green, and wait for daemons to be unscheduled") ginkgo.By("Update the node label to green, and wait for daemons to be unscheduled")
nodeSelector[daemonsetColorLabel] = "green" nodeSelector[daemonsetColorLabel] = "green"
@ -243,8 +243,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1)) gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{greenNode.Name})) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{greenNode.Name}))
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes") framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
}) })
// We defer adding this test to conformance pending the disposition of moving DaemonSet scheduling logic to the // We defer adding this test to conformance pending the disposition of moving DaemonSet scheduling logic to the
@ -287,8 +287,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1)) gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name})) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes") framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By("Remove the node label and wait for daemons to be unscheduled") ginkgo.By("Remove the node label and wait for daemons to be unscheduled")
_, err = setDaemonSetNodeLabels(ctx, c, node.Name, map[string]string{}) _, err = setDaemonSetNodeLabels(ctx, c, node.Name, map[string]string{})
@ -312,8 +312,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.") ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
framework.ExpectNoError(err, "error waiting for daemon pod to start") framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By("Set a daemon pod's phase to 'Failed', check that the daemon pod is revived.") ginkgo.By("Set a daemon pod's phase to 'Failed', check that the daemon pod is revived.")
podList := listDaemonPods(ctx, c, ns, label) podList := listDaemonPods(ctx, c, ns, label)
@ -863,8 +863,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.") ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset)) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
framework.ExpectNoError(err, "error waiting for daemon pod to start") framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By("listing all DaemonSets") ginkgo.By("listing all DaemonSets")
dsList, err := cs.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) dsList, err := cs.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
@ -911,8 +911,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.") ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset)) err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
framework.ExpectNoError(err, "error waiting for daemon pod to start") framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName)) err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By("Getting /status") ginkgo.By("Getting /status")
dsResource := schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"} dsResource := schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"}

View File

@ -18,6 +18,7 @@ package daemonset
import ( import (
"context" "context"
"fmt"
appsv1 "k8s.io/api/apps/v1" appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
@ -26,6 +27,7 @@ import (
"k8s.io/kubectl/pkg/util/podutils" "k8s.io/kubectl/pkg/util/podutils"
"k8s.io/kubernetes/pkg/controller/daemon" "k8s.io/kubernetes/pkg/controller/daemon"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/utils/format"
) )
func NewDaemonSet(dsName, image string, labels map[string]string, volumes []v1.Volume, mounts []v1.VolumeMount, ports []v1.ContainerPort, args ...string) *appsv1.DaemonSet { func NewDaemonSet(dsName, image string, labels map[string]string, volumes []v1.Volume, mounts []v1.VolumeMount, ports []v1.ContainerPort, args ...string) *appsv1.DaemonSet {
@ -138,18 +140,20 @@ func checkDaemonPodStateOnNodes(ctx context.Context, c clientset.Interface, ds *
return len(nodesToPodCount) == len(nodeNames), nil return len(nodesToPodCount) == len(nodeNames), nil
} }
// CheckDaemonStatus returns false if not all desired pods are scheduled or not all of them are ready. // CheckDaemonStatus ensures that eventually the daemon set has the desired
func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) func(ctx context.Context) (bool, error) { // number of pods scheduled and ready. It returns a descriptive error if that
return func(ctx context.Context) (bool, error) { // state is not reached in the amount of time it takes to start
ds, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get(ctx, dsName, metav1.GetOptions{}) // pods. f.Timeouts.PodStart can be changed to influence that timeout.
if err != nil { func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) error {
return false, err return framework.Gomega().Eventually(ctx, framework.GetObject(f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get, dsName, metav1.GetOptions{})).
} WithTimeout(f.Timeouts.PodStart).
desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady Should(framework.MakeMatcher(func(ds *appsv1.DaemonSet) (failure func() string, err error) {
if desired == scheduled && scheduled == ready { desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady
return true, nil if desired == scheduled && scheduled == ready {
} return nil, nil
framework.Logf("error in daemon status. DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d", desired, scheduled, ready) }
return false, nil return func() string {
} return fmt.Sprintf("Expected daemon set to reach state where all desired pods are scheduled and ready. Got instead DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d\n%s", desired, scheduled, ready, format.Object(ds, 1))
}, nil
}))
} }

View File

@ -1307,8 +1307,8 @@ func testRollingUpdateLBConnectivityDisruption(ctx context.Context, f *framework
creationTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) creationTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, nodeNames)) err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, nodeNames))
framework.ExpectNoError(err, "error waiting for daemon pods to start") framework.ExpectNoError(err, "error waiting for daemon pods to start")
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, name)) err = e2edaemonset.CheckDaemonStatus(ctx, f, name)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
ginkgo.By(fmt.Sprintf("Creating a service %s with type=LoadBalancer externalTrafficPolicy=%s in namespace %s", name, externalTrafficPolicy, ns)) ginkgo.By(fmt.Sprintf("Creating a service %s with type=LoadBalancer externalTrafficPolicy=%s in namespace %s", name, externalTrafficPolicy, ns))
jig := e2eservice.NewTestJig(cs, ns, name) jig := e2eservice.NewTestJig(cs, ns, name)

View File

@ -95,7 +95,7 @@ func (t *DaemonSetUpgradeTest) validateRunningDaemonSet(ctx context.Context, f *
// DaemonSet resource itself should be good // DaemonSet resource itself should be good
ginkgo.By("confirming the DaemonSet resource is in a good state") ginkgo.By("confirming the DaemonSet resource is in a good state")
err = wait.PollUntilContextTimeout(ctx, framework.Poll, framework.PodStartTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name)) err = e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name)
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready") framework.ExpectNoError(err)
} }