Merge pull request #128079 from pohly/e2e-daemonset-check-daemon-status-polling

e2e daemon set: better polling in CheckDaemonStatus
This commit is contained in:
Kubernetes Prow Robot 2024-10-15 12:24:21 +01:00 committed by GitHub
commit 1cd8074b83
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 38 additions and 34 deletions

View File

@ -136,8 +136,8 @@ var _ = SIGDescribe("ControllerRevision", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
ginkgo.By(fmt.Sprintf("Confirm DaemonSet %q successfully created with %q label", dsName, dsLabelSelector))
dsList, err := csAppsV1.DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: dsLabelSelector})

View File

@ -184,8 +184,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
ginkgo.By("Stop a daemon pod, check that the daemon pod is revived.")
podList := listDaemonPods(ctx, c, ns, label)
@ -224,8 +224,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
ginkgo.By("Update the node label to green, and wait for daemons to be unscheduled")
nodeSelector[daemonsetColorLabel] = "green"
@ -243,8 +243,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{greenNode.Name}))
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
})
// We defer adding this test to conformance pending the disposition of moving DaemonSet scheduling logic to the
@ -287,8 +287,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
ginkgo.By("Remove the node label and wait for daemons to be unscheduled")
_, err = setDaemonSetNodeLabels(ctx, c, node.Name, map[string]string{})
@ -312,8 +312,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
ginkgo.By("Set a daemon pod's phase to 'Failed', check that the daemon pod is revived.")
podList := listDaemonPods(ctx, c, ns, label)
@ -863,8 +863,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
ginkgo.By("listing all DaemonSets")
dsList, err := cs.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
@ -911,8 +911,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
framework.ExpectNoError(err, "error waiting for daemon pod to start")
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
framework.ExpectNoError(err)
ginkgo.By("Getting /status")
dsResource := schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"}

View File

@ -18,6 +18,7 @@ package daemonset
import (
"context"
"fmt"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
@ -26,6 +27,7 @@ import (
"k8s.io/kubectl/pkg/util/podutils"
"k8s.io/kubernetes/pkg/controller/daemon"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/utils/format"
)
func NewDaemonSet(dsName, image string, labels map[string]string, volumes []v1.Volume, mounts []v1.VolumeMount, ports []v1.ContainerPort, args ...string) *appsv1.DaemonSet {
@ -138,18 +140,20 @@ func checkDaemonPodStateOnNodes(ctx context.Context, c clientset.Interface, ds *
return len(nodesToPodCount) == len(nodeNames), nil
}
// CheckDaemonStatus returns false if not all desired pods are scheduled or not all of them are ready.
func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) func(ctx context.Context) (bool, error) {
return func(ctx context.Context) (bool, error) {
ds, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get(ctx, dsName, metav1.GetOptions{})
if err != nil {
return false, err
}
desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady
if desired == scheduled && scheduled == ready {
return true, nil
}
framework.Logf("error in daemon status. DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d", desired, scheduled, ready)
return false, nil
}
// CheckDaemonStatus ensures that eventually the daemon set has the desired
// number of pods scheduled and ready. It returns a descriptive error if that
// state is not reached in the amount of time it takes to start
// pods. f.Timeouts.PodStart can be changed to influence that timeout.
func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) error {
return framework.Gomega().Eventually(ctx, framework.GetObject(f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get, dsName, metav1.GetOptions{})).
WithTimeout(f.Timeouts.PodStart).
Should(framework.MakeMatcher(func(ds *appsv1.DaemonSet) (failure func() string, err error) {
desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady
if desired == scheduled && scheduled == ready {
return nil, nil
}
return func() string {
return fmt.Sprintf("Expected daemon set to reach state where all desired pods are scheduled and ready. Got instead DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d\n%s", desired, scheduled, ready, format.Object(ds, 1))
}, nil
}))
}

View File

@ -1307,8 +1307,8 @@ func testRollingUpdateLBConnectivityDisruption(ctx context.Context, f *framework
creationTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, nodeNames))
framework.ExpectNoError(err, "error waiting for daemon pods to start")
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, name))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, name)
framework.ExpectNoError(err)
ginkgo.By(fmt.Sprintf("Creating a service %s with type=LoadBalancer externalTrafficPolicy=%s in namespace %s", name, externalTrafficPolicy, ns))
jig := e2eservice.NewTestJig(cs, ns, name)

View File

@ -95,7 +95,7 @@ func (t *DaemonSetUpgradeTest) validateRunningDaemonSet(ctx context.Context, f *
// DaemonSet resource itself should be good
ginkgo.By("confirming the DaemonSet resource is in a good state")
err = wait.PollUntilContextTimeout(ctx, framework.Poll, framework.PodStartTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name))
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
err = e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name)
framework.ExpectNoError(err)
}