test: update graceful node shutdown e2e with watch

Use a watch to detect invalid pod status updates in graceful node
shutdown node e2e test. By using a watch, all pod updates will be
captured while the previous logic required polling the api-server which
could miss some intermediate updates.

Signed-off-by: David Porter <david@porter.me>
This commit is contained in:
David Porter 2022-06-06 18:12:59 -07:00
parent 7811d84fef
commit b4b338d4eb

View File

@ -28,7 +28,11 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/tools/cache"
watchtools "k8s.io/client-go/tools/watch"
"k8s.io/kubectl/pkg/util/podutils"
admissionapi "k8s.io/pod-security-admission/api"
"github.com/onsi/ginkgo"
@ -40,6 +44,7 @@ import (
v1 "k8s.io/api/core/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -102,6 +107,34 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
framework.ExpectNoError(err)
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
defer ginkgo.GinkgoRecover()
w := &cache.ListWatch{
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(context.TODO(), options)
},
}
// Setup watch to continuously monitor any pod events and detect invalid pod status updates
_, err = watchtools.Until(ctx, list.ResourceVersion, w, func(event watch.Event) (bool, error) {
if pod, ok := event.Object.(*v1.Pod); ok {
if isPodStatusAffectedByIssue108594(pod) {
return false, fmt.Errorf("failing test due to detecting invalid pod status")
}
// Watch will never terminate (only when the test ends due to context cancellation)
return false, nil
}
return false, nil
})
// Ignore timeout error since the context will be explicitly cancelled and the watch will never return true
if err != nil && err != wait.ErrWaitTimeout {
framework.Failf("watch for invalid pod status failed: %v", err.Error())
}
}()
ginkgo.By("Verifying batch pods are running")
for _, pod := range list.Items {
if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
@ -125,11 +158,6 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
for _, pod := range list.Items {
if isPodStatusAffectedByIssue108594(&pod) {
framework.Logf("Detected invalid pod state for pod %q: pod status: %+v", pod.Name, pod.Status)
framework.Failf("failing test due to detecting invalid pod status")
}
if kubelettypes.IsCriticalPod(&pod) {
if isPodShutdown(&pod) {
framework.Logf("Expecting critical pod to be running, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status)
@ -157,10 +185,6 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
for _, pod := range list.Items {
if isPodStatusAffectedByIssue108594(&pod) {
framework.Logf("Detected invalid pod state for pod %q: pod status: %+v", pod.Name, pod.Status)
framework.Failf("failing test due to detecting invalid pod status")
}
if !isPodShutdown(&pod) {
framework.Logf("Expecting pod to be shutdown, but it's not currently: Pod: %q, Pod Status %+v", pod.Name, pod.Status)
return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase)
@ -171,6 +195,7 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
// Critical pod starts shutdown after (nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods)
podStatusUpdateTimeout+(nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods),
pollInterval).Should(gomega.BeNil())
})
ginkgo.It("should be able to handle a cancelled shutdown", func() {