diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 3f803c3db12..e62801f41f6 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -30,11 +30,13 @@ import ( resourcev1alpha2 "k8s.io/api/resource/v1alpha2" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" "k8s.io/dynamic-resource-allocation/controller" "k8s.io/klog/v2" "k8s.io/kubernetes/test/e2e/dra/test-driver/app" "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" admissionapi "k8s.io/pod-security-admission/api" utilpointer "k8s.io/utils/pointer" @@ -515,6 +517,52 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu framework.ExpectNoError(err, "start pod") } }) + + // This test covers aspects of non graceful node shutdown by DRA controller + // More details about this can be found in the KEP: + // https://github.com/kubernetes/enhancements/tree/master/keps/sig-storage/2268-non-graceful-shutdown + // NOTE: this test depends on kind. It will only work with kind cluster as it shuts down one of the + // nodes by running `docker stop `, which is very kind-specific. + ginkgo.It("[Serial] [Disruptive] [Slow] must deallocate on non graceful node shutdown", func(ctx context.Context) { + ginkgo.By("create test pod") + parameters := b.parameters() + label := "app.kubernetes.io/instance" + instance := f.UniqueName + "-test-app" + pod := b.podExternal() + pod.Labels[label] = instance + claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer) + b.create(ctx, parameters, claim, pod) + + ginkgo.By("wait for test pod " + pod.Name + " to run") + labelSelector := labels.SelectorFromSet(labels.Set(pod.Labels)) + pods, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, f.ClientSet, pod.Namespace, labelSelector, 1, framework.PodStartTimeout) + framework.ExpectNoError(err, "start pod") + runningPod := &pods.Items[0] + + nodeName := runningPod.Spec.NodeName + // Prevent builder tearDown to fail waiting for unprepared resources + delete(b.driver.Nodes, nodeName) + ginkgo.By("stop node " + nodeName + " non gracefully") + _, stderr, err := framework.RunCmd("docker", "stop", nodeName) + gomega.Expect(stderr).To(gomega.BeEmpty()) + framework.ExpectNoError(err) + ginkgo.DeferCleanup(framework.RunCmd, "docker", "start", nodeName) + if ok := e2enode.WaitForNodeToBeNotReady(ctx, f.ClientSet, nodeName, f.Timeouts.NodeNotReady); !ok { + framework.Failf("Node %s failed to enter NotReady state", nodeName) + } + + ginkgo.By("apply out-of-service taint on node " + nodeName) + taint := v1.Taint{ + Key: v1.TaintNodeOutOfService, + Effect: v1.TaintEffectNoExecute, + } + e2enode.AddOrUpdateTaintOnNode(ctx, f.ClientSet, nodeName, taint) + e2enode.ExpectNodeHasTaint(ctx, f.ClientSet, nodeName, &taint) + ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, f.ClientSet, nodeName, taint) + + ginkgo.By("waiting for claim to get deallocated") + gomega.Eventually(ctx, framework.GetObject(b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get, claim.Name, metav1.GetOptions{})).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", gomega.BeNil())) + }) }) ginkgo.Context("with node-local resources", func() { diff --git a/test/e2e/framework/timeouts.go b/test/e2e/framework/timeouts.go index 5cfc29edba3..6bb89f67425 100644 --- a/test/e2e/framework/timeouts.go +++ b/test/e2e/framework/timeouts.go @@ -39,6 +39,7 @@ var defaultTimeouts = TimeoutContext{ SystemPodsStartup: 10 * time.Minute, NodeSchedulable: 30 * time.Minute, SystemDaemonsetStartup: 5 * time.Minute, + NodeNotReady: 3 * time.Minute, } // TimeoutContext contains timeout settings for several actions. @@ -106,6 +107,9 @@ type TimeoutContext struct { // SystemDaemonsetStartup is how long to wait for all system daemonsets to be ready. SystemDaemonsetStartup time.Duration + + // NodeNotReady is how long to wait for a node to be not ready. + NodeNotReady time.Duration } // NewTimeoutContext returns a TimeoutContext with all values set either to