mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 15:58:37 +00:00
Merge pull request #120965 from bart0sh/PR122-DRA-unexpected-node-shutdown
DRA: e2e: test non-graceful node shutdown
This commit is contained in:
commit
7b9d244efd
@ -30,11 +30,13 @@ import (
|
||||
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/dynamic-resource-allocation/controller"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/test/e2e/dra/test-driver/app"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
utilpointer "k8s.io/utils/pointer"
|
||||
@ -515,6 +517,52 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
framework.ExpectNoError(err, "start pod")
|
||||
}
|
||||
})
|
||||
|
||||
// This test covers aspects of non graceful node shutdown by DRA controller
|
||||
// More details about this can be found in the KEP:
|
||||
// https://github.com/kubernetes/enhancements/tree/master/keps/sig-storage/2268-non-graceful-shutdown
|
||||
// NOTE: this test depends on kind. It will only work with kind cluster as it shuts down one of the
|
||||
// nodes by running `docker stop <node name>`, which is very kind-specific.
|
||||
ginkgo.It("[Serial] [Disruptive] [Slow] must deallocate on non graceful node shutdown", func(ctx context.Context) {
|
||||
ginkgo.By("create test pod")
|
||||
parameters := b.parameters()
|
||||
label := "app.kubernetes.io/instance"
|
||||
instance := f.UniqueName + "-test-app"
|
||||
pod := b.podExternal()
|
||||
pod.Labels[label] = instance
|
||||
claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
b.create(ctx, parameters, claim, pod)
|
||||
|
||||
ginkgo.By("wait for test pod " + pod.Name + " to run")
|
||||
labelSelector := labels.SelectorFromSet(labels.Set(pod.Labels))
|
||||
pods, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, f.ClientSet, pod.Namespace, labelSelector, 1, framework.PodStartTimeout)
|
||||
framework.ExpectNoError(err, "start pod")
|
||||
runningPod := &pods.Items[0]
|
||||
|
||||
nodeName := runningPod.Spec.NodeName
|
||||
// Prevent builder tearDown to fail waiting for unprepared resources
|
||||
delete(b.driver.Nodes, nodeName)
|
||||
ginkgo.By("stop node " + nodeName + " non gracefully")
|
||||
_, stderr, err := framework.RunCmd("docker", "stop", nodeName)
|
||||
gomega.Expect(stderr).To(gomega.BeEmpty())
|
||||
framework.ExpectNoError(err)
|
||||
ginkgo.DeferCleanup(framework.RunCmd, "docker", "start", nodeName)
|
||||
if ok := e2enode.WaitForNodeToBeNotReady(ctx, f.ClientSet, nodeName, f.Timeouts.NodeNotReady); !ok {
|
||||
framework.Failf("Node %s failed to enter NotReady state", nodeName)
|
||||
}
|
||||
|
||||
ginkgo.By("apply out-of-service taint on node " + nodeName)
|
||||
taint := v1.Taint{
|
||||
Key: v1.TaintNodeOutOfService,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
}
|
||||
e2enode.AddOrUpdateTaintOnNode(ctx, f.ClientSet, nodeName, taint)
|
||||
e2enode.ExpectNodeHasTaint(ctx, f.ClientSet, nodeName, &taint)
|
||||
ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, f.ClientSet, nodeName, taint)
|
||||
|
||||
ginkgo.By("waiting for claim to get deallocated")
|
||||
gomega.Eventually(ctx, framework.GetObject(b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get, claim.Name, metav1.GetOptions{})).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", gomega.BeNil()))
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.Context("with node-local resources", func() {
|
||||
|
@ -39,6 +39,7 @@ var defaultTimeouts = TimeoutContext{
|
||||
SystemPodsStartup: 10 * time.Minute,
|
||||
NodeSchedulable: 30 * time.Minute,
|
||||
SystemDaemonsetStartup: 5 * time.Minute,
|
||||
NodeNotReady: 3 * time.Minute,
|
||||
}
|
||||
|
||||
// TimeoutContext contains timeout settings for several actions.
|
||||
@ -106,6 +107,9 @@ type TimeoutContext struct {
|
||||
|
||||
// SystemDaemonsetStartup is how long to wait for all system daemonsets to be ready.
|
||||
SystemDaemonsetStartup time.Duration
|
||||
|
||||
// NodeNotReady is how long to wait for a node to be not ready.
|
||||
NodeNotReady time.Duration
|
||||
}
|
||||
|
||||
// NewTimeoutContext returns a TimeoutContext with all values set either to
|
||||
|
Loading…
Reference in New Issue
Block a user