mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-02 00:07:50 +00:00
DRA E2E: tests for device taints
This commit is contained in:
parent
5760a4f282
commit
2499663b84
@ -87,6 +87,9 @@ type Resources struct {
|
||||
|
||||
// Number of devices called "device-000", "device-001", ... on each node or in the cluster.
|
||||
MaxAllocations int
|
||||
|
||||
// Tainted causes all devices to be published with a NoExecute taint.
|
||||
Tainted bool
|
||||
}
|
||||
|
||||
//go:embed test-driver/deploy/example/plugin-permissions.yaml
|
||||
@ -299,10 +302,18 @@ func (d *Driver) SetUp(nodes *Nodes, resources Resources, devicesPerNode ...map[
|
||||
maxAllocations = 10
|
||||
}
|
||||
for i := 0; i < maxAllocations; i++ {
|
||||
slice.Spec.Devices = append(slice.Spec.Devices, resourceapi.Device{
|
||||
device := resourceapi.Device{
|
||||
Name: fmt.Sprintf("device-%d", i),
|
||||
Basic: &resourceapi.BasicDevice{},
|
||||
})
|
||||
}
|
||||
if resources.Tainted {
|
||||
device.Basic.Taints = []resourceapi.DeviceTaint{{
|
||||
Key: "example.com/taint",
|
||||
Value: "tainted",
|
||||
Effect: resourceapi.DeviceTaintEffectNoSchedule,
|
||||
}}
|
||||
}
|
||||
slice.Spec.Devices = append(slice.Spec.Devices, device)
|
||||
}
|
||||
|
||||
_, err := d.f.ClientSet.ResourceV1beta1().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{})
|
||||
|
@ -35,6 +35,7 @@ import (
|
||||
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourcealphaapi "k8s.io/api/resource/v1alpha3"
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
@ -1280,6 +1281,88 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
|
||||
prioritizedListTests()
|
||||
})
|
||||
|
||||
framework.Context("with device taints", feature.DRADeviceTaints, framework.WithFeatureGate(features.DRADeviceTaints), func() {
|
||||
nodes := NewNodes(f, 1, 1)
|
||||
driver := NewDriver(f, nodes, func() Resources {
|
||||
return Resources{
|
||||
Tainted: true,
|
||||
}
|
||||
})
|
||||
b := newBuilder(f, driver)
|
||||
|
||||
f.It("DeviceTaint keeps pod pending", func(ctx context.Context) {
|
||||
pod, template := b.podInline()
|
||||
b.create(ctx, pod, template)
|
||||
framework.ExpectNoError(e2epod.WaitForPodNameUnschedulableInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name))
|
||||
})
|
||||
|
||||
f.It("DeviceToleration enables pod scheduling", func(ctx context.Context) {
|
||||
pod, template := b.podInline()
|
||||
template.Spec.Spec.Devices.Requests[0].Tolerations = []resourceapi.DeviceToleration{{
|
||||
Effect: resourceapi.DeviceTaintEffectNoSchedule,
|
||||
Operator: resourceapi.DeviceTolerationOpExists,
|
||||
// No key: tolerate *all* taints with this effect.
|
||||
}}
|
||||
b.create(ctx, pod, template)
|
||||
b.testPod(ctx, f, pod)
|
||||
})
|
||||
|
||||
f.It("DeviceTaintRule evicts pod", func(ctx context.Context) {
|
||||
pod, template := b.podInline()
|
||||
template.Spec.Spec.Devices.Requests[0].Tolerations = []resourceapi.DeviceToleration{{
|
||||
Effect: resourceapi.DeviceTaintEffectNoSchedule,
|
||||
Operator: resourceapi.DeviceTolerationOpExists,
|
||||
// No key: tolerate *all* taints with this effect.
|
||||
}}
|
||||
// Add a finalizer to ensure that we get a chance to test the pod status after eviction (= deletion).
|
||||
pod.Finalizers = []string{"e2e-test/dont-delete-me"}
|
||||
b.create(ctx, pod, template)
|
||||
b.testPod(ctx, f, pod)
|
||||
ginkgo.DeferCleanup(func(ctx context.Context) {
|
||||
// Unblock shutdown by removing the finalizer.
|
||||
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, pod.Name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err, "get pod")
|
||||
pod.Finalizers = nil
|
||||
_, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Update(ctx, pod, metav1.UpdateOptions{})
|
||||
framework.ExpectNoError(err, "remove finalizers from pod")
|
||||
})
|
||||
|
||||
// Now evict it.
|
||||
ginkgo.By("Evicting pod...")
|
||||
taint := &resourcealphaapi.DeviceTaintRule{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
GenerateName: "device-taint-rule-" + f.UniqueName + "-",
|
||||
},
|
||||
Spec: resourcealphaapi.DeviceTaintRuleSpec{
|
||||
// All devices of the current driver instance.
|
||||
DeviceSelector: &resourcealphaapi.DeviceTaintSelector{
|
||||
Driver: &driver.Name,
|
||||
},
|
||||
Taint: resourcealphaapi.DeviceTaint{
|
||||
Effect: resourcealphaapi.DeviceTaintEffectNoExecute,
|
||||
Key: "test.example.com/evict",
|
||||
Value: "now",
|
||||
// No TimeAdded, gets defaulted.
|
||||
},
|
||||
},
|
||||
}
|
||||
createdTaint := b.create(ctx, taint)
|
||||
taint = createdTaint[0].(*resourcealphaapi.DeviceTaintRule)
|
||||
gomega.Expect(*taint).Should(gomega.HaveField("Spec.Taint.TimeAdded.Time", gomega.BeTemporally("~", time.Now(), time.Minute /* allow for some clock drift and delays */)))
|
||||
|
||||
framework.ExpectNoError(e2epod.WaitForPodTerminatingInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, f.Timeouts.PodStart))
|
||||
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, pod.Name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err, "get pod")
|
||||
gomega.Expect(pod).Should(gomega.HaveField("Status.Conditions", gomega.ContainElement(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
|
||||
// LastTransitionTime is unknown.
|
||||
"Type": gomega.Equal(v1.DisruptionTarget),
|
||||
"Status": gomega.Equal(v1.ConditionTrue),
|
||||
"Reason": gomega.Equal("DeletionByDeviceTaintManager"),
|
||||
"Message": gomega.Equal("Device Taint manager: deleting due to NoExecute taint"),
|
||||
}))))
|
||||
})
|
||||
})
|
||||
|
||||
// TODO (https://github.com/kubernetes/kubernetes/issues/123699): move most of the test below into `testDriver` so that they get
|
||||
// executed with different parameters.
|
||||
|
||||
@ -2006,6 +2089,12 @@ func (b *builder) create(ctx context.Context, objs ...klog.KMetadata) []klog.KMe
|
||||
err := b.f.ClientSet.ResourceV1beta1().ResourceSlices().Delete(ctx, createdObj.GetName(), metav1.DeleteOptions{})
|
||||
framework.ExpectNoError(err, "delete node resource slice")
|
||||
})
|
||||
case *resourcealphaapi.DeviceTaintRule:
|
||||
createdObj, err = b.f.ClientSet.ResourceV1alpha3().DeviceTaintRules().Create(ctx, obj, metav1.CreateOptions{})
|
||||
ginkgo.DeferCleanup(func(ctx context.Context) {
|
||||
err := b.f.ClientSet.ResourceV1alpha3().DeviceTaintRules().Delete(ctx, createdObj.GetName(), metav1.DeleteOptions{})
|
||||
framework.ExpectNoError(err, "delete DeviceTaintRule")
|
||||
})
|
||||
case *appsv1.DaemonSet:
|
||||
createdObj, err = b.f.ClientSet.AppsV1().DaemonSets(b.f.Namespace.Name).Create(ctx, obj, metav1.CreateOptions{})
|
||||
// Cleanup not really needed, but speeds up namespace shutdown.
|
||||
|
@ -20,7 +20,7 @@ nodes:
|
||||
v: "5"
|
||||
apiServer:
|
||||
extraArgs:
|
||||
runtime-config: "resource.k8s.io/v1beta1=true"
|
||||
runtime-config: "resource.k8s.io/v1alpha3=true,resource.k8s.io/v1beta1=true"
|
||||
- |
|
||||
kind: InitConfiguration
|
||||
nodeRegistration:
|
||||
|
@ -112,6 +112,16 @@ var (
|
||||
// is enabled such that passing CDI device IDs through CRI fields is supported
|
||||
DRAAdminAccess = framework.WithFeature(framework.ValidFeatures.Add("DRAAdminAccess"))
|
||||
|
||||
// owning-sig: sig-scheduling
|
||||
// kep: https://kep.k8s.io/5055
|
||||
// test-infra jobs:
|
||||
// - "ci-kind-dra-all" in https://testgrid.k8s.io/sig-node-dynamic-resource-allocation
|
||||
//
|
||||
// This label is used for tests which need:
|
||||
// - the DynamicResourceAllocation *and* DRADeviceTaints feature gates
|
||||
// - the resource.k8s.io API group, including version v1alpha3
|
||||
DRADeviceTaints = framework.WithFeature(framework.ValidFeatures.Add("DRADeviceTaints"))
|
||||
|
||||
// TODO: document the feature (owning SIG, when to use this feature for a test)
|
||||
// OWNER: sig-node
|
||||
// Testing downward API huge pages
|
||||
|
Loading…
Reference in New Issue
Block a user