diff --git a/pkg/controlplane/apiserver/options/validation.go b/pkg/controlplane/apiserver/options/validation.go index 4838d1f29ef..7e2b1782f71 100644 --- a/pkg/controlplane/apiserver/options/validation.go +++ b/pkg/controlplane/apiserver/options/validation.go @@ -77,6 +77,13 @@ func validateNodeSelectorAuthorizationFeature() []error { return nil } +func validateDRAControlPlaneControllerFeature() []error { + if utilfeature.DefaultFeatureGate.Enabled(features.DRAControlPlaneController) && !utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { + return []error{fmt.Errorf("DRAControlPlaneController feature requires DynamicResourceAllocation feature to be enabled")} + } + return nil +} + func validateUnknownVersionInteroperabilityProxyFeature() []error { if utilfeature.DefaultFeatureGate.Enabled(features.UnknownVersionInteroperabilityProxy) { if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.StorageVersionAPI) { @@ -121,6 +128,7 @@ func (s *Options) Validate() []error { errs = append(errs, validateUnknownVersionInteroperabilityProxyFeature()...) errs = append(errs, validateUnknownVersionInteroperabilityProxyFlags(s)...) errs = append(errs, validateNodeSelectorAuthorizationFeature()...) + errs = append(errs, validateDRAControlPlaneControllerFeature()...) return errs } diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 9768e372eaf..e77d63b43b4 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -235,7 +235,17 @@ const ( // alpha: v1.26 // // Enables support for resources with custom parameters and a lifecycle - // that is independent of a Pod. + // that is independent of a Pod. Resource allocation is done by a DRA driver's + // "control plane controller" in cooperation with the scheduler. + DRAControlPlaneController featuregate.Feature = "DRAControlPlaneController" + + // owner: @pohly + // kep: http://kep.k8s.io/4381 + // alpha: v1.29 + // + // Enables support for resources with custom parameters and a lifecycle + // that is independent of a Pod. Resource allocation is done by the scheduler + // based on "structured parameters". DynamicResourceAllocation featuregate.Feature = "DynamicResourceAllocation" // owner: @harche @@ -1061,6 +1071,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS DevicePluginCDIDevices: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.33 + DRAControlPlaneController: {Default: false, PreRelease: featuregate.Alpha}, + DynamicResourceAllocation: {Default: false, PreRelease: featuregate.Alpha}, EventedPLEG: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/pkg/registry/resource/deviceclass/strategy.go b/pkg/registry/resource/deviceclass/strategy.go index fc6d3a38b66..c42e29480d0 100644 --- a/pkg/registry/resource/deviceclass/strategy.go +++ b/pkg/registry/resource/deviceclass/strategy.go @@ -23,9 +23,11 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apiserver/pkg/storage/names" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/kubernetes/pkg/api/legacyscheme" "k8s.io/kubernetes/pkg/apis/resource" "k8s.io/kubernetes/pkg/apis/resource/validation" + "k8s.io/kubernetes/pkg/features" ) // deviceClassStrategy implements behavior for DeviceClass objects @@ -43,6 +45,7 @@ func (deviceClassStrategy) NamespaceScoped() bool { func (deviceClassStrategy) PrepareForCreate(ctx context.Context, obj runtime.Object) { class := obj.(*resource.DeviceClass) class.Generation = 1 + dropDisabledFields(class, nil) } func (deviceClassStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList { @@ -65,6 +68,8 @@ func (deviceClassStrategy) PrepareForUpdate(ctx context.Context, obj, old runtim class := obj.(*resource.DeviceClass) oldClass := old.(*resource.DeviceClass) + dropDisabledFields(class, oldClass) + // Any changes to the spec increment the generation number. if !apiequality.Semantic.DeepEqual(oldClass.Spec, class.Spec) { class.Generation = oldClass.Generation + 1 @@ -83,3 +88,22 @@ func (deviceClassStrategy) WarningsOnUpdate(ctx context.Context, obj, old runtim func (deviceClassStrategy) AllowUnconditionalUpdate() bool { return true } + +// dropDisabledFields removes fields which are covered by the optional DRAControlPlaneController feature gate. +func dropDisabledFields(newClass, oldClass *resource.DeviceClass) { + if utilfeature.DefaultFeatureGate.Enabled(features.DRAControlPlaneController) { + // No need to drop anything. + return + } + + if oldClass == nil { + // Always drop on create. + newClass.Spec.SuitableNodes = nil + return + } + + // Drop on update only if not already set. + if oldClass.Spec.SuitableNodes == nil { + newClass.Spec.SuitableNodes = nil + } +} diff --git a/pkg/registry/resource/deviceclass/strategy_test.go b/pkg/registry/resource/deviceclass/strategy_test.go index 468b11eac69..77fa7752e48 100644 --- a/pkg/registry/resource/deviceclass/strategy_test.go +++ b/pkg/registry/resource/deviceclass/strategy_test.go @@ -19,18 +19,42 @@ package deviceclass import ( "testing" + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" genericapirequest "k8s.io/apiserver/pkg/endpoints/request" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/resource" + "k8s.io/kubernetes/pkg/features" ) -var deviceClass = &resource.DeviceClass{ +var obj = &resource.DeviceClass{ ObjectMeta: metav1.ObjectMeta{ - Name: "valid-class", + Name: "valid-class", + Generation: 1, }, } -func TestClassStrategy(t *testing.T) { +var objWithGatedFields = &resource.DeviceClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-class", + Generation: 1, + }, + Spec: resource.DeviceClassSpec{ + SuitableNodes: &core.NodeSelector{ + NodeSelectorTerms: []core.NodeSelectorTerm{{ + MatchExpressions: []core.NodeSelectorRequirement{{ + Key: "foo", + Operator: core.NodeSelectorOpExists, + }}, + }}, + }, + }, +} + +func TestStrategy(t *testing.T) { if Strategy.NamespaceScoped() { t.Errorf("DeviceClass must not be namespace scoped") } @@ -39,42 +63,135 @@ func TestClassStrategy(t *testing.T) { } } -func TestClassStrategyCreate(t *testing.T) { +func TestStrategyCreate(t *testing.T) { ctx := genericapirequest.NewDefaultContext() - deviceClass := deviceClass.DeepCopy() - Strategy.PrepareForCreate(ctx, deviceClass) - errs := Strategy.Validate(ctx, deviceClass) - if len(errs) != 0 { - t.Errorf("unexpected error validating for create %v", errs) + testcases := map[string]struct { + obj *resource.DeviceClass + controlPlaneController bool + expectValidationError bool + expectObj *resource.DeviceClass + }{ + "simple": { + obj: obj, + expectObj: obj, + }, + "validation-error": { + obj: func() *resource.DeviceClass { + obj := obj.DeepCopy() + obj.Name = "%#@$%$" + return obj + }(), + expectValidationError: true, + }, + "drop-fields": { + obj: objWithGatedFields, + controlPlaneController: false, + expectObj: obj, + }, + "keep-fields": { + obj: objWithGatedFields, + controlPlaneController: true, + expectObj: objWithGatedFields, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tc.controlPlaneController) + + obj := tc.obj.DeepCopy() + Strategy.PrepareForCreate(ctx, obj) + if errs := Strategy.Validate(ctx, obj); len(errs) != 0 { + if !tc.expectValidationError { + t.Fatalf("unexpected validation errors: %q", errs) + } + return + } else if tc.expectValidationError { + t.Fatal("expected validation error(s), got none") + } + if warnings := Strategy.WarningsOnCreate(ctx, obj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + Strategy.Canonicalize(obj) + assert.Equal(t, tc.expectObj, obj) + }) } } -func TestClassStrategyUpdate(t *testing.T) { - t.Run("no-changes-okay", func(t *testing.T) { - ctx := genericapirequest.NewDefaultContext() - deviceClass := deviceClass.DeepCopy() - newClass := deviceClass.DeepCopy() - newClass.ResourceVersion = "4" +func TestStrategyUpdate(t *testing.T) { + ctx := genericapirequest.NewDefaultContext() - Strategy.PrepareForUpdate(ctx, newClass, deviceClass) - errs := Strategy.ValidateUpdate(ctx, newClass, deviceClass) - if len(errs) != 0 { - t.Errorf("unexpected validation errors: %v", errs) - } - }) + testcases := map[string]struct { + oldObj *resource.DeviceClass + newObj *resource.DeviceClass + controlPlaneController bool + expectValidationError bool + expectObj *resource.DeviceClass + }{ + "no-changes-okay": { + oldObj: obj, + newObj: obj, + expectObj: obj, + }, + "name-change-not-allowed": { + oldObj: obj, + newObj: func() *resource.DeviceClass { + obj := obj.DeepCopy() + obj.Name += "-2" + return obj + }(), + expectValidationError: true, + }, + "drop-fields": { + oldObj: obj, + newObj: objWithGatedFields, + controlPlaneController: false, + expectObj: obj, + }, + "keep-fields": { + oldObj: obj, + newObj: objWithGatedFields, + controlPlaneController: true, + expectObj: func() *resource.DeviceClass { + obj := objWithGatedFields.DeepCopy() + // Spec changes -> generation gets bumped. + obj.Generation++ + return obj + }(), + }, + "keep-existing-fields": { + oldObj: objWithGatedFields, + newObj: objWithGatedFields, + controlPlaneController: false, + expectObj: objWithGatedFields, + }, + } - t.Run("name-change-not-allowed", func(t *testing.T) { - ctx := genericapirequest.NewDefaultContext() - deviceClass := deviceClass.DeepCopy() - newClass := deviceClass.DeepCopy() - newClass.Name = "valid-class-2" - newClass.ResourceVersion = "4" + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tc.controlPlaneController) + oldObj := tc.oldObj.DeepCopy() + newObj := tc.newObj.DeepCopy() + newObj.ResourceVersion = "4" - Strategy.PrepareForUpdate(ctx, newClass, deviceClass) - errs := Strategy.ValidateUpdate(ctx, newClass, deviceClass) - if len(errs) == 0 { - t.Errorf("expected a validation error") - } - }) + Strategy.PrepareForUpdate(ctx, newObj, oldObj) + if errs := Strategy.ValidateUpdate(ctx, newObj, oldObj); len(errs) != 0 { + if !tc.expectValidationError { + t.Fatalf("unexpected validation errors: %q", errs) + } + return + } else if tc.expectValidationError { + t.Fatal("expected validation error(s), got none") + } + if warnings := Strategy.WarningsOnUpdate(ctx, newObj, oldObj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + Strategy.Canonicalize(newObj) + + expectObj := tc.expectObj.DeepCopy() + expectObj.ResourceVersion = "4" + assert.Equal(t, expectObj, newObj) + }) + } } diff --git a/pkg/registry/resource/resourceclaim/strategy.go b/pkg/registry/resource/resourceclaim/strategy.go index f2a9eb82184..2e0f1e7dc3a 100644 --- a/pkg/registry/resource/resourceclaim/strategy.go +++ b/pkg/registry/resource/resourceclaim/strategy.go @@ -28,9 +28,11 @@ import ( "k8s.io/apiserver/pkg/registry/generic" "k8s.io/apiserver/pkg/storage" "k8s.io/apiserver/pkg/storage/names" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/kubernetes/pkg/api/legacyscheme" "k8s.io/kubernetes/pkg/apis/resource" "k8s.io/kubernetes/pkg/apis/resource/validation" + "k8s.io/kubernetes/pkg/features" "sigs.k8s.io/structured-merge-diff/v4/fieldpath" ) @@ -65,6 +67,8 @@ func (resourceclaimStrategy) PrepareForCreate(ctx context.Context, obj runtime.O claim := obj.(*resource.ResourceClaim) // Status must not be set by user on create. claim.Status = resource.ResourceClaimStatus{} + + dropDisabledFields(claim, nil) } func (resourceclaimStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList { @@ -87,6 +91,8 @@ func (resourceclaimStrategy) PrepareForUpdate(ctx context.Context, obj, old runt newClaim := obj.(*resource.ResourceClaim) oldClaim := old.(*resource.ResourceClaim) newClaim.Status = oldClaim.Status + + dropDisabledFields(newClaim, oldClaim) } func (resourceclaimStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList { @@ -127,6 +133,8 @@ func (resourceclaimStatusStrategy) PrepareForUpdate(ctx context.Context, obj, ol oldClaim := old.(*resource.ResourceClaim) newClaim.Spec = oldClaim.Spec metav1.ResetObjectMetaForStatus(&newClaim.ObjectMeta, &oldClaim.ObjectMeta) + + dropDisabledFields(newClaim, oldClaim) } func (resourceclaimStatusStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList { @@ -163,3 +171,36 @@ func toSelectableFields(claim *resource.ResourceClaim) fields.Set { fields := generic.ObjectMetaFieldsSet(&claim.ObjectMeta, true) return fields } + +// dropDisabledFields removes fields which are covered by the optional DRAControlPlaneController feature gate. +func dropDisabledFields(newClaim, oldClaim *resource.ResourceClaim) { + if utilfeature.DefaultFeatureGate.Enabled(features.DRAControlPlaneController) { + // No need to drop anything. + return + } + + if oldClaim == nil { + // Always drop on create. There's no status yet, so nothing to do there. + newClaim.Spec.Controller = "" + return + } + + // Drop on (status) update only if not already set. + if oldClaim.Spec.Controller == "" { + newClaim.Spec.Controller = "" + } + // If the claim is handled by a control plane controller, allow + // setting it also in the status. Stripping that field would be bad. + if oldClaim.Spec.Controller == "" && + newClaim.Status.Allocation != nil && + oldClaim.Status.Allocation == nil && + (oldClaim.Status.Allocation == nil || oldClaim.Status.Allocation.Controller == "") { + newClaim.Status.Allocation.Controller = "" + } + // If there is an existing allocation which used a control plane controller, then + // allow requesting its deallocation. + if !oldClaim.Status.DeallocationRequested && + (newClaim.Status.Allocation == nil || newClaim.Status.Allocation.Controller == "") { + newClaim.Status.DeallocationRequested = false + } +} diff --git a/pkg/registry/resource/resourceclaim/strategy_test.go b/pkg/registry/resource/resourceclaim/strategy_test.go index 8b2d8c3e713..29651180a85 100644 --- a/pkg/registry/resource/resourceclaim/strategy_test.go +++ b/pkg/registry/resource/resourceclaim/strategy_test.go @@ -19,19 +19,60 @@ package resourceclaim import ( "testing" + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" genericapirequest "k8s.io/apiserver/pkg/endpoints/request" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/kubernetes/pkg/apis/resource" + "k8s.io/kubernetes/pkg/features" ) -var resourceClaim = &resource.ResourceClaim{ +var obj = &resource.ResourceClaim{ ObjectMeta: metav1.ObjectMeta{ Name: "valid-claim", Namespace: "default", }, } -func TestClaimStrategy(t *testing.T) { +var objWithStatus = &resource.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-claim", + Namespace: "default", + }, + Status: resource.ResourceClaimStatus{ + Allocation: &resource.AllocationResult{}, + }, +} + +var objWithGatedFields = &resource.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-claim", + Namespace: "default", + }, + Spec: resource.ResourceClaimSpec{ + Controller: "dra.example.com", + }, +} + +var objWithGatedStatusFields = &resource.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-claim", + Namespace: "default", + }, + Spec: resource.ResourceClaimSpec{ + Controller: "dra.example.com", + }, + Status: resource.ResourceClaimStatus{ + Allocation: &resource.AllocationResult{ + Controller: "dra.example.com", + }, + DeallocationRequested: true, + }, +} + +func TestStrategy(t *testing.T) { if !Strategy.NamespaceScoped() { t.Errorf("ResourceClaim must be namespace scoped") } @@ -40,42 +81,236 @@ func TestClaimStrategy(t *testing.T) { } } -func TestClaimStrategyCreate(t *testing.T) { +func TestStrategyCreate(t *testing.T) { ctx := genericapirequest.NewDefaultContext() - resourceClaim := resourceClaim.DeepCopy() - Strategy.PrepareForCreate(ctx, resourceClaim) - errs := Strategy.Validate(ctx, resourceClaim) - if len(errs) != 0 { - t.Errorf("unexpected error validating for create %v", errs) + testcases := map[string]struct { + obj *resource.ResourceClaim + controlPlaneController bool + expectValidationError bool + expectObj *resource.ResourceClaim + }{ + "simple": { + obj: obj, + expectObj: obj, + }, + "validation-error": { + obj: func() *resource.ResourceClaim { + obj := obj.DeepCopy() + obj.Name = "%#@$%$" + return obj + }(), + expectValidationError: true, + }, + "drop-fields": { + obj: objWithGatedFields, + controlPlaneController: false, + expectObj: obj, + }, + "keep-fields": { + obj: objWithGatedFields, + controlPlaneController: true, + expectObj: objWithGatedFields, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tc.controlPlaneController) + + obj := tc.obj.DeepCopy() + Strategy.PrepareForCreate(ctx, obj) + if errs := Strategy.Validate(ctx, obj); len(errs) != 0 { + if !tc.expectValidationError { + t.Fatalf("unexpected validation errors: %q", errs) + } + return + } else if tc.expectValidationError { + t.Fatal("expected validation error(s), got none") + } + if warnings := Strategy.WarningsOnCreate(ctx, obj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + Strategy.Canonicalize(obj) + assert.Equal(t, tc.expectObj, obj) + }) } } -func TestClaimStrategyUpdate(t *testing.T) { - t.Run("no-changes-okay", func(t *testing.T) { - ctx := genericapirequest.NewDefaultContext() - resourceClaim := resourceClaim.DeepCopy() - newClaim := resourceClaim.DeepCopy() - newClaim.ResourceVersion = "4" +func TestStrategyUpdate(t *testing.T) { + ctx := genericapirequest.NewDefaultContext() - Strategy.PrepareForUpdate(ctx, newClaim, resourceClaim) - errs := Strategy.ValidateUpdate(ctx, newClaim, resourceClaim) - if len(errs) != 0 { - t.Errorf("unexpected validation errors: %v", errs) - } - }) + testcases := map[string]struct { + oldObj *resource.ResourceClaim + newObj *resource.ResourceClaim + controlPlaneController bool + expectValidationError bool + expectObj *resource.ResourceClaim + }{ + "no-changes-okay": { + oldObj: obj, + newObj: obj, + expectObj: obj, + }, + "name-change-not-allowed": { + oldObj: obj, + newObj: func() *resource.ResourceClaim { + obj := obj.DeepCopy() + obj.Name += "-2" + return obj + }(), + expectValidationError: true, + }, + "drop-fields": { + oldObj: obj, + newObj: objWithGatedFields, + controlPlaneController: false, + expectObj: obj, + }, + "keep-fields": { + oldObj: obj, + newObj: objWithGatedFields, + controlPlaneController: true, + expectValidationError: true, // Spec is immutable. + }, + "keep-existing-fields": { + oldObj: objWithGatedFields, + newObj: objWithGatedFields, + controlPlaneController: false, + expectObj: objWithGatedFields, + }, + } - t.Run("name-change-not-allowed", func(t *testing.T) { - ctx := genericapirequest.NewDefaultContext() - resourceClaim := resourceClaim.DeepCopy() - newClaim := resourceClaim.DeepCopy() - newClaim.Name = "valid-claim-2" - newClaim.ResourceVersion = "4" + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tc.controlPlaneController) + oldObj := tc.oldObj.DeepCopy() + newObj := tc.newObj.DeepCopy() + newObj.ResourceVersion = "4" - Strategy.PrepareForUpdate(ctx, newClaim, resourceClaim) - errs := Strategy.ValidateUpdate(ctx, newClaim, resourceClaim) - if len(errs) == 0 { - t.Errorf("expected a validation error") - } - }) + Strategy.PrepareForUpdate(ctx, newObj, oldObj) + if errs := Strategy.ValidateUpdate(ctx, newObj, oldObj); len(errs) != 0 { + if !tc.expectValidationError { + t.Fatalf("unexpected validation errors: %q", errs) + } + return + } else if tc.expectValidationError { + t.Fatal("expected validation error(s), got none") + } + if warnings := Strategy.WarningsOnUpdate(ctx, newObj, oldObj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + Strategy.Canonicalize(newObj) + + expectObj := tc.expectObj.DeepCopy() + expectObj.ResourceVersion = "4" + assert.Equal(t, expectObj, newObj) + }) + } +} + +func TestStatusStrategyUpdate(t *testing.T) { + ctx := genericapirequest.NewDefaultContext() + + testcases := map[string]struct { + oldObj *resource.ResourceClaim + newObj *resource.ResourceClaim + controlPlaneController bool + expectValidationError bool + expectObj *resource.ResourceClaim + }{ + "no-changes-okay": { + oldObj: obj, + newObj: obj, + expectObj: obj, + }, + "name-change-not-allowed": { + oldObj: obj, + newObj: func() *resource.ResourceClaim { + obj := obj.DeepCopy() + obj.Name += "-2" + return obj + }(), + expectValidationError: true, + }, + // Cannot add finalizers, annotations and labels during status update. + "drop-meta-changes": { + oldObj: obj, + newObj: func() *resource.ResourceClaim { + obj := obj.DeepCopy() + obj.Finalizers = []string{"foo"} + obj.Annotations = map[string]string{"foo": "bar"} + obj.Labels = map[string]string{"foo": "bar"} + return obj + }(), + expectObj: obj, + }, + "drop-fields": { + oldObj: obj, + newObj: objWithGatedStatusFields, + controlPlaneController: false, + expectObj: objWithStatus, + }, + "keep-fields": { + oldObj: obj, + newObj: objWithGatedStatusFields, + controlPlaneController: true, + expectObj: func() *resource.ResourceClaim { + expectObj := objWithGatedStatusFields.DeepCopy() + // Spec remains unchanged. + expectObj.Spec = obj.Spec + return expectObj + }(), + }, + "keep-fields-because-of-spec": { + oldObj: objWithGatedFields, + newObj: objWithGatedStatusFields, + controlPlaneController: false, + expectObj: objWithGatedStatusFields, + }, + // Normally a claim without a controller in the spec shouldn't + // have one in the status either, but it's not invalid and thus + // let's test this. + "keep-fields-because-of-status": { + oldObj: func() *resource.ResourceClaim { + oldObj := objWithGatedStatusFields.DeepCopy() + oldObj.Spec.Controller = "" + return oldObj + }(), + newObj: objWithGatedStatusFields, + controlPlaneController: false, + expectObj: func() *resource.ResourceClaim { + oldObj := objWithGatedStatusFields.DeepCopy() + oldObj.Spec.Controller = "" + return oldObj + }(), + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tc.controlPlaneController) + oldObj := tc.oldObj.DeepCopy() + newObj := tc.newObj.DeepCopy() + newObj.ResourceVersion = "4" + + StatusStrategy.PrepareForUpdate(ctx, newObj, oldObj) + if errs := StatusStrategy.ValidateUpdate(ctx, newObj, oldObj); len(errs) != 0 { + if !tc.expectValidationError { + t.Fatalf("unexpected validation errors: %q", errs) + } + return + } else if tc.expectValidationError { + t.Fatal("expected validation error(s), got none") + } + if warnings := StatusStrategy.WarningsOnUpdate(ctx, newObj, oldObj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + StatusStrategy.Canonicalize(newObj) + + expectObj := tc.expectObj.DeepCopy() + expectObj.ResourceVersion = "4" + assert.Equal(t, expectObj, newObj) + }) + } } diff --git a/pkg/registry/resource/rest/storage_resource.go b/pkg/registry/resource/rest/storage_resource.go index 3be89acf499..5d5bab1d18a 100644 --- a/pkg/registry/resource/rest/storage_resource.go +++ b/pkg/registry/resource/rest/storage_resource.go @@ -75,6 +75,10 @@ func (p RESTStorageProvider) v1alpha3Storage(apiResourceConfigSource serverstora storage[resource] = resourceClaimTemplateStorage } + // Registered also without the corresponding DRAControlPlaneController feature gate for the + // same reasons as registering the other types without a feature gate check: it might be + // useful to provide access to these resources while their feature is off to allow cleaning + // them up. if resource := "podschedulingcontexts"; apiResourceConfigSource.ResourceEnabled(resourcev1alpha3.SchemeGroupVersion.WithResource(resource)) { podSchedulingStorage, podSchedulingStatusStorage, err := podschedulingcontextsstore.NewREST(restOptionsGetter) if err != nil { diff --git a/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go b/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go index d683e30cbb2..cc63a921693 100644 --- a/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go +++ b/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go @@ -146,6 +146,9 @@ func (p *podSchedulingState) isDirty() bool { // init checks whether there is already a PodSchedulingContext object. // Must not be called concurrently, func (p *podSchedulingState) init(ctx context.Context, pod *v1.Pod, podSchedulingContextLister resourcelisters.PodSchedulingContextLister) error { + if podSchedulingContextLister == nil { + return nil + } schedulingCtx, err := podSchedulingContextLister.PodSchedulingContexts(pod.Namespace).Get(pod.Name) switch { case apierrors.IsNotFound(err): @@ -267,11 +270,13 @@ func statusForClaim(schedulingCtx *resourceapi.PodSchedulingContext, podClaimNam // dynamicResources is a plugin that ensures that ResourceClaims are allocated. type dynamicResources struct { - enabled bool + enabled bool + controlPlaneControllerEnabled bool + fh framework.Handle clientset kubernetes.Interface classLister resourcelisters.DeviceClassLister - podSchedulingContextLister resourcelisters.PodSchedulingContextLister + podSchedulingContextLister resourcelisters.PodSchedulingContextLister // nil if and only if DRAControlPlaneController is disabled sliceLister resourcelisters.ResourceSliceLister // claimAssumeCache enables temporarily storing a newer claim object @@ -338,13 +343,17 @@ func New(ctx context.Context, plArgs runtime.Object, fh framework.Handle, fts fe } pl := &dynamicResources{ - enabled: true, - fh: fh, - clientset: fh.ClientSet(), - classLister: fh.SharedInformerFactory().Resource().V1alpha3().DeviceClasses().Lister(), - podSchedulingContextLister: fh.SharedInformerFactory().Resource().V1alpha3().PodSchedulingContexts().Lister(), - sliceLister: fh.SharedInformerFactory().Resource().V1alpha3().ResourceSlices().Lister(), - claimAssumeCache: fh.ResourceClaimCache(), + enabled: true, + controlPlaneControllerEnabled: fts.EnableDRAControlPlaneController, + + fh: fh, + clientset: fh.ClientSet(), + classLister: fh.SharedInformerFactory().Resource().V1alpha3().DeviceClasses().Lister(), + sliceLister: fh.SharedInformerFactory().Resource().V1alpha3().ResourceSlices().Lister(), + claimAssumeCache: fh.ResourceClaimCache(), + } + if pl.controlPlaneControllerEnabled { + pl.podSchedulingContextLister = fh.SharedInformerFactory().Resource().V1alpha3().PodSchedulingContexts().Lister() } return pl, nil @@ -375,9 +384,6 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu events := []framework.ClusterEventWithHint{ // Allocation is tracked in ResourceClaims, so any changes may make the pods schedulable. {Event: framework.ClusterEvent{Resource: framework.ResourceClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClaimChange}, - // When a driver has provided additional information, a pod waiting for that information - // may be schedulable. - {Event: framework.ClusterEvent{Resource: framework.PodSchedulingContext, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPodSchedulingContextChange}, // A resource might depend on node labels for topology filtering. // A new or updated node may make pods schedulable. // @@ -393,6 +399,15 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu // A pod might be waiting for a class to get created or modified. {Event: framework.ClusterEvent{Resource: framework.DeviceClass, ActionType: framework.Add | framework.Update}}, } + + if pl.podSchedulingContextLister != nil { + events = append(events, + // When a driver has provided additional information, a pod waiting for that information + // may be schedulable. + framework.ClusterEventWithHint{Event: framework.ClusterEvent{Resource: framework.PodSchedulingContext, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPodSchedulingContextChange}, + ) + } + return events, nil } @@ -400,6 +415,10 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu // scheduled. When this fails, one of the registered events can trigger another // attempt. func (pl *dynamicResources) PreEnqueue(ctx context.Context, pod *v1.Pod) (status *framework.Status) { + if !pl.enabled { + return nil + } + if err := pl.foreachPodResourceClaim(pod, nil); err != nil { return statusUnschedulable(klog.FromContext(ctx), err.Error()) } @@ -679,6 +698,7 @@ func (pl *dynamicResources) PreFilter(ctx context.Context, state *framework.Cycl } // Fetch PodSchedulingContext, it's going to be needed when checking claims. + // Doesn't do anything when DRAControlPlaneController is disabled. if err := s.podSchedulingState.init(ctx, pod, pl.podSchedulingContextLister); err != nil { return nil, statusError(logger, err) } @@ -688,6 +708,16 @@ func (pl *dynamicResources) PreFilter(ctx context.Context, state *framework.Cycl s.informationsForClaim = make([]informationForClaim, len(claims)) for index, claim := range claims { + if claim.Spec.Controller != "" && + !pl.controlPlaneControllerEnabled { + // This keeps the pod as unschedulable until the + // scheduler gets restarted with "classic DRA" enabled + // or the claim gets replaced with one which doesn't + // need the feature. That is a cluster event that + // re-enqueues the pod. + return nil, statusUnschedulable(logger, "resourceclaim depends on disabled DRAControlPlaneController feature", "pod", klog.KObj(pod), "resourceclaim", klog.KObj(claim)) + } + if claim.Status.DeallocationRequested { // This will get resolved by the resource driver. return nil, statusUnschedulable(logger, "resourceclaim must be reallocated", "pod", klog.KObj(pod), "resourceclaim", klog.KObj(claim)) diff --git a/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go b/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go index 6b136300e94..a48700f0f3e 100644 --- a/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go +++ b/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go @@ -316,7 +316,11 @@ func TestPlugin(t *testing.T) { prepare prepare want want - disable bool + + // Feature gates. False is chosen so that the uncommon case + // doesn't need to be set. + disableDRA bool + disableClassicDRA bool }{ "empty": { pod: st.MakePod().Name("foo").Namespace("default").Obj(), @@ -912,7 +916,7 @@ func TestPlugin(t *testing.T) { pod: podWithClaimName, claims: []*resourceapi.ResourceClaim{inUseClaim}, }, - "disable": { + "DRA-disabled": { pod: podWithClaimName, claims: []*resourceapi.ResourceClaim{inUseClaim}, want: want{ @@ -920,7 +924,7 @@ func TestPlugin(t *testing.T) { status: framework.NewStatus(framework.Skip), }, }, - disable: true, + disableDRA: true, }, } @@ -933,8 +937,11 @@ func TestPlugin(t *testing.T) { if nodes == nil { nodes = []*v1.Node{workerNode} } - testCtx := setup(t, nodes, tc.claims, tc.classes, tc.schedulings, tc.objs) - testCtx.p.enabled = !tc.disable + features := feature.Features{ + EnableDynamicResourceAllocation: !tc.disableDRA, + EnableDRAControlPlaneController: !tc.disableClassicDRA, + } + testCtx := setup(t, nodes, tc.claims, tc.classes, tc.schedulings, tc.objs, features) initialObjects := testCtx.listAll(t) status := testCtx.p.PreEnqueue(testCtx.ctx, tc.pod) @@ -1136,6 +1143,9 @@ func (tc *testContext) listAll(t *testing.T) (objects []metav1.Object) { } func (tc *testContext) listAssumedClaims() []metav1.Object { + if tc.p.claimAssumeCache == nil { + return nil + } var assumedClaims []metav1.Object for _, obj := range tc.p.claimAssumeCache.List(nil) { claim := obj.(*resourceapi.ResourceClaim) @@ -1219,7 +1229,7 @@ func update(t *testing.T, objects []metav1.Object, updates change) []metav1.Obje return updated } -func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object) (result *testContext) { +func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object, features feature.Features) (result *testContext) { t.Helper() tc := &testContext{} @@ -1242,7 +1252,7 @@ func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, t.Fatal(err) } - pl, err := New(tCtx, nil, fh, feature.Features{EnableDynamicResourceAllocation: true}) + pl, err := New(tCtx, nil, fh, features) if err != nil { t.Fatal(err) } @@ -1436,7 +1446,10 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { logger, tCtx := ktesting.NewTestContext(t) - testCtx := setup(t, nil, tc.claims, nil, nil, nil) + features := feature.Features{ + EnableDynamicResourceAllocation: true, + } + testCtx := setup(t, nil, tc.claims, nil, nil, nil, features) oldObj := tc.oldObj newObj := tc.newObj if claim, ok := tc.newObj.(*resourceapi.ResourceClaim); ok { @@ -1604,7 +1617,11 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() logger, _ := ktesting.NewTestContext(t) - testCtx := setup(t, nil, tc.claims, nil, tc.schedulings, nil) + features := feature.Features{ + EnableDynamicResourceAllocation: true, + EnableDRAControlPlaneController: true, + } + testCtx := setup(t, nil, tc.claims, nil, tc.schedulings, nil, features) actualHint, err := testCtx.p.isSchedulableAfterPodSchedulingContextChange(logger, tc.pod, tc.oldObj, tc.newObj) if tc.expectedErr { require.Error(t, err) diff --git a/pkg/scheduler/framework/plugins/feature/feature.go b/pkg/scheduler/framework/plugins/feature/feature.go index af28abfc901..6fd2bb82c63 100644 --- a/pkg/scheduler/framework/plugins/feature/feature.go +++ b/pkg/scheduler/framework/plugins/feature/feature.go @@ -20,6 +20,7 @@ package feature // This struct allows us to break the dependency of the plugins on // the internal k8s features pkg. type Features struct { + EnableDRAControlPlaneController bool EnableDynamicResourceAllocation bool EnableVolumeCapacityPriority bool EnableNodeInclusionPolicyInPodTopologySpread bool diff --git a/pkg/scheduler/framework/plugins/registry.go b/pkg/scheduler/framework/plugins/registry.go index 3edbdde5c46..044d8f3f6f0 100644 --- a/pkg/scheduler/framework/plugins/registry.go +++ b/pkg/scheduler/framework/plugins/registry.go @@ -46,6 +46,7 @@ import ( // through the WithFrameworkOutOfTreeRegistry option. func NewInTreeRegistry() runtime.Registry { fts := plfeature.Features{ + EnableDRAControlPlaneController: feature.DefaultFeatureGate.Enabled(features.DRAControlPlaneController), EnableDynamicResourceAllocation: feature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation), EnableVolumeCapacityPriority: feature.DefaultFeatureGate.Enabled(features.VolumeCapacityPriority), EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread), diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 70bc6aa6fab..548c8578d94 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -973,8 +973,8 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, }) } - ginkgo.Context("with classic DRA", func() { tests(parameterModeClassicDRA) }) - ginkgo.Context("with structured parameters", func() { tests(parameterModeStructured) }) + framework.Context("with classic DRA", feature.DRAControlPlaneController, func() { tests(parameterModeClassicDRA) }) + framework.Context("with structured parameters", func() { tests(parameterModeStructured) }) // TODO (https://github.com/kubernetes/kubernetes/issues/123699): move most of the test below into `testDriver` so that they get // executed with different parameters. @@ -1100,7 +1100,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, // The following tests are all about behavior in combination with a // control-plane DRA driver controller. - ginkgo.Context("cluster with DRA driver controller", func() { + framework.Context("cluster with classic DRA", feature.DRAControlPlaneController, func() { nodes := NewNodes(f, 1, 4) // kube-controller-manager can trigger delayed allocation for pods where the diff --git a/test/e2e/dra/kind-classic-dra.yaml b/test/e2e/dra/kind-classic-dra.yaml new file mode 100644 index 00000000000..0656acd5a66 --- /dev/null +++ b/test/e2e/dra/kind-classic-dra.yaml @@ -0,0 +1,45 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +containerdConfigPatches: +# Enable CDI as described in +# https://github.com/container-orchestrated-devices/container-device-interface#containerd-configuration +- |- + [plugins."io.containerd.grpc.v1.cri"] + enable_cdi = true +nodes: +- role: control-plane + kubeadmConfigPatches: + - | + kind: ClusterConfiguration + scheduler: + extraArgs: + v: "5" + vmodule: "allocator=6,dynamicresources=6" # structured/allocator.go, DRA scheduler plugin + controllerManager: + extraArgs: + v: "5" + apiServer: + extraArgs: + runtime-config: "resource.k8s.io/v1alpha3=true" + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + v: "5" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + v: "5" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + v: "5" +featureGates: + DynamicResourceAllocation: true + DRAControlPlaneController: true diff --git a/test/e2e/feature/feature.go b/test/e2e/feature/feature.go index 9474c38be1e..7ddcb82d343 100644 --- a/test/e2e/feature/feature.go +++ b/test/e2e/feature/feature.go @@ -94,7 +94,28 @@ var ( // TODO: document the feature (owning SIG, when to use this feature for a test) Downgrade = framework.WithFeature(framework.ValidFeatures.Add("Downgrade")) - // TODO: document the feature (owning SIG, when to use this feature for a test) + // owning-sig: sig-node + // kep: https://kep.k8s.io/3063 + // test-infra jobs: + // - "classic-dra" in https://testgrid.k8s.io/sig-node-dynamic-resource-allocation + // + // This label is used for tests which need: + // - the DynamicResourceAllocation *and* DRAControlPlaneController feature gates + // - the resource.k8s.io API group + // - a container runtime where support for CDI (https://github.com/cncf-tags/container-device-interface) + // is enabled such that passing CDI device IDs through CRI fields is supported + DRAControlPlaneController = framework.WithFeature(framework.ValidFeatures.Add("DRAControlPlaneController")) + + // owning-sig: sig-node + // kep: https://kep.k8s.io/4381 + // test-infra jobs: + // - the non-"classic-dra" jobs in https://testgrid.k8s.io/sig-node-dynamic-resource-allocation + // + // This label is used for tests which need: + // - *only* the DynamicResourceAllocation feature gate + // - the resource.k8s.io API group + // - a container runtime where support for CDI (https://github.com/cncf-tags/container-device-interface) + // is enabled such that passing CDI device IDs through CRI fields is supported DynamicResourceAllocation = framework.WithFeature(framework.ValidFeatures.Add("DynamicResourceAllocation")) // TODO: document the feature (owning SIG, when to use this feature for a test) diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go index 2e86191f4af..0b00a4bb7ce 100644 --- a/test/integration/scheduler/scheduler_test.go +++ b/test/integration/scheduler/scheduler_test.go @@ -658,6 +658,7 @@ func TestNodeEvents(t *testing.T) { // no standard API for those). func TestPodSchedulingContextSSA(t *testing.T) { featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DynamicResourceAllocation, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, true) testCtx := testutils.InitTestAPIServer(t, "podschedulingcontext-ssa", nil) testCtx.DisableEventSink = true diff --git a/test/integration/scheduler_perf/config/performance-config.yaml b/test/integration/scheduler_perf/config/performance-config.yaml index 4b6905a70ff..9d5183d4666 100644 --- a/test/integration/scheduler_perf/config/performance-config.yaml +++ b/test/integration/scheduler_perf/config/performance-config.yaml @@ -745,6 +745,7 @@ # and dynamically created ResourceClaim instances for each pod. - name: SchedulingWithResourceClaimTemplate featureGates: + DRAControlPlaneController: true DynamicResourceAllocation: true # SchedulerQueueingHints: true workloadTemplate: @@ -812,6 +813,7 @@ # scheduling via PodSchedulingContext. - name: SchedulingWithMultipleResourceClaims featureGates: + DRAControlPlaneController: true DynamicResourceAllocation: true # SchedulerQueueingHints: true workloadTemplate: @@ -882,7 +884,7 @@ measurePods: 1000 maxClaimsPerNode: 20 -# SchedulingWithResourceClaimTemplate uses a ResourceClaimTemplate +# SchedulingWithResourceClaimTemplateStructured uses a ResourceClaimTemplate # and dynamically creates ResourceClaim instances for each pod. # The driver uses structured parameters. - name: SchedulingWithResourceClaimTemplateStructured