diff --git a/pkg/controlplane/apiserver/samples/generic/server/admission.go b/pkg/controlplane/apiserver/samples/generic/server/admission.go index 998fd4286bc..eba588534c7 100644 --- a/pkg/controlplane/apiserver/samples/generic/server/admission.go +++ b/pkg/controlplane/apiserver/samples/generic/server/admission.go @@ -30,7 +30,6 @@ import ( certsigning "k8s.io/kubernetes/plugin/pkg/admission/certificates/signing" certsubjectrestriction "k8s.io/kubernetes/plugin/pkg/admission/certificates/subjectrestriction" "k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds" - "k8s.io/kubernetes/plugin/pkg/admission/podtopologylabels" "k8s.io/kubernetes/plugin/pkg/admission/serviceaccount" ) @@ -49,8 +48,6 @@ func DefaultOffAdmissionPlugins() sets.Set[string] { certsubjectrestriction.PluginName, // CertificateSubjectRestriction validatingadmissionpolicy.PluginName, // ValidatingAdmissionPolicy mutatingadmissionpolicy.PluginName, // MutatingAdmissionPolicy - podtopologylabels.PluginName, // PodTopologyLabels, only active when feature gate PodTopologyLabelsAdmission is enabled. - validatingadmissionpolicy.PluginName, // ValidatingAdmissionPolicy, only active when feature gate ValidatingAdmissionPolicy is enabled ) return sets.New(options.AllOrderedPlugins...).Difference(defaultOnPlugins) diff --git a/pkg/controlplane/apiserver/samples/generic/server/admission_test.go b/pkg/controlplane/apiserver/samples/generic/server/admission_test.go index b85423d7c7f..a16a257840a 100644 --- a/pkg/controlplane/apiserver/samples/generic/server/admission_test.go +++ b/pkg/controlplane/apiserver/samples/generic/server/admission_test.go @@ -24,6 +24,7 @@ import ( "k8s.io/kubernetes/plugin/pkg/admission/limitranger" "k8s.io/kubernetes/plugin/pkg/admission/network/defaultingressclass" "k8s.io/kubernetes/plugin/pkg/admission/nodetaint" + "k8s.io/kubernetes/plugin/pkg/admission/podtopologylabels" podpriority "k8s.io/kubernetes/plugin/pkg/admission/priority" "k8s.io/kubernetes/plugin/pkg/admission/runtimeclass" "k8s.io/kubernetes/plugin/pkg/admission/security/podsecurity" @@ -42,6 +43,7 @@ var intentionallyOffPlugins = sets.New[string]( runtimeclass.PluginName, // RuntimeClass defaultingressclass.PluginName, // DefaultIngressClass podsecurity.PluginName, // PodSecurity + podtopologylabels.PluginName, // PodTopologyLabels ) func TestDefaultOffAdmissionPlugins(t *testing.T) { diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 9e00230d383..11dd7106c83 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -969,10 +969,14 @@ const ( // kep: https://kep.k8s.io/4742 // alpha: v1.33 // - // Enables the PodTopologyLabelsAdmission admission plugin to automatically set node topology labels - // (i.e. those with the 'topology.k8s.io/' prefix on Node objects) onto Pod objects when they are - // bound/scheduled to a node. + // Enables the PodTopologyLabelsAdmission admission plugin that mutates `pod/binding` + // requests by copying the `topology.k8s.io/{zone,region}` and `kubernetes.io/hostname` + // labels from the assigned Node object (in the Binding being admitted) onto the Binding + // so that it can be persisted onto the Pod object when the Pod is being scheduled. // This allows workloads running in pods to understand the topology information of their assigned node. + // Enabling this feature also permits external schedulers to set labels on pods in an atomic + // operation when scheduling a Pod by setting the `metadata.labels` field on the submitted Binding, + // similar to how `metadata.annotations` behaves. PodTopologyLabelsAdmission featuregate.Feature = "PodTopologyLabelsAdmission" ) diff --git a/pkg/registry/core/pod/storage/storage.go b/pkg/registry/core/pod/storage/storage.go index 0066e6bcfa7..27f7c7fcfff 100644 --- a/pkg/registry/core/pod/storage/storage.go +++ b/pkg/registry/core/pod/storage/storage.go @@ -247,20 +247,12 @@ func (r *BindingREST) setPodHostAndAnnotations(ctx context.Context, podUID types for k, v := range annotations { pod.Annotations[k] = v } + // Copy all labels from the Binding over to the Pod object, but do not + // overwrite any existing labels that have been previously set, to avoid + // changing any existing behaviour for pods that may be defined with a + // template by users and created by higher-level controllers. if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.PodTopologyLabelsAdmission) { - // If any labels are present on the Binding, copy them across to the Pod. - if len(labels) > 0 { - if pod.Labels == nil { - pod.Labels = make(map[string]string) - } - for k, v := range labels { - if _, ok := pod.Labels[k]; ok { - // don't overwrite labels that are already present on the Pod - continue - } - pod.Labels[k] = v - } - } + copyLabelsWithoutOverwriting(pod, labels) } podutil.UpdatePodCondition(&pod.Status, &api.PodCondition{ Type: api.PodScheduled, @@ -272,6 +264,24 @@ func (r *BindingREST) setPodHostAndAnnotations(ctx context.Context, podUID types return finalPod, err } +func copyLabelsWithoutOverwriting(pod *api.Pod, labels map[string]string) { + if len(labels) == 0 { + // nothing to do + return + } + if pod.Labels == nil { + pod.Labels = make(map[string]string) + } + // Iterate over the binding's labels and copy them across to the Pod. + for k, v := range labels { + if _, ok := pod.Labels[k]; ok { + // don't overwrite labels that are already present on the Pod + continue + } + pod.Labels[k] = v + } +} + // assignPod assigns the given pod to the given machine. func (r *BindingREST) assignPod(ctx context.Context, podUID types.UID, podResourceVersion, podID string, machine string, annotations, labels map[string]string, dryRun bool) (err error) { if _, err = r.setPodHostAndAnnotations(ctx, podUID, podResourceVersion, podID, machine, annotations, labels, dryRun); err != nil { diff --git a/pkg/registry/core/pod/storage/storage_test.go b/pkg/registry/core/pod/storage/storage_test.go index 17e0de0a633..753bb970b8a 100644 --- a/pkg/registry/core/pod/storage/storage_test.go +++ b/pkg/registry/core/pod/storage/storage_test.go @@ -680,8 +680,8 @@ func TestEtcdCreateWithContainersNotFound(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, version.MustParse("1.32")) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, kubefeatures.SetPodTopologyLabels, true) + featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, version.MustParse("1.33")) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, kubefeatures.PodTopologyLabelsAdmission, true) // Suddenly, a wild scheduler appears: _, err = bindingStorage.Create(ctx, "foo", &api.Binding{ ObjectMeta: metav1.ObjectMeta{ diff --git a/plugin/pkg/admission/podtopologylabels/admission.go b/plugin/pkg/admission/podtopologylabels/admission.go index 8467f1614ef..c4189269592 100644 --- a/plugin/pkg/admission/podtopologylabels/admission.go +++ b/plugin/pkg/admission/podtopologylabels/admission.go @@ -38,24 +38,49 @@ import ( // PluginName is a string with the name of the plugin const PluginName = "PodTopologyLabels" +// defaultConfig is the configuration used for the default instantiation of the plugin. +// This is the configured used by kube-apiserver. +// It is not exported to avoid any chance of accidentally mutating the variable. +var defaultConfig = Config{ + Labels: []string{"topology.k8s.io/zone", "topology.k8s.io/region", "kubernetes.io/hostname"}, +} + // Register registers a plugin func Register(plugins *admission.Plugins) { plugins.Register(PluginName, func(_ io.Reader) (admission.Interface, error) { - plugin := NewPodTopologyPlugin() + plugin := NewPodTopologyPlugin(defaultConfig) return plugin, nil }) } +// Config contains configuration for instances of the podtopologylabels admission plugin. +// This is not exposed as user-facing APIServer configuration, however can be used by +// platform operators when building custom topology label plugins. +type Config struct { + // Labels is set of explicit label keys to be copied from the Node object onto + // pod Binding objects during admission. + Labels []string + // Domains is a set of label key prefixes used to copy across label values + // for all labels with a given domain prefix. + // For example, `example.com` would match all labels matching `example.com/*`. + // Keys without a domain portion (i.e. those not containing a /) will never match. + Domains []string + // Suffixes is a set of label key domain suffixes used to copy label values for + // all labels of a given subdomain. + // This acts as a suffix match on the domain portion of label keys. + // If a suffix does not have a leading '.', one will be prepended to avoid + // programmer errors with values like `example.com` matching `notexample.com`. + // Keys without a domain portion (i.e. those not containing a /) will never match. + Suffixes []string +} + // NewPodTopologyPlugin initializes a Plugin -func NewPodTopologyPlugin() *Plugin { +func NewPodTopologyPlugin(c Config) *Plugin { return &Plugin{ - Handler: admission.NewHandler(admission.Create), - // Always copy zone and region labels. - labels: sets.New("topology.k8s.io/zone", "topology.k8s.io/region"), - // Also support copying arbitrary custom topology labels. - domains: sets.New("topology.k8s.io"), - // Copy any sub-domains of topology.k8s.io as well. - suffixes: sets.New(".topology.k8s.io"), + Handler: admission.NewHandler(admission.Create), + labels: sets.New(c.Labels...), + domains: sets.New(c.Domains...), + suffixes: sets.New(c.Suffixes...), } } diff --git a/plugin/pkg/admission/podtopologylabels/admission_test.go b/plugin/pkg/admission/podtopologylabels/admission_test.go index ca0ecefb62e..1b79203bbf3 100644 --- a/plugin/pkg/admission/podtopologylabels/admission_test.go +++ b/plugin/pkg/admission/podtopologylabels/admission_test.go @@ -54,6 +54,7 @@ func TestPodTopology(t *testing.T) { targetNodeLabels: map[string]string{ "topology.k8s.io/zone": "zone1", "topology.k8s.io/region": "region1", + "topology.k8s.io/arbitrary": "something", "non-topology.k8s.io/label": "something", // verify we don't unexpectedly copy non topology.k8s.io labels. }, expectedBindingLabels: map[string]string{ @@ -62,21 +63,23 @@ func TestPodTopology(t *testing.T) { }, }, { - name: "copies arbitrary topology labels", + name: "does not copy arbitrary topology labels", targetNodeLabels: map[string]string{ + "topology.k8s.io/zone": "zone1", "topology.k8s.io/arbitrary": "something", }, expectedBindingLabels: map[string]string{ - "topology.k8s.io/arbitrary": "something", + "topology.k8s.io/zone": "zone1", }, }, { - name: "copies topology labels that use a subdomain", + name: "does not copy topology labels that use a subdomain", targetNodeLabels: map[string]string{ - "something.topology.k8s.io/a-thing": "value", + "topology.k8s.io/region": "region1", + "sub.topology.k8s.io/zone": "value", }, expectedBindingLabels: map[string]string{ - "something.topology.k8s.io/a-thing": "value", + "topology.k8s.io/region": "region1", }, }, { @@ -167,7 +170,7 @@ func TestPodTopology(t *testing.T) { // newHandlerForTest returns the admission controller configured for testing. func newHandlerForTest(c kubernetes.Interface) (*Plugin, informers.SharedInformerFactory, error) { factory := informers.NewSharedInformerFactory(c, 5*time.Minute) - handler := NewPodTopologyPlugin() + handler := NewPodTopologyPlugin(defaultConfig) // todo: write additional test cases with non-default config. pluginInitializer := genericadmissioninitializer.New(c, nil, factory, nil, feature.DefaultFeatureGate, nil, nil) pluginInitializer.Initialize(handler) return handler, factory, admission.ValidateInitialization(handler) diff --git a/plugin/pkg/admission/podtopologylabels/doc.go b/plugin/pkg/admission/podtopologylabels/doc.go index 99271759ddf..2ae51a8e03f 100644 --- a/plugin/pkg/admission/podtopologylabels/doc.go +++ b/plugin/pkg/admission/podtopologylabels/doc.go @@ -15,9 +15,10 @@ limitations under the License. */ // Package podtopologylabels is a plugin that mutates `pod/binding` requests -// to set `topology.k8s.io` labels (including subdomains) from the Node object -// referenced in the Binding to the Binding, which causes the Pod to also -// have these values set. +// by copying the `topology.k8s.io/{zone,region}` and `kubernetes.io/hostname` +// labels from the assigned Node object (in the Binding being admitted) onto +// the Binding so that it can be persisted onto the Pod object when the Pod +// is being scheduled. // If the binding target is NOT a Node object, no action is taken. // If the referenced Node object does not exist, no action is taken. package podtopologylabels // import "k8s.io/kubernetes/plugin/pkg/admission/podtopologylabels" diff --git a/test/integration/pods/pods_test.go b/test/integration/pods/pods_test.go index 33b23235f85..c1c88dc2b71 100644 --- a/test/integration/pods/pods_test.go +++ b/test/integration/pods/pods_test.go @@ -57,6 +57,28 @@ func TestPodTopologyLabels(t *testing.T) { "topology.k8s.io/region": "region", }, }, + { + name: "subdomains of topology.k8s.io are not copied", + targetNodeLabels: map[string]string{ + "sub.topology.k8s.io/zone": "zone", + "topology.k8s.io/region": "region", + }, + expectedPodLabels: map[string]string{ + "topology.k8s.io/region": "region", + }, + }, + { + name: "custom topology.k8s.io labels are not copied", + targetNodeLabels: map[string]string{ + "topology.k8s.io/custom": "thing", + "topology.k8s.io/zone": "zone", + "topology.k8s.io/region": "region", + }, + expectedPodLabels: map[string]string{ + "topology.k8s.io/zone": "zone", + "topology.k8s.io/region": "region", + }, + }, } // Enable the feature BEFORE starting the test server, as the admission plugin only checks feature gates // on start up and not on each invocation at runtime. @@ -70,8 +92,10 @@ func TestPodTopologyLabels_FeatureDisabled(t *testing.T) { { name: "does nothing when the feature is not enabled", targetNodeLabels: map[string]string{ - "topology.k8s.io/zone": "zone", - "topology.k8s.io/region": "region", + "topology.k8s.io/zone": "zone", + "topology.k8s.io/region": "region", + "topology.k8s.io/custom": "thing", + "sub.topology.k8s.io/zone": "zone", }, expectedPodLabels: map[string]string{}, },