From b7014561c1f5d0a3955cc586e04b68d8030799e3 Mon Sep 17 00:00:00 2001 From: Kevin Date: Tue, 26 Apr 2016 12:44:11 +0000 Subject: [PATCH] Add admission controller to limit pod anti affinity topologykey to node level --- cmd/kube-apiserver/app/plugins.go | 1 + docs/admin/kube-apiserver.md | 4 +- .../pkg/admission/antiaffinity/admission.go | 80 ++++++ .../admission/antiaffinity/admission_test.go | 230 ++++++++++++++++++ plugin/pkg/admission/antiaffinity/doc.go | 28 +++ 5 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 plugin/pkg/admission/antiaffinity/admission.go create mode 100644 plugin/pkg/admission/antiaffinity/admission_test.go create mode 100644 plugin/pkg/admission/antiaffinity/doc.go diff --git a/cmd/kube-apiserver/app/plugins.go b/cmd/kube-apiserver/app/plugins.go index f4742fc46ae..c7fd8d60742 100644 --- a/cmd/kube-apiserver/app/plugins.go +++ b/cmd/kube-apiserver/app/plugins.go @@ -26,6 +26,7 @@ import ( // Admission policies _ "k8s.io/kubernetes/plugin/pkg/admission/admit" _ "k8s.io/kubernetes/plugin/pkg/admission/alwayspullimages" + _ "k8s.io/kubernetes/plugin/pkg/admission/antiaffinity" _ "k8s.io/kubernetes/plugin/pkg/admission/deny" _ "k8s.io/kubernetes/plugin/pkg/admission/exec" _ "k8s.io/kubernetes/plugin/pkg/admission/initialresources" diff --git a/docs/admin/kube-apiserver.md b/docs/admin/kube-apiserver.md index 79918fe90a4..4ac64d0df6e 100644 --- a/docs/admin/kube-apiserver.md +++ b/docs/admin/kube-apiserver.md @@ -51,7 +51,7 @@ kube-apiserver ### Options ``` - --admission-control="AlwaysAdmit": Ordered list of plug-ins to do admission control of resources into cluster. Comma-delimited list of: AlwaysAdmit, AlwaysDeny, AlwaysPullImages, DenyEscalatingExec, DenyExecOnPrivileged, InitialResources, LimitRanger, NamespaceAutoProvision, NamespaceExists, NamespaceLifecycle, PersistentVolumeLabel, ResourceQuota, SecurityContextDeny, ServiceAccount + --admission-control="AlwaysAdmit": Ordered list of plug-ins to do admission control of resources into cluster. Comma-delimited list of: AlwaysAdmit, AlwaysDeny, AlwaysPullImages, DenyEscalatingExec, DenyExecOnPrivileged, InitialResources, LimitPodHardAntiAffinityTopology, LimitRanger, NamespaceAutoProvision, NamespaceExists, NamespaceLifecycle, PersistentVolumeLabel, ResourceQuota, SecurityContextDeny, ServiceAccount --admission-control-config-file="": File with admission control configuration. --advertise-address=: The IP address on which to advertise the apiserver to members of the cluster. This address must be reachable by the rest of the cluster. If blank, the --bind-address will be used. If --bind-address is unspecified, the host's default interface will be used. --allow-privileged[=false]: If true, allow privileged containers. @@ -119,7 +119,7 @@ kube-apiserver --watch-cache-sizes=[]: List of watch cache sizes for every resource (pods, nodes, etc.), comma separated. The individual override format: resource#size, where size is a number. It takes effect when watch-cache is enabled. ``` -###### Auto generated by spf13/cobra on 5-May-2016 +###### Auto generated by spf13/cobra on 8-May-2016 diff --git a/plugin/pkg/admission/antiaffinity/admission.go b/plugin/pkg/admission/antiaffinity/admission.go new file mode 100644 index 00000000000..832351c3974 --- /dev/null +++ b/plugin/pkg/admission/antiaffinity/admission.go @@ -0,0 +1,80 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package antiaffinity + +import ( + "fmt" + "io" + + "k8s.io/kubernetes/pkg/admission" + "k8s.io/kubernetes/pkg/api" + apierrors "k8s.io/kubernetes/pkg/api/errors" + "k8s.io/kubernetes/pkg/api/unversioned" + clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" +) + +func init() { + admission.RegisterPlugin("LimitPodHardAntiAffinityTopology", func(client clientset.Interface, config io.Reader) (admission.Interface, error) { + return NewInterPodAntiAffinity(client), nil + }) +} + +// plugin contains the client used by the admission controller +type plugin struct { + *admission.Handler + client clientset.Interface +} + +// NewInterPodAntiAffinity creates a new instance of the LimitPodHardAntiAffinityTopology admission controller +func NewInterPodAntiAffinity(client clientset.Interface) admission.Interface { + return &plugin{ + Handler: admission.NewHandler(admission.Create, admission.Update), + client: client, + } +} + +// Admit will deny any pod that defines AntiAffinity topology key other than unversioned.LabelHostname i.e. "kubernetes.io/hostname" +// in requiredDuringSchedulingRequiredDuringExecution and requiredDuringSchedulingIgnoredDuringExecution. +func (p *plugin) Admit(attributes admission.Attributes) (err error) { + if attributes.GetResource().GroupResource() != api.Resource("pods") { + return nil + } + pod, ok := attributes.GetObject().(*api.Pod) + if !ok { + return apierrors.NewBadRequest("Resource was marked with kind Pod but was unable to be converted") + } + affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations) + if err != nil { + return err + } + if affinity.PodAntiAffinity != nil { + var podAntiAffinityTerms []api.PodAffinityTerm + if len(affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 { + podAntiAffinityTerms = affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution + } + // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. + //if len(affinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 { + // podAntiAffinityTerms = append(podAntiAffinityTerms, affinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...) + //} + for _, v := range podAntiAffinityTerms { + if v.TopologyKey != unversioned.LabelHostname { + return apierrors.NewForbidden(attributes.GetResource().GroupResource(), pod.Name, fmt.Errorf("affinity.PodAntiAffinity.RequiredDuringScheduling has TopologyKey %v but only key %v is allowed", v.TopologyKey, unversioned.LabelHostname)) + } + } + } + return nil +} diff --git a/plugin/pkg/admission/antiaffinity/admission_test.go b/plugin/pkg/admission/antiaffinity/admission_test.go new file mode 100644 index 00000000000..74e864aaaf0 --- /dev/null +++ b/plugin/pkg/admission/antiaffinity/admission_test.go @@ -0,0 +1,230 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package antiaffinity + +import ( + "k8s.io/kubernetes/pkg/admission" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/unversioned" + "testing" +) + +// ensures the hard PodAntiAffinity is denied if it defines TopologyKey other than kubernetes.io/hostname. +func TestInterPodAffinityAdmission(t *testing.T) { + handler := NewInterPodAntiAffinity(nil) + pod := api.Pod{ + Spec: api.PodSpec{}, + } + tests := []struct { + affinity map[string]string + errorExpected bool + }{ + // empty affinity its success. + { + affinity: map[string]string{}, + errorExpected: false, + }, + // what ever topologyKey in preferredDuringSchedulingIgnoredDuringExecution, the admission should success. + { + affinity: map[string]string{ + api.AffinityAnnotationKey: ` + {"podAntiAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [{ + "weight": 5, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces": [], + "topologyKey": "az" + } + }] + }}`, + }, + errorExpected: false, + }, + // valid topologyKey in requiredDuringSchedulingIgnoredDuringExecution, + // plus any topologyKey in preferredDuringSchedulingIgnoredDuringExecution, then admission success. + { + affinity: map[string]string{ + api.AffinityAnnotationKey: ` + {"podAntiAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [{ + "weight": 5, + "podAffinityTerm": { + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces": [], + "topologyKey": "az" + } + }], + "requiredDuringSchedulingIgnoredDuringExecution": [{ + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces": [], + "topologyKey": "` + unversioned.LabelHostname + `" + }] + }}`, + }, + errorExpected: false, + }, + // valid topologyKey in requiredDuringSchedulingIgnoredDuringExecution then admission success. + { + affinity: map[string]string{ + api.AffinityAnnotationKey: ` + {"podAntiAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": [{ + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces":[], + "topologyKey": "` + unversioned.LabelHostname + `" + }] + }}`, + }, + errorExpected: false, + }, + // invalid topologyKey in requiredDuringSchedulingIgnoredDuringExecution then admission fails. + { + affinity: map[string]string{ + api.AffinityAnnotationKey: ` + {"podAntiAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": [{ + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces":[], + "topologyKey": " zone " + }] + }}`, + }, + errorExpected: true, + }, + // invalid topologyKey in requiredDuringSchedulingRequiredDuringExecution then admission fails. + // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. + // { + // affinity: map[string]string{ + // api.AffinityAnnotationKey: ` + // {"podAntiAffinity": { + // "requiredDuringSchedulingRequiredDuringExecution": [{ + // "labelSelector": { + // "matchExpressions": [{ + // "key": "security", + // "operator": "In", + // "values":["S2"] + // }] + // }, + // "namespaces":[], + // "topologyKey": " zone " + // }] + // }}`, + // }, + // errorExpected: true, + // } + // list of requiredDuringSchedulingIgnoredDuringExecution middle element topologyKey is not valid. + { + affinity: map[string]string{ + api.AffinityAnnotationKey: ` + {"podAntiAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": [{ + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces":[], + "topologyKey": "` + unversioned.LabelHostname + `" + }, + { + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces":[], + "topologyKey": " zone " + }, + { + "labelSelector": { + "matchExpressions": [{ + "key": "security", + "operator": "In", + "values":["S2"] + }] + }, + "namespaces": [], + "topologyKey": "` + unversioned.LabelHostname + `" + }] + }}`, + }, + errorExpected: true, + }, + } + for _, test := range tests { + pod.ObjectMeta.Annotations = test.affinity + err := handler.Admit(admission.NewAttributesRecord(&pod, api.Kind("Pod").WithVersion("version"), "foo", "name", api.Resource("pods").WithVersion("version"), "", "ignored", nil)) + + if test.errorExpected && err == nil { + t.Errorf("Expected error for Anti Affinity %+v but did not get an error", test.affinity) + } + + if !test.errorExpected && err != nil { + t.Errorf("Unexpected error %v for AntiAffinity %+v", err, test.affinity) + } + } +} +func TestHandles(t *testing.T) { + handler := NewInterPodAntiAffinity(nil) + tests := map[admission.Operation]bool{ + admission.Update: true, + admission.Create: true, + admission.Delete: false, + admission.Connect: false, + } + for op, expected := range tests { + result := handler.Handles(op) + if result != expected { + t.Errorf("Unexpected result for operation %s: %v\n", op, result) + } + } +} diff --git a/plugin/pkg/admission/antiaffinity/doc.go b/plugin/pkg/admission/antiaffinity/doc.go new file mode 100644 index 00000000000..a2a1acd0502 --- /dev/null +++ b/plugin/pkg/admission/antiaffinity/doc.go @@ -0,0 +1,28 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// LimitPodHardAntiAffinityTopology admission controller rejects any pod +// that specifies "hard" (RequiredDuringScheduling) anti-affinity +// with a TopologyKey other than unversioned.LabelHostname. +// Because anti-affinity is symmetric, without this admission controller, +// a user could maliciously or accidentally specify that their pod (once it has scheduled) +// should block other pods from scheduling into the same zone or some other large topology, +// essentially DoSing the cluster. +// In the future we will address this problem more fully by using quota and priority, +// but for now this admission controller provides a simple protection, +// on the assumption that the only legitimate use of hard pod anti-affinity +// is to exclude other pods from the same node. +package antiaffinity