Add admission controller to limit pod anti affinity topologykey to node level

This commit is contained in:
Kevin 2016-04-26 12:44:11 +00:00
parent fe135fc251
commit b7014561c1
5 changed files with 341 additions and 2 deletions

View File

@ -26,6 +26,7 @@ import (
// Admission policies // Admission policies
_ "k8s.io/kubernetes/plugin/pkg/admission/admit" _ "k8s.io/kubernetes/plugin/pkg/admission/admit"
_ "k8s.io/kubernetes/plugin/pkg/admission/alwayspullimages" _ "k8s.io/kubernetes/plugin/pkg/admission/alwayspullimages"
_ "k8s.io/kubernetes/plugin/pkg/admission/antiaffinity"
_ "k8s.io/kubernetes/plugin/pkg/admission/deny" _ "k8s.io/kubernetes/plugin/pkg/admission/deny"
_ "k8s.io/kubernetes/plugin/pkg/admission/exec" _ "k8s.io/kubernetes/plugin/pkg/admission/exec"
_ "k8s.io/kubernetes/plugin/pkg/admission/initialresources" _ "k8s.io/kubernetes/plugin/pkg/admission/initialresources"

View File

@ -51,7 +51,7 @@ kube-apiserver
### Options ### Options
``` ```
--admission-control="AlwaysAdmit": Ordered list of plug-ins to do admission control of resources into cluster. Comma-delimited list of: AlwaysAdmit, AlwaysDeny, AlwaysPullImages, DenyEscalatingExec, DenyExecOnPrivileged, InitialResources, LimitRanger, NamespaceAutoProvision, NamespaceExists, NamespaceLifecycle, PersistentVolumeLabel, ResourceQuota, SecurityContextDeny, ServiceAccount --admission-control="AlwaysAdmit": Ordered list of plug-ins to do admission control of resources into cluster. Comma-delimited list of: AlwaysAdmit, AlwaysDeny, AlwaysPullImages, DenyEscalatingExec, DenyExecOnPrivileged, InitialResources, LimitPodHardAntiAffinityTopology, LimitRanger, NamespaceAutoProvision, NamespaceExists, NamespaceLifecycle, PersistentVolumeLabel, ResourceQuota, SecurityContextDeny, ServiceAccount
--admission-control-config-file="": File with admission control configuration. --admission-control-config-file="": File with admission control configuration.
--advertise-address=<nil>: The IP address on which to advertise the apiserver to members of the cluster. This address must be reachable by the rest of the cluster. If blank, the --bind-address will be used. If --bind-address is unspecified, the host's default interface will be used. --advertise-address=<nil>: The IP address on which to advertise the apiserver to members of the cluster. This address must be reachable by the rest of the cluster. If blank, the --bind-address will be used. If --bind-address is unspecified, the host's default interface will be used.
--allow-privileged[=false]: If true, allow privileged containers. --allow-privileged[=false]: If true, allow privileged containers.
@ -119,7 +119,7 @@ kube-apiserver
--watch-cache-sizes=[]: List of watch cache sizes for every resource (pods, nodes, etc.), comma separated. The individual override format: resource#size, where size is a number. It takes effect when watch-cache is enabled. --watch-cache-sizes=[]: List of watch cache sizes for every resource (pods, nodes, etc.), comma separated. The individual override format: resource#size, where size is a number. It takes effect when watch-cache is enabled.
``` ```
###### Auto generated by spf13/cobra on 5-May-2016 ###### Auto generated by spf13/cobra on 8-May-2016
<!-- BEGIN MUNGE: GENERATED_ANALYTICS --> <!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@ -0,0 +1,80 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package antiaffinity
import (
"fmt"
"io"
"k8s.io/kubernetes/pkg/admission"
"k8s.io/kubernetes/pkg/api"
apierrors "k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/unversioned"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
)
func init() {
admission.RegisterPlugin("LimitPodHardAntiAffinityTopology", func(client clientset.Interface, config io.Reader) (admission.Interface, error) {
return NewInterPodAntiAffinity(client), nil
})
}
// plugin contains the client used by the admission controller
type plugin struct {
*admission.Handler
client clientset.Interface
}
// NewInterPodAntiAffinity creates a new instance of the LimitPodHardAntiAffinityTopology admission controller
func NewInterPodAntiAffinity(client clientset.Interface) admission.Interface {
return &plugin{
Handler: admission.NewHandler(admission.Create, admission.Update),
client: client,
}
}
// Admit will deny any pod that defines AntiAffinity topology key other than unversioned.LabelHostname i.e. "kubernetes.io/hostname"
// in requiredDuringSchedulingRequiredDuringExecution and requiredDuringSchedulingIgnoredDuringExecution.
func (p *plugin) Admit(attributes admission.Attributes) (err error) {
if attributes.GetResource().GroupResource() != api.Resource("pods") {
return nil
}
pod, ok := attributes.GetObject().(*api.Pod)
if !ok {
return apierrors.NewBadRequest("Resource was marked with kind Pod but was unable to be converted")
}
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil {
return err
}
if affinity.PodAntiAffinity != nil {
var podAntiAffinityTerms []api.PodAffinityTerm
if len(affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
podAntiAffinityTerms = affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(affinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// podAntiAffinityTerms = append(podAntiAffinityTerms, affinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
for _, v := range podAntiAffinityTerms {
if v.TopologyKey != unversioned.LabelHostname {
return apierrors.NewForbidden(attributes.GetResource().GroupResource(), pod.Name, fmt.Errorf("affinity.PodAntiAffinity.RequiredDuringScheduling has TopologyKey %v but only key %v is allowed", v.TopologyKey, unversioned.LabelHostname))
}
}
}
return nil
}

View File

@ -0,0 +1,230 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package antiaffinity
import (
"k8s.io/kubernetes/pkg/admission"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/unversioned"
"testing"
)
// ensures the hard PodAntiAffinity is denied if it defines TopologyKey other than kubernetes.io/hostname.
func TestInterPodAffinityAdmission(t *testing.T) {
handler := NewInterPodAntiAffinity(nil)
pod := api.Pod{
Spec: api.PodSpec{},
}
tests := []struct {
affinity map[string]string
errorExpected bool
}{
// empty affinity its success.
{
affinity: map[string]string{},
errorExpected: false,
},
// what ever topologyKey in preferredDuringSchedulingIgnoredDuringExecution, the admission should success.
{
affinity: map[string]string{
api.AffinityAnnotationKey: `
{"podAntiAffinity": {
"preferredDuringSchedulingIgnoredDuringExecution": [{
"weight": 5,
"podAffinityTerm": {
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces": [],
"topologyKey": "az"
}
}]
}}`,
},
errorExpected: false,
},
// valid topologyKey in requiredDuringSchedulingIgnoredDuringExecution,
// plus any topologyKey in preferredDuringSchedulingIgnoredDuringExecution, then admission success.
{
affinity: map[string]string{
api.AffinityAnnotationKey: `
{"podAntiAffinity": {
"preferredDuringSchedulingIgnoredDuringExecution": [{
"weight": 5,
"podAffinityTerm": {
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces": [],
"topologyKey": "az"
}
}],
"requiredDuringSchedulingIgnoredDuringExecution": [{
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces": [],
"topologyKey": "` + unversioned.LabelHostname + `"
}]
}}`,
},
errorExpected: false,
},
// valid topologyKey in requiredDuringSchedulingIgnoredDuringExecution then admission success.
{
affinity: map[string]string{
api.AffinityAnnotationKey: `
{"podAntiAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": [{
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces":[],
"topologyKey": "` + unversioned.LabelHostname + `"
}]
}}`,
},
errorExpected: false,
},
// invalid topologyKey in requiredDuringSchedulingIgnoredDuringExecution then admission fails.
{
affinity: map[string]string{
api.AffinityAnnotationKey: `
{"podAntiAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": [{
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces":[],
"topologyKey": " zone "
}]
}}`,
},
errorExpected: true,
},
// invalid topologyKey in requiredDuringSchedulingRequiredDuringExecution then admission fails.
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
// {
// affinity: map[string]string{
// api.AffinityAnnotationKey: `
// {"podAntiAffinity": {
// "requiredDuringSchedulingRequiredDuringExecution": [{
// "labelSelector": {
// "matchExpressions": [{
// "key": "security",
// "operator": "In",
// "values":["S2"]
// }]
// },
// "namespaces":[],
// "topologyKey": " zone "
// }]
// }}`,
// },
// errorExpected: true,
// }
// list of requiredDuringSchedulingIgnoredDuringExecution middle element topologyKey is not valid.
{
affinity: map[string]string{
api.AffinityAnnotationKey: `
{"podAntiAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": [{
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces":[],
"topologyKey": "` + unversioned.LabelHostname + `"
},
{
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces":[],
"topologyKey": " zone "
},
{
"labelSelector": {
"matchExpressions": [{
"key": "security",
"operator": "In",
"values":["S2"]
}]
},
"namespaces": [],
"topologyKey": "` + unversioned.LabelHostname + `"
}]
}}`,
},
errorExpected: true,
},
}
for _, test := range tests {
pod.ObjectMeta.Annotations = test.affinity
err := handler.Admit(admission.NewAttributesRecord(&pod, api.Kind("Pod").WithVersion("version"), "foo", "name", api.Resource("pods").WithVersion("version"), "", "ignored", nil))
if test.errorExpected && err == nil {
t.Errorf("Expected error for Anti Affinity %+v but did not get an error", test.affinity)
}
if !test.errorExpected && err != nil {
t.Errorf("Unexpected error %v for AntiAffinity %+v", err, test.affinity)
}
}
}
func TestHandles(t *testing.T) {
handler := NewInterPodAntiAffinity(nil)
tests := map[admission.Operation]bool{
admission.Update: true,
admission.Create: true,
admission.Delete: false,
admission.Connect: false,
}
for op, expected := range tests {
result := handler.Handles(op)
if result != expected {
t.Errorf("Unexpected result for operation %s: %v\n", op, result)
}
}
}

View File

@ -0,0 +1,28 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// LimitPodHardAntiAffinityTopology admission controller rejects any pod
// that specifies "hard" (RequiredDuringScheduling) anti-affinity
// with a TopologyKey other than unversioned.LabelHostname.
// Because anti-affinity is symmetric, without this admission controller,
// a user could maliciously or accidentally specify that their pod (once it has scheduled)
// should block other pods from scheduling into the same zone or some other large topology,
// essentially DoSing the cluster.
// In the future we will address this problem more fully by using quota and priority,
// but for now this admission controller provides a simple protection,
// on the assumption that the only legitimate use of hard pod anti-affinity
// is to exclude other pods from the same node.
package antiaffinity