From ab616a88b9d7681e56eae3204227e2674ec20068 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Fri, 11 May 2018 15:58:29 +0200 Subject: [PATCH] Promote sysctl annotations to API fields --- pkg/apis/core/annotation_key_constants.go | 14 -- pkg/apis/core/helper/helpers.go | 48 ------ pkg/apis/core/helper/helpers_test.go | 47 ----- pkg/apis/core/types.go | 4 + pkg/apis/core/v1/conversion.go | 17 ++ pkg/apis/core/v1/helper/helpers.go | 48 ------ pkg/apis/core/v1/helper/helpers_test.go | 47 ----- pkg/apis/core/validation/validation.go | 39 +---- pkg/apis/core/validation/validation_test.go | 93 ++++------ pkg/apis/policy/helpers.go | 37 ---- pkg/apis/policy/helpers_test.go | 62 ------- pkg/apis/policy/types.go | 26 ++- pkg/apis/policy/validation/validation.go | 64 +++++-- pkg/apis/policy/validation/validation_test.go | 48 +++++- pkg/kubelet/kubelet.go | 15 +- pkg/kubelet/kuberuntime/helpers.go | 19 --- pkg/kubelet/kuberuntime/helpers_test.go | 40 ----- .../kuberuntime/kuberuntime_sandbox.go | 9 +- pkg/kubelet/sysctl/runtime.go | 15 +- pkg/kubelet/sysctl/whitelist.go | 42 +---- pkg/kubelet/sysctl/whitelist_test.go | 6 +- pkg/security/podsecuritypolicy/factory.go | 16 +- .../podsecuritypolicy/provider_test.go | 109 +++++++----- .../sysctl/mustmatchpatterns.go | 113 ++++++++----- .../sysctl/mustmatchpatterns_test.go | 82 ++++----- .../podsecuritypolicy/admission_test.go | 160 +++++++----------- .../api/core/v1/annotation_key_constants.go | 14 -- staging/src/k8s.io/api/core/v1/types.go | 8 +- .../k8s.io/api/extensions/v1beta1/types.go | 19 +++ .../src/k8s.io/api/policy/v1beta1/types.go | 19 +++ test/e2e/common/sysctl.go | 86 +++++----- test/e2e/framework/psp_util.go | 1 + test/e2e/upgrades/sysctl.go | 7 +- 33 files changed, 536 insertions(+), 838 deletions(-) delete mode 100644 pkg/apis/policy/helpers.go delete mode 100644 pkg/apis/policy/helpers_test.go diff --git a/pkg/apis/core/annotation_key_constants.go b/pkg/apis/core/annotation_key_constants.go index cfebd86a4b1..a1e6daae496 100644 --- a/pkg/apis/core/annotation_key_constants.go +++ b/pkg/apis/core/annotation_key_constants.go @@ -56,20 +56,6 @@ const ( // in the Annotations of a Node. PreferAvoidPodsAnnotationKey string = "scheduler.alpha.kubernetes.io/preferAvoidPods" - // SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure - // container of a pod. The annotation value is a comma separated list of sysctl_name=value - // key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by - // the kubelet. Pods with other sysctls will fail to launch. - SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls" - - // UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure - // container of a pod. The annotation value is a comma separated list of sysctl_name=value - // key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly - // namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use - // is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet - // will fail to launch. - UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls" - // ObjectTTLAnnotations represents a suggestion for kubelet for how long it can cache // an object (e.g. secret, config map) before fetching it again from apiserver. // This annotation can be attached to node. diff --git a/pkg/apis/core/helper/helpers.go b/pkg/apis/core/helper/helpers.go index 69947788334..7be34a8578b 100644 --- a/pkg/apis/core/helper/helpers.go +++ b/pkg/apis/core/helper/helpers.go @@ -499,54 +499,6 @@ func GetTaintsFromNodeAnnotations(annotations map[string]string) ([]core.Taint, return taints, nil } -// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls -// and a slice of unsafe Sysctls. This is only a convenience wrapper around -// SysctlsFromPodAnnotation. -func SysctlsFromPodAnnotations(a map[string]string) ([]core.Sysctl, []core.Sysctl, error) { - safe, err := SysctlsFromPodAnnotation(a[core.SysctlsPodAnnotationKey]) - if err != nil { - return nil, nil, err - } - unsafe, err := SysctlsFromPodAnnotation(a[core.UnsafeSysctlsPodAnnotationKey]) - if err != nil { - return nil, nil, err - } - - return safe, unsafe, nil -} - -// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls. -func SysctlsFromPodAnnotation(annotation string) ([]core.Sysctl, error) { - if len(annotation) == 0 { - return nil, nil - } - - kvs := strings.Split(annotation, ",") - sysctls := make([]core.Sysctl, len(kvs)) - for i, kv := range kvs { - cs := strings.Split(kv, "=") - if len(cs) != 2 || len(cs[0]) == 0 { - return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv) - } - sysctls[i].Name = cs[0] - sysctls[i].Value = cs[1] - } - return sysctls, nil -} - -// PodAnnotationsFromSysctls creates an annotation value for a slice of Sysctls. -func PodAnnotationsFromSysctls(sysctls []core.Sysctl) string { - if len(sysctls) == 0 { - return "" - } - - kvs := make([]string, len(sysctls)) - for i := range sysctls { - kvs[i] = fmt.Sprintf("%s=%s", sysctls[i].Name, sysctls[i].Value) - } - return strings.Join(kvs, ",") -} - // GetPersistentVolumeClass returns StorageClassName. func GetPersistentVolumeClass(volume *core.PersistentVolume) string { // Use beta annotation first diff --git a/pkg/apis/core/helper/helpers_test.go b/pkg/apis/core/helper/helpers_test.go index c63ba46c295..2d1700b155c 100644 --- a/pkg/apis/core/helper/helpers_test.go +++ b/pkg/apis/core/helper/helpers_test.go @@ -239,53 +239,6 @@ func TestNodeSelectorRequirementsAsSelector(t *testing.T) { } } -func TestSysctlsFromPodAnnotation(t *testing.T) { - type Test struct { - annotation string - expectValue []core.Sysctl - expectErr bool - } - for i, test := range []Test{ - { - annotation: "", - expectValue: nil, - }, - { - annotation: "foo.bar", - expectErr: true, - }, - { - annotation: "=123", - expectErr: true, - }, - { - annotation: "foo.bar=", - expectValue: []core.Sysctl{{Name: "foo.bar", Value: ""}}, - }, - { - annotation: "foo.bar=42", - expectValue: []core.Sysctl{{Name: "foo.bar", Value: "42"}}, - }, - { - annotation: "foo.bar=42,", - expectErr: true, - }, - { - annotation: "foo.bar=42,abc.def=1", - expectValue: []core.Sysctl{{Name: "foo.bar", Value: "42"}, {Name: "abc.def", Value: "1"}}, - }, - } { - sysctls, err := SysctlsFromPodAnnotation(test.annotation) - if test.expectErr && err == nil { - t.Errorf("[%v]expected error but got none", i) - } else if !test.expectErr && err != nil { - t.Errorf("[%v]did not expect error but got: %v", i, err) - } else if !reflect.DeepEqual(sysctls, test.expectValue) { - t.Errorf("[%v]expect value %v but got %v", i, test.expectValue, sysctls) - } - } -} - func TestIsHugePageResourceName(t *testing.T) { testCases := []struct { name core.ResourceName diff --git a/pkg/apis/core/types.go b/pkg/apis/core/types.go index d2593cac0cc..993ff8c2d79 100644 --- a/pkg/apis/core/types.go +++ b/pkg/apis/core/types.go @@ -2657,6 +2657,10 @@ type PodSecurityContext struct { // If unset, the Kubelet will not modify the ownership and permissions of any volume. // +optional FSGroup *int64 + // Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + // sysctls (by the container runtime) might fail to launch. + // +optional + Sysctls []Sysctl } // PodQOSClass defines the supported qos classes of Pods. diff --git a/pkg/apis/core/v1/conversion.go b/pkg/apis/core/v1/conversion.go index d5bce756c71..bc431d1478d 100644 --- a/pkg/apis/core/v1/conversion.go +++ b/pkg/apis/core/v1/conversion.go @@ -495,6 +495,14 @@ func Convert_core_PodSecurityContext_To_v1_PodSecurityContext(in *core.PodSecuri out.RunAsGroup = in.RunAsGroup out.RunAsNonRoot = in.RunAsNonRoot out.FSGroup = in.FSGroup + if in.Sysctls != nil { + out.Sysctls = make([]v1.Sysctl, len(in.Sysctls)) + for i, sysctl := range in.Sysctls { + if err := Convert_core_Sysctl_To_v1_Sysctl(&sysctl, &out.Sysctls[i], s); err != nil { + return err + } + } + } return nil } @@ -512,6 +520,15 @@ func Convert_v1_PodSecurityContext_To_core_PodSecurityContext(in *v1.PodSecurity out.RunAsGroup = in.RunAsGroup out.RunAsNonRoot = in.RunAsNonRoot out.FSGroup = in.FSGroup + if in.Sysctls != nil { + out.Sysctls = make([]core.Sysctl, len(in.Sysctls)) + for i, sysctl := range in.Sysctls { + if err := Convert_v1_Sysctl_To_core_Sysctl(&sysctl, &out.Sysctls[i], s); err != nil { + return err + } + } + } + return nil } diff --git a/pkg/apis/core/v1/helper/helpers.go b/pkg/apis/core/v1/helper/helpers.go index 931ff76613d..c8dbbdce92b 100644 --- a/pkg/apis/core/v1/helper/helpers.go +++ b/pkg/apis/core/v1/helper/helpers.go @@ -462,54 +462,6 @@ func GetAvoidPodsFromNodeAnnotations(annotations map[string]string) (v1.AvoidPod return avoidPods, nil } -// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls -// and a slice of unsafe Sysctls. This is only a convenience wrapper around -// SysctlsFromPodAnnotation. -func SysctlsFromPodAnnotations(a map[string]string) ([]v1.Sysctl, []v1.Sysctl, error) { - safe, err := SysctlsFromPodAnnotation(a[v1.SysctlsPodAnnotationKey]) - if err != nil { - return nil, nil, err - } - unsafe, err := SysctlsFromPodAnnotation(a[v1.UnsafeSysctlsPodAnnotationKey]) - if err != nil { - return nil, nil, err - } - - return safe, unsafe, nil -} - -// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls. -func SysctlsFromPodAnnotation(annotation string) ([]v1.Sysctl, error) { - if len(annotation) == 0 { - return nil, nil - } - - kvs := strings.Split(annotation, ",") - sysctls := make([]v1.Sysctl, len(kvs)) - for i, kv := range kvs { - cs := strings.Split(kv, "=") - if len(cs) != 2 || len(cs[0]) == 0 { - return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv) - } - sysctls[i].Name = cs[0] - sysctls[i].Value = cs[1] - } - return sysctls, nil -} - -// PodAnnotationsFromSysctls creates an annotation value for a slice of Sysctls. -func PodAnnotationsFromSysctls(sysctls []v1.Sysctl) string { - if len(sysctls) == 0 { - return "" - } - - kvs := make([]string, len(sysctls)) - for i := range sysctls { - kvs[i] = fmt.Sprintf("%s=%s", sysctls[i].Name, sysctls[i].Value) - } - return strings.Join(kvs, ",") -} - // GetPersistentVolumeClass returns StorageClassName. func GetPersistentVolumeClass(volume *v1.PersistentVolume) string { // Use beta annotation first diff --git a/pkg/apis/core/v1/helper/helpers_test.go b/pkg/apis/core/v1/helper/helpers_test.go index 4721cb0c01d..58992ab0a04 100644 --- a/pkg/apis/core/v1/helper/helpers_test.go +++ b/pkg/apis/core/v1/helper/helpers_test.go @@ -582,53 +582,6 @@ func TestGetAvoidPodsFromNode(t *testing.T) { } } -func TestSysctlsFromPodAnnotation(t *testing.T) { - type Test struct { - annotation string - expectValue []v1.Sysctl - expectErr bool - } - for i, test := range []Test{ - { - annotation: "", - expectValue: nil, - }, - { - annotation: "foo.bar", - expectErr: true, - }, - { - annotation: "=123", - expectErr: true, - }, - { - annotation: "foo.bar=", - expectValue: []v1.Sysctl{{Name: "foo.bar", Value: ""}}, - }, - { - annotation: "foo.bar=42", - expectValue: []v1.Sysctl{{Name: "foo.bar", Value: "42"}}, - }, - { - annotation: "foo.bar=42,", - expectErr: true, - }, - { - annotation: "foo.bar=42,abc.def=1", - expectValue: []v1.Sysctl{{Name: "foo.bar", Value: "42"}, {Name: "abc.def", Value: "1"}}, - }, - } { - sysctls, err := SysctlsFromPodAnnotation(test.annotation) - if test.expectErr && err == nil { - t.Errorf("[%v]expected error but got none", i) - } else if !test.expectErr && err != nil { - t.Errorf("[%v]did not expect error but got: %v", i, err) - } else if !reflect.DeepEqual(sysctls, test.expectValue) { - t.Errorf("[%v]expect value %v but got %v", i, test.expectValue, sysctls) - } - } -} - func TestMatchNodeSelectorTerms(t *testing.T) { type args struct { nodeSelectorTerms []v1.NodeSelectorTerm diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index 67c5ceeda7a..495e27264ba 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -129,23 +129,6 @@ func ValidatePodSpecificAnnotations(annotations map[string]string, spec *core.Po allErrs = append(allErrs, ValidateSeccompPodAnnotations(annotations, fldPath)...) allErrs = append(allErrs, ValidateAppArmorPodAnnotations(annotations, spec, fldPath)...) - sysctls, err := helper.SysctlsFromPodAnnotation(annotations[core.SysctlsPodAnnotationKey]) - if err != nil { - allErrs = append(allErrs, field.Invalid(fldPath.Key(core.SysctlsPodAnnotationKey), annotations[core.SysctlsPodAnnotationKey], err.Error())) - } else { - allErrs = append(allErrs, validateSysctls(sysctls, fldPath.Key(core.SysctlsPodAnnotationKey))...) - } - unsafeSysctls, err := helper.SysctlsFromPodAnnotation(annotations[core.UnsafeSysctlsPodAnnotationKey]) - if err != nil { - allErrs = append(allErrs, field.Invalid(fldPath.Key(core.UnsafeSysctlsPodAnnotationKey), annotations[core.UnsafeSysctlsPodAnnotationKey], err.Error())) - } else { - allErrs = append(allErrs, validateSysctls(unsafeSysctls, fldPath.Key(core.UnsafeSysctlsPodAnnotationKey))...) - } - inBoth := sysctlIntersection(sysctls, unsafeSysctls) - if len(inBoth) > 0 { - allErrs = append(allErrs, field.Invalid(fldPath.Key(core.UnsafeSysctlsPodAnnotationKey), strings.Join(inBoth, ", "), "can not be safe and unsafe")) - } - return allErrs } @@ -3364,12 +3347,16 @@ func IsValidSysctlName(name string) bool { func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path) field.ErrorList { allErrs := field.ErrorList{} + names := make(map[string]struct{}) for i, s := range sysctls { if len(s.Name) == 0 { allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), "")) } else if !IsValidSysctlName(s.Name) { allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, fmt.Sprintf("must have at most %d characters and match regex %s", SysctlMaxLength, SysctlFmt))) + } else if _, ok := names[s.Name]; ok { + allErrs = append(allErrs, field.Duplicate(fldPath.Index(i).Child("name"), s.Name)) } + names[s.Name] = struct{}{} } return allErrs } @@ -3408,6 +3395,10 @@ func ValidatePodSecurityContext(securityContext *core.PodSecurityContext, spec * allErrs = append(allErrs, field.Invalid(fldPath.Child("shareProcessNamespace"), *securityContext.ShareProcessNamespace, "ShareProcessNamespace and HostPID cannot both be enabled")) } } + + if len(securityContext.Sysctls) != 0 { + allErrs = append(allErrs, validateSysctls(securityContext.Sysctls, fldPath.Child("sysctls"))...) + } } return allErrs @@ -5279,20 +5270,6 @@ func ValidateLoadBalancerStatus(status *core.LoadBalancerStatus, fldPath *field. return allErrs } -func sysctlIntersection(a []core.Sysctl, b []core.Sysctl) []string { - lookup := make(map[string]struct{}, len(a)) - result := []string{} - for i := range a { - lookup[a[i].Name] = struct{}{} - } - for i := range b { - if _, found := lookup[b[i].Name]; found { - result = append(result, b[i].Name) - } - } - return result -} - // validateVolumeNodeAffinity tests that the PersistentVolume.NodeAffinity has valid data // returns: // - true if volumeNodeAffinity is set diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 301d523883f..6929ff2e519 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -6778,12 +6778,28 @@ func TestValidatePod(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "123", Namespace: "ns", - Annotations: map[string]string{ - core.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000", - core.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000", + }, + Spec: core.PodSpec{ + Containers: []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}}, + RestartPolicy: core.RestartPolicyAlways, + DNSPolicy: core.DNSClusterFirst, + SecurityContext: &core.PodSecurityContext{ + Sysctls: []core.Sysctl{ + { + Name: "kernel.shmmni", + Value: "32768", + }, + { + Name: "kernel.shmmax", + Value: "1000000000", + }, + { + Name: "knet.ipv4.route.min_pmtu", + Value: "1000", + }, + }, }, }, - Spec: validPodSpec(nil), }, { // valid extended resources for init container ObjectMeta: metav1.ObjectMeta{Name: "valid-extended", Namespace: "ns"}, @@ -7464,59 +7480,6 @@ func TestValidatePod(t *testing.T) { Spec: validPodSpec(nil), }, }, - "invalid sysctl annotation": { - expectedError: "metadata.annotations[security.alpha.kubernetes.io/sysctls]", - spec: core.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "123", - Namespace: "ns", - Annotations: map[string]string{ - core.SysctlsPodAnnotationKey: "foo:", - }, - }, - Spec: validPodSpec(nil), - }, - }, - "invalid comma-separated sysctl annotation": { - expectedError: "not of the format sysctl_name=value", - spec: core.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "123", - Namespace: "ns", - Annotations: map[string]string{ - core.SysctlsPodAnnotationKey: "kernel.msgmax,", - }, - }, - Spec: validPodSpec(nil), - }, - }, - "invalid unsafe sysctl annotation": { - expectedError: "metadata.annotations[security.alpha.kubernetes.io/sysctls]", - spec: core.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "123", - Namespace: "ns", - Annotations: map[string]string{ - core.SysctlsPodAnnotationKey: "foo:", - }, - }, - Spec: validPodSpec(nil), - }, - }, - "intersecting safe sysctls and unsafe sysctls annotations": { - expectedError: "can not be safe and unsafe", - spec: core.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "123", - Namespace: "ns", - Annotations: map[string]string{ - core.SysctlsPodAnnotationKey: "kernel.shmmax=10000000", - core.UnsafeSysctlsPodAnnotationKey: "kernel.shmmax=10000000", - }, - }, - Spec: validPodSpec(nil), - }, - }, "invalid extended resource requirement: request must be == limit": { expectedError: "must be equal to example.com/a", spec: core.Pod{ @@ -12805,6 +12768,11 @@ func TestValidateSysctls(t *testing.T) { "_invalid", } + duplicates := []string{ + "kernel.shmmax", + "kernel.shmmax", + } + sysctls := make([]core.Sysctl, len(valid)) for i, sysctl := range valid { sysctls[i].Name = sysctl @@ -12829,6 +12797,17 @@ func TestValidateSysctls(t *testing.T) { t.Errorf("unexpected errors: expected=%q, got=%q", expected, got) } } + + sysctls = make([]core.Sysctl, len(duplicates)) + for i, sysctl := range duplicates { + sysctls[i].Name = sysctl + } + errs = validateSysctls(sysctls, field.NewPath("foo")) + if len(errs) != 1 { + t.Errorf("unexpected validation errors: %v", errs) + } else if errs[0].Type != field.ErrorTypeDuplicate { + t.Errorf("expected error type %v, got %v", field.ErrorTypeDuplicate, errs[0].Type) + } } func newNodeNameEndpoint(nodeName string) *core.Endpoints { diff --git a/pkg/apis/policy/helpers.go b/pkg/apis/policy/helpers.go deleted file mode 100644 index 9c1ebd15235..00000000000 --- a/pkg/apis/policy/helpers.go +++ /dev/null @@ -1,37 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package policy - -import ( - "strings" -) - -// SysctlsFromPodSecurityPolicyAnnotation parses an annotation value of the key -// SysctlsSecurityPolicyAnnotationKey into a slice of sysctls. An empty slice -// is returned if annotation is the empty string. -func SysctlsFromPodSecurityPolicyAnnotation(annotation string) ([]string, error) { - if len(annotation) == 0 { - return []string{}, nil - } - - return strings.Split(annotation, ","), nil -} - -// PodAnnotationsFromSysctls creates an annotation value for a slice of Sysctls. -func PodAnnotationsFromSysctls(sysctls []string) string { - return strings.Join(sysctls, ",") -} diff --git a/pkg/apis/policy/helpers_test.go b/pkg/apis/policy/helpers_test.go deleted file mode 100644 index b07c8e30f58..00000000000 --- a/pkg/apis/policy/helpers_test.go +++ /dev/null @@ -1,62 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package policy - -import ( - "reflect" - "testing" -) - -func TestPodAnnotationsFromSysctls(t *testing.T) { - type Test struct { - sysctls []string - expectedValue string - } - for _, test := range []Test{ - {sysctls: []string{"a.b"}, expectedValue: "a.b"}, - {sysctls: []string{"a.b", "c.d"}, expectedValue: "a.b,c.d"}, - {sysctls: []string{"a.b", "a.b"}, expectedValue: "a.b,a.b"}, - {sysctls: []string{}, expectedValue: ""}, - {sysctls: nil, expectedValue: ""}, - } { - a := PodAnnotationsFromSysctls(test.sysctls) - if a != test.expectedValue { - t.Errorf("wrong value for %v: got=%q wanted=%q", test.sysctls, a, test.expectedValue) - } - } -} - -func TestSysctlsFromPodSecurityPolicyAnnotation(t *testing.T) { - type Test struct { - expectedValue []string - annotation string - } - for _, test := range []Test{ - {annotation: "a.b", expectedValue: []string{"a.b"}}, - {annotation: "a.b,c.d", expectedValue: []string{"a.b", "c.d"}}, - {annotation: "a.b,a.b", expectedValue: []string{"a.b", "a.b"}}, - {annotation: "", expectedValue: []string{}}, - } { - sysctls, err := SysctlsFromPodSecurityPolicyAnnotation(test.annotation) - if err != nil { - t.Errorf("error for %q: %v", test.annotation, err) - } - if !reflect.DeepEqual(sysctls, test.expectedValue) { - t.Errorf("wrong value for %q: got=%v wanted=%v", test.annotation, sysctls, test.expectedValue) - } - } -} diff --git a/pkg/apis/policy/types.go b/pkg/apis/policy/types.go index 7d28caddcb4..298fcd0e4df 100644 --- a/pkg/apis/policy/types.go +++ b/pkg/apis/policy/types.go @@ -22,13 +22,6 @@ import ( api "k8s.io/kubernetes/pkg/apis/core" ) -const ( - // SysctlsPodSecurityPolicyAnnotationKey represents the key of a whitelist of - // allowed safe and unsafe sysctls in a pod spec. It's a comma-separated list of plain sysctl - // names or sysctl patterns (which end in *). The string "*" matches all sysctls. - SysctlsPodSecurityPolicyAnnotationKey string = "security.alpha.kubernetes.io/sysctls" -) - // PodDisruptionBudgetSpec is a description of a PodDisruptionBudget. type PodDisruptionBudgetSpec struct { // An eviction is allowed if at least "minAvailable" pods selected by @@ -215,6 +208,25 @@ type PodSecurityPolicySpec struct { // is allowed in the "Volumes" field. // +optional AllowedFlexVolumes []AllowedFlexVolume + // AllowedUnsafeSysctls is a list of explicitly allowed unsafe sysctls, defaults to none. + // Each entry is either a plain sysctl name or ends in "*" in which case it is considered + // as a prefix of allowed sysctls. Single * means all unsafe sysctls are allowed. + // Kubelet has to whitelist all allowed unsafe sysctls explicitly to avoid rejection. + // + // Examples: + // e.g. "foo/*" allows "foo/bar", "foo/baz", etc. + // e.g. "foo.*" allows "foo.bar", "foo.baz", etc. + // +optional + AllowedUnsafeSysctls []string + // ForbiddenSysctls is a list of explicitly forbidden sysctls, defaults to none. + // Each entry is either a plain sysctl name or ends in "*" in which case it is considered + // as a prefix of forbidden sysctls. Single * means all sysctls are forbidden. + // + // Examples: + // e.g. "foo/*" forbids "foo/bar", "foo/baz", etc. + // e.g. "foo.*" forbids "foo.bar", "foo.baz", etc. + // +optional + ForbiddenSysctls []string } // AllowedHostPath defines the host volume conditions that will be enabled by a policy diff --git a/pkg/apis/policy/validation/validation.go b/pkg/apis/policy/validation/validation.go index c1b592a858f..3ba54867208 100644 --- a/pkg/apis/policy/validation/validation.go +++ b/pkg/apis/policy/validation/validation.go @@ -118,6 +118,9 @@ func ValidatePodSecurityPolicySpec(spec *policy.PodSecurityPolicySpec, fldPath * allErrs = append(allErrs, validatePSPDefaultAllowPrivilegeEscalation(fldPath.Child("defaultAllowPrivilegeEscalation"), spec.DefaultAllowPrivilegeEscalation, spec.AllowPrivilegeEscalation)...) allErrs = append(allErrs, validatePSPAllowedHostPaths(fldPath.Child("allowedHostPaths"), spec.AllowedHostPaths)...) allErrs = append(allErrs, validatePSPAllowedFlexVolumes(fldPath.Child("allowedFlexVolumes"), spec.AllowedFlexVolumes)...) + allErrs = append(allErrs, validatePodSecurityPolicySysctls(fldPath.Child("allowedUnsafeSysctls"), spec.AllowedUnsafeSysctls)...) + allErrs = append(allErrs, validatePodSecurityPolicySysctls(fldPath.Child("forbiddenSysctls"), spec.ForbiddenSysctls)...) + allErrs = append(allErrs, validatePodSecurityPolicySysctlListsDoNotOverlap(fldPath.Child("allowedUnsafeSysctls"), fldPath.Child("forbiddenSysctls"), spec.AllowedUnsafeSysctls, spec.ForbiddenSysctls)...) return allErrs } @@ -138,15 +141,6 @@ func ValidatePodSecurityPolicySpecificAnnotations(annotations map[string]string, } } - sysctlAnnotation := annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] - sysctlFldPath := fldPath.Key(policy.SysctlsPodSecurityPolicyAnnotationKey) - sysctls, err := policy.SysctlsFromPodSecurityPolicyAnnotation(sysctlAnnotation) - if err != nil { - allErrs = append(allErrs, field.Invalid(sysctlFldPath, sysctlAnnotation, err.Error())) - } else { - allErrs = append(allErrs, validatePodSecurityPolicySysctls(sysctlFldPath, sysctls)...) - } - if p := annotations[seccomp.DefaultProfileAnnotationKey]; p != "" { allErrs = append(allErrs, apivalidation.ValidateSeccompProfile(p, fldPath.Key(seccomp.DefaultProfileAnnotationKey))...) } @@ -307,11 +301,55 @@ func IsValidSysctlPattern(name string) bool { return sysctlPatternRegexp.MatchString(name) } +func validatePodSecurityPolicySysctlListsDoNotOverlap(allowedSysctlsFldPath, forbiddenSysctlsFldPath *field.Path, allowedUnsafeSysctls, forbiddenSysctls []string) field.ErrorList { + allErrs := field.ErrorList{} + for i, allowedSysctl := range allowedUnsafeSysctls { + isAllowedSysctlPattern := false + allowedSysctlPrefix := "" + if strings.HasSuffix(allowedSysctl, "*") { + isAllowedSysctlPattern = true + allowedSysctlPrefix = strings.TrimSuffix(allowedSysctl, "*") + } + for j, forbiddenSysctl := range forbiddenSysctls { + isForbiddenSysctlPattern := false + forbiddenSysctlPrefix := "" + if strings.HasSuffix(forbiddenSysctl, "*") { + isForbiddenSysctlPattern = true + forbiddenSysctlPrefix = strings.TrimSuffix(forbiddenSysctl, "*") + } + switch { + case isAllowedSysctlPattern && isForbiddenSysctlPattern: + if strings.HasPrefix(allowedSysctlPrefix, forbiddenSysctlPrefix) { + allErrs = append(allErrs, field.Invalid(allowedSysctlsFldPath.Index(i), allowedUnsafeSysctls[i], fmt.Sprintf("sysctl overlaps with %v", forbiddenSysctl))) + } else if strings.HasPrefix(forbiddenSysctlPrefix, allowedSysctlPrefix) { + allErrs = append(allErrs, field.Invalid(forbiddenSysctlsFldPath.Index(j), forbiddenSysctls[j], fmt.Sprintf("sysctl overlaps with %v", allowedSysctl))) + } + case isAllowedSysctlPattern: + if strings.HasPrefix(forbiddenSysctl, allowedSysctlPrefix) { + allErrs = append(allErrs, field.Invalid(forbiddenSysctlsFldPath.Index(j), forbiddenSysctls[j], fmt.Sprintf("sysctl overlaps with %v", allowedSysctl))) + } + case isForbiddenSysctlPattern: + if strings.HasPrefix(allowedSysctl, forbiddenSysctlPrefix) { + allErrs = append(allErrs, field.Invalid(allowedSysctlsFldPath.Index(i), allowedUnsafeSysctls[i], fmt.Sprintf("sysctl overlaps with %v", forbiddenSysctl))) + } + default: + if allowedSysctl == forbiddenSysctl { + allErrs = append(allErrs, field.Invalid(allowedSysctlsFldPath.Index(i), allowedUnsafeSysctls[i], fmt.Sprintf("sysctl overlaps with %v", forbiddenSysctl))) + } + } + } + } + return allErrs +} + // validatePodSecurityPolicySysctls validates the sysctls fields of PodSecurityPolicy. func validatePodSecurityPolicySysctls(fldPath *field.Path, sysctls []string) field.ErrorList { allErrs := field.ErrorList{} + coversAll := false for i, s := range sysctls { - if !IsValidSysctlPattern(string(s)) { + if len(s) == 0 { + allErrs = append(allErrs, field.Invalid(fldPath.Index(i), sysctls[i], fmt.Sprintf("empty sysctl not allowed"))) + } else if !IsValidSysctlPattern(string(s)) { allErrs = append( allErrs, field.Invalid(fldPath.Index(i), sysctls[i], fmt.Sprintf("must have at most %d characters and match regex %s", @@ -319,9 +357,15 @@ func validatePodSecurityPolicySysctls(fldPath *field.Path, sysctls []string) fie SysctlPatternFmt, )), ) + } else if s[0] == '*' { + coversAll = true } } + if coversAll && len(sysctls) > 1 { + allErrs = append(allErrs, field.Forbidden(fldPath.Child("items"), fmt.Sprintf("if '*' is present, must not specify other sysctls"))) + } + return allErrs } diff --git a/pkg/apis/policy/validation/validation_test.go b/pkg/apis/policy/validation/validation_test.go index 681812a63cc..552fd8cecab 100644 --- a/pkg/apis/policy/validation/validation_test.go +++ b/pkg/apis/policy/validation/validation_test.go @@ -323,8 +323,19 @@ func TestValidatePodSecurityPolicy(t *testing.T) { apparmor.AllowedProfilesAnnotationKey: apparmor.ProfileRuntimeDefault + ",not-good", } - invalidSysctlPattern := validPSP() - invalidSysctlPattern.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "a.*.b" + invalidAllowedUnsafeSysctlPattern := validPSP() + invalidAllowedUnsafeSysctlPattern.Spec.AllowedUnsafeSysctls = []string{"a.*.b"} + + invalidForbiddenSysctlPattern := validPSP() + invalidForbiddenSysctlPattern.Spec.ForbiddenSysctls = []string{"a.*.b"} + + invalidOverlappingSysctls := validPSP() + invalidOverlappingSysctls.Spec.ForbiddenSysctls = []string{"kernel.*", "net.ipv4.ip_local_port_range"} + invalidOverlappingSysctls.Spec.AllowedUnsafeSysctls = []string{"kernel.shmmax", "net.ipv4.ip_local_port_range"} + + invalidDuplicatedSysctls := validPSP() + invalidDuplicatedSysctls.Spec.ForbiddenSysctls = []string{"net.ipv4.ip_local_port_range"} + invalidDuplicatedSysctls.Spec.AllowedUnsafeSysctls = []string{"net.ipv4.ip_local_port_range"} invalidSeccompDefault := validPSP() invalidSeccompDefault.Annotations = map[string]string{ @@ -456,11 +467,26 @@ func TestValidatePodSecurityPolicy(t *testing.T) { errorType: field.ErrorTypeInvalid, errorDetail: "invalid AppArmor profile name: \"not-good\"", }, - "invalid sysctl pattern": { - psp: invalidSysctlPattern, + "invalid allowed unsafe sysctl pattern": { + psp: invalidAllowedUnsafeSysctlPattern, errorType: field.ErrorTypeInvalid, errorDetail: fmt.Sprintf("must have at most 253 characters and match regex %s", SysctlPatternFmt), }, + "invalid forbidden sysctl pattern": { + psp: invalidForbiddenSysctlPattern, + errorType: field.ErrorTypeInvalid, + errorDetail: fmt.Sprintf("must have at most 253 characters and match regex %s", SysctlPatternFmt), + }, + "invalid overlapping sysctl pattern": { + psp: invalidOverlappingSysctls, + errorType: field.ErrorTypeInvalid, + errorDetail: fmt.Sprintf("sysctl overlaps with %s", invalidOverlappingSysctls.Spec.ForbiddenSysctls[0]), + }, + "invalid duplicated sysctls": { + psp: invalidDuplicatedSysctls, + errorType: field.ErrorTypeInvalid, + errorDetail: fmt.Sprintf("sysctl overlaps with %s", invalidDuplicatedSysctls.Spec.AllowedUnsafeSysctls[0]), + }, "invalid seccomp default profile": { psp: invalidSeccompDefault, errorType: field.ErrorTypeInvalid, @@ -561,8 +587,11 @@ func TestValidatePodSecurityPolicy(t *testing.T) { apparmor.AllowedProfilesAnnotationKey: apparmor.ProfileRuntimeDefault + "," + apparmor.ProfileNamePrefix + "foo", } - withSysctl := validPSP() - withSysctl.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "net.*" + withForbiddenSysctl := validPSP() + withForbiddenSysctl.Spec.ForbiddenSysctls = []string{"net.*"} + + withAllowedUnsafeSysctl := validPSP() + withAllowedUnsafeSysctl.Spec.AllowedUnsafeSysctls = []string{"net.ipv4.tcp_max_syn_backlog"} validSeccomp := validPSP() validSeccomp.Annotations = map[string]string{ @@ -607,8 +636,11 @@ func TestValidatePodSecurityPolicy(t *testing.T) { "valid AppArmor annotations": { psp: validAppArmor, }, - "with network sysctls": { - psp: withSysctl, + "with network sysctls forbidden": { + psp: withForbiddenSysctl, + }, + "with unsafe net.ipv4.tcp_max_syn_backlog sysctl allowed": { + psp: withAllowedUnsafeSysctl, }, "valid seccomp annotations": { psp: validSeccomp, diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 0207a88eddc..b448a00839a 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -100,6 +100,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/volumemanager" "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" "k8s.io/kubernetes/pkg/security/apparmor" + sysctlwhitelist "k8s.io/kubernetes/pkg/security/podsecuritypolicy/sysctl" utildbus "k8s.io/kubernetes/pkg/util/dbus" kubeio "k8s.io/kubernetes/pkg/util/io" utilipt "k8s.io/kubernetes/pkg/util/iptables" @@ -837,20 +838,16 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, if err != nil { return nil, err } - safeWhitelist, err := sysctl.NewWhitelist(sysctl.SafeSysctlWhitelist(), v1.SysctlsPodAnnotationKey) - if err != nil { - return nil, err - } - // Safe, whitelisted sysctls can always be used as unsafe sysctls in the spec + + // Safe, whitelisted sysctls can always be used as unsafe sysctls in the spec. // Hence, we concatenate those two lists. - safeAndUnsafeSysctls := append(sysctl.SafeSysctlWhitelist(), allowedUnsafeSysctls...) - unsafeWhitelist, err := sysctl.NewWhitelist(safeAndUnsafeSysctls, v1.UnsafeSysctlsPodAnnotationKey) + safeAndUnsafeSysctls := append(sysctlwhitelist.SafeSysctlWhitelist(), allowedUnsafeSysctls...) + sysctlsWhitelist, err := sysctl.NewWhitelist(safeAndUnsafeSysctls) if err != nil { return nil, err } klet.admitHandlers.AddPodAdmitHandler(runtimeSupport) - klet.admitHandlers.AddPodAdmitHandler(safeWhitelist) - klet.admitHandlers.AddPodAdmitHandler(unsafeWhitelist) + klet.admitHandlers.AddPodAdmitHandler(sysctlsWhitelist) // enable active deadline handler activeDeadlineHandler, err := newActiveDeadlineHandler(klet.statusManager, kubeDeps.Recorder, klet.clock) diff --git a/pkg/kubelet/kuberuntime/helpers.go b/pkg/kubelet/kuberuntime/helpers.go index a1e6119a2a9..d52faa06407 100644 --- a/pkg/kubelet/kuberuntime/helpers.go +++ b/pkg/kubelet/kuberuntime/helpers.go @@ -26,7 +26,6 @@ import ( "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" utilfeature "k8s.io/apiserver/pkg/util/feature" - v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/features" runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -191,24 +190,6 @@ func toKubeRuntimeStatus(status *runtimeapi.RuntimeStatus) *kubecontainer.Runtim return &kubecontainer.RuntimeStatus{Conditions: conditions} } -// getSysctlsFromAnnotations gets sysctls and unsafeSysctls from annotations. -func getSysctlsFromAnnotations(annotations map[string]string) (map[string]string, error) { - apiSysctls, apiUnsafeSysctls, err := v1helper.SysctlsFromPodAnnotations(annotations) - if err != nil { - return nil, err - } - - sysctls := make(map[string]string) - for _, c := range apiSysctls { - sysctls[c.Name] = c.Value - } - for _, c := range apiUnsafeSysctls { - sysctls[c.Name] = c.Value - } - - return sysctls, nil -} - // getSeccompProfileFromAnnotations gets seccomp profile from annotations. // It gets pod's profile if containerName is empty. func (m *kubeGenericRuntimeManager) getSeccompProfileFromAnnotations(annotations map[string]string, containerName string) string { diff --git a/pkg/kubelet/kuberuntime/helpers_test.go b/pkg/kubelet/kuberuntime/helpers_test.go index f10fc04a236..0db2ad4d7ed 100644 --- a/pkg/kubelet/kuberuntime/helpers_test.go +++ b/pkg/kubelet/kuberuntime/helpers_test.go @@ -56,46 +56,6 @@ func TestStableKey(t *testing.T) { assert.NotEqual(t, oldKey, newKey) } -// TestGetSystclsFromAnnotations tests the logic of getting sysctls from annotations. -func TestGetSystclsFromAnnotations(t *testing.T) { - tests := []struct { - annotations map[string]string - expectedSysctls map[string]string - }{{ - annotations: map[string]string{ - v1.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000", - v1.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000", - }, - expectedSysctls: map[string]string{ - "kernel.shmmni": "32768", - "kernel.shmmax": "1000000000", - "knet.ipv4.route.min_pmtu": "1000", - }, - }, { - annotations: map[string]string{ - v1.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000", - }, - expectedSysctls: map[string]string{ - "kernel.shmmni": "32768", - "kernel.shmmax": "1000000000", - }, - }, { - annotations: map[string]string{ - v1.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000", - }, - expectedSysctls: map[string]string{ - "knet.ipv4.route.min_pmtu": "1000", - }, - }} - - for i, test := range tests { - actualSysctls, err := getSysctlsFromAnnotations(test.annotations) - assert.NoError(t, err, "TestCase[%d]", i) - assert.Len(t, actualSysctls, len(test.expectedSysctls), "TestCase[%d]", i) - assert.Equal(t, test.expectedSysctls, actualSysctls, "TestCase[%d]", i) - } -} - func TestToKubeContainer(t *testing.T) { c := &runtimeapi.Container{ Id: "test-id", diff --git a/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go b/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go index cddf3385d90..1579b768372 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go @@ -134,10 +134,13 @@ func (m *kubeGenericRuntimeManager) generatePodSandboxLinuxConfig(pod *v1.Pod) ( }, } - sysctls, err := getSysctlsFromAnnotations(pod.Annotations) - if err != nil { - return nil, fmt.Errorf("failed to get sysctls from annotations %v for pod %q: %v", pod.Annotations, format.Pod(pod), err) + sysctls := make(map[string]string) + if pod.Spec.SecurityContext != nil { + for _, c := range pod.Spec.SecurityContext.Sysctls { + sysctls[c.Name] = c.Value + } } + lc.Sysctls = sysctls if pod.Spec.SecurityContext != nil { diff --git a/pkg/kubelet/sysctl/runtime.go b/pkg/kubelet/sysctl/runtime.go index a7aa49e9ba5..8b37c65c55a 100644 --- a/pkg/kubelet/sysctl/runtime.go +++ b/pkg/kubelet/sysctl/runtime.go @@ -19,7 +19,6 @@ package sysctl import ( "fmt" - v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/lifecycle" ) @@ -83,17 +82,11 @@ func NewRuntimeAdmitHandler(runtime container.Runtime) (*runtimeAdmitHandler, er // Admit checks whether the runtime supports sysctls. func (w *runtimeAdmitHandler) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { - sysctls, unsafeSysctls, err := v1helper.SysctlsFromPodAnnotations(attrs.Pod.Annotations) - if err != nil { - return lifecycle.PodAdmitResult{ - Admit: false, - Reason: AnnotationInvalidReason, - Message: fmt.Sprintf("invalid sysctl annotation: %v", err), - } - } + if attrs.Pod.Spec.SecurityContext != nil { - if len(sysctls)+len(unsafeSysctls) > 0 { - return w.result + if len(attrs.Pod.Spec.SecurityContext.Sysctls) > 0 { + return w.result + } } return lifecycle.PodAdmitResult{ diff --git a/pkg/kubelet/sysctl/whitelist.go b/pkg/kubelet/sysctl/whitelist.go index 8269e94bb37..067409a7e8f 100644 --- a/pkg/kubelet/sysctl/whitelist.go +++ b/pkg/kubelet/sysctl/whitelist.go @@ -20,7 +20,6 @@ import ( "fmt" "strings" - v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/apis/core/validation" policyvalidation "k8s.io/kubernetes/pkg/apis/policy/validation" "k8s.io/kubernetes/pkg/kubelet/lifecycle" @@ -31,36 +30,21 @@ const ( ForbiddenReason = "SysctlForbidden" ) -// SafeSysctlWhitelist returns the whitelist of safe sysctls and safe sysctl patterns (ending in *). -// -// A sysctl is called safe iff -// - it is namespaced in the container or the pod -// - it is isolated, i.e. has no influence on any other pod on the same node. -func SafeSysctlWhitelist() []string { - return []string{ - "kernel.shm_rmid_forced", - "net.ipv4.ip_local_port_range", - "net.ipv4.tcp_syncookies", - } -} - // patternWhitelist takes a list of sysctls or sysctl patterns (ending in *) and // checks validity via a sysctl and prefix map, rejecting those which are not known // to be namespaced. type patternWhitelist struct { - sysctls map[string]Namespace - prefixes map[string]Namespace - annotationKey string + sysctls map[string]Namespace + prefixes map[string]Namespace } var _ lifecycle.PodAdmitHandler = &patternWhitelist{} // NewWhitelist creates a new Whitelist from a list of sysctls and sysctl pattern (ending in *). -func NewWhitelist(patterns []string, annotationKey string) (*patternWhitelist, error) { +func NewWhitelist(patterns []string) (*patternWhitelist, error) { w := &patternWhitelist{ - sysctls: map[string]Namespace{}, - prefixes: map[string]Namespace{}, - annotationKey: annotationKey, + sysctls: map[string]Namespace{}, + prefixes: map[string]Namespace{}, } for _, s := range patterns { @@ -121,32 +105,22 @@ func (w *patternWhitelist) validateSysctl(sysctl string, hostNet, hostIPC bool) return fmt.Errorf("%q not whitelisted", sysctl) } -// Admit checks that all sysctls given in annotations v1.SysctlsPodAnnotationKey and v1.UnsafeSysctlsPodAnnotationKey +// Admit checks that all sysctls given in pod's security context // are valid according to the whitelist. func (w *patternWhitelist) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { pod := attrs.Pod - a := pod.Annotations[w.annotationKey] - if a == "" { + if pod.Spec.SecurityContext == nil || len(pod.Spec.SecurityContext.Sysctls) == 0 { return lifecycle.PodAdmitResult{ Admit: true, } } - sysctls, err := v1helper.SysctlsFromPodAnnotation(a) - if err != nil { - return lifecycle.PodAdmitResult{ - Admit: false, - Reason: AnnotationInvalidReason, - Message: fmt.Sprintf("invalid %s annotation: %v", w.annotationKey, err), - } - } - var hostNet, hostIPC bool if pod.Spec.SecurityContext != nil { hostNet = pod.Spec.HostNetwork hostIPC = pod.Spec.HostIPC } - for _, s := range sysctls { + for _, s := range pod.Spec.SecurityContext.Sysctls { if err := w.validateSysctl(s.Name, hostNet, hostIPC); err != nil { return lifecycle.PodAdmitResult{ Admit: false, diff --git a/pkg/kubelet/sysctl/whitelist_test.go b/pkg/kubelet/sysctl/whitelist_test.go index 77f253337f9..0b6401e3423 100644 --- a/pkg/kubelet/sysctl/whitelist_test.go +++ b/pkg/kubelet/sysctl/whitelist_test.go @@ -19,7 +19,7 @@ package sysctl import ( "testing" - "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/security/podsecuritypolicy/sysctl" ) func TestNewWhitelist(t *testing.T) { @@ -35,7 +35,7 @@ func TestNewWhitelist(t *testing.T) { {sysctls: []string{"net.*.foo"}, err: true}, {sysctls: []string{"foo"}, err: true}, } { - _, err := NewWhitelist(append(SafeSysctlWhitelist(), test.sysctls...), v1.SysctlsPodAnnotationKey) + _, err := NewWhitelist(append(sysctl.SafeSysctlWhitelist(), test.sysctls...)) if test.err && err == nil { t.Errorf("expected an error creating a whitelist for %v", test.sysctls) } else if !test.err && err != nil { @@ -65,7 +65,7 @@ func TestWhitelist(t *testing.T) { {sysctl: "kernel.sem", hostIPC: true}, } - w, err := NewWhitelist(append(SafeSysctlWhitelist(), "kernel.msg*", "kernel.sem"), v1.SysctlsPodAnnotationKey) + w, err := NewWhitelist(append(sysctl.SafeSysctlWhitelist(), "kernel.msg*", "kernel.sem")) if err != nil { t.Fatalf("failed to create whitelist: %v", err) } diff --git a/pkg/security/podsecuritypolicy/factory.go b/pkg/security/podsecuritypolicy/factory.go index a198058e6eb..05c7f0d8bb3 100644 --- a/pkg/security/podsecuritypolicy/factory.go +++ b/pkg/security/podsecuritypolicy/factory.go @@ -77,15 +77,7 @@ func (f *simpleStrategyFactory) CreateStrategies(psp *policy.PodSecurityPolicy, errs = append(errs, err) } - var unsafeSysctls []string - if ann, found := psp.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey]; found { - var err error - unsafeSysctls, err = policy.SysctlsFromPodSecurityPolicyAnnotation(ann) - if err != nil { - errs = append(errs, err) - } - } - sysctlsStrat := createSysctlsStrategy(unsafeSysctls) + sysctlsStrat := createSysctlsStrategy(sysctl.SafeSysctlWhitelist(), psp.Spec.AllowedUnsafeSysctls, psp.Spec.ForbiddenSysctls) if len(errs) > 0 { return nil, errors.NewAggregate(errs) @@ -170,7 +162,7 @@ func createCapabilitiesStrategy(defaultAddCaps, requiredDropCaps, allowedCaps [] return capabilities.NewDefaultCapabilities(defaultAddCaps, requiredDropCaps, allowedCaps) } -// createSysctlsStrategy creates a new unsafe sysctls strategy. -func createSysctlsStrategy(sysctlsPatterns []string) sysctl.SysctlsStrategy { - return sysctl.NewMustMatchPatterns(sysctlsPatterns) +// createSysctlsStrategy creates a new sysctls strategy. +func createSysctlsStrategy(safeWhitelist, allowedUnsafeSysctls, forbiddenSysctls []string) sysctl.SysctlsStrategy { + return sysctl.NewMustMatchPatterns(safeWhitelist, allowedUnsafeSysctls, forbiddenSysctls) } diff --git a/pkg/security/podsecuritypolicy/provider_test.go b/pkg/security/podsecuritypolicy/provider_test.go index 1fd278a8b47..a7b8174a967 100644 --- a/pkg/security/podsecuritypolicy/provider_test.go +++ b/pkg/security/podsecuritypolicy/provider_test.go @@ -267,17 +267,34 @@ func TestValidatePodSecurityContextFailures(t *testing.T) { }, } - failOtherSysctlsAllowedPSP := defaultPSP() - failOtherSysctlsAllowedPSP.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "bar,abc" + failSysctlDisallowedPSP := defaultPSP() + failSysctlDisallowedPSP.Spec.ForbiddenSysctls = []string{"kernel.shm_rmid_forced"} - failNoSysctlAllowedPSP := defaultPSP() - failNoSysctlAllowedPSP.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "" + failNoSafeSysctlAllowedPSP := defaultPSP() + failNoSafeSysctlAllowedPSP.Spec.ForbiddenSysctls = []string{"*"} - failSafeSysctlFooPod := defaultPod() - failSafeSysctlFooPod.Annotations[api.SysctlsPodAnnotationKey] = "foo=1" + failAllUnsafeSysctlsPSP := defaultPSP() + failAllUnsafeSysctlsPSP.Spec.AllowedUnsafeSysctls = []string{} - failUnsafeSysctlFooPod := defaultPod() - failUnsafeSysctlFooPod.Annotations[api.UnsafeSysctlsPodAnnotationKey] = "foo=1" + failSafeSysctlKernelPod := defaultPod() + failSafeSysctlKernelPod.Spec.SecurityContext = &api.PodSecurityContext{ + Sysctls: []api.Sysctl{ + { + Name: "kernel.shm_rmid_forced", + Value: "1", + }, + }, + } + + failUnsafeSysctlPod := defaultPod() + failUnsafeSysctlPod.Spec.SecurityContext = &api.PodSecurityContext{ + Sysctls: []api.Sysctl{ + { + Name: "kernel.sem", + Value: "32000", + }, + }, + } failSeccompProfilePod := defaultPod() failSeccompProfilePod.Annotations = map[string]string{api.SeccompPodAnnotationKey: "foo"} @@ -359,25 +376,20 @@ func TestValidatePodSecurityContextFailures(t *testing.T) { psp: failHostPathReadOnlyPSP, expectedError: "must be read-only", }, - "failSafeSysctlFooPod with failNoSysctlAllowedSCC": { - pod: failSafeSysctlFooPod, - psp: failNoSysctlAllowedPSP, - expectedError: "sysctls are not allowed", + "failSafeSysctlKernelPod with failNoSafeSysctlAllowedPSP": { + pod: failSafeSysctlKernelPod, + psp: failNoSafeSysctlAllowedPSP, + expectedError: "sysctl \"kernel.shm_rmid_forced\" is not allowed", }, - "failUnsafeSysctlFooPod with failNoSysctlAllowedSCC": { - pod: failUnsafeSysctlFooPod, - psp: failNoSysctlAllowedPSP, - expectedError: "sysctls are not allowed", + "failSafeSysctlKernelPod with failSysctlDisallowedPSP": { + pod: failSafeSysctlKernelPod, + psp: failSysctlDisallowedPSP, + expectedError: "sysctl \"kernel.shm_rmid_forced\" is not allowed", }, - "failSafeSysctlFooPod with failOtherSysctlsAllowedSCC": { - pod: failSafeSysctlFooPod, - psp: failOtherSysctlsAllowedPSP, - expectedError: "sysctl \"foo\" is not allowed", - }, - "failUnsafeSysctlFooPod with failOtherSysctlsAllowedSCC": { - pod: failUnsafeSysctlFooPod, - psp: failOtherSysctlsAllowedPSP, - expectedError: "sysctl \"foo\" is not allowed", + "failUnsafeSysctlPod with failAllUnsafeSysctlsPSP": { + pod: failUnsafeSysctlPod, + psp: failAllUnsafeSysctlsPSP, + expectedError: "unsafe sysctl \"kernel.sem\" is not allowed", }, "failInvalidSeccomp": { pod: failSeccompProfilePod, @@ -707,14 +719,29 @@ func TestValidatePodSecurityContextSuccess(t *testing.T) { {PathPrefix: "/foo"}, } - sysctlAllowFooPSP := defaultPSP() - sysctlAllowFooPSP.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "foo" + sysctlAllowAllPSP := defaultPSP() + sysctlAllowAllPSP.Spec.ForbiddenSysctls = []string{} + sysctlAllowAllPSP.Spec.AllowedUnsafeSysctls = []string{"*"} - safeSysctlFooPod := defaultPod() - safeSysctlFooPod.Annotations[api.SysctlsPodAnnotationKey] = "foo=1" + safeSysctlKernelPod := defaultPod() + safeSysctlKernelPod.Spec.SecurityContext = &api.PodSecurityContext{ + Sysctls: []api.Sysctl{ + { + Name: "kernel.shm_rmid_forced", + Value: "1", + }, + }, + } - unsafeSysctlFooPod := defaultPod() - unsafeSysctlFooPod.Annotations[api.UnsafeSysctlsPodAnnotationKey] = "foo=1" + unsafeSysctlKernelPod := defaultPod() + unsafeSysctlKernelPod.Spec.SecurityContext = &api.PodSecurityContext{ + Sysctls: []api.Sysctl{ + { + Name: "kernel.sem", + Value: "32000", + }, + }, + } seccompPSP := defaultPSP() seccompPSP.Annotations = map[string]string{ @@ -766,21 +793,13 @@ func TestValidatePodSecurityContextSuccess(t *testing.T) { pod: seLinuxPod, psp: seLinuxPSP, }, - "pass sysctl specific profile with safe sysctl": { - pod: safeSysctlFooPod, - psp: sysctlAllowFooPSP, + "pass sysctl specific profile with safe kernel sysctl": { + pod: safeSysctlKernelPod, + psp: sysctlAllowAllPSP, }, - "pass sysctl specific profile with unsafe sysctl": { - pod: unsafeSysctlFooPod, - psp: sysctlAllowFooPSP, - }, - "pass empty profile with safe sysctl": { - pod: safeSysctlFooPod, - psp: defaultPSP(), - }, - "pass empty profile with unsafe sysctl": { - pod: unsafeSysctlFooPod, - psp: defaultPSP(), + "pass sysctl specific profile with unsafe kernel sysctl": { + pod: unsafeSysctlKernelPod, + psp: sysctlAllowAllPSP, }, "pass hostDir allowed directory validating PSP": { pod: hostPathDirPod, diff --git a/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns.go b/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns.go index b59dd9d3fff..6c33276c563 100644 --- a/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns.go +++ b/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns.go @@ -22,12 +22,26 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" api "k8s.io/kubernetes/pkg/apis/core" - "k8s.io/kubernetes/pkg/apis/core/helper" ) +// SafeSysctlWhitelist returns the whitelist of safe sysctls and safe sysctl patterns (ending in *). +// +// A sysctl is called safe iff +// - it is namespaced in the container or the pod +// - it is isolated, i.e. has no influence on any other pod on the same node. +func SafeSysctlWhitelist() []string { + return []string{ + "kernel.shm_rmid_forced", + "net.ipv4.ip_local_port_range", + "net.ipv4.tcp_syncookies", + } +} + // mustMatchPatterns implements the SysctlsStrategy interface type mustMatchPatterns struct { - patterns []string + safeWhitelist []string + allowedUnsafeSysctls []string + forbiddenSysctls []string } var ( @@ -38,56 +52,75 @@ var ( // NewMustMatchPatterns creates a new mustMatchPatterns strategy that will provide validation. // Passing nil means the default pattern, passing an empty list means to disallow all sysctls. -func NewMustMatchPatterns(patterns []string) SysctlsStrategy { - if patterns == nil { - patterns = defaultSysctlsPatterns - } +func NewMustMatchPatterns(safeWhitelist, allowedUnsafeSysctls, forbiddenSysctls []string) SysctlsStrategy { return &mustMatchPatterns{ - patterns: patterns, + safeWhitelist: safeWhitelist, + allowedUnsafeSysctls: allowedUnsafeSysctls, + forbiddenSysctls: forbiddenSysctls, } } +func (s *mustMatchPatterns) isForbidden(sysctlName string) bool { + // Is the sysctl forbidden? + for _, s := range s.forbiddenSysctls { + if strings.HasSuffix(s, "*") { + prefix := strings.TrimSuffix(s, "*") + if strings.HasPrefix(sysctlName, prefix) { + return true + } + } else if sysctlName == s { + return true + } + } + return false +} + +func (s *mustMatchPatterns) isSafe(sysctlName string) bool { + for _, ws := range s.safeWhitelist { + if sysctlName == ws { + return true + } + } + return false +} + +func (s *mustMatchPatterns) isAllowedUnsafe(sysctlName string) bool { + for _, s := range s.allowedUnsafeSysctls { + if strings.HasSuffix(s, "*") { + prefix := strings.TrimSuffix(s, "*") + if strings.HasPrefix(sysctlName, prefix) { + return true + } + } else if sysctlName == s { + return true + } + } + return false +} + // Validate ensures that the specified values fall within the range of the strategy. func (s *mustMatchPatterns) Validate(pod *api.Pod) field.ErrorList { allErrs := field.ErrorList{} - allErrs = append(allErrs, s.validateAnnotation(pod, api.SysctlsPodAnnotationKey)...) - allErrs = append(allErrs, s.validateAnnotation(pod, api.UnsafeSysctlsPodAnnotationKey)...) - return allErrs -} -func (s *mustMatchPatterns) validateAnnotation(pod *api.Pod, key string) field.ErrorList { - allErrs := field.ErrorList{} - - fieldPath := field.NewPath("pod", "metadata", "annotations").Key(key) - - sysctls, err := helper.SysctlsFromPodAnnotation(pod.Annotations[key]) - if err != nil { - allErrs = append(allErrs, field.Invalid(fieldPath, pod.Annotations[key], err.Error())) + var sysctls []api.Sysctl + if pod.Spec.SecurityContext != nil { + sysctls = pod.Spec.SecurityContext.Sysctls } - if len(sysctls) > 0 { - if len(s.patterns) == 0 { - allErrs = append(allErrs, field.Invalid(fieldPath, pod.Annotations[key], "sysctls are not allowed")) - } else { - for i, sysctl := range sysctls { - allErrs = append(allErrs, s.ValidateSysctl(sysctl.Name, fieldPath.Index(i))...) - } + fieldPath := field.NewPath("pod", "spec", "securityContext").Child("sysctls") + + for i, sysctl := range sysctls { + switch { + case s.isForbidden(sysctl.Name): + allErrs = append(allErrs, field.ErrorList{field.Forbidden(fieldPath.Index(i), fmt.Sprintf("sysctl %q is not allowed", sysctl.Name))}...) + case s.isSafe(sysctl.Name): + continue + case s.isAllowedUnsafe(sysctl.Name): + continue + default: + allErrs = append(allErrs, field.ErrorList{field.Forbidden(fieldPath.Index(i), fmt.Sprintf("unsafe sysctl %q is not allowed", sysctl.Name))}...) } } return allErrs } - -func (s *mustMatchPatterns) ValidateSysctl(sysctlName string, fldPath *field.Path) field.ErrorList { - for _, s := range s.patterns { - if s[len(s)-1] == '*' { - prefix := s[:len(s)-1] - if strings.HasPrefix(sysctlName, string(prefix)) { - return nil - } - } else if sysctlName == s { - return nil - } - } - return field.ErrorList{field.Forbidden(fldPath, fmt.Sprintf("sysctl %q is not allowed", sysctlName))} -} diff --git a/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns_test.go b/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns_test.go index 7622c826695..9bc7b0c6aff 100644 --- a/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns_test.go +++ b/pkg/security/podsecuritypolicy/sysctl/mustmatchpatterns_test.go @@ -17,48 +17,47 @@ limitations under the License. package sysctl import ( - api "k8s.io/kubernetes/pkg/apis/core" - "k8s.io/kubernetes/pkg/apis/core/helper" "testing" + + api "k8s.io/kubernetes/pkg/apis/core" ) func TestValidate(t *testing.T) { tests := map[string]struct { - patterns []string - allowed []string - disallowed []string + whitelist []string + forbiddenSafe []string + allowedUnsafe []string + allowed []string + disallowed []string }{ // no container requests - "nil": { - patterns: nil, - allowed: []string{"foo"}, + "with allow all": { + whitelist: []string{"foo"}, + allowed: []string{"foo"}, }, "empty": { - patterns: []string{}, - disallowed: []string{"foo"}, + whitelist: []string{"foo"}, + forbiddenSafe: []string{"*"}, + disallowed: []string{"foo"}, }, "without wildcard": { - patterns: []string{"a", "a.b"}, + whitelist: []string{"a", "a.b"}, allowed: []string{"a", "a.b"}, disallowed: []string{"b"}, }, - "with catch-all wildcard": { - patterns: []string{"*"}, - allowed: []string{"a", "a.b"}, - }, "with catch-all wildcard and non-wildcard": { - patterns: []string{"a.b.c", "*"}, - allowed: []string{"a", "a.b", "a.b.c", "b"}, + allowedUnsafe: []string{"a.b.c", "*"}, + allowed: []string{"a", "a.b", "a.b.c", "b"}, }, "without catch-all wildcard": { - patterns: []string{"a.*", "b.*", "c.d.e", "d.e.f.*"}, - allowed: []string{"a.b", "b.c", "c.d.e", "d.e.f.g.h"}, - disallowed: []string{"a", "b", "c", "c.d", "d.e", "d.e.f"}, + allowedUnsafe: []string{"a.*", "b.*", "c.d.e", "d.e.f.*"}, + allowed: []string{"a.b", "b.c", "c.d.e", "d.e.f.g.h"}, + disallowed: []string{"a", "b", "c", "c.d", "d.e", "d.e.f"}, }, } for k, v := range tests { - strategy := NewMustMatchPatterns(v.patterns) + strategy := NewMustMatchPatterns(v.whitelist, v.allowedUnsafe, v.forbiddenSafe) pod := &api.Pod{} errs := strategy.Validate(pod) @@ -66,37 +65,40 @@ func TestValidate(t *testing.T) { t.Errorf("%s: unexpected validaton errors for empty sysctls: %v", k, errs) } - sysctls := []api.Sysctl{} - for _, s := range v.allowed { - sysctls = append(sysctls, api.Sysctl{ - Name: s, - Value: "dummy", - }) - } - testAllowed := func(key string, category string) { - pod.Annotations = map[string]string{ - key: helper.PodAnnotationsFromSysctls(sysctls), + testAllowed := func() { + sysctls := []api.Sysctl{} + for _, s := range v.allowed { + sysctls = append(sysctls, api.Sysctl{ + Name: s, + Value: "dummy", + }) + } + pod.Spec.SecurityContext = &api.PodSecurityContext{ + Sysctls: sysctls, } errs = strategy.Validate(pod) if len(errs) != 0 { - t.Errorf("%s: unexpected validaton errors for %s sysctls: %v", k, category, errs) + t.Errorf("%s: unexpected validaton errors for sysctls: %v", k, errs) } } - testDisallowed := func(key string, category string) { + testDisallowed := func() { for _, s := range v.disallowed { - pod.Annotations = map[string]string{ - key: helper.PodAnnotationsFromSysctls([]api.Sysctl{{Name: s, Value: "dummy"}}), + pod.Spec.SecurityContext = &api.PodSecurityContext{ + Sysctls: []api.Sysctl{ + { + Name: s, + Value: "dummy", + }, + }, } errs = strategy.Validate(pod) if len(errs) == 0 { - t.Errorf("%s: expected error for %s sysctl %q", k, category, s) + t.Errorf("%s: expected error for sysctl %q", k, s) } } } - testAllowed(api.SysctlsPodAnnotationKey, "safe") - testAllowed(api.UnsafeSysctlsPodAnnotationKey, "unsafe") - testDisallowed(api.SysctlsPodAnnotationKey, "safe") - testDisallowed(api.UnsafeSysctlsPodAnnotationKey, "unsafe") + testAllowed() + testDisallowed() } } diff --git a/plugin/pkg/admission/security/podsecuritypolicy/admission_test.go b/plugin/pkg/admission/security/podsecuritypolicy/admission_test.go index e4644558d3d..92d4a36da5a 100644 --- a/plugin/pkg/admission/security/podsecuritypolicy/admission_test.go +++ b/plugin/pkg/admission/security/podsecuritypolicy/admission_test.go @@ -35,7 +35,6 @@ import ( "k8s.io/apiserver/pkg/authorization/authorizerfactory" "k8s.io/kubernetes/pkg/api/legacyscheme" kapi "k8s.io/kubernetes/pkg/apis/core" - "k8s.io/kubernetes/pkg/apis/core/helper" "k8s.io/kubernetes/pkg/apis/policy" informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion" "k8s.io/kubernetes/pkg/controller" @@ -1608,37 +1607,40 @@ func TestAdmitSysctls(t *testing.T) { } return sysctls } - pod.Annotations[kapi.SysctlsPodAnnotationKey] = helper.PodAnnotationsFromSysctls(dummySysctls(safeSysctls)) - pod.Annotations[kapi.UnsafeSysctlsPodAnnotationKey] = helper.PodAnnotationsFromSysctls(dummySysctls(unsafeSysctls)) + pod.Spec.SecurityContext = &kapi.PodSecurityContext{ + Sysctls: dummySysctls(append(safeSysctls, unsafeSysctls...)), + } + return pod } - noSysctls := restrictivePSP() - noSysctls.Name = "no sysctls" + safeSysctls := restrictivePSP() + safeSysctls.Name = "no sysctls" - emptySysctls := restrictivePSP() - emptySysctls.Name = "empty sysctls" - emptySysctls.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "" + noSysctls := restrictivePSP() + noSysctls.Name = "empty sysctls" + noSysctls.Spec.ForbiddenSysctls = []string{"*"} mixedSysctls := restrictivePSP() mixedSysctls.Name = "wildcard sysctls" - mixedSysctls.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "a.*,b.*,c,d.e.f" + mixedSysctls.Spec.ForbiddenSysctls = []string{"net.*"} + mixedSysctls.Spec.AllowedUnsafeSysctls = []string{"a.*", "b.*"} - aSysctl := restrictivePSP() - aSysctl.Name = "a sysctl" - aSysctl.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "a" + aUnsafeSysctl := restrictivePSP() + aUnsafeSysctl.Name = "a sysctl" + aUnsafeSysctl.Spec.AllowedUnsafeSysctls = []string{"a"} - bSysctl := restrictivePSP() - bSysctl.Name = "b sysctl" - bSysctl.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "b" + bUnsafeSysctl := restrictivePSP() + bUnsafeSysctl.Name = "b sysctl" + bUnsafeSysctl.Spec.AllowedUnsafeSysctls = []string{"b"} - cSysctl := restrictivePSP() - cSysctl.Name = "c sysctl" - cSysctl.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "c" + cUnsafeSysctl := restrictivePSP() + cUnsafeSysctl.Name = "c sysctl" + cUnsafeSysctl.Spec.AllowedUnsafeSysctls = []string{"c"} catchallSysctls := restrictivePSP() catchallSysctls.Name = "catchall sysctl" - catchallSysctls.Annotations[policy.SysctlsPodSecurityPolicyAnnotationKey] = "*" + catchallSysctls.Spec.AllowedUnsafeSysctls = []string{"*"} tests := map[string]struct { pod *kapi.Pod @@ -1647,148 +1649,102 @@ func TestAdmitSysctls(t *testing.T) { shouldPassValidate bool expectedPSP string }{ - "pod without unsafe sysctls request allowed under noSysctls PSP": { + "pod without any sysctls request allowed under safeSysctls PSP": { + pod: goodPod(), + psps: []*policy.PodSecurityPolicy{safeSysctls}, + shouldPassAdmit: true, + shouldPassValidate: true, + expectedPSP: safeSysctls.Name, + }, + "pod without any sysctls request allowed under noSysctls PSP": { pod: goodPod(), psps: []*policy.PodSecurityPolicy{noSysctls}, shouldPassAdmit: true, shouldPassValidate: true, expectedPSP: noSysctls.Name, }, - "pod without any sysctls request allowed under emptySysctls PSP": { - pod: goodPod(), - psps: []*policy.PodSecurityPolicy{emptySysctls}, + "pod with safe sysctls request allowed under safeSysctls PSP": { + pod: podWithSysctls([]string{"kernel.shm_rmid_forced", "net.ipv4.tcp_syncookies"}, []string{}), + psps: []*policy.PodSecurityPolicy{safeSysctls}, shouldPassAdmit: true, shouldPassValidate: true, - expectedPSP: emptySysctls.Name, + expectedPSP: safeSysctls.Name, }, - "pod with safe sysctls request allowed under noSysctls PSP": { - pod: podWithSysctls([]string{"a", "b"}, []string{}), - psps: []*policy.PodSecurityPolicy{noSysctls}, - shouldPassAdmit: true, - shouldPassValidate: true, - expectedPSP: noSysctls.Name, - }, - "pod with unsafe sysctls request allowed under noSysctls PSP": { + "pod with unsafe sysctls request disallowed under noSysctls PSP": { pod: podWithSysctls([]string{}, []string{"a", "b"}), psps: []*policy.PodSecurityPolicy{noSysctls}, - shouldPassAdmit: true, - shouldPassValidate: true, + shouldPassAdmit: false, + shouldPassValidate: false, expectedPSP: noSysctls.Name, }, - "pod with safe sysctls request disallowed under emptySysctls PSP": { - pod: podWithSysctls([]string{"a", "b"}, []string{}), - psps: []*policy.PodSecurityPolicy{emptySysctls}, + "pod with unsafe sysctls a, b request disallowed under aUnsafeSysctl SCC": { + pod: podWithSysctls([]string{}, []string{"a", "b"}), + psps: []*policy.PodSecurityPolicy{aUnsafeSysctl}, shouldPassAdmit: false, shouldPassValidate: false, }, - "pod with unsafe sysctls a, b request disallowed under aSysctls SCC": { - pod: podWithSysctls([]string{}, []string{"a", "b"}), - psps: []*policy.PodSecurityPolicy{aSysctl}, - shouldPassAdmit: false, - shouldPassValidate: false, - }, - "pod with unsafe sysctls b request disallowed under aSysctls SCC": { + "pod with unsafe sysctls b request disallowed under aUnsafeSysctl SCC": { pod: podWithSysctls([]string{}, []string{"b"}), - psps: []*policy.PodSecurityPolicy{aSysctl}, + psps: []*policy.PodSecurityPolicy{aUnsafeSysctl}, shouldPassAdmit: false, shouldPassValidate: false, }, - "pod with unsafe sysctls a request allowed under aSysctls SCC": { + "pod with unsafe sysctls a request allowed under aUnsafeSysctl SCC": { pod: podWithSysctls([]string{}, []string{"a"}), - psps: []*policy.PodSecurityPolicy{aSysctl}, + psps: []*policy.PodSecurityPolicy{aUnsafeSysctl}, shouldPassAdmit: true, shouldPassValidate: true, - expectedPSP: aSysctl.Name, + expectedPSP: aUnsafeSysctl.Name, }, - "pod with safe sysctls a, b request disallowed under aSysctls SCC": { - pod: podWithSysctls([]string{"a", "b"}, []string{}), - psps: []*policy.PodSecurityPolicy{aSysctl}, - shouldPassAdmit: false, - shouldPassValidate: false, - }, - "pod with safe sysctls b request disallowed under aSysctls SCC": { - pod: podWithSysctls([]string{"b"}, []string{}), - psps: []*policy.PodSecurityPolicy{aSysctl}, - shouldPassAdmit: false, - shouldPassValidate: false, - }, - "pod with safe sysctls a request allowed under aSysctls SCC": { - pod: podWithSysctls([]string{"a"}, []string{}), - psps: []*policy.PodSecurityPolicy{aSysctl}, + "pod with safe net sysctl request allowed under aUnsafeSysctl SCC": { + pod: podWithSysctls([]string{"net.ipv4.ip_local_port_range"}, []string{}), + psps: []*policy.PodSecurityPolicy{aUnsafeSysctl}, shouldPassAdmit: true, shouldPassValidate: true, - expectedPSP: aSysctl.Name, + expectedPSP: aUnsafeSysctl.Name, }, - "pod with unsafe sysctls request disallowed under emptySysctls PSP": { - pod: podWithSysctls([]string{}, []string{"a", "b"}), - psps: []*policy.PodSecurityPolicy{emptySysctls}, + "pod with safe sysctls request disallowed under noSysctls PSP": { + pod: podWithSysctls([]string{"net.ipv4.ip_local_port_range"}, []string{}), + psps: []*policy.PodSecurityPolicy{noSysctls}, shouldPassAdmit: false, shouldPassValidate: false, }, "pod with matching sysctls request allowed under mixedSysctls PSP": { - pod: podWithSysctls([]string{"a.b", "b.c"}, []string{"c", "d.e.f"}), + pod: podWithSysctls([]string{"kernel.shm_rmid_forced"}, []string{"a.b", "b.a"}), psps: []*policy.PodSecurityPolicy{mixedSysctls}, shouldPassAdmit: true, shouldPassValidate: true, expectedPSP: mixedSysctls.Name, }, "pod with not-matching unsafe sysctls request disallowed under mixedSysctls PSP": { - pod: podWithSysctls([]string{"a.b", "b.c", "c", "d.e.f"}, []string{"e"}), + pod: podWithSysctls([]string{}, []string{"e"}), psps: []*policy.PodSecurityPolicy{mixedSysctls}, shouldPassAdmit: false, shouldPassValidate: false, }, "pod with not-matching safe sysctls request disallowed under mixedSysctls PSP": { - pod: podWithSysctls([]string{"a.b", "b.c", "c", "d.e.f", "e"}, []string{}), + pod: podWithSysctls([]string{"net.ipv4.ip_local_port_range"}, []string{}), psps: []*policy.PodSecurityPolicy{mixedSysctls}, shouldPassAdmit: false, shouldPassValidate: false, }, "pod with sysctls request allowed under catchallSysctls PSP": { - pod: podWithSysctls([]string{"e"}, []string{"f"}), + pod: podWithSysctls([]string{"net.ipv4.ip_local_port_range"}, []string{"f"}), psps: []*policy.PodSecurityPolicy{catchallSysctls}, shouldPassAdmit: true, shouldPassValidate: true, expectedPSP: catchallSysctls.Name, }, - "pod with sysctls request allowed under catchallSysctls PSP, not under mixedSysctls or emptySysctls PSP": { - pod: podWithSysctls([]string{"e"}, []string{"f"}), - psps: []*policy.PodSecurityPolicy{mixedSysctls, catchallSysctls, emptySysctls}, - shouldPassAdmit: true, - shouldPassValidate: true, - expectedPSP: catchallSysctls.Name, - }, - "pod with safe c sysctl request allowed under cSysctl PSP, not under aSysctl or bSysctl PSP": { - pod: podWithSysctls([]string{}, []string{"c"}), - psps: []*policy.PodSecurityPolicy{aSysctl, bSysctl, cSysctl}, - shouldPassAdmit: true, - shouldPassValidate: true, - expectedPSP: cSysctl.Name, - }, - "pod with unsafe c sysctl request allowed under cSysctl PSP, not under aSysctl or bSysctl PSP": { - pod: podWithSysctls([]string{"c"}, []string{}), - psps: []*policy.PodSecurityPolicy{aSysctl, bSysctl, cSysctl}, - shouldPassAdmit: true, - shouldPassValidate: true, - expectedPSP: cSysctl.Name, - }, } for k, v := range tests { - origSafeSysctls, origUnsafeSysctls, err := helper.SysctlsFromPodAnnotations(v.pod.Annotations) - if err != nil { - t.Fatalf("invalid sysctl annotation: %v", err) - } + origSysctl := v.pod.Spec.SecurityContext.Sysctls testPSPAdmit(k, v.psps, v.pod, v.shouldPassAdmit, v.shouldPassValidate, v.expectedPSP, t) if v.shouldPassAdmit { - safeSysctls, unsafeSysctls, _ := helper.SysctlsFromPodAnnotations(v.pod.Annotations) - if !reflect.DeepEqual(safeSysctls, origSafeSysctls) { - t.Errorf("%s: wrong safe sysctls: expected=%v, got=%v", k, origSafeSysctls, safeSysctls) - } - if !reflect.DeepEqual(unsafeSysctls, origUnsafeSysctls) { - t.Errorf("%s: wrong unsafe sysctls: expected=%v, got=%v", k, origSafeSysctls, safeSysctls) + if !reflect.DeepEqual(v.pod.Spec.SecurityContext.Sysctls, origSysctl) { + t.Errorf("%s: wrong sysctls: expected=%v, got=%v", k, origSysctl, v.pod.Spec.SecurityContext.Sysctls) } } } diff --git a/staging/src/k8s.io/api/core/v1/annotation_key_constants.go b/staging/src/k8s.io/api/core/v1/annotation_key_constants.go index 95fff364283..16a0cfced13 100644 --- a/staging/src/k8s.io/api/core/v1/annotation_key_constants.go +++ b/staging/src/k8s.io/api/core/v1/annotation_key_constants.go @@ -56,20 +56,6 @@ const ( // in the Annotations of a Node. PreferAvoidPodsAnnotationKey string = "scheduler.alpha.kubernetes.io/preferAvoidPods" - // SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure - // container of a pod. The annotation value is a comma separated list of sysctl_name=value - // key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by - // the kubelet. Pods with other sysctls will fail to launch. - SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls" - - // UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure - // container of a pod. The annotation value is a comma separated list of sysctl_name=value - // key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly - // namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use - // is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet - // will fail to launch. - UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls" - // ObjectTTLAnnotations represents a suggestion for kubelet for how long it can cache // an object (e.g. secret, config map) before fetching it again from apiserver. // This annotation can be attached to node. diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index c8ad4371c79..76e51852467 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -2919,6 +2919,10 @@ type PodSecurityContext struct { // If unset, the Kubelet will not modify the ownership and permissions of any volume. // +optional FSGroup *int64 `json:"fsGroup,omitempty" protobuf:"varint,5,opt,name=fsGroup"` + // Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + // sysctls (by the container runtime) might fail to launch. + // +optional + Sysctls []Sysctl `json:"sysctls,omitempty" protobuf:"bytes,7,rep,name=sysctls"` } // PodQOSClass defines the supported qos classes of Pods. @@ -5203,9 +5207,9 @@ const ( // Sysctl defines a kernel parameter to be set type Sysctl struct { // Name of a property to set - Name string `protobuf:"bytes,1,opt,name=name"` + Name string `json:"name" protobuf:"bytes,1,opt,name=name"` // Value of a property to set - Value string `protobuf:"bytes,2,opt,name=value"` + Value string `json:"value" protobuf:"bytes,2,opt,name=value"` } // NodeResources is an object for conveying resource information about a node. diff --git a/staging/src/k8s.io/api/extensions/v1beta1/types.go b/staging/src/k8s.io/api/extensions/v1beta1/types.go index 90f69e7b79c..3a86ef43a07 100644 --- a/staging/src/k8s.io/api/extensions/v1beta1/types.go +++ b/staging/src/k8s.io/api/extensions/v1beta1/types.go @@ -946,6 +946,25 @@ type PodSecurityPolicySpec struct { // is allowed in the "volumes" field. // +optional AllowedFlexVolumes []AllowedFlexVolume `json:"allowedFlexVolumes,omitempty" protobuf:"bytes,18,rep,name=allowedFlexVolumes"` + // allowedUnsafeSysctls is a list of explicitly allowed unsafe sysctls, defaults to none. + // Each entry is either a plain sysctl name or ends in "*" in which case it is considered + // as a prefix of allowed sysctls. Single * means all unsafe sysctls are allowed. + // Kubelet has to whitelist all allowed unsafe sysctls explicitly to avoid rejection. + // + // Examples: + // e.g. "foo/*" allows "foo/bar", "foo/baz", etc. + // e.g. "foo.*" allows "foo.bar", "foo.baz", etc. + // +optional + AllowedUnsafeSysctls []string `json:"allowedUnsafeSysctls,omitempty" protobuf:"bytes,19,rep,name=allowedUnsafeSysctls"` + // forbiddenSysctls is a list of explicitly forbidden sysctls, defaults to none. + // Each entry is either a plain sysctl name or ends in "*" in which case it is considered + // as a prefix of forbidden sysctls. Single * means all sysctls are forbidden. + // + // Examples: + // e.g. "foo/*" forbids "foo/bar", "foo/baz", etc. + // e.g. "foo.*" forbids "foo.bar", "foo.baz", etc. + // +optional + ForbiddenSysctls []string `json:"forbiddenSysctls,omitempty" protobuf:"bytes,20,rep,name=forbiddenSysctls"` } // AllowedHostPath defines the host volume conditions that will be enabled by a policy diff --git a/staging/src/k8s.io/api/policy/v1beta1/types.go b/staging/src/k8s.io/api/policy/v1beta1/types.go index 4fbe028990e..ba1e4ff314f 100644 --- a/staging/src/k8s.io/api/policy/v1beta1/types.go +++ b/staging/src/k8s.io/api/policy/v1beta1/types.go @@ -201,6 +201,25 @@ type PodSecurityPolicySpec struct { // is allowed in the "volumes" field. // +optional AllowedFlexVolumes []AllowedFlexVolume `json:"allowedFlexVolumes,omitempty" protobuf:"bytes,18,rep,name=allowedFlexVolumes"` + // allowedUnsafeSysctls is a list of explicitly allowed unsafe sysctls, defaults to none. + // Each entry is either a plain sysctl name or ends in "*" in which case it is considered + // as a prefix of allowed sysctls. Single * means all unsafe sysctls are allowed. + // Kubelet has to whitelist all allowed unsafe sysctls explicitly to avoid rejection. + // + // Examples: + // e.g. "foo/*" allows "foo/bar", "foo/baz", etc. + // e.g. "foo.*" allows "foo.bar", "foo.baz", etc. + // +optional + AllowedUnsafeSysctls []string `json:"allowedUnsafeSysctls,omitempty" protobuf:"bytes,19,rep,name=allowedUnsafeSysctls"` + // forbiddenSysctls is a list of explicitly forbidden sysctls, defaults to none. + // Each entry is either a plain sysctl name or ends in "*" in which case it is considered + // as a prefix of forbidden sysctls. Single * means all sysctls are forbidden. + // + // Examples: + // e.g. "foo/*" forbids "foo/bar", "foo/baz", etc. + // e.g. "foo.*" forbids "foo.bar", "foo.baz", etc. + // +optional + ForbiddenSysctls []string `json:"forbiddenSysctls,omitempty" protobuf:"bytes,20,rep,name=forbiddenSysctls"` } // AllowedHostPath defines the host volume conditions that will be enabled by a policy diff --git a/test/e2e/common/sysctl.go b/test/e2e/common/sysctl.go index 5f07371d02e..6ae2c5a0e85 100644 --- a/test/e2e/common/sysctl.go +++ b/test/e2e/common/sysctl.go @@ -20,7 +20,6 @@ import ( "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" - v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/kubelet/sysctl" "k8s.io/kubernetes/test/e2e/framework" @@ -59,12 +58,14 @@ var _ = framework.KubeDescribe("Sysctls [NodeFeature:Sysctls]", func() { It("should support sysctls", func() { pod := testPod() - pod.Annotations[v1.SysctlsPodAnnotationKey] = v1helper.PodAnnotationsFromSysctls([]v1.Sysctl{ - { - Name: "kernel.shm_rmid_forced", - Value: "1", + pod.Spec.SecurityContext = &v1.PodSecurityContext{ + Sysctls: []v1.Sysctl{ + { + Name: "kernel.shm_rmid_forced", + Value: "1", + }, }, - }) + } pod.Spec.Containers[0].Command = []string{"/bin/sysctl", "kernel.shm_rmid_forced"} By("Creating a pod with the kernel.shm_rmid_forced sysctl") @@ -100,12 +101,14 @@ var _ = framework.KubeDescribe("Sysctls [NodeFeature:Sysctls]", func() { It("should support unsafe sysctls which are actually whitelisted", func() { pod := testPod() - pod.Annotations[v1.UnsafeSysctlsPodAnnotationKey] = v1helper.PodAnnotationsFromSysctls([]v1.Sysctl{ - { - Name: "kernel.shm_rmid_forced", - Value: "1", + pod.Spec.SecurityContext = &v1.PodSecurityContext{ + Sysctls: []v1.Sysctl{ + { + Name: "kernel.shm_rmid_forced", + Value: "1", + }, }, - }) + } pod.Spec.Containers[0].Command = []string{"/bin/sysctl", "kernel.shm_rmid_forced"} By("Creating a pod with the kernel.shm_rmid_forced sysctl") @@ -141,34 +144,27 @@ var _ = framework.KubeDescribe("Sysctls [NodeFeature:Sysctls]", func() { It("should reject invalid sysctls", func() { pod := testPod() - pod.Annotations[v1.SysctlsPodAnnotationKey] = v1helper.PodAnnotationsFromSysctls([]v1.Sysctl{ - { - Name: "foo-", - Value: "bar", + pod.Spec.SecurityContext = &v1.PodSecurityContext{ + Sysctls: []v1.Sysctl{ + // Safe parameters + { + Name: "foo-", + Value: "bar", + }, + { + Name: "kernel.shmmax", + Value: "100000000", + }, + { + Name: "safe-and-unsafe", + Value: "100000000", + }, + { + Name: "bar..", + Value: "42", + }, }, - { - Name: "kernel.shmmax", - Value: "100000000", - }, - { - Name: "safe-and-unsafe", - Value: "100000000", - }, - }) - pod.Annotations[v1.UnsafeSysctlsPodAnnotationKey] = v1helper.PodAnnotationsFromSysctls([]v1.Sysctl{ - { - Name: "kernel.shmall", - Value: "100000000", - }, - { - Name: "bar..", - Value: "42", - }, - { - Name: "safe-and-unsafe", - Value: "100000000", - }, - }) + } By("Creating a pod with one valid and two invalid sysctls") client := f.ClientSet.CoreV1().Pods(f.Namespace.Name) @@ -177,18 +173,20 @@ var _ = framework.KubeDescribe("Sysctls [NodeFeature:Sysctls]", func() { Expect(err).NotTo(BeNil()) Expect(err.Error()).To(ContainSubstring(`Invalid value: "foo-"`)) Expect(err.Error()).To(ContainSubstring(`Invalid value: "bar.."`)) - Expect(err.Error()).To(ContainSubstring(`safe-and-unsafe`)) + Expect(err.Error()).NotTo(ContainSubstring(`safe-and-unsafe`)) Expect(err.Error()).NotTo(ContainSubstring("kernel.shmmax")) }) It("should not launch unsafe, but not explicitly enabled sysctls on the node", func() { pod := testPod() - pod.Annotations[v1.SysctlsPodAnnotationKey] = v1helper.PodAnnotationsFromSysctls([]v1.Sysctl{ - { - Name: "kernel.msgmax", - Value: "10000000000", + pod.Spec.SecurityContext = &v1.PodSecurityContext{ + Sysctls: []v1.Sysctl{ + { + Name: "kernel.msgmax", + Value: "10000000000", + }, }, - }) + } By("Creating a pod with a greylisted, but not whitelisted sysctl on the node") pod = podClient.Create(pod) diff --git a/test/e2e/framework/psp_util.go b/test/e2e/framework/psp_util.go index 4e6e4f8a701..cd281f00d54 100644 --- a/test/e2e/framework/psp_util.go +++ b/test/e2e/framework/psp_util.go @@ -71,6 +71,7 @@ func PrivilegedPSP(name string) *extensionsv1beta1.PodSecurityPolicy { Rule: extensionsv1beta1.FSGroupStrategyRunAsAny, }, ReadOnlyRootFilesystem: false, + AllowedUnsafeSysctls: []string{"*"}, }, } } diff --git a/test/e2e/upgrades/sysctl.go b/test/e2e/upgrades/sysctl.go index 1b9a0a71a7f..8ba3fdaefff 100644 --- a/test/e2e/upgrades/sysctl.go +++ b/test/e2e/upgrades/sysctl.go @@ -26,7 +26,6 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" - v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/kubelet/sysctl" "k8s.io/kubernetes/test/e2e/framework" @@ -123,9 +122,6 @@ func sysctlTestPod(name string, sysctls map[string]string) *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: name, - Annotations: map[string]string{ - v1.SysctlsPodAnnotationKey: v1helper.PodAnnotationsFromSysctls(sysctlList), - }, }, Spec: v1.PodSpec{ Containers: []v1.Container{ @@ -136,6 +132,9 @@ func sysctlTestPod(name string, sysctls map[string]string) *v1.Pod { }, }, RestartPolicy: v1.RestartPolicyNever, + SecurityContext: &v1.PodSecurityContext{ + Sysctls: sysctlList, + }, }, } }