valid error for creation and update from valid to invalid only

- using an option AllowNamespacedSysctlsForHostNetAndHostIPC

Signed-off-by: Paco Xu <paco.xu@daocloud.io>
This commit is contained in:
Paco Xu 2023-07-17 13:59:24 +08:00
parent 9a8ccdebc5
commit 36d6917ae1
10 changed files with 154 additions and 82 deletions

View File

@ -365,6 +365,7 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
AllowInvalidLabelValueInSelector: false,
AllowInvalidTopologySpreadConstraintLabelSelector: false,
AllowMutableNodeSelectorAndNodeAffinity: utilfeature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
AllowNamespacedSysctlsForHostNetAndHostIPC: false,
}
if oldPodSpec != nil {
@ -377,6 +378,17 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
opts.AllowInvalidLabelValueInSelector = hasInvalidLabelValueInAffinitySelector(oldPodSpec)
// if old spec has invalid labelSelector in topologySpreadConstraint, we must allow it
opts.AllowInvalidTopologySpreadConstraintLabelSelector = hasInvalidTopologySpreadConstraintLabelSelector(oldPodSpec)
// if old spec has invalid sysctl with hostNet or hostIPC, we must allow it when update
if oldPodSpec.SecurityContext != nil && len(oldPodSpec.SecurityContext.Sysctls) != 0 {
for _, s := range oldPodSpec.SecurityContext.Sysctls {
err := apivalidation.ValidateHostSysctl(s.Name, oldPodSpec.SecurityContext, nil)
if err != nil {
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
break
}
}
}
}
if oldPodMeta != nil && !opts.AllowInvalidPodDeletionCost {
// This is an update, so validate only if the existing object was valid.

View File

@ -3813,6 +3813,8 @@ type PodValidationOptions struct {
AllowInvalidTopologySpreadConstraintLabelSelector bool
// Allow node selector additions for gated pods.
AllowMutableNodeSelectorAndNodeAffinity bool
// Allow namespaced sysctls in hostNet and hostIPC pods
AllowNamespacedSysctlsForHostNetAndHostIPC bool
// The top-level resource being validated is a Pod, not just a PodSpec
// embedded in some other resource.
ResourceIsPod bool
@ -4563,10 +4565,10 @@ func IsValidSysctlName(name string) bool {
return sysctlContainSlashRegexp.MatchString(name)
}
func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path, hostNetwork, hostIPC bool) field.ErrorList {
func validateSysctls(securityContext *core.PodSecurityContext, fldPath *field.Path, opts PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{}
names := make(map[string]struct{})
for i, s := range sysctls {
for i, s := range securityContext.Sysctls {
if len(s.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), ""))
} else if !IsValidSysctlName(s.Name) {
@ -4574,41 +4576,27 @@ func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path, hostNetwork, ho
} else if _, ok := names[s.Name]; ok {
allErrs = append(allErrs, field.Duplicate(fldPath.Index(i).Child("name"), s.Name))
}
// The parameters hostNet and hostIPC are used to forbid sysctls for pod sharing the
// respective namespaces with the host.
if !isValidSysctlWithHostNetIPC(s.Name, hostNetwork, hostIPC) {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, "sysctl not allowed with host net/ipc enabled"))
if !opts.AllowNamespacedSysctlsForHostNetAndHostIPC {
err := ValidateHostSysctl(s.Name, securityContext, fldPath.Index(i).Child("name"))
if err != nil {
allErrs = append(allErrs, err)
}
}
names[s.Name] = struct{}{}
}
return allErrs
}
func isValidSysctlWithHostNetIPC(sysctl string, hostNet, hostIPC bool) bool {
sysctl = utilsysctl.ConvertSysctlVariableToDotsSeparator(sysctl)
var ns utilsysctl.Namespace
if strings.HasSuffix(sysctl, "*") {
prefix := sysctl[:len(sysctl)-1]
ns = utilsysctl.NamespacedBy(prefix)
if ns == utilsysctl.UnknownNamespace {
// don't handle unknown namespace here
return true
}
} else {
ns = utilsysctl.NamespacedBy(sysctl)
if ns == utilsysctl.UnknownNamespace {
// don't handle unknown namespace here
return true
}
// ValidateHostSysctl will return error if namespaced sysctls is applied to pod sharing the respective namespaces with the host.
func ValidateHostSysctl(sysctl string, securityContext *core.PodSecurityContext, fldPath *field.Path) *field.Error {
ns, _, _ := utilsysctl.GetNamespace(sysctl)
switch {
case securityContext.HostNetwork && ns == utilsysctl.NetNamespace:
return field.Invalid(fldPath, sysctl, "may not be specified when 'hostNetwork' is true")
case securityContext.HostIPC && ns == utilsysctl.IPCNamespace:
return field.Invalid(fldPath, sysctl, "may not be specified when 'hostIPC' is true")
}
if ns == utilsysctl.IpcNamespace && hostIPC {
return false
}
if ns == utilsysctl.NetNamespace && hostNet {
return false
}
return true
return nil
}
// validatePodSpecSecurityContext verifies the SecurityContext of a PodSpec,
@ -4643,13 +4631,7 @@ func validatePodSpecSecurityContext(securityContext *core.PodSecurityContext, sp
}
if len(securityContext.Sysctls) != 0 {
var hostNetwork, hostIPC bool
if spec.SecurityContext != nil {
hostNetwork = spec.SecurityContext.HostNetwork
hostIPC = spec.SecurityContext.HostIPC
}
allErrs = append(allErrs, validateSysctls(securityContext.Sysctls, fldPath.Child("sysctls"), hostNetwork, hostIPC)...)
allErrs = append(allErrs, validateSysctls(securityContext, fldPath.Child("sysctls"), opts)...)
}
if securityContext.FSGroupChangePolicy != nil {

View File

@ -21505,12 +21505,18 @@ func TestValidateSysctls(t *testing.T) {
"kernel.shmmax",
"kernel.shmmax",
}
opts := PodValidationOptions{
AllowNamespacedSysctlsForHostNetAndHostIPC: false,
}
sysctls := make([]core.Sysctl, len(valid))
validSecurityContext := &core.PodSecurityContext{
Sysctls: sysctls,
}
for i, sysctl := range valid {
sysctls[i].Name = sysctl
}
errs := validateSysctls(sysctls, field.NewPath("foo"), false, false)
errs := validateSysctls(validSecurityContext, field.NewPath("foo"), opts)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
@ -21519,7 +21525,10 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range invalid {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"), false, false)
inValidSecurityContext := &core.PodSecurityContext{
Sysctls: sysctls,
}
errs = validateSysctls(inValidSecurityContext, field.NewPath("foo"), opts)
if len(errs) != 2 {
t.Errorf("expected 2 validation errors. Got: %v", errs)
} else {
@ -21535,7 +21544,10 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range duplicates {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"), false, false)
securityContextWithDup := &core.PodSecurityContext{
Sysctls: sysctls,
}
errs = validateSysctls(securityContextWithDup, field.NewPath("foo"), opts)
if len(errs) != 1 {
t.Errorf("unexpected validation errors: %v", errs)
} else if errs[0].Type != field.ErrorTypeDuplicate {
@ -21546,19 +21558,40 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range invalidWithHostNet {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"), true, false)
invalidSecurityContextWithHostNet := &core.PodSecurityContext{
Sysctls: sysctls,
HostIPC: false,
HostNetwork: true,
}
errs = validateSysctls(invalidSecurityContextWithHostNet, field.NewPath("foo"), opts)
if len(errs) != 2 {
t.Errorf("unexpected validation errors: %v", errs)
}
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
errs = validateSysctls(invalidSecurityContextWithHostNet, field.NewPath("foo"), opts)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
sysctls = make([]core.Sysctl, len(invalidWithHostIPC))
for i, sysctl := range invalidWithHostIPC {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"), false, true)
invalidSecurityContextWithHostIPC := &core.PodSecurityContext{
Sysctls: sysctls,
HostIPC: true,
HostNetwork: false,
}
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = false
errs = validateSysctls(invalidSecurityContextWithHostIPC, field.NewPath("foo"), opts)
if len(errs) != 2 {
t.Errorf("unexpected validation errors: %v", errs)
}
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
errs = validateSysctls(invalidSecurityContextWithHostIPC, field.NewPath("foo"), opts)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
}
func newNodeNameEndpoint(nodeName string) *core.Endpoints {

View File

@ -55,20 +55,14 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
policyvalidation.SysctlContainSlashPatternFmt,
)
}
s = utilsysctl.ConvertSysctlVariableToDotsSeparator(s)
if strings.HasSuffix(s, "*") {
prefix := s[:len(s)-1]
ns := utilsysctl.NamespacedBy(prefix)
if ns == utilsysctl.UnknownNamespace {
return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", s)
}
w.prefixes[prefix] = ns
ns, sysctlOrPrefix, prefixed := utilsysctl.GetNamespace(s)
if ns == utilsysctl.UnknownNamespace {
return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", sysctlOrPrefix)
}
if prefixed {
w.prefixes[sysctlOrPrefix] = ns
} else {
ns := utilsysctl.NamespacedBy(s)
if ns == utilsysctl.UnknownNamespace {
return nil, fmt.Errorf("the sysctl %q are not known to be namespaced", s)
}
w.sysctls[s] = ns
w.sysctls[sysctlOrPrefix] = ns
}
}
return w, nil
@ -82,10 +76,10 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
// respective namespaces with the host. This check is only possible for sysctls on
// the static default allowlist, not those on the custom allowlist provided by the admin.
func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
sysctl = utilsysctl.ConvertSysctlVariableToDotsSeparator(sysctl)
sysctl = utilsysctl.NormalizeName(sysctl)
nsErrorFmt := "%q not allowed with host %s enabled"
if ns, found := w.sysctls[sysctl]; found {
if ns == utilsysctl.IpcNamespace && hostIPC {
if ns == utilsysctl.IPCNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
if ns == utilsysctl.NetNamespace && hostNet {
@ -95,7 +89,7 @@ func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool)
}
for p, ns := range w.prefixes {
if strings.HasPrefix(sysctl, p) {
if ns == utilsysctl.IpcNamespace && hostIPC {
if ns == utilsysctl.IPCNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
if ns == utilsysctl.NetNamespace && hostNet {

View File

@ -1,3 +1,6 @@
//go:build linux
// +build linux
/*
Copyright 2016 The Kubernetes Authors.
@ -17,9 +20,10 @@ limitations under the License.
package sysctl
import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
)
func TestNewAllowlist(t *testing.T) {

View File

@ -29,7 +29,7 @@ func ConvertPodSysctlsVariableToDotsSeparator(securityContext *v1.PodSecurityCon
return
}
for i, sysctl := range securityContext.Sysctls {
securityContext.Sysctls[i].Name = utilsysctl.ConvertSysctlVariableToDotsSeparator(sysctl.Name)
securityContext.Sysctls[i].Name = utilsysctl.NormalizeName(sysctl.Name)
}
return
}

View File

@ -24,37 +24,81 @@ import (
type Namespace string
const (
// refer to https://man7.org/linux/man-pages/man7/ipc_namespaces.7.html
// the Linux IPC namespace
IpcNamespace = Namespace("ipc")
IPCNamespace = Namespace("IPC")
// refer to https://man7.org/linux/man-pages/man7/network_namespaces.7.html
// the network namespace
NetNamespace = Namespace("net")
NetNamespace = Namespace("Net")
// the zero value if no namespace is known
UnknownNamespace = Namespace("")
)
var namespaces = map[string]Namespace{
"kernel.sem": IpcNamespace,
var nameToNamespace = map[string]Namespace{
// kernel semaphore parameters: SEMMSL, SEMMNS, SEMOPM, and SEMMNI.
"kernel.sem": IPCNamespace,
// kernel shared memory limits include shmall, shmmax, shmmni, and shm_rmid_forced.
"kernel.shmall": IPCNamespace,
"kernel.shmmax": IPCNamespace,
"kernel.shmmni": IPCNamespace,
"kernel.shm_rmid_forced": IPCNamespace,
// make backward compatibility to know the namespace of kernel.shm*
"kernel.shm": IPCNamespace,
// kernel messages include msgmni, msgmax and msgmnb.
"kernel.msgmax": IPCNamespace,
"kernel.msgmnb": IPCNamespace,
"kernel.msgmni": IPCNamespace,
// make backward compatibility to know the namespace of kernel.msg*
"kernel.msg": IPCNamespace,
}
var prefixNamespaces = map[string]Namespace{
"kernel.shm": IpcNamespace,
"kernel.msg": IpcNamespace,
"fs.mqueue.": IpcNamespace,
"net.": NetNamespace,
var prefixToNamespace = map[string]Namespace{
"net": NetNamespace,
// mqueue filesystem provides the necessary kernel features to enable the creation
// of a user space library that implements the POSIX message queues API.
"fs.mqueue": IPCNamespace,
}
// NamespacedBy returns the namespace of the Linux kernel for a sysctl, or
// namespaceOf returns the namespace of the Linux kernel for a sysctl, or
// unknownNamespace if the sysctl is not known to be namespaced.
func NamespacedBy(val string) Namespace {
if ns, found := namespaces[val]; found {
// The second return is prefixed bool.
// It returns true if the key is prefixed with a key in the prefix map
func namespaceOf(val string) Namespace {
if ns, found := nameToNamespace[val]; found {
return ns
}
for p, ns := range prefixNamespaces {
if strings.HasPrefix(val, p) {
for p, ns := range prefixToNamespace {
if strings.HasPrefix(val, p+".") {
return ns
}
}
return UnknownNamespace
}
// GetNamespace extracts information from a sysctl string. It returns:
// 1. The sysctl namespace, which can be one of the following: IPC, Net, or unknown.
// 2. sysctlOrPrefix: the prefix of the sysctl parameter until the first '*'.
// If there is no '*', it will be the original string.
// 3. 'prefixed' is set to true if the sysctl parameter contains '*' or it is in the prefixToNamespace key list, in most cases, it is a suffix *.
//
// For example, if the input sysctl is 'net.ipv6.neigh.*', GetNamespace will return:
// - The Net namespace
// - The sysctlOrPrefix as 'net.ipv6.neigh'
// - 'prefixed' set to true
//
// For the input sysctl 'net.ipv6.conf.all.disable_ipv6', GetNamespace will return:
// - The Net namespace
// - The sysctlOrPrefix as 'net.ipv6.conf.all.disable_ipv6'
// - 'prefixed' set to false.
func GetNamespace(sysctl string) (ns Namespace, sysctlOrPrefix string, prefixed bool) {
sysctlOrPrefix = NormalizeName(sysctl)
firstIndex := strings.IndexAny(sysctlOrPrefix, "*")
if firstIndex != -1 {
sysctlOrPrefix = sysctlOrPrefix[:firstIndex]
prefixed = true
}
ns = namespaceOf(sysctlOrPrefix)
return
}

View File

@ -20,16 +20,16 @@ import (
"testing"
)
func TestNamespacedBy(t *testing.T) {
func TestNamespacedOf(t *testing.T) {
tests := map[string]Namespace{
"kernel.shm_rmid_forced": IpcNamespace,
"kernel.shm_rmid_forced": IPCNamespace,
"net.a.b.c": NetNamespace,
"fs.mqueue.a.b.c": IpcNamespace,
"fs.mqueue.a.b.c": IPCNamespace,
"foo": UnknownNamespace,
}
for sysctl, ns := range tests {
if got := NamespacedBy(sysctl); got != ns {
if got := namespaceOf(sysctl); got != ns {
t.Errorf("wrong namespace for %q: got=%s want=%s", sysctl, got, ns)
}
}

View File

@ -99,22 +99,25 @@ func (*procSysctl) SetSysctl(sysctl string, newVal int) error {
return os.WriteFile(path.Join(sysctlBase, sysctl), []byte(strconv.Itoa(newVal)), 0640)
}
// ConvertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format.
// NormalizeName can return sysctl variables in dots separator format.
// The '/' separator is also accepted in place of a '.'.
// Convert the sysctl variables to dots separator format for validation.
// More info:
//
// https://man7.org/linux/man-pages/man8/sysctl.8.html
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html
func ConvertSysctlVariableToDotsSeparator(val string) string {
func NormalizeName(val string) string {
if val == "" {
return val
}
firstSepIndex := strings.IndexAny(val, "./")
// if the first found is `.` like `net.ipv4.conf.eno2/100.rp_filter`
if firstSepIndex == -1 || val[firstSepIndex] == '.' {
return val
}
// for `net/ipv4/conf/eno2.100/rp_filter`, swap the use of `.` and `/`
// to `net.ipv4.conf.eno2/100.rp_filter`
f := func(r rune) rune {
switch r {
case '.':

View File

@ -40,7 +40,7 @@ func TestConvertSysctlVariableToDotsSeparator(t *testing.T) {
}
for _, test := range valid {
convertSysctlVal := ConvertSysctlVariableToDotsSeparator(test.in)
convertSysctlVal := NormalizeName(test.in)
assert.Equalf(t, test.out, convertSysctlVal, "The sysctl variable was not converted correctly. got: %s, want: %s", convertSysctlVal, test.out)
}
}