diff --git a/pkg/kubelet/sysctl/safe_sysctls.go b/pkg/kubelet/sysctl/safe_sysctls.go index b69ebd57407..17b4ef83fab 100644 --- a/pkg/kubelet/sysctl/safe_sysctls.go +++ b/pkg/kubelet/sysctl/safe_sysctls.go @@ -61,10 +61,12 @@ var safeSysctls = []sysctl{ kernel: utilkernel.TCPKeepAliveProbesNamespacedKernelVersion, }, { - name: "net.ipv4.tcp_rmem", + name: "net.ipv4.tcp_rmem", + kernel: utilkernel.TCPReceiveMemoryNamespacedKernelVersion, }, { - name: "net.ipv4.tcp_wmem", + name: "net.ipv4.tcp_wmem", + kernel: utilkernel.TCPTransmitMemoryNamespacedKernelVersion, }, } diff --git a/pkg/kubelet/sysctl/safe_sysctls_test.go b/pkg/kubelet/sysctl/safe_sysctls_test.go index 133740f549a..06d75260279 100644 --- a/pkg/kubelet/sysctl/safe_sysctls_test.go +++ b/pkg/kubelet/sysctl/safe_sysctls_test.go @@ -41,8 +41,6 @@ func Test_getSafeSysctlAllowlist(t *testing.T) { "net.ipv4.tcp_syncookies", "net.ipv4.ping_group_range", "net.ipv4.ip_unprivileged_port_start", - "net.ipv4.tcp_rmem", - "net.ipv4.tcp_wmem", }, }, { @@ -58,8 +56,6 @@ func Test_getSafeSysctlAllowlist(t *testing.T) { "net.ipv4.ping_group_range", "net.ipv4.ip_unprivileged_port_start", "net.ipv4.ip_local_reserved_ports", - "net.ipv4.tcp_rmem", - "net.ipv4.tcp_wmem", }, }, { diff --git a/pkg/util/kernel/constants.go b/pkg/util/kernel/constants.go index 6775027e7a0..ea46d46cbe7 100644 --- a/pkg/util/kernel/constants.go +++ b/pkg/util/kernel/constants.go @@ -54,3 +54,11 @@ const TmpfsNoswapSupportKernelVersion = "6.4" // nftables mode with by default. This is not directly related to any specific kernel // commit; see https://issues.k8s.io/122743#issuecomment-1893922424 const NFTablesKubeProxyKernelVersion = "5.13" + +// TCPReceiveMemoryNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_rmem was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/356d1833b638bd465672aefeb71def3ab93fc17d) +const TCPReceiveMemoryNamespacedKernelVersion = "4.15" + +// TCPTransmitMemoryNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_wmem was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/356d1833b638bd465672aefeb71def3ab93fc17d) +const TCPTransmitMemoryNamespacedKernelVersion = "4.15" diff --git a/staging/src/k8s.io/pod-security-admission/policy/check_sysctls.go b/staging/src/k8s.io/pod-security-admission/policy/check_sysctls.go index 78950c3b70b..4bbb165f681 100644 --- a/staging/src/k8s.io/pod-security-admission/policy/check_sysctls.go +++ b/staging/src/k8s.io/pod-security-admission/policy/check_sysctls.go @@ -47,6 +47,8 @@ spec.securityContext.sysctls[*].name 'net.ipv4.tcp_fin_timeout' 'net.ipv4.tcp_keepalive_intvl' 'net.ipv4.tcp_keepalive_probes' +'net.ipv4.tcp_rmem' +'net.ipv4.tcp_wmem' */ @@ -104,6 +106,10 @@ var ( "net.ipv4.tcp_keepalive_intvl", "net.ipv4.tcp_keepalive_probes", ) + sysctlsAllowedV1Dot30 = sets.NewString( + "net.ipv4.tcp_rmem", + "net.ipv4.tcp_wmem", + ) ) func sysctlsV1Dot0(podMetadata *metav1.ObjectMeta, podSpec *corev1.PodSpec) CheckResult { @@ -118,6 +124,10 @@ func sysctlsV1Dot29(podMetadata *metav1.ObjectMeta, podSpec *corev1.PodSpec) Che return sysctls(podMetadata, podSpec, sysctlsAllowedV1Dot29) } +func sysctlsV1Dot30(podMetadata *metav1.ObjectMeta, podSpec *corev1.PodSpec) CheckResult { + return sysctls(podMetadata, podSpec, sysctlsAllowedV1Dot30) +} + func sysctls(podMetadata *metav1.ObjectMeta, podSpec *corev1.PodSpec, sysctls_allowed_set sets.String) CheckResult { var forbiddenSysctls []string diff --git a/staging/src/k8s.io/pod-security-admission/policy/check_sysctls_test.go b/staging/src/k8s.io/pod-security-admission/policy/check_sysctls_test.go index b09af170a38..ee2afd7d2f6 100644 --- a/staging/src/k8s.io/pod-security-admission/policy/check_sysctls_test.go +++ b/staging/src/k8s.io/pod-security-admission/policy/check_sysctls_test.go @@ -96,6 +96,28 @@ func TestSysctls(t *testing.T) { expectReason: `forbidden sysctls`, expectDetail: `net.ipv4.tcp_keepalive_probes`, }, + { + name: "new supported sysctls not supported: net.ipv4.tcp_rmem", + pod: &corev1.Pod{Spec: corev1.PodSpec{ + SecurityContext: &corev1.PodSecurityContext{ + Sysctls: []corev1.Sysctl{{Name: "net.ipv4.tcp_rmem", Value: "4096 87380 16777216"}}, + }, + }}, + allowed: false, + expectReason: `forbidden sysctls`, + expectDetail: `net.ipv4.tcp_rmem`, + }, + { + name: "new supported sysctls not supported: net.ipv4.tcp_wmem", + pod: &corev1.Pod{Spec: corev1.PodSpec{ + SecurityContext: &corev1.PodSecurityContext{ + Sysctls: []corev1.Sysctl{{Name: "net.ipv4.tcp_wmem", Value: "4096 87380 16777216"}}, + }, + }}, + allowed: false, + expectReason: `forbidden sysctls`, + expectDetail: `net.ipv4.tcp_wmem`, + }, } for _, tc := range tests { @@ -244,3 +266,62 @@ func TestSysctls_1_29(t *testing.T) { }) } } + +func TestSysctls_1_30(t *testing.T) { + tests := []struct { + name string + pod *corev1.Pod + allowed bool + expectReason string + expectDetail string + }{ + { + name: "forbidden sysctls", + pod: &corev1.Pod{Spec: corev1.PodSpec{ + SecurityContext: &corev1.PodSecurityContext{ + Sysctls: []corev1.Sysctl{{Name: "a"}, {Name: "b"}}, + }, + }}, + allowed: false, + expectReason: `forbidden sysctls`, + expectDetail: `a, b`, + }, + { + name: "new supported sysctls: net.ipv4.tcp_rmem", + pod: &corev1.Pod{Spec: corev1.PodSpec{ + SecurityContext: &corev1.PodSecurityContext{ + Sysctls: []corev1.Sysctl{{Name: "net.ipv4.tcp_rmem", Value: "4096 87380 16777216"}}, + }, + }}, + allowed: true, + }, + { + name: "new supported sysctls: net.ipv4.tcp_wmem", + pod: &corev1.Pod{Spec: corev1.PodSpec{ + SecurityContext: &corev1.PodSecurityContext{ + Sysctls: []corev1.Sysctl{{Name: "net.ipv4.tcp_wmem", Value: "4096 65536 16777216"}}, + }, + }}, + allowed: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := sysctlsV1Dot30(&tc.pod.ObjectMeta, &tc.pod.Spec) + if !tc.allowed { + if result.Allowed { + t.Fatal("expected disallowed") + } + if e, a := tc.expectReason, result.ForbiddenReason; e != a { + t.Errorf("expected\n%s\ngot\n%s", e, a) + } + if e, a := tc.expectDetail, result.ForbiddenDetail; e != a { + t.Errorf("expected\n%s\ngot\n%s", e, a) + } + } else if !result.Allowed { + t.Fatal("expected allowed") + } + }) + } +} diff --git a/staging/src/k8s.io/pod-security-admission/test/fixtures_sysctls.go b/staging/src/k8s.io/pod-security-admission/test/fixtures_sysctls.go index 9ef1a0feb1b..d150fc83d2a 100644 --- a/staging/src/k8s.io/pod-security-admission/test/fixtures_sysctls.go +++ b/staging/src/k8s.io/pod-security-admission/test/fixtures_sysctls.go @@ -156,4 +156,39 @@ func init() { fixtureKey{level: api.LevelBaseline, version: api.MajorMinorVersion(1, 29), check: "sysctls"}, fixtureDataV1Dot29, ) + + fixtureDataV1Dot30 := fixtureGenerator{ + expectErrorSubstring: "forbidden sysctl", + generatePass: func(p *corev1.Pod) []*corev1.Pod { + if p.Spec.SecurityContext == nil { + p.Spec.SecurityContext = &corev1.PodSecurityContext{} + } + return []*corev1.Pod{ + // security context with no sysctls + tweak(p, func(p *corev1.Pod) { p.Spec.SecurityContext.Sysctls = nil }), + // sysctls with name="net.ipv4.tcp_rmem", "net.ipv4.tcp_wmem" + tweak(p, func(p *corev1.Pod) { + p.Spec.SecurityContext.Sysctls = []corev1.Sysctl{ + {Name: "net.ipv4.tcp_rmem", Value: "4096 87380 16777216"}, + {Name: "net.ipv4.tcp_wmem", Value: "4096 65536 16777216"}, + } + }), + } + }, + generateFail: func(p *corev1.Pod) []*corev1.Pod { + if p.Spec.SecurityContext == nil { + p.Spec.SecurityContext = &corev1.PodSecurityContext{} + } + return []*corev1.Pod{ + // sysctls with out of allowed name + tweak(p, func(p *corev1.Pod) { + p.Spec.SecurityContext.Sysctls = []corev1.Sysctl{{Name: "othersysctl", Value: "other"}} + }), + } + }, + } + registerFixtureGenerator( + fixtureKey{level: api.LevelBaseline, version: api.MajorMinorVersion(1, 29), check: "sysctls"}, + fixtureDataV1Dot30, + ) }