diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 926bd5a4caa..cabb16b34fa 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -485,6 +485,8 @@ func AddKubeletConfigFlags(fs *pflag.FlagSet, c *kubeletconfig.KubeletConfigurat fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.") fs.StringVar(&c.PodCIDR, "pod-cidr", c.PodCIDR, "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master.") + fs.Int64Var(c.PodPidsLimit, "pod-max-pids", *c.PodPidsLimit, " Set the maximum number of processes per pod.") + fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.") fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits") fs.BoolVar(&c.EnableControllerAttachDetach, "enable-controller-attach-detach", c.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations") diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index b3219092c62..87e53c00141 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -525,6 +525,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) { ExperimentalQOSReserved: *experimentalQOSReserved, ExperimentalCPUManagerPolicy: s.CPUManagerPolicy, ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration, + ExperimentalPodPidsLimit: *s.PodPidsLimit, }, s.FailSwapOn, devicePluginEnabled, diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index d72fd5e92fe..9c82f4d7213 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -223,6 +223,12 @@ const ( // // Implement IPVS-based in-cluster service load balancing SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode" + + // owner: @dims + // alpha: v1.10 + // + // Implement support for limiting pids in pods + SupportPodPidsLimit utilfeature.Feature = "SupportPodPidsLimit" ) func init() { @@ -263,6 +269,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS PVCProtection: {Default: false, PreRelease: utilfeature.Alpha}, ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha}, SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta}, + SupportPodPidsLimit: {Default: false, PreRelease: utilfeature.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go b/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go index ef1ff425067..23fb8f8fe84 100644 --- a/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go +++ b/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go @@ -62,6 +62,8 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} { obj.ImageGCLowThresholdPercent = 80 obj.MaxOpenFiles = 1000000 obj.MaxPods = 110 + temp := int64(-1) + obj.PodPidsLimit = &temp obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second} obj.CPUManagerPolicy = "none" obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency diff --git a/pkg/kubelet/apis/kubeletconfig/helpers_test.go b/pkg/kubelet/apis/kubeletconfig/helpers_test.go index 9af810a84d3..c58a043b451 100644 --- a/pkg/kubelet/apis/kubeletconfig/helpers_test.go +++ b/pkg/kubelet/apis/kubeletconfig/helpers_test.go @@ -195,6 +195,7 @@ var ( "NodeStatusUpdateFrequency.Duration", "OOMScoreAdj", "PodCIDR", + "PodPidsLimit", "PodsPerCore", "Port", "ProtectKernelDefaults", diff --git a/pkg/kubelet/apis/kubeletconfig/types.go b/pkg/kubelet/apis/kubeletconfig/types.go index 970f3b5907d..f25f7a65c73 100644 --- a/pkg/kubelet/apis/kubeletconfig/types.go +++ b/pkg/kubelet/apis/kubeletconfig/types.go @@ -193,6 +193,8 @@ type KubeletConfiguration struct { // The CIDR to use for pod IP addresses, only used in standalone mode. // In cluster mode, this is obtained from the master. PodCIDR string + // PodPidsLimit is the maximum number of pids in any pod. + PodPidsLimit *int64 // ResolverConfig is the resolver configuration file used as the basis // for the container DNS resolution configuration. ResolverConfig string diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go index fe46fb6d8a3..59cdcabf7b5 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go @@ -129,6 +129,10 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) { if obj.MaxPods == 0 { obj.MaxPods = 110 } + if obj.PodPidsLimit == nil { + temp := int64(-1) + obj.PodPidsLimit = &temp + } if obj.NodeStatusUpdateFrequency == zeroDuration { obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second} } diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go index 0f762d0d0d4..6b16661dc26 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go @@ -190,6 +190,8 @@ type KubeletConfiguration struct { // The CIDR to use for pod IP addresses, only used in standalone mode. // In cluster mode, this is obtained from the master. PodCIDR string `json:"podCIDR"` + // PodPidsLimit is the maximum number of pids in any pod. + PodPidsLimit *int64 `json:"podPidsLimit"` // ResolverConfig is the resolver configuration file used as the basis // for the container DNS resolution configuration. ResolverConfig string `json:"resolvConf"` diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go index 9ca7162f5eb..94165deabdc 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go @@ -213,6 +213,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_kubeletconfig_KubeletConfigura out.HairpinMode = in.HairpinMode out.MaxPods = in.MaxPods out.PodCIDR = in.PodCIDR + out.PodPidsLimit = (*int64)(unsafe.Pointer(in.PodPidsLimit)) out.ResolverConfig = in.ResolverConfig if err := v1.Convert_Pointer_bool_To_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil { return err @@ -336,6 +337,7 @@ func autoConvert_kubeletconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigura out.HairpinMode = in.HairpinMode out.MaxPods = in.MaxPods out.PodCIDR = in.PodCIDR + out.PodPidsLimit = (*int64)(unsafe.Pointer(in.PodPidsLimit)) out.ResolverConfig = in.ResolverConfig if err := v1.Convert_bool_To_Pointer_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil { return err diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go index 301aa272d33..8566133c1c0 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go @@ -231,6 +231,15 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { } out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod out.RuntimeRequestTimeout = in.RuntimeRequestTimeout + if in.PodPidsLimit != nil { + in, out := &in.PodPidsLimit, &out.PodPidsLimit + if *in == nil { + *out = nil + } else { + *out = new(int64) + **out = **in + } + } if in.CPUCFSQuota != nil { in, out := &in.CPUCFSQuota, &out.CPUCFSQuota if *in == nil { diff --git a/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go b/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go index b150467d4b2..da9ad76e209 100644 --- a/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go @@ -123,6 +123,15 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { out.VolumeStatsAggPeriod = in.VolumeStatsAggPeriod out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod out.RuntimeRequestTimeout = in.RuntimeRequestTimeout + if in.PodPidsLimit != nil { + in, out := &in.PodPidsLimit, &out.PodPidsLimit + if *in == nil { + *out = nil + } else { + *out = new(int64) + **out = **in + } + } if in.EvictionHard != nil { in, out := &in.EvictionHard, &out.EvictionHard *out = make(map[string]string, len(*in)) diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index d1d99713429..5a80a0e62eb 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -328,6 +328,9 @@ func getSupportedSubsystems() []subsystem { if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) { supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{}) } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) { + supportedSubsystems = append(supportedSubsystems, &cgroupfs.PidsGroup{}) + } return supportedSubsystems } @@ -430,6 +433,10 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { Paths: cgroupPaths, } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { + libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit + } + if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil { return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err) } @@ -463,6 +470,10 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { Resources: resources, } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { + libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit + } + // get the manager with the specified cgroup configuration manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil) if err != nil { diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index 5dbe6d9b1b5..3754f8028e9 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -107,6 +107,7 @@ type NodeConfig struct { ExperimentalQOSReserved map[v1.ResourceName]int64 ExperimentalCPUManagerPolicy string ExperimentalCPUManagerReconcilePeriod time.Duration + ExperimentalPodPidsLimit int64 } type NodeAllocatableConfig struct { diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 2a55f3c8437..e279a8afdaf 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -300,6 +300,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { qosContainersInfo: cm.GetQOSContainersInfo(), subsystems: cm.subsystems, cgroupManager: cm.cgroupManager, + podPidsLimit: cm.ExperimentalPodPidsLimit, } } return &podContainerManagerNoop{ diff --git a/pkg/kubelet/cm/pod_container_manager_linux.go b/pkg/kubelet/cm/pod_container_manager_linux.go index e62d192891d..2b0cbf83018 100644 --- a/pkg/kubelet/cm/pod_container_manager_linux.go +++ b/pkg/kubelet/cm/pod_container_manager_linux.go @@ -27,7 +27,9 @@ import ( "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" + utilfeature "k8s.io/apiserver/pkg/util/feature" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" + kubefeatures "k8s.io/kubernetes/pkg/features" ) const ( @@ -45,6 +47,8 @@ type podContainerManagerImpl struct { // cgroupManager is the cgroup Manager Object responsible for managing all // pod cgroups. cgroupManager CgroupManager + // Maximum number of pids in a pod + podPidsLimit int64 } // Make sure that podContainerManagerImpl implements the PodContainerManager interface @@ -77,6 +81,9 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error { Name: podContainerName, ResourceParameters: ResourceConfigForPod(pod), } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && m.podPidsLimit > 0 { + containerConfig.ResourceParameters.PodPidsLimit = &m.podPidsLimit + } if err := m.cgroupManager.Create(containerConfig); err != nil { return fmt.Errorf("failed to create container for %v : %v", podContainerName, err) } diff --git a/pkg/kubelet/cm/types.go b/pkg/kubelet/cm/types.go index 1ee48f9c8eb..ce2cc2c826f 100644 --- a/pkg/kubelet/cm/types.go +++ b/pkg/kubelet/cm/types.go @@ -33,6 +33,8 @@ type ResourceConfig struct { CpuPeriod *uint64 // HugePageLimit map from page size (in bytes) to limit (in bytes) HugePageLimit map[int64]int64 + // Maximum number of pids + PodPidsLimit *int64 } // CgroupName is the abstract name of a cgroup prior to any driver specific conversion.