diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 250da221f3b..875bd065af1 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -478,6 +478,8 @@ func AddKubeletConfigFlags(fs *pflag.FlagSet, c *kubeletconfig.KubeletConfigurat fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.") fs.StringVar(&c.PodCIDR, "pod-cidr", c.PodCIDR, "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master.") + fs.Int64Var(c.PodPidsLimit, "pod-max-pids", *c.PodPidsLimit, " Set the maximum number of processes per pod.") + fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.") fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits") fs.BoolVar(&c.EnableControllerAttachDetach, "enable-controller-attach-detach", c.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations") diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index a3eb633f980..867ed43678b 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -471,6 +471,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) { ExperimentalQOSReserved: *experimentalQOSReserved, ExperimentalCPUManagerPolicy: s.CPUManagerPolicy, ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration, + ExperimentalPodPidsLimit: *s.PodPidsLimit, }, s.FailSwapOn, devicePluginEnabled, diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index c6cbc6cd586..e06d125252c 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -220,6 +220,12 @@ const ( // // Implement IPVS-based in-cluster service load balancing SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode" + + // owner: @dims + // alpha: v1.10 + // + // Implement support for limiting pids in pods + SupportPodPidsLimit utilfeature.Feature = "SupportPodPidsLimit" ) func init() { @@ -261,6 +267,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS PVCProtection: {Default: false, PreRelease: utilfeature.Alpha}, ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha}, SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta}, + SupportPodPidsLimit: {Default: false, PreRelease: utilfeature.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go b/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go index ef1ff425067..23fb8f8fe84 100644 --- a/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go +++ b/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go @@ -62,6 +62,8 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} { obj.ImageGCLowThresholdPercent = 80 obj.MaxOpenFiles = 1000000 obj.MaxPods = 110 + temp := int64(-1) + obj.PodPidsLimit = &temp obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second} obj.CPUManagerPolicy = "none" obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency diff --git a/pkg/kubelet/apis/kubeletconfig/helpers_test.go b/pkg/kubelet/apis/kubeletconfig/helpers_test.go index 59c3b5d05b6..1edbf38d48e 100644 --- a/pkg/kubelet/apis/kubeletconfig/helpers_test.go +++ b/pkg/kubelet/apis/kubeletconfig/helpers_test.go @@ -193,6 +193,7 @@ var ( "NodeStatusUpdateFrequency.Duration", "OOMScoreAdj", "PodCIDR", + "PodPidsLimit", "PodsPerCore", "Port", "ProtectKernelDefaults", diff --git a/pkg/kubelet/apis/kubeletconfig/types.go b/pkg/kubelet/apis/kubeletconfig/types.go index aee0077d47f..9a3e8555f1b 100644 --- a/pkg/kubelet/apis/kubeletconfig/types.go +++ b/pkg/kubelet/apis/kubeletconfig/types.go @@ -187,6 +187,8 @@ type KubeletConfiguration struct { // The CIDR to use for pod IP addresses, only used in standalone mode. // In cluster mode, this is obtained from the master. PodCIDR string + // PodPidsLimit is the maximum number of pids in any pod. + PodPidsLimit *int64 // ResolverConfig is the resolver configuration file used as the basis // for the container DNS resolution configuration. ResolverConfig string diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go index fe46fb6d8a3..59cdcabf7b5 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go @@ -129,6 +129,10 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) { if obj.MaxPods == 0 { obj.MaxPods = 110 } + if obj.PodPidsLimit == nil { + temp := int64(-1) + obj.PodPidsLimit = &temp + } if obj.NodeStatusUpdateFrequency == zeroDuration { obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second} } diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go index 8d4ffe4a615..068d860c299 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go @@ -184,6 +184,8 @@ type KubeletConfiguration struct { // The CIDR to use for pod IP addresses, only used in standalone mode. // In cluster mode, this is obtained from the master. PodCIDR string `json:"podCIDR"` + // PodPidsLimit is the maximum number of pids in any pod. + PodPidsLimit *int64 `json:"podPidsLimit"` // ResolverConfig is the resolver configuration file used as the basis // for the container DNS resolution configuration. ResolverConfig string `json:"resolvConf"` diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index d1d99713429..5a80a0e62eb 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -328,6 +328,9 @@ func getSupportedSubsystems() []subsystem { if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) { supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{}) } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) { + supportedSubsystems = append(supportedSubsystems, &cgroupfs.PidsGroup{}) + } return supportedSubsystems } @@ -430,6 +433,10 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { Paths: cgroupPaths, } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { + libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit + } + if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil { return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err) } @@ -463,6 +470,10 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { Resources: resources, } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { + libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit + } + // get the manager with the specified cgroup configuration manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil) if err != nil { diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index fd61f2a751b..5bbd3512320 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -106,6 +106,7 @@ type NodeConfig struct { ExperimentalQOSReserved map[v1.ResourceName]int64 ExperimentalCPUManagerPolicy string ExperimentalCPUManagerReconcilePeriod time.Duration + ExperimentalPodPidsLimit int64 } type NodeAllocatableConfig struct { diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index f72ec699c96..abd962a88d2 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -300,6 +300,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { qosContainersInfo: cm.GetQOSContainersInfo(), subsystems: cm.subsystems, cgroupManager: cm.cgroupManager, + podPidsLimit: cm.ExperimentalPodPidsLimit, } } return &podContainerManagerNoop{ diff --git a/pkg/kubelet/cm/pod_container_manager_linux.go b/pkg/kubelet/cm/pod_container_manager_linux.go index e62d192891d..2b0cbf83018 100644 --- a/pkg/kubelet/cm/pod_container_manager_linux.go +++ b/pkg/kubelet/cm/pod_container_manager_linux.go @@ -27,7 +27,9 @@ import ( "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" + utilfeature "k8s.io/apiserver/pkg/util/feature" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" + kubefeatures "k8s.io/kubernetes/pkg/features" ) const ( @@ -45,6 +47,8 @@ type podContainerManagerImpl struct { // cgroupManager is the cgroup Manager Object responsible for managing all // pod cgroups. cgroupManager CgroupManager + // Maximum number of pids in a pod + podPidsLimit int64 } // Make sure that podContainerManagerImpl implements the PodContainerManager interface @@ -77,6 +81,9 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error { Name: podContainerName, ResourceParameters: ResourceConfigForPod(pod), } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && m.podPidsLimit > 0 { + containerConfig.ResourceParameters.PodPidsLimit = &m.podPidsLimit + } if err := m.cgroupManager.Create(containerConfig); err != nil { return fmt.Errorf("failed to create container for %v : %v", podContainerName, err) } diff --git a/pkg/kubelet/cm/types.go b/pkg/kubelet/cm/types.go index 1ee48f9c8eb..ce2cc2c826f 100644 --- a/pkg/kubelet/cm/types.go +++ b/pkg/kubelet/cm/types.go @@ -33,6 +33,8 @@ type ResourceConfig struct { CpuPeriod *uint64 // HugePageLimit map from page size (in bytes) to limit (in bytes) HugePageLimit map[int64]int64 + // Maximum number of pids + PodPidsLimit *int64 } // CgroupName is the abstract name of a cgroup prior to any driver specific conversion.