From ecd6361ff0e8421332a50e55fcba17b823d5d338 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Mon, 8 Jan 2018 12:32:34 -0500 Subject: [PATCH 1/2] Set pids limit at pod level Add a new Alpha Feature to set a maximum number of pids per Pod. This is to allow the use case where cluster administrators wish to limit the pids consumed per pod (example when running a CI system). By default, we do not set any maximum limit, If an administrator wants to enable this, they should enable `SupportPodPidsLimit=true` in the `--feature-gates=` parameter to kubelet and specify the limit using the `--pod-max-pids` parameter. The limit set is the total count of all processes running in all containers in the pod. --- cmd/kubelet/app/options/options.go | 2 ++ cmd/kubelet/app/server.go | 1 + pkg/features/kube_features.go | 7 +++++++ pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go | 2 ++ pkg/kubelet/apis/kubeletconfig/helpers_test.go | 1 + pkg/kubelet/apis/kubeletconfig/types.go | 2 ++ pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go | 4 ++++ pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go | 2 ++ pkg/kubelet/cm/cgroup_manager_linux.go | 11 +++++++++++ pkg/kubelet/cm/container_manager.go | 1 + pkg/kubelet/cm/container_manager_linux.go | 1 + pkg/kubelet/cm/pod_container_manager_linux.go | 7 +++++++ pkg/kubelet/cm/types.go | 2 ++ 13 files changed, 43 insertions(+) diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 250da221f3b..875bd065af1 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -478,6 +478,8 @@ func AddKubeletConfigFlags(fs *pflag.FlagSet, c *kubeletconfig.KubeletConfigurat fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.") fs.StringVar(&c.PodCIDR, "pod-cidr", c.PodCIDR, "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master.") + fs.Int64Var(c.PodPidsLimit, "pod-max-pids", *c.PodPidsLimit, " Set the maximum number of processes per pod.") + fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.") fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits") fs.BoolVar(&c.EnableControllerAttachDetach, "enable-controller-attach-detach", c.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations") diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index a3eb633f980..867ed43678b 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -471,6 +471,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) { ExperimentalQOSReserved: *experimentalQOSReserved, ExperimentalCPUManagerPolicy: s.CPUManagerPolicy, ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration, + ExperimentalPodPidsLimit: *s.PodPidsLimit, }, s.FailSwapOn, devicePluginEnabled, diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index c6cbc6cd586..e06d125252c 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -220,6 +220,12 @@ const ( // // Implement IPVS-based in-cluster service load balancing SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode" + + // owner: @dims + // alpha: v1.10 + // + // Implement support for limiting pids in pods + SupportPodPidsLimit utilfeature.Feature = "SupportPodPidsLimit" ) func init() { @@ -261,6 +267,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS PVCProtection: {Default: false, PreRelease: utilfeature.Alpha}, ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha}, SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta}, + SupportPodPidsLimit: {Default: false, PreRelease: utilfeature.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go b/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go index ef1ff425067..23fb8f8fe84 100644 --- a/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go +++ b/pkg/kubelet/apis/kubeletconfig/fuzzer/fuzzer.go @@ -62,6 +62,8 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} { obj.ImageGCLowThresholdPercent = 80 obj.MaxOpenFiles = 1000000 obj.MaxPods = 110 + temp := int64(-1) + obj.PodPidsLimit = &temp obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second} obj.CPUManagerPolicy = "none" obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency diff --git a/pkg/kubelet/apis/kubeletconfig/helpers_test.go b/pkg/kubelet/apis/kubeletconfig/helpers_test.go index 59c3b5d05b6..1edbf38d48e 100644 --- a/pkg/kubelet/apis/kubeletconfig/helpers_test.go +++ b/pkg/kubelet/apis/kubeletconfig/helpers_test.go @@ -193,6 +193,7 @@ var ( "NodeStatusUpdateFrequency.Duration", "OOMScoreAdj", "PodCIDR", + "PodPidsLimit", "PodsPerCore", "Port", "ProtectKernelDefaults", diff --git a/pkg/kubelet/apis/kubeletconfig/types.go b/pkg/kubelet/apis/kubeletconfig/types.go index aee0077d47f..9a3e8555f1b 100644 --- a/pkg/kubelet/apis/kubeletconfig/types.go +++ b/pkg/kubelet/apis/kubeletconfig/types.go @@ -187,6 +187,8 @@ type KubeletConfiguration struct { // The CIDR to use for pod IP addresses, only used in standalone mode. // In cluster mode, this is obtained from the master. PodCIDR string + // PodPidsLimit is the maximum number of pids in any pod. + PodPidsLimit *int64 // ResolverConfig is the resolver configuration file used as the basis // for the container DNS resolution configuration. ResolverConfig string diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go index fe46fb6d8a3..59cdcabf7b5 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go @@ -129,6 +129,10 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) { if obj.MaxPods == 0 { obj.MaxPods = 110 } + if obj.PodPidsLimit == nil { + temp := int64(-1) + obj.PodPidsLimit = &temp + } if obj.NodeStatusUpdateFrequency == zeroDuration { obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second} } diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go index 8d4ffe4a615..068d860c299 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go @@ -184,6 +184,8 @@ type KubeletConfiguration struct { // The CIDR to use for pod IP addresses, only used in standalone mode. // In cluster mode, this is obtained from the master. PodCIDR string `json:"podCIDR"` + // PodPidsLimit is the maximum number of pids in any pod. + PodPidsLimit *int64 `json:"podPidsLimit"` // ResolverConfig is the resolver configuration file used as the basis // for the container DNS resolution configuration. ResolverConfig string `json:"resolvConf"` diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index d1d99713429..5a80a0e62eb 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -328,6 +328,9 @@ func getSupportedSubsystems() []subsystem { if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) { supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{}) } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) { + supportedSubsystems = append(supportedSubsystems, &cgroupfs.PidsGroup{}) + } return supportedSubsystems } @@ -430,6 +433,10 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { Paths: cgroupPaths, } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { + libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit + } + if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil { return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err) } @@ -463,6 +470,10 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { Resources: resources, } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { + libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit + } + // get the manager with the specified cgroup configuration manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil) if err != nil { diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index fd61f2a751b..5bbd3512320 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -106,6 +106,7 @@ type NodeConfig struct { ExperimentalQOSReserved map[v1.ResourceName]int64 ExperimentalCPUManagerPolicy string ExperimentalCPUManagerReconcilePeriod time.Duration + ExperimentalPodPidsLimit int64 } type NodeAllocatableConfig struct { diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index f72ec699c96..abd962a88d2 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -300,6 +300,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { qosContainersInfo: cm.GetQOSContainersInfo(), subsystems: cm.subsystems, cgroupManager: cm.cgroupManager, + podPidsLimit: cm.ExperimentalPodPidsLimit, } } return &podContainerManagerNoop{ diff --git a/pkg/kubelet/cm/pod_container_manager_linux.go b/pkg/kubelet/cm/pod_container_manager_linux.go index e62d192891d..2b0cbf83018 100644 --- a/pkg/kubelet/cm/pod_container_manager_linux.go +++ b/pkg/kubelet/cm/pod_container_manager_linux.go @@ -27,7 +27,9 @@ import ( "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" + utilfeature "k8s.io/apiserver/pkg/util/feature" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" + kubefeatures "k8s.io/kubernetes/pkg/features" ) const ( @@ -45,6 +47,8 @@ type podContainerManagerImpl struct { // cgroupManager is the cgroup Manager Object responsible for managing all // pod cgroups. cgroupManager CgroupManager + // Maximum number of pids in a pod + podPidsLimit int64 } // Make sure that podContainerManagerImpl implements the PodContainerManager interface @@ -77,6 +81,9 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error { Name: podContainerName, ResourceParameters: ResourceConfigForPod(pod), } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && m.podPidsLimit > 0 { + containerConfig.ResourceParameters.PodPidsLimit = &m.podPidsLimit + } if err := m.cgroupManager.Create(containerConfig); err != nil { return fmt.Errorf("failed to create container for %v : %v", podContainerName, err) } diff --git a/pkg/kubelet/cm/types.go b/pkg/kubelet/cm/types.go index 1ee48f9c8eb..ce2cc2c826f 100644 --- a/pkg/kubelet/cm/types.go +++ b/pkg/kubelet/cm/types.go @@ -33,6 +33,8 @@ type ResourceConfig struct { CpuPeriod *uint64 // HugePageLimit map from page size (in bytes) to limit (in bytes) HugePageLimit map[int64]int64 + // Maximum number of pids + PodPidsLimit *int64 } // CgroupName is the abstract name of a cgroup prior to any driver specific conversion. From 3df1ce53fb528f7bf8a0ce89ea08116b0b998833 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Mon, 8 Jan 2018 16:21:10 -0500 Subject: [PATCH 2/2] update generated code --- .../kubeletconfig/v1alpha1/zz_generated.conversion.go | 2 ++ .../apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go | 9 +++++++++ pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go | 9 +++++++++ 3 files changed, 20 insertions(+) diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go index 9368d7c0a22..a860541885f 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.conversion.go @@ -211,6 +211,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_kubeletconfig_KubeletConfigura out.HairpinMode = in.HairpinMode out.MaxPods = in.MaxPods out.PodCIDR = in.PodCIDR + out.PodPidsLimit = (*int64)(unsafe.Pointer(in.PodPidsLimit)) out.ResolverConfig = in.ResolverConfig if err := v1.Convert_Pointer_bool_To_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil { return err @@ -332,6 +333,7 @@ func autoConvert_kubeletconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigura out.HairpinMode = in.HairpinMode out.MaxPods = in.MaxPods out.PodCIDR = in.PodCIDR + out.PodPidsLimit = (*int64)(unsafe.Pointer(in.PodPidsLimit)) out.ResolverConfig = in.ResolverConfig if err := v1.Convert_bool_To_Pointer_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil { return err diff --git a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go index 700a997a931..fc03c66ee0f 100644 --- a/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/kubeletconfig/v1alpha1/zz_generated.deepcopy.go @@ -226,6 +226,15 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { } out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod out.RuntimeRequestTimeout = in.RuntimeRequestTimeout + if in.PodPidsLimit != nil { + in, out := &in.PodPidsLimit, &out.PodPidsLimit + if *in == nil { + *out = nil + } else { + *out = new(int64) + **out = **in + } + } if in.CPUCFSQuota != nil { in, out := &in.CPUCFSQuota, &out.CPUCFSQuota if *in == nil { diff --git a/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go b/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go index eb788f3b4c2..e3027beaef0 100644 --- a/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go @@ -118,6 +118,15 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { out.VolumeStatsAggPeriod = in.VolumeStatsAggPeriod out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod out.RuntimeRequestTimeout = in.RuntimeRequestTimeout + if in.PodPidsLimit != nil { + in, out := &in.PodPidsLimit, &out.PodPidsLimit + if *in == nil { + *out = nil + } else { + *out = new(int64) + **out = **in + } + } if in.EvictionHard != nil { in, out := &in.EvictionHard, &out.EvictionHard *out = make(map[string]string, len(*in))