mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 12:43:23 +00:00
Merge pull request #57973 from dims/set-pids-limit-at-pod-level
Automatic merge from submit-queue (batch tested with PRs 57973, 57990). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Set pids limit at pod level **What this PR does / why we need it**: Add a new Alpha Feature to set a maximum number of pids per Pod. This is to allow the use case where cluster administrators wish to limit the pids consumed per pod (example when running a CI system). By default, we do not set any maximum limit, If an administrator wants to enable this, they should enable `SupportPodPidsLimit=true` in the `--feature-gates=` parameter to kubelet and specify the limit using the `--pod-max-pids` parameter. The limit set is the total count of all processes running in all containers in the pod. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #43783 **Special notes for your reviewer**: **Release note**: ```release-note New alpha feature to limit the number of processes running in a pod. Cluster administrators will be able to place limits by using the new kubelet command line parameter --pod-max-pids. Note that since this is a alpha feature they will need to enable the "SupportPodPidsLimit" feature. ```
This commit is contained in:
commit
bf111161b7
@ -485,6 +485,8 @@ func AddKubeletConfigFlags(fs *pflag.FlagSet, c *kubeletconfig.KubeletConfigurat
|
|||||||
fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.")
|
fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.")
|
||||||
|
|
||||||
fs.StringVar(&c.PodCIDR, "pod-cidr", c.PodCIDR, "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master.")
|
fs.StringVar(&c.PodCIDR, "pod-cidr", c.PodCIDR, "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master.")
|
||||||
|
fs.Int64Var(c.PodPidsLimit, "pod-max-pids", *c.PodPidsLimit, "<Warning: Alpha feature> Set the maximum number of processes per pod.")
|
||||||
|
|
||||||
fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.")
|
fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.")
|
||||||
fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits")
|
fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits")
|
||||||
fs.BoolVar(&c.EnableControllerAttachDetach, "enable-controller-attach-detach", c.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations")
|
fs.BoolVar(&c.EnableControllerAttachDetach, "enable-controller-attach-detach", c.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations")
|
||||||
|
@ -525,6 +525,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
|
|||||||
ExperimentalQOSReserved: *experimentalQOSReserved,
|
ExperimentalQOSReserved: *experimentalQOSReserved,
|
||||||
ExperimentalCPUManagerPolicy: s.CPUManagerPolicy,
|
ExperimentalCPUManagerPolicy: s.CPUManagerPolicy,
|
||||||
ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
|
ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
|
||||||
|
ExperimentalPodPidsLimit: *s.PodPidsLimit,
|
||||||
},
|
},
|
||||||
s.FailSwapOn,
|
s.FailSwapOn,
|
||||||
devicePluginEnabled,
|
devicePluginEnabled,
|
||||||
|
@ -223,6 +223,12 @@ const (
|
|||||||
//
|
//
|
||||||
// Implement IPVS-based in-cluster service load balancing
|
// Implement IPVS-based in-cluster service load balancing
|
||||||
SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode"
|
SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode"
|
||||||
|
|
||||||
|
// owner: @dims
|
||||||
|
// alpha: v1.10
|
||||||
|
//
|
||||||
|
// Implement support for limiting pids in pods
|
||||||
|
SupportPodPidsLimit utilfeature.Feature = "SupportPodPidsLimit"
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@ -263,6 +269,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
|
|||||||
PVCProtection: {Default: false, PreRelease: utilfeature.Alpha},
|
PVCProtection: {Default: false, PreRelease: utilfeature.Alpha},
|
||||||
ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha},
|
ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha},
|
||||||
SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta},
|
SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta},
|
||||||
|
SupportPodPidsLimit: {Default: false, PreRelease: utilfeature.Alpha},
|
||||||
|
|
||||||
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
||||||
// unintentionally on either side:
|
// unintentionally on either side:
|
||||||
|
@ -62,6 +62,8 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
|
|||||||
obj.ImageGCLowThresholdPercent = 80
|
obj.ImageGCLowThresholdPercent = 80
|
||||||
obj.MaxOpenFiles = 1000000
|
obj.MaxOpenFiles = 1000000
|
||||||
obj.MaxPods = 110
|
obj.MaxPods = 110
|
||||||
|
temp := int64(-1)
|
||||||
|
obj.PodPidsLimit = &temp
|
||||||
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
|
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
|
||||||
obj.CPUManagerPolicy = "none"
|
obj.CPUManagerPolicy = "none"
|
||||||
obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency
|
obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency
|
||||||
|
@ -195,6 +195,7 @@ var (
|
|||||||
"NodeStatusUpdateFrequency.Duration",
|
"NodeStatusUpdateFrequency.Duration",
|
||||||
"OOMScoreAdj",
|
"OOMScoreAdj",
|
||||||
"PodCIDR",
|
"PodCIDR",
|
||||||
|
"PodPidsLimit",
|
||||||
"PodsPerCore",
|
"PodsPerCore",
|
||||||
"Port",
|
"Port",
|
||||||
"ProtectKernelDefaults",
|
"ProtectKernelDefaults",
|
||||||
|
@ -193,6 +193,8 @@ type KubeletConfiguration struct {
|
|||||||
// The CIDR to use for pod IP addresses, only used in standalone mode.
|
// The CIDR to use for pod IP addresses, only used in standalone mode.
|
||||||
// In cluster mode, this is obtained from the master.
|
// In cluster mode, this is obtained from the master.
|
||||||
PodCIDR string
|
PodCIDR string
|
||||||
|
// PodPidsLimit is the maximum number of pids in any pod.
|
||||||
|
PodPidsLimit *int64
|
||||||
// ResolverConfig is the resolver configuration file used as the basis
|
// ResolverConfig is the resolver configuration file used as the basis
|
||||||
// for the container DNS resolution configuration.
|
// for the container DNS resolution configuration.
|
||||||
ResolverConfig string
|
ResolverConfig string
|
||||||
|
@ -129,6 +129,10 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
|
|||||||
if obj.MaxPods == 0 {
|
if obj.MaxPods == 0 {
|
||||||
obj.MaxPods = 110
|
obj.MaxPods = 110
|
||||||
}
|
}
|
||||||
|
if obj.PodPidsLimit == nil {
|
||||||
|
temp := int64(-1)
|
||||||
|
obj.PodPidsLimit = &temp
|
||||||
|
}
|
||||||
if obj.NodeStatusUpdateFrequency == zeroDuration {
|
if obj.NodeStatusUpdateFrequency == zeroDuration {
|
||||||
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
|
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
|
||||||
}
|
}
|
||||||
|
@ -190,6 +190,8 @@ type KubeletConfiguration struct {
|
|||||||
// The CIDR to use for pod IP addresses, only used in standalone mode.
|
// The CIDR to use for pod IP addresses, only used in standalone mode.
|
||||||
// In cluster mode, this is obtained from the master.
|
// In cluster mode, this is obtained from the master.
|
||||||
PodCIDR string `json:"podCIDR"`
|
PodCIDR string `json:"podCIDR"`
|
||||||
|
// PodPidsLimit is the maximum number of pids in any pod.
|
||||||
|
PodPidsLimit *int64 `json:"podPidsLimit"`
|
||||||
// ResolverConfig is the resolver configuration file used as the basis
|
// ResolverConfig is the resolver configuration file used as the basis
|
||||||
// for the container DNS resolution configuration.
|
// for the container DNS resolution configuration.
|
||||||
ResolverConfig string `json:"resolvConf"`
|
ResolverConfig string `json:"resolvConf"`
|
||||||
|
@ -213,6 +213,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_kubeletconfig_KubeletConfigura
|
|||||||
out.HairpinMode = in.HairpinMode
|
out.HairpinMode = in.HairpinMode
|
||||||
out.MaxPods = in.MaxPods
|
out.MaxPods = in.MaxPods
|
||||||
out.PodCIDR = in.PodCIDR
|
out.PodCIDR = in.PodCIDR
|
||||||
|
out.PodPidsLimit = (*int64)(unsafe.Pointer(in.PodPidsLimit))
|
||||||
out.ResolverConfig = in.ResolverConfig
|
out.ResolverConfig = in.ResolverConfig
|
||||||
if err := v1.Convert_Pointer_bool_To_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil {
|
if err := v1.Convert_Pointer_bool_To_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -336,6 +337,7 @@ func autoConvert_kubeletconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigura
|
|||||||
out.HairpinMode = in.HairpinMode
|
out.HairpinMode = in.HairpinMode
|
||||||
out.MaxPods = in.MaxPods
|
out.MaxPods = in.MaxPods
|
||||||
out.PodCIDR = in.PodCIDR
|
out.PodCIDR = in.PodCIDR
|
||||||
|
out.PodPidsLimit = (*int64)(unsafe.Pointer(in.PodPidsLimit))
|
||||||
out.ResolverConfig = in.ResolverConfig
|
out.ResolverConfig = in.ResolverConfig
|
||||||
if err := v1.Convert_bool_To_Pointer_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil {
|
if err := v1.Convert_bool_To_Pointer_bool(&in.CPUCFSQuota, &out.CPUCFSQuota, s); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -231,6 +231,15 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
|
|||||||
}
|
}
|
||||||
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
|
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
|
||||||
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
||||||
|
if in.PodPidsLimit != nil {
|
||||||
|
in, out := &in.PodPidsLimit, &out.PodPidsLimit
|
||||||
|
if *in == nil {
|
||||||
|
*out = nil
|
||||||
|
} else {
|
||||||
|
*out = new(int64)
|
||||||
|
**out = **in
|
||||||
|
}
|
||||||
|
}
|
||||||
if in.CPUCFSQuota != nil {
|
if in.CPUCFSQuota != nil {
|
||||||
in, out := &in.CPUCFSQuota, &out.CPUCFSQuota
|
in, out := &in.CPUCFSQuota, &out.CPUCFSQuota
|
||||||
if *in == nil {
|
if *in == nil {
|
||||||
|
@ -123,6 +123,15 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
|
|||||||
out.VolumeStatsAggPeriod = in.VolumeStatsAggPeriod
|
out.VolumeStatsAggPeriod = in.VolumeStatsAggPeriod
|
||||||
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
|
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
|
||||||
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
||||||
|
if in.PodPidsLimit != nil {
|
||||||
|
in, out := &in.PodPidsLimit, &out.PodPidsLimit
|
||||||
|
if *in == nil {
|
||||||
|
*out = nil
|
||||||
|
} else {
|
||||||
|
*out = new(int64)
|
||||||
|
**out = **in
|
||||||
|
}
|
||||||
|
}
|
||||||
if in.EvictionHard != nil {
|
if in.EvictionHard != nil {
|
||||||
in, out := &in.EvictionHard, &out.EvictionHard
|
in, out := &in.EvictionHard, &out.EvictionHard
|
||||||
*out = make(map[string]string, len(*in))
|
*out = make(map[string]string, len(*in))
|
||||||
|
@ -328,6 +328,9 @@ func getSupportedSubsystems() []subsystem {
|
|||||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
|
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
|
||||||
supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{})
|
supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{})
|
||||||
}
|
}
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) {
|
||||||
|
supportedSubsystems = append(supportedSubsystems, &cgroupfs.PidsGroup{})
|
||||||
|
}
|
||||||
return supportedSubsystems
|
return supportedSubsystems
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -430,6 +433,10 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
|
|||||||
Paths: cgroupPaths,
|
Paths: cgroupPaths,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
|
||||||
|
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
|
||||||
|
}
|
||||||
|
|
||||||
if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil {
|
if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil {
|
||||||
return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err)
|
return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err)
|
||||||
}
|
}
|
||||||
@ -463,6 +470,10 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
|
|||||||
Resources: resources,
|
Resources: resources,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
|
||||||
|
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
|
||||||
|
}
|
||||||
|
|
||||||
// get the manager with the specified cgroup configuration
|
// get the manager with the specified cgroup configuration
|
||||||
manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil)
|
manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -107,6 +107,7 @@ type NodeConfig struct {
|
|||||||
ExperimentalQOSReserved map[v1.ResourceName]int64
|
ExperimentalQOSReserved map[v1.ResourceName]int64
|
||||||
ExperimentalCPUManagerPolicy string
|
ExperimentalCPUManagerPolicy string
|
||||||
ExperimentalCPUManagerReconcilePeriod time.Duration
|
ExperimentalCPUManagerReconcilePeriod time.Duration
|
||||||
|
ExperimentalPodPidsLimit int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type NodeAllocatableConfig struct {
|
type NodeAllocatableConfig struct {
|
||||||
|
@ -300,6 +300,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
|
|||||||
qosContainersInfo: cm.GetQOSContainersInfo(),
|
qosContainersInfo: cm.GetQOSContainersInfo(),
|
||||||
subsystems: cm.subsystems,
|
subsystems: cm.subsystems,
|
||||||
cgroupManager: cm.cgroupManager,
|
cgroupManager: cm.cgroupManager,
|
||||||
|
podPidsLimit: cm.ExperimentalPodPidsLimit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &podContainerManagerNoop{
|
return &podContainerManagerNoop{
|
||||||
|
@ -27,7 +27,9 @@ import (
|
|||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||||
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||||
|
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -45,6 +47,8 @@ type podContainerManagerImpl struct {
|
|||||||
// cgroupManager is the cgroup Manager Object responsible for managing all
|
// cgroupManager is the cgroup Manager Object responsible for managing all
|
||||||
// pod cgroups.
|
// pod cgroups.
|
||||||
cgroupManager CgroupManager
|
cgroupManager CgroupManager
|
||||||
|
// Maximum number of pids in a pod
|
||||||
|
podPidsLimit int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure that podContainerManagerImpl implements the PodContainerManager interface
|
// Make sure that podContainerManagerImpl implements the PodContainerManager interface
|
||||||
@ -77,6 +81,9 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
|
|||||||
Name: podContainerName,
|
Name: podContainerName,
|
||||||
ResourceParameters: ResourceConfigForPod(pod),
|
ResourceParameters: ResourceConfigForPod(pod),
|
||||||
}
|
}
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && m.podPidsLimit > 0 {
|
||||||
|
containerConfig.ResourceParameters.PodPidsLimit = &m.podPidsLimit
|
||||||
|
}
|
||||||
if err := m.cgroupManager.Create(containerConfig); err != nil {
|
if err := m.cgroupManager.Create(containerConfig); err != nil {
|
||||||
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
|
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,8 @@ type ResourceConfig struct {
|
|||||||
CpuPeriod *uint64
|
CpuPeriod *uint64
|
||||||
// HugePageLimit map from page size (in bytes) to limit (in bytes)
|
// HugePageLimit map from page size (in bytes) to limit (in bytes)
|
||||||
HugePageLimit map[int64]int64
|
HugePageLimit map[int64]int64
|
||||||
|
// Maximum number of pids
|
||||||
|
PodPidsLimit *int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion.
|
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion.
|
||||||
|
Loading…
Reference in New Issue
Block a user