Set pids limit at pod level

Add a new Alpha Feature to set a maximum number of pids per Pod.
This is to allow the use case where cluster administrators wish
to limit the pids consumed per pod (example when running a CI system).

By default, we do not set any maximum limit, If an administrator wants
to enable this, they should enable `SupportPodPidsLimit=true` in the
`--feature-gates=` parameter to kubelet and specify the limit using the
`--pod-max-pids` parameter.

The limit set is the total count of all processes running in all
containers in the pod.
This commit is contained in:
Davanum Srinivas 2018-01-08 12:32:34 -05:00
parent 09cf6c9776
commit ecd6361ff0
13 changed files with 43 additions and 0 deletions

View File

@ -478,6 +478,8 @@ func AddKubeletConfigFlags(fs *pflag.FlagSet, c *kubeletconfig.KubeletConfigurat
fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.")
fs.StringVar(&c.PodCIDR, "pod-cidr", c.PodCIDR, "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master.")
fs.Int64Var(c.PodPidsLimit, "pod-max-pids", *c.PodPidsLimit, "<Warning: Alpha feature> Set the maximum number of processes per pod.")
fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.")
fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits")
fs.BoolVar(&c.EnableControllerAttachDetach, "enable-controller-attach-detach", c.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations")

View File

@ -471,6 +471,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
ExperimentalQOSReserved: *experimentalQOSReserved,
ExperimentalCPUManagerPolicy: s.CPUManagerPolicy,
ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
ExperimentalPodPidsLimit: *s.PodPidsLimit,
},
s.FailSwapOn,
devicePluginEnabled,

View File

@ -220,6 +220,12 @@ const (
//
// Implement IPVS-based in-cluster service load balancing
SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode"
// owner: @dims
// alpha: v1.10
//
// Implement support for limiting pids in pods
SupportPodPidsLimit utilfeature.Feature = "SupportPodPidsLimit"
)
func init() {
@ -261,6 +267,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
PVCProtection: {Default: false, PreRelease: utilfeature.Alpha},
ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha},
SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta},
SupportPodPidsLimit: {Default: false, PreRelease: utilfeature.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:

View File

@ -62,6 +62,8 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
obj.ImageGCLowThresholdPercent = 80
obj.MaxOpenFiles = 1000000
obj.MaxPods = 110
temp := int64(-1)
obj.PodPidsLimit = &temp
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
obj.CPUManagerPolicy = "none"
obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency

View File

@ -193,6 +193,7 @@ var (
"NodeStatusUpdateFrequency.Duration",
"OOMScoreAdj",
"PodCIDR",
"PodPidsLimit",
"PodsPerCore",
"Port",
"ProtectKernelDefaults",

View File

@ -187,6 +187,8 @@ type KubeletConfiguration struct {
// The CIDR to use for pod IP addresses, only used in standalone mode.
// In cluster mode, this is obtained from the master.
PodCIDR string
// PodPidsLimit is the maximum number of pids in any pod.
PodPidsLimit *int64
// ResolverConfig is the resolver configuration file used as the basis
// for the container DNS resolution configuration.
ResolverConfig string

View File

@ -129,6 +129,10 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.MaxPods == 0 {
obj.MaxPods = 110
}
if obj.PodPidsLimit == nil {
temp := int64(-1)
obj.PodPidsLimit = &temp
}
if obj.NodeStatusUpdateFrequency == zeroDuration {
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
}

View File

@ -184,6 +184,8 @@ type KubeletConfiguration struct {
// The CIDR to use for pod IP addresses, only used in standalone mode.
// In cluster mode, this is obtained from the master.
PodCIDR string `json:"podCIDR"`
// PodPidsLimit is the maximum number of pids in any pod.
PodPidsLimit *int64 `json:"podPidsLimit"`
// ResolverConfig is the resolver configuration file used as the basis
// for the container DNS resolution configuration.
ResolverConfig string `json:"resolvConf"`

View File

@ -328,6 +328,9 @@ func getSupportedSubsystems() []subsystem {
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) {
supportedSubsystems = append(supportedSubsystems, &cgroupfs.HugetlbGroup{})
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) {
supportedSubsystems = append(supportedSubsystems, &cgroupfs.PidsGroup{})
}
return supportedSubsystems
}
@ -430,6 +433,10 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
Paths: cgroupPaths,
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
}
if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil {
return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err)
}
@ -463,6 +470,10 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
Resources: resources,
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
}
// get the manager with the specified cgroup configuration
manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil)
if err != nil {

View File

@ -106,6 +106,7 @@ type NodeConfig struct {
ExperimentalQOSReserved map[v1.ResourceName]int64
ExperimentalCPUManagerPolicy string
ExperimentalCPUManagerReconcilePeriod time.Duration
ExperimentalPodPidsLimit int64
}
type NodeAllocatableConfig struct {

View File

@ -300,6 +300,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
qosContainersInfo: cm.GetQOSContainersInfo(),
subsystems: cm.subsystems,
cgroupManager: cm.cgroupManager,
podPidsLimit: cm.ExperimentalPodPidsLimit,
}
}
return &podContainerManagerNoop{

View File

@ -27,7 +27,9 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
kubefeatures "k8s.io/kubernetes/pkg/features"
)
const (
@ -45,6 +47,8 @@ type podContainerManagerImpl struct {
// cgroupManager is the cgroup Manager Object responsible for managing all
// pod cgroups.
cgroupManager CgroupManager
// Maximum number of pids in a pod
podPidsLimit int64
}
// Make sure that podContainerManagerImpl implements the PodContainerManager interface
@ -77,6 +81,9 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
Name: podContainerName,
ResourceParameters: ResourceConfigForPod(pod),
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && m.podPidsLimit > 0 {
containerConfig.ResourceParameters.PodPidsLimit = &m.podPidsLimit
}
if err := m.cgroupManager.Create(containerConfig); err != nil {
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
}

View File

@ -33,6 +33,8 @@ type ResourceConfig struct {
CpuPeriod *uint64
// HugePageLimit map from page size (in bytes) to limit (in bytes)
HugePageLimit map[int64]int64
// Maximum number of pids
PodPidsLimit *int64
}
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion.