mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 02:11:09 +00:00
Fix PidPressure, make it evict by priority, and add fork-bomb node e2e test
This commit is contained in:
parent
3d9c6eb9e6
commit
8b440c6424
@ -63,6 +63,7 @@ var OpForSignal = map[Signal]ThresholdOperator{
|
||||
SignalNodeFsInodesFree: OpLessThan,
|
||||
SignalImageFsAvailable: OpLessThan,
|
||||
SignalImageFsInodesFree: OpLessThan,
|
||||
SignalPIDAvailable: OpLessThan,
|
||||
}
|
||||
|
||||
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
|
||||
|
@ -157,7 +157,7 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
|
||||
return lifecycle.PodAdmitResult{
|
||||
Admit: false,
|
||||
Reason: Reason,
|
||||
Message: fmt.Sprintf(nodeLowMessageFmt, m.nodeConditions),
|
||||
Message: fmt.Sprintf(nodeConditionMessageFmt, m.nodeConditions),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,6 +40,8 @@ const (
|
||||
Reason = "Evicted"
|
||||
// nodeLowMessageFmt is the message for evictions due to resource pressure.
|
||||
nodeLowMessageFmt = "The node was low on resource: %v. "
|
||||
// nodeConditionMessageFmt is the message for evictions due to resource pressure.
|
||||
nodeConditionMessageFmt = "The node had condition: %v. "
|
||||
// containerMessageFmt provides additional information for containers exceeding requests
|
||||
containerMessageFmt = "Container %s was using %s, which exceeds its request of %s. "
|
||||
// containerEphemeralStorageMessageFmt provides additional information for containers which have exceeded their ES limit
|
||||
@ -50,6 +52,8 @@ const (
|
||||
emptyDirMessageFmt = "Usage of EmptyDir volume %q exceeds the limit %q. "
|
||||
// inodes, number. internal to this module, used to account for local disk inode consumption.
|
||||
resourceInodes v1.ResourceName = "inodes"
|
||||
// resourcePids, number. internal to this module, used to account for local pid consumption.
|
||||
resourcePids v1.ResourceName = "pids"
|
||||
// OffendingContainersKey is the key in eviction event annotations for the list of container names which exceeded their requests
|
||||
OffendingContainersKey = "offending_containers"
|
||||
// OffendingContainersUsageKey is the key in eviction event annotations for the list of usage of containers which exceeded their requests
|
||||
@ -84,6 +88,7 @@ func init() {
|
||||
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceInodes
|
||||
signalToResource[evictionapi.SignalNodeFsAvailable] = v1.ResourceEphemeralStorage
|
||||
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceInodes
|
||||
signalToResource[evictionapi.SignalPIDAvailable] = resourcePids
|
||||
}
|
||||
|
||||
// validSignal returns true if the signal is supported.
|
||||
@ -674,6 +679,11 @@ func rankMemoryPressure(pods []*v1.Pod, stats statsFunc) {
|
||||
orderedBy(exceedMemoryRequests(stats), priority, memory(stats)).Sort(pods)
|
||||
}
|
||||
|
||||
// rankPIDPressure orders the input pods by priority in response to PID pressure.
|
||||
func rankPIDPressure(pods []*v1.Pod, stats statsFunc) {
|
||||
orderedBy(priority).Sort(pods)
|
||||
}
|
||||
|
||||
// rankDiskPressureFunc returns a rankFunc that measures the specified fs stats.
|
||||
func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) rankFunc {
|
||||
return func(pods []*v1.Pod, stats statsFunc) {
|
||||
@ -987,6 +997,7 @@ func buildSignalToRankFunc(withImageFs bool) map[evictionapi.Signal]rankFunc {
|
||||
signalToRankFunc := map[evictionapi.Signal]rankFunc{
|
||||
evictionapi.SignalMemoryAvailable: rankMemoryPressure,
|
||||
evictionapi.SignalAllocatableMemoryAvailable: rankMemoryPressure,
|
||||
evictionapi.SignalPIDAvailable: rankPIDPressure,
|
||||
}
|
||||
// usage of an imagefs is optional
|
||||
if withImageFs {
|
||||
|
@ -943,13 +943,13 @@ func TestSortByEvictionPriority(t *testing.T) {
|
||||
expected: []evictionapi.Threshold{},
|
||||
},
|
||||
{
|
||||
name: "memory first, PID last",
|
||||
name: "memory first",
|
||||
thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
@ -968,13 +968,13 @@ func TestSortByEvictionPriority(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "allocatable memory first, PID last",
|
||||
name: "allocatable memory first",
|
||||
thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
||||
|
@ -122,6 +122,7 @@ go_test(
|
||||
"//pkg/kubelet/cm/cpuset:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/eviction:go_default_library",
|
||||
"//pkg/kubelet/eviction/api:go_default_library",
|
||||
"//pkg/kubelet/images:go_default_library",
|
||||
"//pkg/kubelet/kubeletconfig:go_default_library",
|
||||
"//pkg/kubelet/kubeletconfig/status:go_default_library",
|
||||
|
@ -33,6 +33,7 @@ import (
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
@ -78,7 +79,7 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive][NodeF
|
||||
if inodesFree <= inodesConsumed {
|
||||
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
|
||||
@ -114,7 +115,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive][N
|
||||
if inodesFree <= inodesConsumed {
|
||||
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
// Consume enough inodes to induce disk pressure,
|
||||
@ -173,7 +174,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive
|
||||
diskConsumed := resource.MustParse("100Mi")
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
@ -205,14 +206,14 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup
|
||||
if availableBytes <= uint64(diskConsumed.Value()) {
|
||||
framework.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionSoft = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
initialConfig.EvictionSoftGracePeriod = map[string]string{"nodefs.available": "1m"}
|
||||
initialConfig.EvictionSoft = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
initialConfig.EvictionSoftGracePeriod = map[string]string{string(evictionapi.SignalNodeFsAvailable): "1m"}
|
||||
// Defer to the pod default grace period
|
||||
initialConfig.EvictionMaxPodGracePeriod = 30
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
// Ensure that pods are not evicted because of the eviction-hard threshold
|
||||
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
|
||||
initialConfig.EvictionHard = map[string]string{"memory.available": "0%"}
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
@ -234,7 +235,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
|
||||
Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
|
||||
initialConfig.EvictionHard = map[string]string{"memory.available": "0%"}
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
|
||||
})
|
||||
sizeLimit := resource.MustParse("100Mi")
|
||||
useOverLimit := 101 /* Mb */
|
||||
@ -297,7 +298,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [
|
||||
if availableBytes <= uint64(memoryConsumed.Value()) {
|
||||
framework.Skipf("Too little memory free on the host for the PriorityMemoryEvictionOrdering test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = map[string]string{"memory.available": fmt.Sprintf("%d", availableBytes-uint64(memoryConsumed.Value()))}
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): fmt.Sprintf("%d", availableBytes-uint64(memoryConsumed.Value()))}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
BeforeEach(func() {
|
||||
@ -354,7 +355,7 @@ var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Ser
|
||||
if availableBytes <= uint64(diskConsumed.Value()) {
|
||||
framework.Skipf("Too little disk free on the host for the PriorityLocalStorageEvictionOrdering test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
BeforeEach(func() {
|
||||
@ -392,6 +393,47 @@ var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Ser
|
||||
})
|
||||
})
|
||||
|
||||
// PriorityPidEvictionOrdering tests that the node emits pid pressure in response to a fork bomb, and evicts pods by priority
|
||||
var _ = framework.KubeDescribe("PriorityPidEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
|
||||
f := framework.NewDefaultFramework("pidpressure-eviction-test")
|
||||
pressureTimeout := 2 * time.Minute
|
||||
expectedNodeCondition := v1.NodePIDPressure
|
||||
expectedStarvedResource := noStarvedResource
|
||||
|
||||
highPriorityClassName := f.BaseName + "-high-priority"
|
||||
highPriority := int32(999999999)
|
||||
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
pidsConsumed := int64(10000)
|
||||
summary := eventuallyGetSummary()
|
||||
availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
BeforeEach(func() {
|
||||
_, err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority})
|
||||
Expect(err == nil || errors.IsAlreadyExists(err)).To(BeTrue())
|
||||
})
|
||||
AfterEach(func() {
|
||||
err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Delete(highPriorityClassName, &metav1.DeleteOptions{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
specs := []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: pidConsumingPod("fork-bomb-container", 12000),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
}
|
||||
specs[1].pod.Spec.PriorityClassName = highPriorityClassName
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs)
|
||||
})
|
||||
})
|
||||
|
||||
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
|
||||
type podEvictSpec struct {
|
||||
// P0 should never be evicted, P1 shouldn't evict before P2, etc.
|
||||
@ -722,6 +764,17 @@ func logMemoryMetrics() {
|
||||
}
|
||||
}
|
||||
|
||||
func logPidMetrics() {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
framework.Logf("Error getting summary: %v", err)
|
||||
return
|
||||
}
|
||||
if summary.Node.Rlimit != nil && summary.Node.Rlimit.MaxPID != nil && summary.Node.Rlimit.NumOfRunningProcesses != nil {
|
||||
framework.Logf("Node.Rlimit.MaxPID: %d, Node.Rlimit.RunningProcesses: %d", *summary.Node.Rlimit.MaxPID, *summary.Node.Rlimit.NumOfRunningProcesses)
|
||||
}
|
||||
}
|
||||
|
||||
func eventuallyGetSummary() (s *stats.Summary) {
|
||||
Eventually(func() error {
|
||||
summary, err := getNodeSummary()
|
||||
@ -764,23 +817,33 @@ const (
|
||||
)
|
||||
|
||||
func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
|
||||
path := ""
|
||||
if volumeSource != nil {
|
||||
path = volumeMountPath
|
||||
}
|
||||
// Each iteration creates an empty file
|
||||
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, "touch %s${i}.txt; sleep 0.001")
|
||||
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, fmt.Sprintf("touch %s${i}.txt; sleep 0.001;", filepath.Join(path, "file")))
|
||||
}
|
||||
|
||||
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
|
||||
path := ""
|
||||
if volumeSource != nil {
|
||||
path = volumeMountPath
|
||||
}
|
||||
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
|
||||
return podWithCommand(volumeSource, resources, diskConsumedMB, name, "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null")
|
||||
return podWithCommand(volumeSource, resources, diskConsumedMB, name, fmt.Sprintf("dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null;", filepath.Join(path, "file")))
|
||||
}
|
||||
|
||||
func pidConsumingPod(name string, numProcesses int) *v1.Pod {
|
||||
// Each iteration forks once, but creates two processes
|
||||
return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses/2, name, "(while true; do sleep 5; done)&")
|
||||
}
|
||||
|
||||
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
|
||||
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
|
||||
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
|
||||
path := ""
|
||||
volumeMounts := []v1.VolumeMount{}
|
||||
volumes := []v1.Volume{}
|
||||
if volumeSource != nil {
|
||||
path = volumeMountPath
|
||||
volumeMounts = []v1.VolumeMount{{MountPath: volumeMountPath, Name: volumeName}}
|
||||
volumes = []v1.Volume{{Name: volumeName, VolumeSource: *volumeSource}}
|
||||
}
|
||||
@ -795,7 +858,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s; i=$(($i+1)); done; while true; do sleep 5; done", iterations, fmt.Sprintf(command, filepath.Join(path, "file"))),
|
||||
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s i=$(($i+1)); done; while true; do sleep 5; done", iterations, command),
|
||||
},
|
||||
Resources: resources,
|
||||
VolumeMounts: volumeMounts,
|
||||
|
Loading…
Reference in New Issue
Block a user