Merge pull request #59841 from dashpole/metrics_after_reclaim

Automatic merge from submit-queue (batch tested with PRs 59683, 59964, 59841, 59936, 59686). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Reevaluate eviction thresholds after reclaim functions

**What this PR does / why we need it**:
When the node comes under `DiskPressure` due to inodes or disk space, the eviction manager runs garbage collection functions to clean up dead containers and unused images.
Currently, we use the strategy of trying to measure the disk space and inodes freed by garbage collection.  However, as #46789 and #56573 point out, there are gaps in the implementation that can cause extra evictions even when they are not required.  Furthermore, for nodes which frequently cycle through images, it results in a large number of evictions, as running out of inodes always causes an eviction.

This PR changes this strategy to call the garbage collection functions and ignore the results.  Then, it triggers another collection of node-level metrics, and sees if the node is still under DiskPressure.
This way, we can simply observe the decrease in disk or inode usage, rather than trying to measure how much is freed.

**Which issue(s) this PR fixes**:
Fixes #46789
Fixes #56573
Related PR #56575

**Special notes for your reviewer**:
This will look cleaner after #57802  removes arguments from [makeSignalObservations](https://github.com/kubernetes/kubernetes/pull/57802/files#diff-9e5246d8c78d50ce4ba440f98663f3e9R719).

**Release note**:
```release-note
NONE
```

/sig node
/kind bug
/priority important-soon
cc @kubernetes/sig-node-pr-reviews
This commit is contained in:
Kubernetes Submit Queue
2018-02-16 16:31:33 -08:00
committed by GitHub
8 changed files with 105 additions and 83 deletions

View File

@@ -52,7 +52,8 @@ const (
pressureDelay = 20 * time.Second
testContextFmt = "when we run containers that should cause %s"
noPressure = v1.NodeConditionType("NoPressure")
lotsOfDisk = 10240 // 10 Gb in Mb
lotsOfDisk = 10240 // 10 Gb in Mb
lotsOfFiles = 1000000000 // 1 billion
)
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
@@ -76,11 +77,11 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
{
evictionPriority: 1,
pod: inodeConsumingPod("container-inode-hog", nil),
pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil),
},
{
evictionPriority: 1,
pod: inodeConsumingPod("volume-inode-hog", &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
},
{
evictionPriority: 0,
@@ -90,6 +91,35 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
})
})
// ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images
// Disk pressure is induced by pulling large images
var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", func() {
f := framework.NewDefaultFramework("image-gc-eviction-test")
pressureTimeout := 10 * time.Minute
expectedNodeCondition := v1.NodeDiskPressure
inodesConsumed := uint64(100000)
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
summary := eventuallyGetSummary()
inodesFree := *summary.Node.Fs.InodesFree
if inodesFree <= inodesConsumed {
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
}
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
// Consume enough inodes to induce disk pressure,
// but expect that image garbage collection can reduce it enough to avoid an eviction
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
{
evictionPriority: 0,
pod: inodeConsumingPod("container-inode", 110000, nil),
},
})
})
})
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
@@ -630,19 +660,19 @@ const (
volumeName = "test-volume"
)
func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod {
func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
// Each iteration creates an empty file
return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;")
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, "touch %s${i}.txt; sleep 0.001")
}
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB)+" do dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null ; i=$(($i+1)); done; while true; do sleep 5; done")
return podWithCommand(volumeSource, resources, diskConsumedMB, name, "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null")
}
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, name, command string) *v1.Pod {
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
path := ""
volumeMounts := []v1.VolumeMount{}
volumes := []v1.Volume{}
@@ -662,7 +692,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
Command: []string{
"sh",
"-c",
fmt.Sprintf(command, filepath.Join(path, "file")),
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s; i=$(($i+1)); done; while true; do sleep 5; done", iterations, fmt.Sprintf(command, filepath.Join(path, "file"))),
},
Resources: resources,
VolumeMounts: volumeMounts,