mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-11-03 15:25:19 +00:00
Merge pull request #60900 from dashpole/eviction_test_no_pressure
Automatic merge from submit-queue (batch tested with PRs 60900, 62215, 62196). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. [Flaky test fix] Use memory.force_empty before and after eviction tests **What this PR does / why we need it**: (copied from https://github.com/kubernetes/kubernetes/pull/60720): MemoryAllocatableEviction tests have been somewhat flaky: https://k8s-testgrid.appspot.com/sig-node-kubelet#kubelet-serial-gce-e2e&include-filter-by-regex=MemoryAllocatable The failure on the flakes is ["Pod ran to completion"](https://k8s-gubernator.appspot.com/build/kubernetes-jenkins/logs/ci-kubernetes-node-kubelet-serial/3785#k8sio-memoryallocatableeviction-slow-serial-disruptive-when-we-run-containers-that-should-cause-memorypressure-should-eventually-evict-all-of-the-correct-pods). Looking at [an example log](https://storage.googleapis.com/kubernetes-jenkins/logs/ci-kubernetes-node-kubelet-serial/3785/artifacts/tmp-node-e2e-6070a774-cos-stable-63-10032-71-0/kubelet.log) (and search for memory-hog-pod, we can see that this pod fails admission because the allocatable memory threshold has already been crossed. `eviction manager: thresholds - ignoring grace period: threshold [signal=allocatableMemory.available, quantity=250Mi] observed 242404Ki` https://github.com/kubernetes/kubernetes/pull/60720 wasn't effective. To clean-up after each eviction test, and prepare for the next, use memory.force_empty to make the kernel reclaim memory in the allocatable cgroup before and after eviction tests. **Special notes for your reviewer**: I tested to make sure this doesn't break Cgroup Manager tests. It should work on both cgroupfs and systemd based systems, although I have only tested in on cgroupfs. **Release note**: ```release-note NONE ``` /assign @yujuhong @Random-Liu /sig node /priority important-soon /kind bug its getting a little late in the release cycle, so we can probably wait until after code freeze is lifted for this.
This commit is contained in:
@@ -41,6 +41,7 @@ import (
|
||||
kubeletscheme "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/scheme"
|
||||
kubeletconfigv1beta1 "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/v1beta1"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/remote"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
@@ -57,6 +58,9 @@ var startServices = flag.Bool("start-services", true, "If true, start local node
|
||||
var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
|
||||
var busyboxImage = "busybox"
|
||||
|
||||
// Kubelet internal cgroup name for node allocatable cgroup.
|
||||
const defaultNodeAllocatableCgroup = "kubepods"
|
||||
|
||||
func getNodeSummary() (*stats.Summary, error) {
|
||||
req, err := http.NewRequest("GET", *kubeletAddress+"/stats/summary", nil)
|
||||
if err != nil {
|
||||
@@ -407,3 +411,19 @@ func restartKubelet() {
|
||||
stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
|
||||
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
|
||||
}
|
||||
|
||||
func toCgroupFsName(cgroup string) string {
|
||||
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
|
||||
return cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroup), true)
|
||||
}
|
||||
return cgroup
|
||||
}
|
||||
|
||||
// reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/)
|
||||
// to make the kernel reclaim memory in the allocatable cgroup
|
||||
// the time to reduce pressure may be unbounded, but usually finishes within a second
|
||||
func reduceAllocatableMemoryUsage() {
|
||||
cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(defaultNodeAllocatableCgroup))
|
||||
_, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput()
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user