Merge pull request #32163 from mtaufen/more-eviction-logging

Automatic merge from submit-queue Log pressure condition, memory usage, events in memory eviction test I want to log this to help us debug some of the latest memory eviction test flakes, where we are seeing burstable "fail" before the besteffort. I saw (in the logs) attempts by the eviction manager to evict besteffort a while before burstable phase changed to "Failed", but the besteffort's phase appeared to remain "Running". I want to see the pressure condition interleaved with the pod phases to get a sense of the eviction manager's knowledge vs. pod phase.
2025-07-28 05:57:25 +00:00 · 2016-09-09 18:37:55 -07:00 · 2016-09-09 18:37:55 -07:00 · 09efe0457d
commit 09efe0457d
parent a400a8b7f4 b9f0bd959e
2 changed files with 188 additions and 127 deletions
--- a/test/e2e/framework/util.go
+++ b/test/e2e/framework/util.go
@ -42,6 +42,7 @@ import (
 	"text/tabwriter"
 	"time"
 	"github.com/golang/glog"
 	"k8s.io/kubernetes/federation/client/clientset_generated/federation_release_1_4"
 	"k8s.io/kubernetes/pkg/api"
 	apierrs "k8s.io/kubernetes/pkg/api/errors"
@ -5170,3 +5171,14 @@ func CreateFileForGoBinData(gobindataPath, outputFilename string) error {
 	}
 	return nil
 }
 func ListNamespaceEvents(c *client.Client, ns string) error {
 	ls, err := c.Events(ns).List(api.ListOptions{})
 	if err != nil {
 		return err
 	}
 	for _, event := range ls.Items {
 		glog.Infof("Event(%#v): type: '%v' reason: '%v' %v", event.InvolvedObject, event.Type, event.Reason, event.Message)
 	}
 	return nil
 }
--- a/test/e2e_node/memory_eviction_test.go
+++ b/test/e2e_node/memory_eviction_test.go
@ -36,142 +36,191 @@ import (
 var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", func() {
 	f := framework.NewDefaultFramework("eviction-test")
-	Context("when there is memory pressure", func() {
+	// This is a dummy context to wrap the outer AfterEach, which will run after the inner AfterEach.
 	// We want to list all of the node and pod events, including any that occur while waiting for
 	// memory pressure reduction, even if we time out while waiting.
 	Context("", func() {
 		AfterEach(func() {
-			// Wait for the memory pressure condition to disappear from the node status before continuing.
+			glog.Infof("Summary of node events during the memory eviction test:")
-			By("waiting for the memory pressure condition on the node to disappear before ending the test.")
+			err := framework.ListNamespaceEvents(f.Client, f.Namespace.Name)
-			Eventually(func() error {
+			framework.ExpectNoError(err)
-				nodeList, err := f.Client.Nodes().List(api.ListOptions{})
+			glog.Infof("Summary of pod events during the memory eviction test:")
-				if err != nil {
+			err = framework.ListNamespaceEvents(f.Client, "")
-					return fmt.Errorf("tried to get node list but got error: %v", err)
+			framework.ExpectNoError(err)
-				}
+		})
 				// Assuming that there is only one node, because this is a node e2e test.
 				if len(nodeList.Items) != 1 {
 					return fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
 				}
 				node := nodeList.Items[0]
 				_, pressure := api.GetNodeCondition(&node.Status, api.NodeMemoryPressure)
 				if pressure != nil && pressure.Status == api.ConditionTrue {
 					return fmt.Errorf("node is still reporting memory pressure condition: %s", pressure)
 				}
 				return nil
 			}, 5*time.Minute, 15*time.Second).Should(BeNil())
-			// Check available memory after condition disappears, just in case:
+		Context("when there is memory pressure", func() {
-			// Wait for available memory to decrease to a reasonable level before ending the test.
+			AfterEach(func() {
-			// This helps prevent interference with tests that start immediately after this one.
+				// Wait for the memory pressure condition to disappear from the node status before continuing.
-			By("waiting for available memory to decrease to a reasonable level before ending the test.")
+				By("waiting for the memory pressure condition on the node to disappear before ending the test.")
-			Eventually(func() error {
+				Eventually(func() error {
-				summary, err := getNodeSummary()
+					nodeList, err := f.Client.Nodes().List(api.ListOptions{})
-				if err != nil {
+					if err != nil {
-					return err
+						return fmt.Errorf("tried to get node list but got error: %v", err)
-				}
+					}
-				if summary.Node.Memory.AvailableBytes == nil {
+					// Assuming that there is only one node, because this is a node e2e test.
-					return fmt.Errorf("summary.Node.Memory.AvailableBytes was nil, cannot get memory stats.")
+					if len(nodeList.Items) != 1 {
-				}
+						return fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
-				if summary.Node.Memory.WorkingSetBytes == nil {
+					}
-					return fmt.Errorf("summary.Node.Memory.WorkingSetBytes was nil, cannot get memory stats.")
+					node := nodeList.Items[0]
-				}
+					_, pressure := api.GetNodeCondition(&node.Status, api.NodeMemoryPressure)
-				avail := *summary.Node.Memory.AvailableBytes
+					if pressure != nil && pressure.Status == api.ConditionTrue {
-				wset := *summary.Node.Memory.WorkingSetBytes
+						return fmt.Errorf("node is still reporting memory pressure condition: %s", pressure)
-
+					}
 				// memory limit = avail + wset
 				limit := avail + wset
 				halflimit := limit / 2
 				// Wait for at least half of memory limit to be available
 				if avail >= halflimit {
 					return nil
-				}
+				}, 5*time.Minute, 15*time.Second).Should(BeNil())
 				return fmt.Errorf("current available memory is: %d bytes. Expected at least %d bytes available.", avail, halflimit)
 			}, 5*time.Minute, 15*time.Second).Should(BeNil())
-			// TODO(mtaufen): 5 minute wait to stop flaky test bleeding while we figure out what is actually going on.
+				// Check available memory after condition disappears, just in case:
-			//                If related to pressure transition period in eviction manager, probably only need to wait
+				// Wait for available memory to decrease to a reasonable level before ending the test.
-			//                just over 30s becasue that is the transition period set for node e2e tests. But since we
+				// This helps prevent interference with tests that start immediately after this one.
-			//                know 5 min works and we don't know if transition period is the problem, wait 5 min for now.
+				By("waiting for available memory to decrease to a reasonable level before ending the test.")
-			time.Sleep(5 * time.Minute)
+				Eventually(func() error {
 					summary, err := getNodeSummary()
 					if err != nil {
 						return err
 					}
 					if summary.Node.Memory.AvailableBytes == nil {
 						return fmt.Errorf("summary.Node.Memory.AvailableBytes was nil, cannot get memory stats.")
 					}
 					if summary.Node.Memory.WorkingSetBytes == nil {
 						return fmt.Errorf("summary.Node.Memory.WorkingSetBytes was nil, cannot get memory stats.")
 					}
 					avail := *summary.Node.Memory.AvailableBytes
 					wset := *summary.Node.Memory.WorkingSetBytes
-			// Finally, try starting a new pod and wait for it to be scheduled and running.
+					// memory limit = avail + wset
-			// This is the final check to try to prevent interference with subsequent tests.
+					limit := avail + wset
-			podName := "admit-best-effort-pod"
+					halflimit := limit / 2
-			f.PodClient().CreateSync(&api.Pod{
+
-				ObjectMeta: api.ObjectMeta{
+					// Wait for at least half of memory limit to be available
-					Name: podName,
+					if avail >= halflimit {
-				},
+						return nil
-				Spec: api.PodSpec{
+					}
-					RestartPolicy: api.RestartPolicyNever,
+					return fmt.Errorf("current available memory is: %d bytes. Expected at least %d bytes available.", avail, halflimit)
-					Containers: []api.Container{
+				}, 5*time.Minute, 15*time.Second).Should(BeNil())
-						{
+
-							Image: ImageRegistry[pauseImage],
+				// TODO(mtaufen): 5 minute wait to stop flaky test bleeding while we figure out what is actually going on.
-							Name:  podName,
+				//                If related to pressure transition period in eviction manager, probably only need to wait
 				//                just over 30s becasue that is the transition period set for node e2e tests. But since we
 				//                know 5 min works and we don't know if transition period is the problem, wait 5 min for now.
 				time.Sleep(5 * time.Minute)
 				// Finally, try starting a new pod and wait for it to be scheduled and running.
 				// This is the final check to try to prevent interference with subsequent tests.
 				podName := "admit-best-effort-pod"
 				f.PodClient().CreateSync(&api.Pod{
 					ObjectMeta: api.ObjectMeta{
 						Name: podName,
 					},
 					Spec: api.PodSpec{
 						RestartPolicy: api.RestartPolicyNever,
 						Containers: []api.Container{
 							{
 								Image: ImageRegistry[pauseImage],
 								Name:  podName,
 							},
 						},
 					},
-				},
+				})
 			})
 			It("should evict pods in the correct order (besteffort first, then burstable, then guaranteed)", func() {
 				By("creating a guaranteed pod, a burstable pod, and a besteffort pod.")
 				// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal.
 				guaranteed := createMemhogPod(f, "guaranteed-", "guaranteed", api.ResourceRequirements{
 					Requests: api.ResourceList{
 						"cpu":    resource.MustParse("100m"),
 						"memory": resource.MustParse("100Mi"),
 					},
 					Limits: api.ResourceList{
 						"cpu":    resource.MustParse("100m"),
 						"memory": resource.MustParse("100Mi"),
 					}})
 				// A pod is burstable if limits and requests do not match across all containers.
 				burstable := createMemhogPod(f, "burstable-", "burstable", api.ResourceRequirements{
 					Requests: api.ResourceList{
 						"cpu":    resource.MustParse("100m"),
 						"memory": resource.MustParse("100Mi"),
 					}})
 				// A pod is besteffort if none of its containers have specified any requests or limits.
 				besteffort := createMemhogPod(f, "besteffort-", "besteffort", api.ResourceRequirements{})
 				// We poll until timeout or all pods are killed.
 				// Inside the func, we check that all pods are in a valid phase with
 				// respect to the eviction order of best effort, then burstable, then guaranteed.
 				By("polling the Status.Phase of each pod and checking for violations of the eviction order.")
 				Eventually(func() error {
 					gteed, gtErr := f.Client.Pods(f.Namespace.Name).Get(guaranteed.Name)
 					framework.ExpectNoError(gtErr, fmt.Sprintf("getting pod %s", guaranteed.Name))
 					gteedPh := gteed.Status.Phase
 					burst, buErr := f.Client.Pods(f.Namespace.Name).Get(burstable.Name)
 					framework.ExpectNoError(buErr, fmt.Sprintf("getting pod %s", burstable.Name))
 					burstPh := burst.Status.Phase
 					best, beErr := f.Client.Pods(f.Namespace.Name).Get(besteffort.Name)
 					framework.ExpectNoError(beErr, fmt.Sprintf("getting pod %s", besteffort.Name))
 					bestPh := best.Status.Phase
 					glog.Infof("pod phase: guaranteed: %v, burstable: %v, besteffort: %v", gteedPh, burstPh, bestPh)
 					// NOTE/TODO(mtaufen): This should help us debug why burstable appears to fail before besteffort in some
 					//                     scenarios. We have seen some evidence that the eviction manager has in fact done the
 					//                     right thing and evicted the besteffort first, and attempted to change the besteffort phase
 					//                     to "Failed" when it evicts it, but that for some reason the test isn't seeing the updated
 					//                     phase. I'm trying to confirm or deny this.
 					//                     The eviction manager starts trying to evict things when the node comes under memory
 					//                     pressure, and the eviction manager reports this information in the pressure condition. If we
 					//                     see the eviction manager reporting a pressure condition for a while without the besteffort failing,
 					//                     and we see that the manager did in fact evict the besteffort (this should be in the Kubelet log), we
 					//                     will have more reason to believe the phase is out of date.
 					nodeList, err := f.Client.Nodes().List(api.ListOptions{})
 					if err != nil {
 						glog.Errorf("tried to get node list but got error: %v", err)
 					}
 					if len(nodeList.Items) != 1 {
 						glog.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
 					}
 					node := nodeList.Items[0]
 					_, pressure := api.GetNodeCondition(&node.Status, api.NodeMemoryPressure)
 					glog.Infof("node pressure condition: %s", pressure)
 					// NOTE/TODO(mtaufen): Also log (at least temporarily) the actual memory consumption on the node.
 					//                     I used this to plot memory usage from a successful test run and it looks the
 					//                     way I would expect. I want to see what the plot from a flake looks like.
 					summary, err := getNodeSummary()
 					if err != nil {
 						return err
 					}
 					if summary.Node.Memory.WorkingSetBytes != nil {
 						wset := *summary.Node.Memory.WorkingSetBytes
 						glog.Infof("Node's working set is (bytes): %v", wset)
 					}
 					if bestPh == api.PodRunning {
 						Expect(burstPh).NotTo(Equal(api.PodFailed), "burstable pod failed before best effort pod")
 						Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before best effort pod")
 					} else if burstPh == api.PodRunning {
 						Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before burstable pod")
 					}
 					// When both besteffort and burstable have been evicted, the test has completed.
 					if bestPh == api.PodFailed && burstPh == api.PodFailed {
 						return nil
 					}
 					return fmt.Errorf("besteffort and burstable have not yet both been evicted.")
 				}, 60*time.Minute, 5*time.Second).Should(BeNil())
 			})
 		})
 		It("should evict pods in the correct order (besteffort first, then burstable, then guaranteed)", func() {
 			By("creating a guaranteed pod, a burstable pod, and a besteffort pod.")
 			// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal.
 			guaranteed := createMemhogPod(f, "guaranteed-", "guaranteed", api.ResourceRequirements{
 				Requests: api.ResourceList{
 					"cpu":    resource.MustParse("100m"),
 					"memory": resource.MustParse("100Mi"),
 				},
 				Limits: api.ResourceList{
 					"cpu":    resource.MustParse("100m"),
 					"memory": resource.MustParse("100Mi"),
 				}})
 			// A pod is burstable if limits and requests do not match across all containers.
 			burstable := createMemhogPod(f, "burstable-", "burstable", api.ResourceRequirements{
 				Requests: api.ResourceList{
 					"cpu":    resource.MustParse("100m"),
 					"memory": resource.MustParse("100Mi"),
 				}})
 			// A pod is besteffort if none of its containers have specified any requests or limits.
 			besteffort := createMemhogPod(f, "besteffort-", "besteffort", api.ResourceRequirements{})
 			// We poll until timeout or all pods are killed.
 			// Inside the func, we check that all pods are in a valid phase with
 			// respect to the eviction order of best effort, then burstable, then guaranteed.
 			By("polling the Status.Phase of each pod and checking for violations of the eviction order.")
 			Eventually(func() error {
 				gteed, gtErr := f.Client.Pods(f.Namespace.Name).Get(guaranteed.Name)
 				framework.ExpectNoError(gtErr, fmt.Sprintf("getting pod %s", guaranteed.Name))
 				gteedPh := gteed.Status.Phase
 				burst, buErr := f.Client.Pods(f.Namespace.Name).Get(burstable.Name)
 				framework.ExpectNoError(buErr, fmt.Sprintf("getting pod %s", burstable.Name))
 				burstPh := burst.Status.Phase
 				best, beErr := f.Client.Pods(f.Namespace.Name).Get(besteffort.Name)
 				framework.ExpectNoError(beErr, fmt.Sprintf("getting pod %s", besteffort.Name))
 				bestPh := best.Status.Phase
 				glog.Infof("pod phase: guaranteed: %v, burstable: %v, besteffort: %v", gteedPh, burstPh, bestPh)
 				if bestPh == api.PodRunning {
 					Expect(burstPh).NotTo(Equal(api.PodFailed), "burstable pod failed before best effort pod")
 					Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before best effort pod")
 				} else if burstPh == api.PodRunning {
 					Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before burstable pod")
 				}
 				// When both besteffort and burstable have been evicted, the test has completed.
 				if bestPh == api.PodFailed && burstPh == api.PodFailed {
 					return nil
 				}
 				return fmt.Errorf("besteffort and burstable have not yet both been evicted.")
 			}, 60*time.Minute, 5*time.Second).Should(BeNil())
 		})
 	})
 })
 func createMemhogPod(f *framework.Framework, genName string, ctnName string, res api.ResourceRequirements) *api.Pod {
@ -213,7 +262,7 @@ func createMemhogPod(f *framework.Framework, genName string, ctnName string, res
 					// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick
 					// to fill ~4Gi of memory, so initial ballpark 12Mi/tick.
 					// We might see flakes due to timeout if the total memory on the nodes increases.
-					Args:      []string{"-mem-alloc-size", "12Mi", "-mem-alloc-sleep", "10s", "-mem-total", memLimit},
+					Args:      []string{"-mem-alloc-size", "120Mi", "-mem-alloc-sleep", "5s", "-mem-total", memLimit},
 					Resources: res,
 				},
 			},