mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 05:57:25 +00:00
Merge pull request #32163 from mtaufen/more-eviction-logging
Automatic merge from submit-queue Log pressure condition, memory usage, events in memory eviction test I want to log this to help us debug some of the latest memory eviction test flakes, where we are seeing burstable "fail" before the besteffort. I saw (in the logs) attempts by the eviction manager to evict besteffort a while before burstable phase changed to "Failed", but the besteffort's phase appeared to remain "Running". I want to see the pressure condition interleaved with the pod phases to get a sense of the eviction manager's knowledge vs. pod phase.
This commit is contained in:
commit
09efe0457d
@ -42,6 +42,7 @@ import (
|
|||||||
"text/tabwriter"
|
"text/tabwriter"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
"k8s.io/kubernetes/federation/client/clientset_generated/federation_release_1_4"
|
"k8s.io/kubernetes/federation/client/clientset_generated/federation_release_1_4"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
apierrs "k8s.io/kubernetes/pkg/api/errors"
|
apierrs "k8s.io/kubernetes/pkg/api/errors"
|
||||||
@ -5170,3 +5171,14 @@ func CreateFileForGoBinData(gobindataPath, outputFilename string) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ListNamespaceEvents(c *client.Client, ns string) error {
|
||||||
|
ls, err := c.Events(ns).List(api.ListOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, event := range ls.Items {
|
||||||
|
glog.Infof("Event(%#v): type: '%v' reason: '%v' %v", event.InvolvedObject, event.Type, event.Reason, event.Message)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -36,142 +36,191 @@ import (
|
|||||||
var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", func() {
|
var _ = framework.KubeDescribe("MemoryEviction [Slow] [Serial] [Disruptive]", func() {
|
||||||
f := framework.NewDefaultFramework("eviction-test")
|
f := framework.NewDefaultFramework("eviction-test")
|
||||||
|
|
||||||
Context("when there is memory pressure", func() {
|
// This is a dummy context to wrap the outer AfterEach, which will run after the inner AfterEach.
|
||||||
|
// We want to list all of the node and pod events, including any that occur while waiting for
|
||||||
|
// memory pressure reduction, even if we time out while waiting.
|
||||||
|
Context("", func() {
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
// Wait for the memory pressure condition to disappear from the node status before continuing.
|
glog.Infof("Summary of node events during the memory eviction test:")
|
||||||
By("waiting for the memory pressure condition on the node to disappear before ending the test.")
|
err := framework.ListNamespaceEvents(f.Client, f.Namespace.Name)
|
||||||
Eventually(func() error {
|
framework.ExpectNoError(err)
|
||||||
nodeList, err := f.Client.Nodes().List(api.ListOptions{})
|
glog.Infof("Summary of pod events during the memory eviction test:")
|
||||||
if err != nil {
|
err = framework.ListNamespaceEvents(f.Client, "")
|
||||||
return fmt.Errorf("tried to get node list but got error: %v", err)
|
framework.ExpectNoError(err)
|
||||||
}
|
})
|
||||||
// Assuming that there is only one node, because this is a node e2e test.
|
|
||||||
if len(nodeList.Items) != 1 {
|
|
||||||
return fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
|
|
||||||
}
|
|
||||||
node := nodeList.Items[0]
|
|
||||||
_, pressure := api.GetNodeCondition(&node.Status, api.NodeMemoryPressure)
|
|
||||||
if pressure != nil && pressure.Status == api.ConditionTrue {
|
|
||||||
return fmt.Errorf("node is still reporting memory pressure condition: %s", pressure)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}, 5*time.Minute, 15*time.Second).Should(BeNil())
|
|
||||||
|
|
||||||
// Check available memory after condition disappears, just in case:
|
Context("when there is memory pressure", func() {
|
||||||
// Wait for available memory to decrease to a reasonable level before ending the test.
|
AfterEach(func() {
|
||||||
// This helps prevent interference with tests that start immediately after this one.
|
// Wait for the memory pressure condition to disappear from the node status before continuing.
|
||||||
By("waiting for available memory to decrease to a reasonable level before ending the test.")
|
By("waiting for the memory pressure condition on the node to disappear before ending the test.")
|
||||||
Eventually(func() error {
|
Eventually(func() error {
|
||||||
summary, err := getNodeSummary()
|
nodeList, err := f.Client.Nodes().List(api.ListOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("tried to get node list but got error: %v", err)
|
||||||
}
|
}
|
||||||
if summary.Node.Memory.AvailableBytes == nil {
|
// Assuming that there is only one node, because this is a node e2e test.
|
||||||
return fmt.Errorf("summary.Node.Memory.AvailableBytes was nil, cannot get memory stats.")
|
if len(nodeList.Items) != 1 {
|
||||||
}
|
return fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
|
||||||
if summary.Node.Memory.WorkingSetBytes == nil {
|
}
|
||||||
return fmt.Errorf("summary.Node.Memory.WorkingSetBytes was nil, cannot get memory stats.")
|
node := nodeList.Items[0]
|
||||||
}
|
_, pressure := api.GetNodeCondition(&node.Status, api.NodeMemoryPressure)
|
||||||
avail := *summary.Node.Memory.AvailableBytes
|
if pressure != nil && pressure.Status == api.ConditionTrue {
|
||||||
wset := *summary.Node.Memory.WorkingSetBytes
|
return fmt.Errorf("node is still reporting memory pressure condition: %s", pressure)
|
||||||
|
}
|
||||||
// memory limit = avail + wset
|
|
||||||
limit := avail + wset
|
|
||||||
halflimit := limit / 2
|
|
||||||
|
|
||||||
// Wait for at least half of memory limit to be available
|
|
||||||
if avail >= halflimit {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}, 5*time.Minute, 15*time.Second).Should(BeNil())
|
||||||
return fmt.Errorf("current available memory is: %d bytes. Expected at least %d bytes available.", avail, halflimit)
|
|
||||||
}, 5*time.Minute, 15*time.Second).Should(BeNil())
|
|
||||||
|
|
||||||
// TODO(mtaufen): 5 minute wait to stop flaky test bleeding while we figure out what is actually going on.
|
// Check available memory after condition disappears, just in case:
|
||||||
// If related to pressure transition period in eviction manager, probably only need to wait
|
// Wait for available memory to decrease to a reasonable level before ending the test.
|
||||||
// just over 30s becasue that is the transition period set for node e2e tests. But since we
|
// This helps prevent interference with tests that start immediately after this one.
|
||||||
// know 5 min works and we don't know if transition period is the problem, wait 5 min for now.
|
By("waiting for available memory to decrease to a reasonable level before ending the test.")
|
||||||
time.Sleep(5 * time.Minute)
|
Eventually(func() error {
|
||||||
|
summary, err := getNodeSummary()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if summary.Node.Memory.AvailableBytes == nil {
|
||||||
|
return fmt.Errorf("summary.Node.Memory.AvailableBytes was nil, cannot get memory stats.")
|
||||||
|
}
|
||||||
|
if summary.Node.Memory.WorkingSetBytes == nil {
|
||||||
|
return fmt.Errorf("summary.Node.Memory.WorkingSetBytes was nil, cannot get memory stats.")
|
||||||
|
}
|
||||||
|
avail := *summary.Node.Memory.AvailableBytes
|
||||||
|
wset := *summary.Node.Memory.WorkingSetBytes
|
||||||
|
|
||||||
// Finally, try starting a new pod and wait for it to be scheduled and running.
|
// memory limit = avail + wset
|
||||||
// This is the final check to try to prevent interference with subsequent tests.
|
limit := avail + wset
|
||||||
podName := "admit-best-effort-pod"
|
halflimit := limit / 2
|
||||||
f.PodClient().CreateSync(&api.Pod{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
// Wait for at least half of memory limit to be available
|
||||||
Name: podName,
|
if avail >= halflimit {
|
||||||
},
|
return nil
|
||||||
Spec: api.PodSpec{
|
}
|
||||||
RestartPolicy: api.RestartPolicyNever,
|
return fmt.Errorf("current available memory is: %d bytes. Expected at least %d bytes available.", avail, halflimit)
|
||||||
Containers: []api.Container{
|
}, 5*time.Minute, 15*time.Second).Should(BeNil())
|
||||||
{
|
|
||||||
Image: ImageRegistry[pauseImage],
|
// TODO(mtaufen): 5 minute wait to stop flaky test bleeding while we figure out what is actually going on.
|
||||||
Name: podName,
|
// If related to pressure transition period in eviction manager, probably only need to wait
|
||||||
|
// just over 30s becasue that is the transition period set for node e2e tests. But since we
|
||||||
|
// know 5 min works and we don't know if transition period is the problem, wait 5 min for now.
|
||||||
|
time.Sleep(5 * time.Minute)
|
||||||
|
|
||||||
|
// Finally, try starting a new pod and wait for it to be scheduled and running.
|
||||||
|
// This is the final check to try to prevent interference with subsequent tests.
|
||||||
|
podName := "admit-best-effort-pod"
|
||||||
|
f.PodClient().CreateSync(&api.Pod{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: podName,
|
||||||
|
},
|
||||||
|
Spec: api.PodSpec{
|
||||||
|
RestartPolicy: api.RestartPolicyNever,
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Image: ImageRegistry[pauseImage],
|
||||||
|
Name: podName,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should evict pods in the correct order (besteffort first, then burstable, then guaranteed)", func() {
|
||||||
|
By("creating a guaranteed pod, a burstable pod, and a besteffort pod.")
|
||||||
|
|
||||||
|
// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal.
|
||||||
|
guaranteed := createMemhogPod(f, "guaranteed-", "guaranteed", api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("100m"),
|
||||||
|
"memory": resource.MustParse("100Mi"),
|
||||||
|
},
|
||||||
|
Limits: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("100m"),
|
||||||
|
"memory": resource.MustParse("100Mi"),
|
||||||
|
}})
|
||||||
|
|
||||||
|
// A pod is burstable if limits and requests do not match across all containers.
|
||||||
|
burstable := createMemhogPod(f, "burstable-", "burstable", api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("100m"),
|
||||||
|
"memory": resource.MustParse("100Mi"),
|
||||||
|
}})
|
||||||
|
|
||||||
|
// A pod is besteffort if none of its containers have specified any requests or limits.
|
||||||
|
besteffort := createMemhogPod(f, "besteffort-", "besteffort", api.ResourceRequirements{})
|
||||||
|
|
||||||
|
// We poll until timeout or all pods are killed.
|
||||||
|
// Inside the func, we check that all pods are in a valid phase with
|
||||||
|
// respect to the eviction order of best effort, then burstable, then guaranteed.
|
||||||
|
By("polling the Status.Phase of each pod and checking for violations of the eviction order.")
|
||||||
|
Eventually(func() error {
|
||||||
|
|
||||||
|
gteed, gtErr := f.Client.Pods(f.Namespace.Name).Get(guaranteed.Name)
|
||||||
|
framework.ExpectNoError(gtErr, fmt.Sprintf("getting pod %s", guaranteed.Name))
|
||||||
|
gteedPh := gteed.Status.Phase
|
||||||
|
|
||||||
|
burst, buErr := f.Client.Pods(f.Namespace.Name).Get(burstable.Name)
|
||||||
|
framework.ExpectNoError(buErr, fmt.Sprintf("getting pod %s", burstable.Name))
|
||||||
|
burstPh := burst.Status.Phase
|
||||||
|
|
||||||
|
best, beErr := f.Client.Pods(f.Namespace.Name).Get(besteffort.Name)
|
||||||
|
framework.ExpectNoError(beErr, fmt.Sprintf("getting pod %s", besteffort.Name))
|
||||||
|
bestPh := best.Status.Phase
|
||||||
|
|
||||||
|
glog.Infof("pod phase: guaranteed: %v, burstable: %v, besteffort: %v", gteedPh, burstPh, bestPh)
|
||||||
|
|
||||||
|
// NOTE/TODO(mtaufen): This should help us debug why burstable appears to fail before besteffort in some
|
||||||
|
// scenarios. We have seen some evidence that the eviction manager has in fact done the
|
||||||
|
// right thing and evicted the besteffort first, and attempted to change the besteffort phase
|
||||||
|
// to "Failed" when it evicts it, but that for some reason the test isn't seeing the updated
|
||||||
|
// phase. I'm trying to confirm or deny this.
|
||||||
|
// The eviction manager starts trying to evict things when the node comes under memory
|
||||||
|
// pressure, and the eviction manager reports this information in the pressure condition. If we
|
||||||
|
// see the eviction manager reporting a pressure condition for a while without the besteffort failing,
|
||||||
|
// and we see that the manager did in fact evict the besteffort (this should be in the Kubelet log), we
|
||||||
|
// will have more reason to believe the phase is out of date.
|
||||||
|
nodeList, err := f.Client.Nodes().List(api.ListOptions{})
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("tried to get node list but got error: %v", err)
|
||||||
|
}
|
||||||
|
if len(nodeList.Items) != 1 {
|
||||||
|
glog.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items)
|
||||||
|
}
|
||||||
|
node := nodeList.Items[0]
|
||||||
|
_, pressure := api.GetNodeCondition(&node.Status, api.NodeMemoryPressure)
|
||||||
|
glog.Infof("node pressure condition: %s", pressure)
|
||||||
|
|
||||||
|
// NOTE/TODO(mtaufen): Also log (at least temporarily) the actual memory consumption on the node.
|
||||||
|
// I used this to plot memory usage from a successful test run and it looks the
|
||||||
|
// way I would expect. I want to see what the plot from a flake looks like.
|
||||||
|
summary, err := getNodeSummary()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if summary.Node.Memory.WorkingSetBytes != nil {
|
||||||
|
wset := *summary.Node.Memory.WorkingSetBytes
|
||||||
|
glog.Infof("Node's working set is (bytes): %v", wset)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if bestPh == api.PodRunning {
|
||||||
|
Expect(burstPh).NotTo(Equal(api.PodFailed), "burstable pod failed before best effort pod")
|
||||||
|
Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before best effort pod")
|
||||||
|
} else if burstPh == api.PodRunning {
|
||||||
|
Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before burstable pod")
|
||||||
|
}
|
||||||
|
|
||||||
|
// When both besteffort and burstable have been evicted, the test has completed.
|
||||||
|
if bestPh == api.PodFailed && burstPh == api.PodFailed {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("besteffort and burstable have not yet both been evicted.")
|
||||||
|
|
||||||
|
}, 60*time.Minute, 5*time.Second).Should(BeNil())
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should evict pods in the correct order (besteffort first, then burstable, then guaranteed)", func() {
|
|
||||||
By("creating a guaranteed pod, a burstable pod, and a besteffort pod.")
|
|
||||||
|
|
||||||
// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal.
|
|
||||||
guaranteed := createMemhogPod(f, "guaranteed-", "guaranteed", api.ResourceRequirements{
|
|
||||||
Requests: api.ResourceList{
|
|
||||||
"cpu": resource.MustParse("100m"),
|
|
||||||
"memory": resource.MustParse("100Mi"),
|
|
||||||
},
|
|
||||||
Limits: api.ResourceList{
|
|
||||||
"cpu": resource.MustParse("100m"),
|
|
||||||
"memory": resource.MustParse("100Mi"),
|
|
||||||
}})
|
|
||||||
|
|
||||||
// A pod is burstable if limits and requests do not match across all containers.
|
|
||||||
burstable := createMemhogPod(f, "burstable-", "burstable", api.ResourceRequirements{
|
|
||||||
Requests: api.ResourceList{
|
|
||||||
"cpu": resource.MustParse("100m"),
|
|
||||||
"memory": resource.MustParse("100Mi"),
|
|
||||||
}})
|
|
||||||
|
|
||||||
// A pod is besteffort if none of its containers have specified any requests or limits.
|
|
||||||
besteffort := createMemhogPod(f, "besteffort-", "besteffort", api.ResourceRequirements{})
|
|
||||||
|
|
||||||
// We poll until timeout or all pods are killed.
|
|
||||||
// Inside the func, we check that all pods are in a valid phase with
|
|
||||||
// respect to the eviction order of best effort, then burstable, then guaranteed.
|
|
||||||
By("polling the Status.Phase of each pod and checking for violations of the eviction order.")
|
|
||||||
Eventually(func() error {
|
|
||||||
|
|
||||||
gteed, gtErr := f.Client.Pods(f.Namespace.Name).Get(guaranteed.Name)
|
|
||||||
framework.ExpectNoError(gtErr, fmt.Sprintf("getting pod %s", guaranteed.Name))
|
|
||||||
gteedPh := gteed.Status.Phase
|
|
||||||
|
|
||||||
burst, buErr := f.Client.Pods(f.Namespace.Name).Get(burstable.Name)
|
|
||||||
framework.ExpectNoError(buErr, fmt.Sprintf("getting pod %s", burstable.Name))
|
|
||||||
burstPh := burst.Status.Phase
|
|
||||||
|
|
||||||
best, beErr := f.Client.Pods(f.Namespace.Name).Get(besteffort.Name)
|
|
||||||
framework.ExpectNoError(beErr, fmt.Sprintf("getting pod %s", besteffort.Name))
|
|
||||||
bestPh := best.Status.Phase
|
|
||||||
|
|
||||||
glog.Infof("pod phase: guaranteed: %v, burstable: %v, besteffort: %v", gteedPh, burstPh, bestPh)
|
|
||||||
|
|
||||||
if bestPh == api.PodRunning {
|
|
||||||
Expect(burstPh).NotTo(Equal(api.PodFailed), "burstable pod failed before best effort pod")
|
|
||||||
Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before best effort pod")
|
|
||||||
} else if burstPh == api.PodRunning {
|
|
||||||
Expect(gteedPh).NotTo(Equal(api.PodFailed), "guaranteed pod failed before burstable pod")
|
|
||||||
}
|
|
||||||
|
|
||||||
// When both besteffort and burstable have been evicted, the test has completed.
|
|
||||||
if bestPh == api.PodFailed && burstPh == api.PodFailed {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return fmt.Errorf("besteffort and burstable have not yet both been evicted.")
|
|
||||||
|
|
||||||
}, 60*time.Minute, 5*time.Second).Should(BeNil())
|
|
||||||
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
func createMemhogPod(f *framework.Framework, genName string, ctnName string, res api.ResourceRequirements) *api.Pod {
|
func createMemhogPod(f *framework.Framework, genName string, ctnName string, res api.ResourceRequirements) *api.Pod {
|
||||||
@ -213,7 +262,7 @@ func createMemhogPod(f *framework.Framework, genName string, ctnName string, res
|
|||||||
// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick
|
// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick
|
||||||
// to fill ~4Gi of memory, so initial ballpark 12Mi/tick.
|
// to fill ~4Gi of memory, so initial ballpark 12Mi/tick.
|
||||||
// We might see flakes due to timeout if the total memory on the nodes increases.
|
// We might see flakes due to timeout if the total memory on the nodes increases.
|
||||||
Args: []string{"-mem-alloc-size", "12Mi", "-mem-alloc-sleep", "10s", "-mem-total", memLimit},
|
Args: []string{"-mem-alloc-size", "120Mi", "-mem-alloc-sleep", "5s", "-mem-total", memLimit},
|
||||||
Resources: res,
|
Resources: res,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user