From 00be6c438cb322787f98907485d4a8d9e9cd1385 Mon Sep 17 00:00:00 2001 From: bindata-mockuser Date: Tue, 9 Aug 2016 15:37:23 -0700 Subject: [PATCH 1/2] Delete all dead containers only after pod syncing is done. --- pkg/kubelet/kubelet.go | 15 ++- pkg/kubelet/pod_container_deletor.go | 47 +++++---- pkg/kubelet/pod_container_deletor_test.go | 122 +++++++++++++++++++++- 3 files changed, 157 insertions(+), 27 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 03f2ca5eabe..ba64265ac32 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2409,6 +2409,13 @@ func (kl *Kubelet) HandlePodReconcile(pods []*api.Pod) { // Update the pod in pod manager, status manager will do periodically reconcile according // to the pod manager. kl.podManager.UpdatePod(pod) + + // After an evicted pod is synced, all dead containers in the pod can be removed. + if eviction.PodIsEvicted(pod.Status) { + if podStatus, err := kl.podCache.Get(pod.UID); err == nil { + kl.containerDeletor.deleteContainersInPod("", podStatus, true) + } + } } } @@ -2989,10 +2996,12 @@ func (kl *Kubelet) ListenAndServeReadOnly(address net.IP, port uint) { // Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around. func (kl *Kubelet) cleanUpContainersInPod(podId types.UID, exitedContainerID string) { if podStatus, err := kl.podCache.Get(podId); err == nil { - if status, ok := kl.statusManager.GetPodStatus(podId); ok { - // If a pod is evicted, we can delete all the dead containers. - kl.containerDeletor.deleteContainersInPod(exitedContainerID, podStatus, eviction.PodIsEvicted(status)) + removeAll := false + if syncedPod, ok := kl.podManager.GetPodByUID(podId); ok { + // When an evicted pod has already synced, all containers can be removed. + removeAll = eviction.PodIsEvicted(syncedPod.Status) } + kl.containerDeletor.deleteContainersInPod(exitedContainerID, podStatus, removeAll) } } diff --git a/pkg/kubelet/pod_container_deletor.go b/pkg/kubelet/pod_container_deletor.go index 122170409a3..4daf1a5021b 100644 --- a/pkg/kubelet/pod_container_deletor.go +++ b/pkg/kubelet/pod_container_deletor.go @@ -58,29 +58,33 @@ func newPodContainerDeletor(runtime kubecontainer.Runtime, containersToKeep int) } } -// getContainersToDeleteInPod returns the exited containers in a pod whose name matches the name inferred from exitedContainerID, ordered by the creation time from the latest to the earliest. -func getContainersToDeleteInPod(exitedContainerID string, podStatus *kubecontainer.PodStatus, containersToKeep int) containerStatusbyCreatedList { - var matchedContainer *kubecontainer.ContainerStatus - var exitedContainers []*kubecontainer.ContainerStatus - // Find all exited containers in the pod +// getContainersToDeleteInPod returns the exited containers in a pod whose name matches the name inferred from filterContainerId (if not empty), ordered by the creation time from the latest to the earliest. +// If filterContainerId is empty, all dead containers in the pod are returned. +func getContainersToDeleteInPod(filterContainerId string, podStatus *kubecontainer.PodStatus, containersToKeep int) containerStatusbyCreatedList { + matchedContainer := func(filterContainerId string, podStatus *kubecontainer.PodStatus) *kubecontainer.ContainerStatus { + if filterContainerId == "" { + return nil + } + for _, containerStatus := range podStatus.ContainerStatuses { + if containerStatus.ID.ID == filterContainerId { + return containerStatus + } + } + return nil + }(filterContainerId, podStatus) + + if filterContainerId != "" && matchedContainer == nil { + glog.Warningf("Container %q not found in pod's containers", filterContainerId) + return containerStatusbyCreatedList{} + } + + // Find the exited containers whose name matches the name of the container with id being filterContainerId + var candidates containerStatusbyCreatedList for _, containerStatus := range podStatus.ContainerStatuses { if containerStatus.State != kubecontainer.ContainerStateExited { continue } - if containerStatus.ID.ID == exitedContainerID { - matchedContainer = containerStatus - } - exitedContainers = append(exitedContainers, containerStatus) - } - if matchedContainer == nil { - glog.Warningf("Container %q not found in pod's exited containers", exitedContainerID) - return containerStatusbyCreatedList{} - } - - // Find the exited containers whose name matches the name of the container with id being exitedContainerID - var candidates containerStatusbyCreatedList - for _, containerStatus := range exitedContainers { - if matchedContainer.Name == containerStatus.Name { + if matchedContainer == nil || matchedContainer.Name == containerStatus.Name { candidates = append(candidates, containerStatus) } } @@ -93,12 +97,13 @@ func getContainersToDeleteInPod(exitedContainerID string, podStatus *kubecontain } // deleteContainersInPod issues container deletion requests for containers selected by getContainersToDeleteInPod. -func (p *podContainerDeletor) deleteContainersInPod(exitedContainerID string, podStatus *kubecontainer.PodStatus, removeAll bool) { +func (p *podContainerDeletor) deleteContainersInPod(filterContainerId string, podStatus *kubecontainer.PodStatus, removeAll bool) { containersToKeep := p.containersToKeep if removeAll { containersToKeep = 0 } - for _, candidate := range getContainersToDeleteInPod(exitedContainerID, podStatus, containersToKeep) { + + for _, candidate := range getContainersToDeleteInPod(filterContainerId, podStatus, containersToKeep) { select { case p.worker <- candidate.ID: default: diff --git a/pkg/kubelet/pod_container_deletor_test.go b/pkg/kubelet/pod_container_deletor_test.go index 51797d14c3e..29c66f45156 100644 --- a/pkg/kubelet/pod_container_deletor_test.go +++ b/pkg/kubelet/pod_container_deletor_test.go @@ -24,7 +24,7 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) -func testGetContainersToDeleteInPod(t *testing.T) { +func TestGetContainersToDeleteInPodWithFilter(t *testing.T) { pod := kubecontainer.PodStatus{ ContainerStatuses: []*kubecontainer.ContainerStatus{ { @@ -62,7 +62,7 @@ func testGetContainersToDeleteInPod(t *testing.T) { testCases := []struct { containersToKeep int - expectedContainersToDelete []*kubecontainer.ContainerStatus + expectedContainersToDelete containerStatusbyCreatedList }{ { 0, @@ -80,7 +80,123 @@ func testGetContainersToDeleteInPod(t *testing.T) { for _, test := range testCases { candidates := getContainersToDeleteInPod("4", &pod, test.containersToKeep) - if !reflect.DeepEqual(getContainersToDeleteInPod("4", &pod, test.containersToKeep), test.expectedContainersToDelete) { + if !reflect.DeepEqual(candidates, test.expectedContainersToDelete) { + t.Errorf("expected %v got %v", test.expectedContainersToDelete, candidates) + } + } +} + +func TestGetContainersToDeleteInPod(t *testing.T) { + pod := kubecontainer.PodStatus{ + ContainerStatuses: []*kubecontainer.ContainerStatus{ + { + ID: kubecontainer.ContainerID{Type: "test", ID: "1"}, + Name: "foo", + CreatedAt: time.Now(), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "2"}, + Name: "bar", + CreatedAt: time.Now().Add(time.Second), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "3"}, + Name: "bar", + CreatedAt: time.Now().Add(2 * time.Second), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "4"}, + Name: "bar", + CreatedAt: time.Now().Add(3 * time.Second), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "5"}, + Name: "bar", + CreatedAt: time.Now().Add(4 * time.Second), + State: kubecontainer.ContainerStateRunning, + }, + }, + } + + testCases := []struct { + containersToKeep int + expectedContainersToDelete containerStatusbyCreatedList + }{ + { + 0, + []*kubecontainer.ContainerStatus{pod.ContainerStatuses[3], pod.ContainerStatuses[2], pod.ContainerStatuses[1], pod.ContainerStatuses[0]}, + }, + { + 1, + []*kubecontainer.ContainerStatus{pod.ContainerStatuses[2], pod.ContainerStatuses[1], pod.ContainerStatuses[0]}, + }, + { + 2, + []*kubecontainer.ContainerStatus{pod.ContainerStatuses[1], pod.ContainerStatuses[0]}, + }, + } + + for _, test := range testCases { + candidates := getContainersToDeleteInPod("", &pod, test.containersToKeep) + if !reflect.DeepEqual(candidates, test.expectedContainersToDelete) { + t.Errorf("expected %v got %v", test.expectedContainersToDelete, candidates) + } + } +} + +func TestGetContainersToDeleteInPodWithNoMatch(t *testing.T) { + pod := kubecontainer.PodStatus{ + ContainerStatuses: []*kubecontainer.ContainerStatus{ + { + ID: kubecontainer.ContainerID{Type: "test", ID: "1"}, + Name: "foo", + CreatedAt: time.Now(), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "2"}, + Name: "bar", + CreatedAt: time.Now().Add(time.Second), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "3"}, + Name: "bar", + CreatedAt: time.Now().Add(2 * time.Second), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "4"}, + Name: "bar", + CreatedAt: time.Now().Add(3 * time.Second), + State: kubecontainer.ContainerStateExited, + }, + { + ID: kubecontainer.ContainerID{Type: "test", ID: "5"}, + Name: "bar", + CreatedAt: time.Now().Add(4 * time.Second), + State: kubecontainer.ContainerStateRunning, + }, + }, + } + + testCases := []struct { + filterId string + expectedContainersToDelete containerStatusbyCreatedList + }{ + { + "abc", + []*kubecontainer.ContainerStatus{}, + }, + } + + for _, test := range testCases { + candidates := getContainersToDeleteInPod(test.filterId, &pod, len(pod.ContainerStatuses)) + if !reflect.DeepEqual(candidates, test.expectedContainersToDelete) { t.Errorf("expected %v got %v", test.expectedContainersToDelete, candidates) } } From 1c47d9ddd016f3fd198fd8972e5300988913c201 Mon Sep 17 00:00:00 2001 From: bindata-mockuser Date: Tue, 16 Aug 2016 15:34:01 -0700 Subject: [PATCH 2/2] Adding disk eviciton test to node e2e tests --- test/e2e/framework/test_context.go | 2 +- test/e2e_node/disk_eviction_test.go | 93 +++++++++++++++++++---------- 2 files changed, 62 insertions(+), 33 deletions(-) diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 0b23da49f27..c99c80c246d 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -169,6 +169,6 @@ func RegisterNodeFlags() { flag.BoolVar(&TestContext.DisableKubenet, "disable-kubenet", false, "If true, start kubelet without kubenet. (default false)") // TODO: uncomment this when the flag is re-enabled in kubelet //flag.BoolVar(&TestContext.CgroupsPerQOS, "cgroups-per-qos", false, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.") - flag.StringVar(&TestContext.EvictionHard, "eviction-hard", "memory.available<250Mi", "The hard eviction thresholds. If set, pods get evicted when the specified resources drop below the thresholds.") + flag.StringVar(&TestContext.EvictionHard, "eviction-hard", "memory.available<250Mi,imagefs.available<10%%", "The hard eviction thresholds. If set, pods get evicted when the specified resources drop below the thresholds.") flag.StringVar(&TestContext.ManifestPath, "manifest-path", "", "The path to the static pod manifest file.") } diff --git a/test/e2e_node/disk_eviction_test.go b/test/e2e_node/disk_eviction_test.go index 67fdec03bc2..4de87537449 100644 --- a/test/e2e_node/disk_eviction_test.go +++ b/test/e2e_node/disk_eviction_test.go @@ -40,29 +40,25 @@ const ( ) // TODO: Leverage dynamic Kubelet settings when it's implemented to only modify the kubelet eviction option in this test. -// To manually trigger the test on a node with disk space just over 15Gi : -// make test-e2e-node FOCUS="hard eviction test" TEST_ARGS="--eviction-hard=nodefs.available<15Gi" -var _ = framework.KubeDescribe("Kubelet Eviction Manager [Flaky] [Serial] [Disruptive]", func() { +var _ = framework.KubeDescribe("Kubelet Eviction Manager [Serial] [Disruptive]", func() { f := framework.NewDefaultFramework("kubelet-eviction-manager") var podClient *framework.PodClient var c *client.Client - var n *api.Node BeforeEach(func() { podClient = f.PodClient() c = f.Client - nodeList := framework.GetReadySchedulableNodesOrDie(c) - n = &nodeList.Items[0] }) Describe("hard eviction test", func() { Context("pod using the most disk space gets evicted when the node disk usage is above the eviction hard threshold", func() { - var busyPodName, idlePodName string + var busyPodName, idlePodName, verifyPodName string var containersToCleanUp map[string]bool AfterEach(func() { podClient.Delete(busyPodName, &api.DeleteOptions{}) podClient.Delete(idlePodName, &api.DeleteOptions{}) + podClient.Delete(verifyPodName, &api.DeleteOptions{}) for container := range containersToCleanUp { // TODO: to be container implementation agnostic cmd := exec.Command("docker", "rm", "-f", strings.Trim(container, dockertools.DockerPrefix)) @@ -77,21 +73,9 @@ var _ = framework.KubeDescribe("Kubelet Eviction Manager [Flaky] [Serial] [Disru busyPodName = "to-evict" + string(uuid.NewUUID()) idlePodName = "idle" + string(uuid.NewUUID()) + verifyPodName = "verify" + string(uuid.NewUUID()) containersToCleanUp = make(map[string]bool) - podClient.Create(&api.Pod{ - ObjectMeta: api.ObjectMeta{ - Name: idlePodName, - }, - Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyNever, - Containers: []api.Container{ - { - Image: ImageRegistry[pauseImage], - Name: idlePodName, - }, - }, - }, - }) + createIdlePod(idlePodName, podClient) podClient.Create(&api.Pod{ ObjectMeta: api.ObjectMeta{ Name: busyPodName, @@ -104,7 +88,7 @@ var _ = framework.KubeDescribe("Kubelet Eviction Manager [Flaky] [Serial] [Disru Name: busyPodName, // Filling the disk Command: []string{"sh", "-c", - fmt.Sprintf("for NUM in `seq 1 1 1000`; do dd if=/dev/urandom of=%s.$NUM bs=4000000 count=10; sleep 3; done", + fmt.Sprintf("for NUM in `seq 1 1 100000`; do dd if=/dev/urandom of=%s.$NUM bs=50000000 count=10; sleep 0.5; done", dummyFile)}, }, }, @@ -112,14 +96,17 @@ var _ = framework.KubeDescribe("Kubelet Eviction Manager [Flaky] [Serial] [Disru }) }) - It("should evict the pod using the most disk space", func() { + It("should evict the pod using the most disk space [Slow]", func() { if !evictionOptionIsSet() { framework.Logf("test skipped because eviction option is not set") return } evictionOccurred := false + nodeDiskPressureCondition := false + podRescheduleable := false Eventually(func() error { + // The pod should be evicted. if !evictionOccurred { podData, err := podClient.Get(busyPodName) if err != nil { @@ -131,9 +118,6 @@ var _ = framework.KubeDescribe("Kubelet Eviction Manager [Flaky] [Serial] [Disru if err != nil { return err } - if !nodeHasDiskPressure(f.Client) { - return fmt.Errorf("expected disk pressure condition is not set") - } podData, err = podClient.Get(idlePodName) if err != nil { @@ -142,23 +126,68 @@ var _ = framework.KubeDescribe("Kubelet Eviction Manager [Flaky] [Serial] [Disru recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses) if podData.Status.Phase != api.PodRunning { - return fmt.Errorf("expected phase to be running. got %+v", podData.Status.Phase) + err = verifyPodEviction(podData) + if err != nil { + return err + } } - evictionOccurred = true + return fmt.Errorf("waiting for node disk pressure condition to be set") } - // After eviction happens the pod is evicted so eventually the node disk pressure should be gone. - if nodeHasDiskPressure(f.Client) { - return fmt.Errorf("expected disk pressure condition relief has not happened") + // The node should have disk pressure condition after the pods are evicted. + if !nodeDiskPressureCondition { + if !nodeHasDiskPressure(f.Client) { + return fmt.Errorf("expected disk pressure condition is not set") + } + nodeDiskPressureCondition = true + return fmt.Errorf("waiting for node disk pressure condition to be cleared") } + + // After eviction happens the pod is evicted so eventually the node disk pressure should be relieved. + if !podRescheduleable { + if nodeHasDiskPressure(f.Client) { + return fmt.Errorf("expected disk pressure condition relief has not happened") + } + createIdlePod(verifyPodName, podClient) + podRescheduleable = true + return fmt.Errorf("waiting for the node to accept a new pod") + } + + // The new pod should be able to be scheduled and run after the disk pressure is relieved. + podData, err := podClient.Get(verifyPodName) + if err != nil { + return err + } + recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses) + if podData.Status.Phase != api.PodRunning { + return fmt.Errorf("waiting for the new pod to be running") + } + return nil - }, time.Minute*5, podCheckInterval).Should(BeNil()) + }, time.Minute*15 /* based on n1-standard-1 machine type */, podCheckInterval).Should(BeNil()) }) }) }) }) +func createIdlePod(podName string, podClient *framework.PodClient) { + podClient.Create(&api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: podName, + }, + Spec: api.PodSpec{ + RestartPolicy: api.RestartPolicyNever, + Containers: []api.Container{ + { + Image: ImageRegistry[pauseImage], + Name: podName, + }, + }, + }, + }) +} + func verifyPodEviction(podData *api.Pod) error { if podData.Status.Phase != api.PodFailed { return fmt.Errorf("expected phase to be failed. got %+v", podData.Status.Phase)