From 2478d9d29f10c89b988490c367dd77c27c71efa8 Mon Sep 17 00:00:00 2001
From: Jeff Vance <jvance@redhat.com>
Date: Mon, 23 Jan 2017 15:39:17 -0800
Subject: [PATCH] test host cleanup in kubelet

---
 test/e2e/kubelet.go | 326 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 286 insertions(+), 40 deletions(-)

diff --git a/test/e2e/kubelet.go b/test/e2e/kubelet.go
index 1a2b4f7fdee..c7dd03d03ca 100644
--- a/test/e2e/kubelet.go
+++ b/test/e2e/kubelet.go
@@ -18,12 +18,15 @@ package e2e
 
 import (
 	"fmt"
+	"path/filepath"
 	"strings"
 	"time"
 
+	apierrs "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/kubernetes/pkg/api"
 	"k8s.io/kubernetes/pkg/api/v1"
 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
 	"k8s.io/kubernetes/pkg/util/uuid"
@@ -128,59 +131,229 @@ func updateNodeLabels(c clientset.Interface, nodeNames sets.String, toAdd, toRem
 	}
 }
 
+// Calls startVolumeServer to create and run a nfs-server pod. Returns server pod and its
+// ip address.
+// Note: startVolumeServer() waits for the nfs-server pod to be Running and sleeps some
+//   so that the nfs server can start up.
+func createNfsServerPod(c clientset.Interface, config VolumeTestConfig) (*v1.Pod, string) {
+
+	pod := startVolumeServer(c, config)
+	Expect(pod).NotTo(BeNil())
+	ip := pod.Status.PodIP
+	Expect(len(ip)).NotTo(BeZero())
+	framework.Logf("NFS server IP address: %v", ip)
+
+	return pod, ip
+}
+
+// Creates a pod that mounts an nfs volume that is served by the nfs-server pod. The container
+// will execute the passed in shell cmd. Waits for the pod to start.
+// Note: the nfs plugin is defined inline, no PV or PVC.
+func createPodUsingNfs(f *framework.Framework, c clientset.Interface, ns, nfsIP, cmd string) *v1.Pod {
+
+	By("create pod using nfs volume")
+
+	isPrivileged := true
+	cmdLine := []string{"-c", cmd}
+	pod := &v1.Pod{
+		TypeMeta: metav1.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: api.Registry.GroupOrDie(v1.GroupName).GroupVersion.String(),
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			GenerateName: "pod-nfs-vol-",
+			Namespace:    ns,
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Name:    "pod-nfs-vol",
+					Image:   "gcr.io/google_containers/busybox:1.24",
+					Command: []string{"/bin/sh"},
+					Args:    cmdLine,
+					VolumeMounts: []v1.VolumeMount{
+						{
+							Name:      "nfs-vol",
+							MountPath: "/mnt",
+						},
+					},
+					SecurityContext: &v1.SecurityContext{
+						Privileged: &isPrivileged,
+					},
+				},
+			},
+			RestartPolicy: v1.RestartPolicyNever, //don't restart pod
+			Volumes: []v1.Volume{
+				{
+					Name: "nfs-vol",
+					VolumeSource: v1.VolumeSource{
+						NFS: &v1.NFSVolumeSource{
+							Server:   nfsIP,
+							Path:     "/exports",
+							ReadOnly: false,
+						},
+					},
+				},
+			},
+		},
+	}
+	rtnPod, err := c.Core().Pods(ns).Create(pod)
+	Expect(err).NotTo(HaveOccurred())
+
+	err = f.WaitForPodReady(rtnPod.Name) // running & ready
+	Expect(err).NotTo(HaveOccurred())
+
+	rtnPod, err = c.Core().Pods(ns).Get(rtnPod.Name, metav1.GetOptions{}) // return fresh pod
+	Expect(err).NotTo(HaveOccurred())
+
+	return rtnPod
+}
+
+// Deletes the passed-in pod and waits for the pod to be terminated. Resilient to the pod
+// not existing.
+func deletePodwithWait(f *framework.Framework, c clientset.Interface, pod *v1.Pod) {
+
+	if pod == nil {
+		return
+	}
+	framework.Logf("Deleting pod %v", pod.Name)
+	err := c.Core().Pods(pod.Namespace).Delete(pod.Name, nil)
+	if err != nil {
+		if apierrs.IsNotFound(err) {
+			return // assume pod was deleted already
+		}
+		Expect(err).NotTo(HaveOccurred())
+	}
+
+	// wait for pod to terminate. Expect apierr NotFound
+	err = f.WaitForPodTerminated(pod.Name, "")
+	Expect(err).To(HaveOccurred())
+	if !apierrs.IsNotFound(err) {
+		framework.Logf("Error! Expected IsNotFound error deleting pod %q, instead got: %v", pod.Name, err)
+		Expect(apierrs.IsNotFound(err)).To(BeTrue())
+	}
+	framework.Logf("Pod %v successfully deleted", pod.Name)
+}
+
+// Checks for a lingering nfs mount and/or uid directory on the pod's host. The host IP is used
+// so that this test runs in GCE, where it appears that SSH cannot resolve the hostname.
+// If expectClean is true then we expect the node to be cleaned up and thus commands like
+// `ls <uid-dir>` should fail (since that dir was removed). If expectClean is false then we expect
+// the node is not cleaned up, and thus cmds like `ls <uid-dir>` should succeed. We wait for the
+// kubelet to be cleaned up, afterwhich an error is reported.
+func checkPodCleanup(c clientset.Interface, pod *v1.Pod, expectClean bool) {
+
+	timeout := 5 * time.Minute
+	poll := 20 * time.Second
+	podUID := string(pod.UID)
+	podDir := filepath.Join("/var/lib/kubelet/pods", podUID)
+	mountDir := filepath.Join(podDir, "volumes", "kubernetes.io~nfs")
+	// use ip rather than hostname in GCE
+	nodeIP, err := framework.GetHostExternalAddress(c, pod)
+	Expect(err).NotTo(HaveOccurred())
+
+	condMsg := map[bool]string{
+		true:  "deleted",
+		false: "present",
+	}
+
+	// table of host tests to perform
+	tests := map[string]string{ //["what-to-test"] "remote-command"
+		"pod UID directory": fmt.Sprintf("sudo ls %v", podDir),
+		"pod nfs mount":     fmt.Sprintf("sudo mount | grep %v", mountDir),
+	}
+
+	for test, cmd := range tests {
+		framework.Logf("Wait up to %v for host's (%v) %q to be %v", timeout, nodeIP, test, condMsg[expectClean])
+		err = wait.Poll(poll, timeout, func() (bool, error) {
+			result, _ := nodeExec(nodeIP, cmd)
+			framework.LogSSHResult(result)
+			sawFiles := result.Code == 0
+			if expectClean && sawFiles { // keep trying
+				return false, nil
+			}
+			if !expectClean && !sawFiles { // stop wait loop
+				return true, fmt.Errorf("%v is gone but expected to exist", test)
+			}
+			return true, nil // done, host is as expected
+		})
+		if err != nil {
+			framework.Logf("Host (%v) cleanup error: %v. Expected %q to be %v", nodeIP, err, test, condMsg[expectClean])
+			Expect(err).NotTo(HaveOccurred())
+		}
+	}
+
+	if expectClean {
+		framework.Logf("Pod's host has been cleaned up")
+	} else {
+		framework.Logf("Pod's host has not been cleaned up (per expectation)")
+	}
+}
+
 var _ = framework.KubeDescribe("kubelet", func() {
-	var c clientset.Interface
-	var numNodes int
-	var nodeNames sets.String
-	var nodeLabels map[string]string
+	var (
+		c  clientset.Interface
+		ns string
+	)
 	f := framework.NewDefaultFramework("kubelet")
-	var resourceMonitor *framework.ResourceMonitor
 
 	BeforeEach(func() {
 		c = f.ClientSet
-		// Use node labels to restrict the pods to be assigned only to the
-		// nodes we observe initially.
-		nodeLabels = make(map[string]string)
-		nodeLabels["kubelet_cleanup"] = "true"
-
-		nodes := framework.GetReadySchedulableNodesOrDie(c)
-		numNodes = len(nodes.Items)
-		nodeNames = sets.NewString()
-		// If there are a lot of nodes, we don't want to use all of them
-		// (if there are 1000 nodes in the cluster, starting 10 pods/node
-		// will take ~10 minutes today). And there is also deletion phase.
-		// Instead, we choose at most 10 nodes.
-		if numNodes > maxNodesToCheck {
-			numNodes = maxNodesToCheck
-		}
-		for i := 0; i < numNodes; i++ {
-			nodeNames.Insert(nodes.Items[i].Name)
-		}
-		updateNodeLabels(c, nodeNames, nodeLabels, nil)
-
-		// Start resourceMonitor only in small clusters.
-		if len(nodes.Items) <= maxNodesToCheck {
-			resourceMonitor = framework.NewResourceMonitor(f.ClientSet, framework.TargetContainers(), containerStatsPollingInterval)
-			resourceMonitor.Start()
-		}
-	})
-
-	AfterEach(func() {
-		if resourceMonitor != nil {
-			resourceMonitor.Stop()
-		}
-		// If we added labels to nodes in this test, remove them now.
-		updateNodeLabels(c, nodeNames, nil, nodeLabels)
+		ns = f.Namespace.Name
 	})
 
 	framework.KubeDescribe("Clean up pods on node", func() {
+		var (
+			numNodes        int
+			nodeNames       sets.String
+			nodeLabels      map[string]string
+			resourceMonitor *framework.ResourceMonitor
+		)
 		type DeleteTest struct {
 			podsPerNode int
 			timeout     time.Duration
 		}
+
 		deleteTests := []DeleteTest{
 			{podsPerNode: 10, timeout: 1 * time.Minute},
 		}
+
+		BeforeEach(func() {
+			// Use node labels to restrict the pods to be assigned only to the
+			// nodes we observe initially.
+			nodeLabels = make(map[string]string)
+			nodeLabels["kubelet_cleanup"] = "true"
+			nodes := framework.GetReadySchedulableNodesOrDie(c)
+			numNodes = len(nodes.Items)
+			Expect(numNodes).NotTo(BeZero())
+			nodeNames = sets.NewString()
+			// If there are a lot of nodes, we don't want to use all of them
+			// (if there are 1000 nodes in the cluster, starting 10 pods/node
+			// will take ~10 minutes today). And there is also deletion phase.
+			// Instead, we choose at most 10 nodes.
+			if numNodes > maxNodesToCheck {
+				numNodes = maxNodesToCheck
+			}
+			for i := 0; i < numNodes; i++ {
+				nodeNames.Insert(nodes.Items[i].Name)
+			}
+			updateNodeLabels(c, nodeNames, nodeLabels, nil)
+
+			// Start resourceMonitor only in small clusters.
+			if len(nodes.Items) <= maxNodesToCheck {
+				resourceMonitor = framework.NewResourceMonitor(f.ClientSet, framework.TargetContainers(), containerStatsPollingInterval)
+				resourceMonitor.Start()
+			}
+		})
+
+		AfterEach(func() {
+			if resourceMonitor != nil {
+				resourceMonitor.Stop()
+			}
+			// If we added labels to nodes in this test, remove them now.
+			updateNodeLabels(c, nodeNames, nil, nodeLabels)
+		})
+
 		for _, itArg := range deleteTests {
 			name := fmt.Sprintf(
 				"kubelet should be able to delete %d pods per node in %v.", itArg.podsPerNode, itArg.timeout)
@@ -202,7 +375,7 @@ var _ = framework.KubeDescribe("kubelet", func() {
 				// running on the nodes according to kubelet. The timeout is set to
 				// only 30 seconds here because framework.RunRC already waited for all pods to
 				// transition to the running status.
-				Expect(waitTillNPodsRunningOnNodes(f.ClientSet, nodeNames, rcName, f.Namespace.Name, totalPods,
+				Expect(waitTillNPodsRunningOnNodes(f.ClientSet, nodeNames, rcName, ns, totalPods,
 					time.Second*30)).NotTo(HaveOccurred())
 				if resourceMonitor != nil {
 					resourceMonitor.LogLatest()
@@ -218,7 +391,7 @@ var _ = framework.KubeDescribe("kubelet", func() {
 				//   - a bug in graceful termination (if it is enabled)
 				//   - docker slow to delete pods (or resource problems causing slowness)
 				start := time.Now()
-				Expect(waitTillNPodsRunningOnNodes(f.ClientSet, nodeNames, rcName, f.Namespace.Name, 0,
+				Expect(waitTillNPodsRunningOnNodes(f.ClientSet, nodeNames, rcName, ns, 0,
 					itArg.timeout)).NotTo(HaveOccurred())
 				framework.Logf("Deleting %d pods on %d nodes completed in %v after the RC was deleted", totalPods, len(nodeNames),
 					time.Since(start))
@@ -228,4 +401,77 @@ var _ = framework.KubeDescribe("kubelet", func() {
 			})
 		}
 	})
+
+	// Delete nfs server pod after another pods accesses the mounted nfs volume.
+	framework.KubeDescribe("host cleanup with volume mounts [HostCleanup]", func() {
+		type hostCleanupTest struct {
+			itDescr string
+			podCmd  string
+		}
+
+		Context("Host cleanup after pod using NFS mount is deleted [Volume][NFS]", func() {
+			// issue #31272
+			var (
+				nfsServerPod *v1.Pod
+				nfsIP        string
+				NFSconfig    VolumeTestConfig
+				pod          *v1.Pod // client pod
+			)
+
+			// fill in test slice for this context
+			testTbl := []hostCleanupTest{
+				{
+					itDescr: "after deleting the nfs-server, the host should be cleaned-up when deleting sleeping pod which mounts an NFS vol",
+					podCmd:  "sleep 6000",
+				},
+				{
+					itDescr: "after deleting the nfs-server, the host should be cleaned-up when deleting a pod accessing the NFS vol",
+					podCmd:  "while true; do echo FeFieFoFum >>/mnt/SUCCESS; cat /mnt/SUCCESS; done",
+				},
+			}
+
+			BeforeEach(func() {
+				NFSconfig = VolumeTestConfig{
+					namespace:   ns,
+					prefix:      "nfs",
+					serverImage: NfsServerImage,
+					serverPorts: []int{2049},
+					serverArgs:  []string{"-G", "777", "/exports"},
+				}
+				nfsServerPod, nfsIP = createNfsServerPod(c, NFSconfig)
+			})
+
+			AfterEach(func() {
+				deletePodwithWait(f, c, pod)
+				deletePodwithWait(f, c, nfsServerPod)
+			})
+
+			// execute It blocks from above table of tests
+			for _, test := range testTbl {
+				t := test // local copy for closure
+				It(fmt.Sprintf("%v [Serial]", t.itDescr), func() {
+					// create a pod which uses the nfs server's volume
+					pod = createPodUsingNfs(f, c, ns, nfsIP, t.podCmd)
+
+					By("Delete the NFS server pod")
+					deletePodwithWait(f, c, nfsServerPod)
+					nfsServerPod = nil
+
+					By("Delete the pod mounted to the NFS volume")
+					deletePodwithWait(f, c, pod)
+					// pod object is now stale, but is intentionally not nil
+
+					By("Check if host running deleted pod has been cleaned up -- expect not")
+					// expect the pod's host *not* to be cleaned up
+					checkPodCleanup(c, pod, false)
+
+					By("Recreate the nfs server pod")
+					nfsServerPod, nfsIP = createNfsServerPod(c, NFSconfig)
+					By("Verify host running the deleted pod is now cleaned up")
+					// expect the pod's host to be cleaned up
+					checkPodCleanup(c, pod, true)
+				})
+			}
+		})
+	})
 })