Revert "fix flaky host cleanup test"

This commit is contained in:
Jerzy Szczepkowski 2017-02-14 12:50:42 +01:00 committed by GitHub
parent 739f4ffe0e
commit e6cdf20c5c
2 changed files with 47 additions and 69 deletions

View File

@ -130,7 +130,10 @@ func updateNodeLabels(c clientset.Interface, nodeNames sets.String, toAdd, toRem
} }
} }
// Wraps startVolumeServer to create and run a nfs-server pod. Returns server pod and its ip address. // Calls startVolumeServer to create and run a nfs-server pod. Returns server pod and its
// ip address.
// Note: startVolumeServer() waits for the nfs-server pod to be Running and sleeps some
// so that the nfs server can start up.
func createNfsServerPod(c clientset.Interface, config VolumeTestConfig) (*v1.Pod, string) { func createNfsServerPod(c clientset.Interface, config VolumeTestConfig) (*v1.Pod, string) {
pod := startVolumeServer(c, config) pod := startVolumeServer(c, config)
@ -142,26 +145,6 @@ func createNfsServerPod(c clientset.Interface, config VolumeTestConfig) (*v1.Pod
return pod, ip return pod, ip
} }
// Restart the passed-in nfs-server by issuing a `/usr/sbin/rpc.nfsd 1` command in the
// pod's (only) container. This command changes the number of nfs server threads from
// (presumably) zero back to 1, and therefore allows nfs to open connections again.
func restartNfsServer(serverPod *v1.Pod) {
const startcmd = "/usr/sbin/rpc.nfsd 1"
ns := fmt.Sprintf("--namespace=%v", serverPod.Namespace)
framework.RunKubectlOrDie("exec", ns, serverPod.Name, "--", "/bin/sh", "-c", startcmd)
}
// Stop the passed-in nfs-server by issuing a `/usr/sbin/rpc.nfsd 0` command in the
// pod's (only) container. This command changes the number of nfs server threads to 0,
// thus closing all open nfs connections.
func stopNfsServer(serverPod *v1.Pod) {
const stopcmd = "/usr/sbin/rpc.nfsd 0"
ns := fmt.Sprintf("--namespace=%v", serverPod.Namespace)
framework.RunKubectlOrDie("exec", ns, serverPod.Name, "--", "/bin/sh", "-c", stopcmd)
}
// Creates a pod that mounts an nfs volume that is served by the nfs-server pod. The container // Creates a pod that mounts an nfs volume that is served by the nfs-server pod. The container
// will execute the passed in shell cmd. Waits for the pod to start. // will execute the passed in shell cmd. Waits for the pod to start.
// Note: the nfs plugin is defined inline, no PV or PVC. // Note: the nfs plugin is defined inline, no PV or PVC.
@ -235,48 +218,42 @@ func checkPodCleanup(c clientset.Interface, pod *v1.Pod, expectClean bool) {
timeout := 5 * time.Minute timeout := 5 * time.Minute
poll := 20 * time.Second poll := 20 * time.Second
podDir := filepath.Join("/var/lib/kubelet/pods", string(pod.UID)) podUID := string(pod.UID)
podDir := filepath.Join("/var/lib/kubelet/pods", podUID)
mountDir := filepath.Join(podDir, "volumes", "kubernetes.io~nfs") mountDir := filepath.Join(podDir, "volumes", "kubernetes.io~nfs")
// use ip rather than hostname in GCE // use ip rather than hostname in GCE
nodeIP, err := framework.GetHostExternalAddress(c, pod) nodeIP, err := framework.GetHostExternalAddress(c, pod)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
condMsg := "deleted" condMsg := map[bool]string{
if !expectClean { true: "deleted",
condMsg = "present" false: "present",
} }
// table of host tests to perform (order may matter so not using a map) // table of host tests to perform
type testT struct { tests := map[string]string{ //["what-to-test"] "remote-command"
feature string // feature to test "pod UID directory": fmt.Sprintf("sudo ls %v", podDir),
cmd string // remote command to execute on node "pod nfs mount": fmt.Sprintf("sudo mount | grep %v", mountDir),
}
tests := []testT{
{
feature: "pod UID directory",
cmd: fmt.Sprintf("sudo ls %v", podDir),
},
{
feature: "pod nfs mount",
cmd: fmt.Sprintf("sudo mount | grep %v", mountDir),
},
} }
for _, test := range tests { for test, cmd := range tests {
framework.Logf("Wait up to %v for host's (%v) %q to be %v", timeout, nodeIP, test.feature, condMsg) framework.Logf("Wait up to %v for host's (%v) %q to be %v", timeout, nodeIP, test, condMsg[expectClean])
err = wait.Poll(poll, timeout, func() (bool, error) { err = wait.Poll(poll, timeout, func() (bool, error) {
result, _ := nodeExec(nodeIP, test.cmd) result, _ := nodeExec(nodeIP, cmd)
framework.LogSSHResult(result) framework.LogSSHResult(result)
ok := (result.Code == 0 && len(result.Stdout) > 0 && len(result.Stderr) == 0) sawFiles := result.Code == 0
if expectClean && ok { // keep trying if expectClean && sawFiles { // keep trying
return false, nil return false, nil
} }
if !expectClean && !ok { // stop wait loop if !expectClean && !sawFiles { // stop wait loop
return true, fmt.Errorf("%v is gone but expected to exist", test.feature) return true, fmt.Errorf("%v is gone but expected to exist", test)
} }
return true, nil // done, host is as expected return true, nil // done, host is as expected
}) })
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Host (%v) cleanup error: %v. Expected %q to be %v", nodeIP, err, test.feature, condMsg)) if err != nil {
framework.Logf("Host (%v) cleanup error: %v. Expected %q to be %v", nodeIP, err, test, condMsg[expectClean])
Expect(err).NotTo(HaveOccurred())
}
} }
if expectClean { if expectClean {
@ -398,20 +375,14 @@ var _ = framework.KubeDescribe("kubelet", func() {
} }
}) })
// Test host cleanup when disrupting the volume server. // Delete nfs server pod after another pods accesses the mounted nfs volume.
framework.KubeDescribe("host cleanup with volume mounts [HostCleanup][Slow]", func() { framework.KubeDescribe("host cleanup with volume mounts [HostCleanup][Flaky]", func() {
type hostCleanupTest struct { type hostCleanupTest struct {
itDescr string itDescr string
podCmd string podCmd string
} }
// Disrupt the nfs server pod after a client pod accesses the nfs volume. Context("Host cleanup after pod using NFS mount is deleted [Volume][NFS]", func() {
// Note: the nfs-server is stopped NOT deleted. This is done to preserve its ip addr.
// If the nfs-server pod is deleted the client pod's mount can not be unmounted.
// If the nfs-server pod is deleted and re-created, due to having a different ip
// addr, the client pod's mount still cannot be unmounted.
Context("Host cleanup after disrupting NFS volume [NFS]", func() {
// issue #31272 // issue #31272
var ( var (
nfsServerPod *v1.Pod nfsServerPod *v1.Pod
@ -424,11 +395,11 @@ var _ = framework.KubeDescribe("kubelet", func() {
testTbl := []hostCleanupTest{ testTbl := []hostCleanupTest{
{ {
itDescr: "after deleting the nfs-server, the host should be cleaned-up when deleting sleeping pod which mounts an NFS vol", itDescr: "after deleting the nfs-server, the host should be cleaned-up when deleting sleeping pod which mounts an NFS vol",
podCmd: "sleep 6000", // keep pod running podCmd: "sleep 6000",
}, },
{ {
itDescr: "after deleting the nfs-server, the host should be cleaned-up when deleting a pod accessing the NFS vol", itDescr: "after deleting the nfs-server, the host should be cleaned-up when deleting a pod accessing the NFS vol",
podCmd: "while true; do echo FeFieFoFum >>/mnt/SUCCESS; sleep 1; cat /mnt/SUCCESS; done", podCmd: "while true; do echo FeFieFoFum >>/mnt/SUCCESS; cat /mnt/SUCCESS; done",
}, },
} }
@ -449,24 +420,28 @@ var _ = framework.KubeDescribe("kubelet", func() {
}) })
// execute It blocks from above table of tests // execute It blocks from above table of tests
for _, t := range testTbl { for _, test := range testTbl {
It(t.itDescr, func() { t := test // local copy for closure
It(fmt.Sprintf("%v [Serial]", t.itDescr), func() {
// create a pod which uses the nfs server's volume
pod = createPodUsingNfs(f, c, ns, nfsIP, t.podCmd) pod = createPodUsingNfs(f, c, ns, nfsIP, t.podCmd)
By("Stop the NFS server") By("Delete the NFS server pod")
stopNfsServer(nfsServerPod) deletePodWithWait(f, c, nfsServerPod)
nfsServerPod = nil
By("Delete the pod mounted to the NFS volume") By("Delete the pod mounted to the NFS volume")
deletePodWithWait(f, c, pod) deletePodWithWait(f, c, pod)
// pod object is now stale, but is intentionally not nil // pod object is now stale, but is intentionally not nil
By("Check if pod's host has been cleaned up -- expect not") By("Check if host running deleted pod has been cleaned up -- expect not")
// expect the pod's host *not* to be cleaned up
checkPodCleanup(c, pod, false) checkPodCleanup(c, pod, false)
By("Restart the nfs server") By("Recreate the nfs server pod")
restartNfsServer(nfsServerPod) nfsServerPod, nfsIP = createNfsServerPod(c, NFSconfig)
By("Verify host running the deleted pod is now cleaned up") By("Verify host running the deleted pod is now cleaned up")
// expect the pod's host to be cleaned up
checkPodCleanup(c, pod, true) checkPodCleanup(c, pod, true)
}) })
} }

View File

@ -408,10 +408,13 @@ func deletePodWithWait(f *framework.Framework, c clientset.Interface, pod *v1.Po
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
} }
// wait for pod to terminate // wait for pod to terminate. Expect apierr NotFound
err = f.WaitForPodTerminated(pod.Name, "") err = f.WaitForPodTerminated(pod.Name, "")
Expect(apierrs.IsNotFound(err)).To(BeTrue(), fmt.Sprintf("Expected IsNotFound error deleting pod %q, instead got: %v", pod.Name, err)) Expect(err).To(HaveOccurred())
if !apierrs.IsNotFound(err) {
framework.Logf("Error! Expected IsNotFound error deleting pod %q, instead got: %v", pod.Name, err)
Expect(apierrs.IsNotFound(err)).To(BeTrue())
}
framework.Logf("Ignore \"not found\" error above. Pod %v successfully deleted", pod.Name) framework.Logf("Ignore \"not found\" error above. Pod %v successfully deleted", pod.Name)
} }