From baeea3272a48dd2177ed0b9c7129e745a6a0966c Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Fri, 20 Nov 2015 13:54:34 -0800 Subject: [PATCH] make SSH logging in e2e more consistent --- test/e2e/core.go | 4 +- test/e2e/daemon_restart.go | 14 +++---- test/e2e/etcd_failure.go | 7 ++-- test/e2e/reboot.go | 5 ++- test/e2e/resize_nodes.go | 19 +++++----- test/e2e/service.go | 21 ++++++----- test/e2e/ssh.go | 10 ++--- test/e2e/util.go | 76 +++++++++++++++++++++----------------- 8 files changed, 85 insertions(+), 71 deletions(-) diff --git a/test/e2e/core.go b/test/e2e/core.go index 19f4c857386..857bbf6e36b 100644 --- a/test/e2e/core.go +++ b/test/e2e/core.go @@ -85,11 +85,11 @@ func logCore(cmds []command, hosts []string, dir, provider string) { defer wg.Done() logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component) fmt.Printf("Writing to %s.\n", logfile) - stdout, stderr, _, err := SSH(cmd.cmd, host, provider) + result, err := SSH(cmd.cmd, host, provider) if err != nil { fmt.Printf("Error running command: %v\n", err) } - if err := ioutil.WriteFile(logfile, []byte(stdout+stderr), 0777); err != nil { + if err := ioutil.WriteFile(logfile, []byte(result.Stdout+result.Stderr), 0777); err != nil { fmt.Printf("Error writing logfile: %v\n", err) } }(cmd, host) diff --git a/test/e2e/daemon_restart.go b/test/e2e/daemon_restart.go index d3ef557dd0c..38309b05b2f 100644 --- a/test/e2e/daemon_restart.go +++ b/test/e2e/daemon_restart.go @@ -58,10 +58,10 @@ const ( // nodeExec execs the given cmd on node via SSH. Note that the nodeName is an sshable name, // eg: the name returned by getMasterHost(). This is also not guaranteed to work across // cloud providers since it involves ssh. -func nodeExec(nodeName, cmd string) (string, string, int, error) { - stdout, stderr, code, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider) +func nodeExec(nodeName, cmd string) (SSHResult, error) { + result, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider) Expect(err).NotTo(HaveOccurred()) - return stdout, stderr, code, err + return result, err } // restartDaemonConfig is a config to restart a running daemon on a node, and wait till @@ -99,10 +99,10 @@ func (r *restartDaemonConfig) waitUp() { "curl -s -o /dev/null -I -w \"%%{http_code}\" http://localhost:%v/healthz", r.healthzPort) err := wait.Poll(r.pollInterval, r.pollTimeout, func() (bool, error) { - stdout, stderr, code, err := nodeExec(r.nodeName, healthzCheck) + result, err := nodeExec(r.nodeName, healthzCheck) expectNoError(err) - if code == 0 { - httpCode, err := strconv.Atoi(stdout) + if result.Code == 0 { + httpCode, err := strconv.Atoi(result.Stdout) if err != nil { Logf("Unable to parse healthz http return code: %v", err) } else if httpCode == 200 { @@ -110,7 +110,7 @@ func (r *restartDaemonConfig) waitUp() { } } Logf("node %v exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", - r.nodeName, healthzCheck, code, stdout, stderr) + r.nodeName, healthzCheck, result.Code, result.Stdout, result.Stderr) return false, nil }) expectNoError(err, "%v did not respond with a 200 via %v within %v", r, healthzCheck, r.pollTimeout) diff --git a/test/e2e/etcd_failure.go b/test/e2e/etcd_failure.go index 7581c46b0f3..f3517b4163b 100644 --- a/test/e2e/etcd_failure.go +++ b/test/e2e/etcd_failure.go @@ -93,10 +93,11 @@ func doEtcdFailure(failCommand, fixCommand string) { } func masterExec(cmd string) { - stdout, stderr, code, err := SSH(cmd, getMasterHost()+":22", testContext.Provider) + result, err := SSH(cmd, getMasterHost()+":22", testContext.Provider) Expect(err).NotTo(HaveOccurred()) - if code != 0 { - Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr) + if result.Code != 0 { + LogSSHResult(result) + Failf("master exec command returned non-zero") } } diff --git a/test/e2e/reboot.go b/test/e2e/reboot.go index ee44fe6992d..f1fb4d8cc0d 100644 --- a/test/e2e/reboot.go +++ b/test/e2e/reboot.go @@ -162,8 +162,9 @@ func issueSSHCommand(node *api.Node, provider, cmd string) error { return fmt.Errorf("couldn't find external IP address for node %s", node.Name) } Logf("Calling %s on %s", cmd, node.Name) - if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil { - return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err) + if result, err := SSH(cmd, host, provider); result.Code != 0 || err != nil { + LogSSHResult(result) + return fmt.Errorf("failed running %q: %v (exit code %d)", cmd, err, result.Code) } return nil } diff --git a/test/e2e/resize_nodes.go b/test/e2e/resize_nodes.go index 06cc68ec254..6a2e7084249 100644 --- a/test/e2e/resize_nodes.go +++ b/test/e2e/resize_nodes.go @@ -338,14 +338,15 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica // may fail). Manual intervention is required in such case (recreating the // cluster solves the problem too). err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) { - _, _, code, err := SSHVerbose(undropCmd, host, testContext.Provider) - if code == 0 && err == nil { + result, err := SSH(undropCmd, host, testContext.Provider) + if result.Code == 0 && err == nil { return true, nil - } else { - Logf("Expected 0 exit code and nil error when running '%s' on %s, got %d and %v", - undropCmd, node.Name, code, err) - return false, nil } + LogSSHResult(result) + if err != nil { + Logf("Unexpected error: %v", err) + } + return false, nil }) if err != nil { Failf("Failed to remove the iptable REJECT rule. Manual intervention is "+ @@ -364,9 +365,9 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica // We could also block network traffic from the master(s) to this node, // but blocking it one way is sufficient for this test. dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule) - if _, _, code, err := SSHVerbose(dropCmd, host, testContext.Provider); code != 0 || err != nil { - Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v", - dropCmd, node.Name, code, err) + if result, err := SSH(dropCmd, host, testContext.Provider); result.Code != 0 || err != nil { + LogSSHResult(result) + Failf("Unexpected error: %v", err) } Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name) diff --git a/test/e2e/service.go b/test/e2e/service.go index 73871b7f001..b43433f9dee 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -298,12 +298,13 @@ var _ = Describe("Services", func() { expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort)) By("Removing iptable rules") - _, _, code, err := SSH(` + result, err := SSH(` sudo iptables -t nat -F KUBE-SERVICES || true; sudo iptables -t nat -F KUBE-PORTALS-HOST || true; sudo iptables -t nat -F KUBE-PORTALS-CONTAINER || true`, host, testContext.Provider) - if err != nil || code != 0 { - Failf("couldn't remove iptable rules: %v (code %v)", err, code) + if err != nil || result.Code != 0 { + LogSSHResult(result) + Failf("couldn't remove iptable rules: %v", err) } expectNoError(verifyServeHostnameServiceUp(c, host, podNames1, svc1IP, servicePort)) expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort)) @@ -1304,11 +1305,12 @@ func verifyServeHostnameServiceUp(c *client.Client, host string, expectedPods [] for _, cmd := range commands { passed := false for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5) { - stdout, _, code, err := SSH(cmd, host, testContext.Provider) - if err != nil || code != 0 { - Logf("error while SSH-ing to node: %v (code %v)", err, code) + result, err := SSH(cmd, host, testContext.Provider) + if err != nil || result.Code != 0 { + LogSSHResult(result) + Logf("error while SSH-ing to node: %v", err) } - pods := strings.Split(strings.TrimSpace(stdout), "\n") + pods := strings.Split(strings.TrimSpace(result.Stdout), "\n") sort.StringSlice(pods).Sort() if api.Semantic.DeepEqual(pods, expectedPods) { passed = true @@ -1328,11 +1330,12 @@ func verifyServeHostnameServiceDown(c *client.Client, host string, serviceIP str "curl -s --connect-timeout 2 http://%s:%d && exit 99", serviceIP, servicePort) for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5 * time.Second) { - _, _, code, err := SSH(command, host, testContext.Provider) + result, err := SSH(command, host, testContext.Provider) if err != nil { + LogSSHResult(result) Logf("error while SSH-ing to node: %v", err) } - if code != 99 { + if result.Code != 99 { return nil } Logf("service still alive - still waiting") diff --git a/test/e2e/ssh.go b/test/e2e/ssh.go index 2efdf36af18..6697a4041b5 100644 --- a/test/e2e/ssh.go +++ b/test/e2e/ssh.go @@ -65,8 +65,8 @@ var _ = Describe("SSH", func() { for _, testCase := range testCases { By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd)) for _, host := range hosts { - stdout, stderr, code, err := SSH(testCase.cmd, host, testContext.Provider) - stdout, stderr = strings.TrimSpace(stdout), strings.TrimSpace(stderr) + result, err := SSH(testCase.cmd, host, testContext.Provider) + stdout, stderr := strings.TrimSpace(result.Stdout), strings.TrimSpace(result.Stderr) if err != testCase.expectedError { Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError) } @@ -76,8 +76,8 @@ var _ = Describe("SSH", func() { if stderr != testCase.expectedStderr { Failf("Ran %s on %s, got stderr '%s', expected '%s'", testCase.cmd, host, stderr, testCase.expectedStderr) } - if code != testCase.expectedCode { - Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, code, testCase.expectedCode) + if result.Code != testCase.expectedCode { + Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, result.Code, testCase.expectedCode) } // Show stdout, stderr for logging purposes. if len(stdout) > 0 { @@ -91,7 +91,7 @@ var _ = Describe("SSH", func() { // Quickly test that SSH itself errors correctly. By("SSH'ing to a nonexistent host") - if _, _, _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil { + if _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil { Failf("Expected error trying to SSH to nonexistent host.") } }) diff --git a/test/e2e/util.go b/test/e2e/util.go index 0cec9824cf3..509cd542bc7 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1932,43 +1932,48 @@ func NodeSSHHosts(c *client.Client) ([]string, error) { return sshHosts, nil } +type SSHResult struct { + User string + Host string + Cmd string + Stdout string + Stderr string + Code int +} + // SSH synchronously SSHs to a node running on provider and runs cmd. If there // is no error performing the SSH, the stdout, stderr, and exit code are // returned. -func SSH(cmd, host, provider string) (string, string, int, error) { - return sshCore(cmd, host, provider, false) -} +func SSH(cmd, host, provider string) (SSHResult, error) { + result := SSHResult{Host: host, Cmd: cmd} -// SSHVerbose is just like SSH, but it logs the command, user, host, stdout, -// stderr, exit code, and error. -func SSHVerbose(cmd, host, provider string) (string, string, int, error) { - return sshCore(cmd, host, provider, true) -} - -func sshCore(cmd, host, provider string, verbose bool) (string, string, int, error) { // Get a signer for the provider. signer, err := getSigner(provider) if err != nil { - return "", "", 0, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err) + return result, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err) } // RunSSHCommand will default to Getenv("USER") if user == "", but we're // defaulting here as well for logging clarity. - user := os.Getenv("KUBE_SSH_USER") - if user == "" { - user = os.Getenv("USER") + result.User = os.Getenv("KUBE_SSH_USER") + if result.User == "" { + result.User = os.Getenv("USER") } - stdout, stderr, code, err := util.RunSSHCommand(cmd, user, host, signer) - if verbose { - remote := fmt.Sprintf("%s@%s", user, host) - Logf("[%s] Running `%s`", remote, cmd) - Logf("[%s] stdout: %q", remote, stdout) - Logf("[%s] stderr: %q", remote, stderr) - Logf("[%s] exit code: %d", remote, code) - Logf("[%s] error: %v", remote, err) - } - return stdout, stderr, code, err + stdout, stderr, code, err := util.RunSSHCommand(cmd, result.User, host, signer) + result.Stdout = stdout + result.Stderr = stderr + result.Code = code + + return result, err +} + +func LogSSHResult(result SSHResult) { + remote := fmt.Sprintf("%s@%s", result.User, result.Host) + Logf("ssh %s: command: %s", remote, result.Cmd) + Logf("ssh %s: stdout: %q", remote, result.Stdout) + Logf("ssh %s: stderr: %q", remote, result.Stderr) + Logf("ssh %s: exit code: %d", remote, result.Code) } // NewHostExecPodSpec returns the pod spec of hostexec pod @@ -2194,20 +2199,22 @@ func restartKubeProxy(host string) error { return fmt.Errorf("unsupported provider: %s", testContext.Provider) } // kubelet will restart the kube-proxy since it's running in a static pod - _, _, code, err := SSH("sudo pkill kube-proxy", host, testContext.Provider) - if err != nil || code != 0 { - return fmt.Errorf("couldn't restart kube-proxy: %v (code %v)", err, code) + result, err := SSH("sudo pkill kube-proxy", host, testContext.Provider) + if err != nil || result.Code != 0 { + LogSSHResult(result) + return fmt.Errorf("couldn't restart kube-proxy: %v", err) } // wait for kube-proxy to come back up err = wait.Poll(5*time.Second, 60*time.Second, func() (bool, error) { - stdout, stderr, code, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider) + result, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider) if err != nil { return false, err } - if code != 0 { - return false, fmt.Errorf("failed to run command, exited %v: %v", code, stderr) + if result.Code != 0 { + LogSSHResult(result) + return false, fmt.Errorf("failed to run command, exited %d", result.Code) } - if stdout == "0\n" { + if result.Stdout == "0\n" { return false, nil } Logf("kube-proxy is back up.") @@ -2230,9 +2237,10 @@ func restartApiserver() error { } else { command = "sudo /etc/init.d/kube-apiserver restart" } - _, _, code, err := SSH(command, getMasterHost()+":22", testContext.Provider) - if err != nil || code != 0 { - return fmt.Errorf("couldn't restart apiserver: %v (code %v)", err, code) + result, err := SSH(command, getMasterHost()+":22", testContext.Provider) + if err != nil || result.Code != 0 { + LogSSHResult(result) + return fmt.Errorf("couldn't restart apiserver: %v", err) } return nil }