make SSH logging in e2e more consistent

This commit is contained in:
Tim Hockin 2015-11-20 13:54:34 -08:00
parent 80569e8866
commit baeea3272a
8 changed files with 85 additions and 71 deletions

View File

@ -85,11 +85,11 @@ func logCore(cmds []command, hosts []string, dir, provider string) {
defer wg.Done()
logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component)
fmt.Printf("Writing to %s.\n", logfile)
stdout, stderr, _, err := SSH(cmd.cmd, host, provider)
result, err := SSH(cmd.cmd, host, provider)
if err != nil {
fmt.Printf("Error running command: %v\n", err)
}
if err := ioutil.WriteFile(logfile, []byte(stdout+stderr), 0777); err != nil {
if err := ioutil.WriteFile(logfile, []byte(result.Stdout+result.Stderr), 0777); err != nil {
fmt.Printf("Error writing logfile: %v\n", err)
}
}(cmd, host)

View File

@ -58,10 +58,10 @@ const (
// nodeExec execs the given cmd on node via SSH. Note that the nodeName is an sshable name,
// eg: the name returned by getMasterHost(). This is also not guaranteed to work across
// cloud providers since it involves ssh.
func nodeExec(nodeName, cmd string) (string, string, int, error) {
stdout, stderr, code, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider)
func nodeExec(nodeName, cmd string) (SSHResult, error) {
result, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider)
Expect(err).NotTo(HaveOccurred())
return stdout, stderr, code, err
return result, err
}
// restartDaemonConfig is a config to restart a running daemon on a node, and wait till
@ -99,10 +99,10 @@ func (r *restartDaemonConfig) waitUp() {
"curl -s -o /dev/null -I -w \"%%{http_code}\" http://localhost:%v/healthz", r.healthzPort)
err := wait.Poll(r.pollInterval, r.pollTimeout, func() (bool, error) {
stdout, stderr, code, err := nodeExec(r.nodeName, healthzCheck)
result, err := nodeExec(r.nodeName, healthzCheck)
expectNoError(err)
if code == 0 {
httpCode, err := strconv.Atoi(stdout)
if result.Code == 0 {
httpCode, err := strconv.Atoi(result.Stdout)
if err != nil {
Logf("Unable to parse healthz http return code: %v", err)
} else if httpCode == 200 {
@ -110,7 +110,7 @@ func (r *restartDaemonConfig) waitUp() {
}
}
Logf("node %v exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v",
r.nodeName, healthzCheck, code, stdout, stderr)
r.nodeName, healthzCheck, result.Code, result.Stdout, result.Stderr)
return false, nil
})
expectNoError(err, "%v did not respond with a 200 via %v within %v", r, healthzCheck, r.pollTimeout)

View File

@ -93,10 +93,11 @@ func doEtcdFailure(failCommand, fixCommand string) {
}
func masterExec(cmd string) {
stdout, stderr, code, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
result, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
Expect(err).NotTo(HaveOccurred())
if code != 0 {
Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr)
if result.Code != 0 {
LogSSHResult(result)
Failf("master exec command returned non-zero")
}
}

View File

@ -162,8 +162,9 @@ func issueSSHCommand(node *api.Node, provider, cmd string) error {
return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
}
Logf("Calling %s on %s", cmd, node.Name)
if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil {
return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err)
if result, err := SSH(cmd, host, provider); result.Code != 0 || err != nil {
LogSSHResult(result)
return fmt.Errorf("failed running %q: %v (exit code %d)", cmd, err, result.Code)
}
return nil
}

View File

@ -338,14 +338,15 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
// may fail). Manual intervention is required in such case (recreating the
// cluster solves the problem too).
err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
_, _, code, err := SSHVerbose(undropCmd, host, testContext.Provider)
if code == 0 && err == nil {
result, err := SSH(undropCmd, host, testContext.Provider)
if result.Code == 0 && err == nil {
return true, nil
} else {
Logf("Expected 0 exit code and nil error when running '%s' on %s, got %d and %v",
undropCmd, node.Name, code, err)
return false, nil
}
LogSSHResult(result)
if err != nil {
Logf("Unexpected error: %v", err)
}
return false, nil
})
if err != nil {
Failf("Failed to remove the iptable REJECT rule. Manual intervention is "+
@ -364,9 +365,9 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
// We could also block network traffic from the master(s) to this node,
// but blocking it one way is sufficient for this test.
dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
if _, _, code, err := SSHVerbose(dropCmd, host, testContext.Provider); code != 0 || err != nil {
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
dropCmd, node.Name, code, err)
if result, err := SSH(dropCmd, host, testContext.Provider); result.Code != 0 || err != nil {
LogSSHResult(result)
Failf("Unexpected error: %v", err)
}
Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)

View File

@ -298,12 +298,13 @@ var _ = Describe("Services", func() {
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
By("Removing iptable rules")
_, _, code, err := SSH(`
result, err := SSH(`
sudo iptables -t nat -F KUBE-SERVICES || true;
sudo iptables -t nat -F KUBE-PORTALS-HOST || true;
sudo iptables -t nat -F KUBE-PORTALS-CONTAINER || true`, host, testContext.Provider)
if err != nil || code != 0 {
Failf("couldn't remove iptable rules: %v (code %v)", err, code)
if err != nil || result.Code != 0 {
LogSSHResult(result)
Failf("couldn't remove iptable rules: %v", err)
}
expectNoError(verifyServeHostnameServiceUp(c, host, podNames1, svc1IP, servicePort))
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
@ -1304,11 +1305,12 @@ func verifyServeHostnameServiceUp(c *client.Client, host string, expectedPods []
for _, cmd := range commands {
passed := false
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5) {
stdout, _, code, err := SSH(cmd, host, testContext.Provider)
if err != nil || code != 0 {
Logf("error while SSH-ing to node: %v (code %v)", err, code)
result, err := SSH(cmd, host, testContext.Provider)
if err != nil || result.Code != 0 {
LogSSHResult(result)
Logf("error while SSH-ing to node: %v", err)
}
pods := strings.Split(strings.TrimSpace(stdout), "\n")
pods := strings.Split(strings.TrimSpace(result.Stdout), "\n")
sort.StringSlice(pods).Sort()
if api.Semantic.DeepEqual(pods, expectedPods) {
passed = true
@ -1328,11 +1330,12 @@ func verifyServeHostnameServiceDown(c *client.Client, host string, serviceIP str
"curl -s --connect-timeout 2 http://%s:%d && exit 99", serviceIP, servicePort)
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5 * time.Second) {
_, _, code, err := SSH(command, host, testContext.Provider)
result, err := SSH(command, host, testContext.Provider)
if err != nil {
LogSSHResult(result)
Logf("error while SSH-ing to node: %v", err)
}
if code != 99 {
if result.Code != 99 {
return nil
}
Logf("service still alive - still waiting")

View File

@ -65,8 +65,8 @@ var _ = Describe("SSH", func() {
for _, testCase := range testCases {
By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd))
for _, host := range hosts {
stdout, stderr, code, err := SSH(testCase.cmd, host, testContext.Provider)
stdout, stderr = strings.TrimSpace(stdout), strings.TrimSpace(stderr)
result, err := SSH(testCase.cmd, host, testContext.Provider)
stdout, stderr := strings.TrimSpace(result.Stdout), strings.TrimSpace(result.Stderr)
if err != testCase.expectedError {
Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError)
}
@ -76,8 +76,8 @@ var _ = Describe("SSH", func() {
if stderr != testCase.expectedStderr {
Failf("Ran %s on %s, got stderr '%s', expected '%s'", testCase.cmd, host, stderr, testCase.expectedStderr)
}
if code != testCase.expectedCode {
Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, code, testCase.expectedCode)
if result.Code != testCase.expectedCode {
Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, result.Code, testCase.expectedCode)
}
// Show stdout, stderr for logging purposes.
if len(stdout) > 0 {
@ -91,7 +91,7 @@ var _ = Describe("SSH", func() {
// Quickly test that SSH itself errors correctly.
By("SSH'ing to a nonexistent host")
if _, _, _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
if _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
Failf("Expected error trying to SSH to nonexistent host.")
}
})

View File

@ -1932,43 +1932,48 @@ func NodeSSHHosts(c *client.Client) ([]string, error) {
return sshHosts, nil
}
type SSHResult struct {
User string
Host string
Cmd string
Stdout string
Stderr string
Code int
}
// SSH synchronously SSHs to a node running on provider and runs cmd. If there
// is no error performing the SSH, the stdout, stderr, and exit code are
// returned.
func SSH(cmd, host, provider string) (string, string, int, error) {
return sshCore(cmd, host, provider, false)
}
func SSH(cmd, host, provider string) (SSHResult, error) {
result := SSHResult{Host: host, Cmd: cmd}
// SSHVerbose is just like SSH, but it logs the command, user, host, stdout,
// stderr, exit code, and error.
func SSHVerbose(cmd, host, provider string) (string, string, int, error) {
return sshCore(cmd, host, provider, true)
}
func sshCore(cmd, host, provider string, verbose bool) (string, string, int, error) {
// Get a signer for the provider.
signer, err := getSigner(provider)
if err != nil {
return "", "", 0, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
return result, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
}
// RunSSHCommand will default to Getenv("USER") if user == "", but we're
// defaulting here as well for logging clarity.
user := os.Getenv("KUBE_SSH_USER")
if user == "" {
user = os.Getenv("USER")
result.User = os.Getenv("KUBE_SSH_USER")
if result.User == "" {
result.User = os.Getenv("USER")
}
stdout, stderr, code, err := util.RunSSHCommand(cmd, user, host, signer)
if verbose {
remote := fmt.Sprintf("%s@%s", user, host)
Logf("[%s] Running `%s`", remote, cmd)
Logf("[%s] stdout: %q", remote, stdout)
Logf("[%s] stderr: %q", remote, stderr)
Logf("[%s] exit code: %d", remote, code)
Logf("[%s] error: %v", remote, err)
}
return stdout, stderr, code, err
stdout, stderr, code, err := util.RunSSHCommand(cmd, result.User, host, signer)
result.Stdout = stdout
result.Stderr = stderr
result.Code = code
return result, err
}
func LogSSHResult(result SSHResult) {
remote := fmt.Sprintf("%s@%s", result.User, result.Host)
Logf("ssh %s: command: %s", remote, result.Cmd)
Logf("ssh %s: stdout: %q", remote, result.Stdout)
Logf("ssh %s: stderr: %q", remote, result.Stderr)
Logf("ssh %s: exit code: %d", remote, result.Code)
}
// NewHostExecPodSpec returns the pod spec of hostexec pod
@ -2194,20 +2199,22 @@ func restartKubeProxy(host string) error {
return fmt.Errorf("unsupported provider: %s", testContext.Provider)
}
// kubelet will restart the kube-proxy since it's running in a static pod
_, _, code, err := SSH("sudo pkill kube-proxy", host, testContext.Provider)
if err != nil || code != 0 {
return fmt.Errorf("couldn't restart kube-proxy: %v (code %v)", err, code)
result, err := SSH("sudo pkill kube-proxy", host, testContext.Provider)
if err != nil || result.Code != 0 {
LogSSHResult(result)
return fmt.Errorf("couldn't restart kube-proxy: %v", err)
}
// wait for kube-proxy to come back up
err = wait.Poll(5*time.Second, 60*time.Second, func() (bool, error) {
stdout, stderr, code, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider)
result, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider)
if err != nil {
return false, err
}
if code != 0 {
return false, fmt.Errorf("failed to run command, exited %v: %v", code, stderr)
if result.Code != 0 {
LogSSHResult(result)
return false, fmt.Errorf("failed to run command, exited %d", result.Code)
}
if stdout == "0\n" {
if result.Stdout == "0\n" {
return false, nil
}
Logf("kube-proxy is back up.")
@ -2230,9 +2237,10 @@ func restartApiserver() error {
} else {
command = "sudo /etc/init.d/kube-apiserver restart"
}
_, _, code, err := SSH(command, getMasterHost()+":22", testContext.Provider)
if err != nil || code != 0 {
return fmt.Errorf("couldn't restart apiserver: %v (code %v)", err, code)
result, err := SSH(command, getMasterHost()+":22", testContext.Provider)
if err != nil || result.Code != 0 {
LogSSHResult(result)
return fmt.Errorf("couldn't restart apiserver: %v", err)
}
return nil
}