mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
make SSH logging in e2e more consistent
This commit is contained in:
parent
80569e8866
commit
baeea3272a
@ -85,11 +85,11 @@ func logCore(cmds []command, hosts []string, dir, provider string) {
|
||||
defer wg.Done()
|
||||
logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component)
|
||||
fmt.Printf("Writing to %s.\n", logfile)
|
||||
stdout, stderr, _, err := SSH(cmd.cmd, host, provider)
|
||||
result, err := SSH(cmd.cmd, host, provider)
|
||||
if err != nil {
|
||||
fmt.Printf("Error running command: %v\n", err)
|
||||
}
|
||||
if err := ioutil.WriteFile(logfile, []byte(stdout+stderr), 0777); err != nil {
|
||||
if err := ioutil.WriteFile(logfile, []byte(result.Stdout+result.Stderr), 0777); err != nil {
|
||||
fmt.Printf("Error writing logfile: %v\n", err)
|
||||
}
|
||||
}(cmd, host)
|
||||
|
@ -58,10 +58,10 @@ const (
|
||||
// nodeExec execs the given cmd on node via SSH. Note that the nodeName is an sshable name,
|
||||
// eg: the name returned by getMasterHost(). This is also not guaranteed to work across
|
||||
// cloud providers since it involves ssh.
|
||||
func nodeExec(nodeName, cmd string) (string, string, int, error) {
|
||||
stdout, stderr, code, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider)
|
||||
func nodeExec(nodeName, cmd string) (SSHResult, error) {
|
||||
result, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
return stdout, stderr, code, err
|
||||
return result, err
|
||||
}
|
||||
|
||||
// restartDaemonConfig is a config to restart a running daemon on a node, and wait till
|
||||
@ -99,10 +99,10 @@ func (r *restartDaemonConfig) waitUp() {
|
||||
"curl -s -o /dev/null -I -w \"%%{http_code}\" http://localhost:%v/healthz", r.healthzPort)
|
||||
|
||||
err := wait.Poll(r.pollInterval, r.pollTimeout, func() (bool, error) {
|
||||
stdout, stderr, code, err := nodeExec(r.nodeName, healthzCheck)
|
||||
result, err := nodeExec(r.nodeName, healthzCheck)
|
||||
expectNoError(err)
|
||||
if code == 0 {
|
||||
httpCode, err := strconv.Atoi(stdout)
|
||||
if result.Code == 0 {
|
||||
httpCode, err := strconv.Atoi(result.Stdout)
|
||||
if err != nil {
|
||||
Logf("Unable to parse healthz http return code: %v", err)
|
||||
} else if httpCode == 200 {
|
||||
@ -110,7 +110,7 @@ func (r *restartDaemonConfig) waitUp() {
|
||||
}
|
||||
}
|
||||
Logf("node %v exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v",
|
||||
r.nodeName, healthzCheck, code, stdout, stderr)
|
||||
r.nodeName, healthzCheck, result.Code, result.Stdout, result.Stderr)
|
||||
return false, nil
|
||||
})
|
||||
expectNoError(err, "%v did not respond with a 200 via %v within %v", r, healthzCheck, r.pollTimeout)
|
||||
|
@ -93,10 +93,11 @@ func doEtcdFailure(failCommand, fixCommand string) {
|
||||
}
|
||||
|
||||
func masterExec(cmd string) {
|
||||
stdout, stderr, code, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
|
||||
result, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
if code != 0 {
|
||||
Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr)
|
||||
if result.Code != 0 {
|
||||
LogSSHResult(result)
|
||||
Failf("master exec command returned non-zero")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -162,8 +162,9 @@ func issueSSHCommand(node *api.Node, provider, cmd string) error {
|
||||
return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
|
||||
}
|
||||
Logf("Calling %s on %s", cmd, node.Name)
|
||||
if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil {
|
||||
return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err)
|
||||
if result, err := SSH(cmd, host, provider); result.Code != 0 || err != nil {
|
||||
LogSSHResult(result)
|
||||
return fmt.Errorf("failed running %q: %v (exit code %d)", cmd, err, result.Code)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -338,14 +338,15 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
|
||||
// may fail). Manual intervention is required in such case (recreating the
|
||||
// cluster solves the problem too).
|
||||
err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
|
||||
_, _, code, err := SSHVerbose(undropCmd, host, testContext.Provider)
|
||||
if code == 0 && err == nil {
|
||||
result, err := SSH(undropCmd, host, testContext.Provider)
|
||||
if result.Code == 0 && err == nil {
|
||||
return true, nil
|
||||
} else {
|
||||
Logf("Expected 0 exit code and nil error when running '%s' on %s, got %d and %v",
|
||||
undropCmd, node.Name, code, err)
|
||||
return false, nil
|
||||
}
|
||||
LogSSHResult(result)
|
||||
if err != nil {
|
||||
Logf("Unexpected error: %v", err)
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
Failf("Failed to remove the iptable REJECT rule. Manual intervention is "+
|
||||
@ -364,9 +365,9 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
|
||||
// We could also block network traffic from the master(s) to this node,
|
||||
// but blocking it one way is sufficient for this test.
|
||||
dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
|
||||
if _, _, code, err := SSHVerbose(dropCmd, host, testContext.Provider); code != 0 || err != nil {
|
||||
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
|
||||
dropCmd, node.Name, code, err)
|
||||
if result, err := SSH(dropCmd, host, testContext.Provider); result.Code != 0 || err != nil {
|
||||
LogSSHResult(result)
|
||||
Failf("Unexpected error: %v", err)
|
||||
}
|
||||
|
||||
Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)
|
||||
|
@ -298,12 +298,13 @@ var _ = Describe("Services", func() {
|
||||
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
|
||||
|
||||
By("Removing iptable rules")
|
||||
_, _, code, err := SSH(`
|
||||
result, err := SSH(`
|
||||
sudo iptables -t nat -F KUBE-SERVICES || true;
|
||||
sudo iptables -t nat -F KUBE-PORTALS-HOST || true;
|
||||
sudo iptables -t nat -F KUBE-PORTALS-CONTAINER || true`, host, testContext.Provider)
|
||||
if err != nil || code != 0 {
|
||||
Failf("couldn't remove iptable rules: %v (code %v)", err, code)
|
||||
if err != nil || result.Code != 0 {
|
||||
LogSSHResult(result)
|
||||
Failf("couldn't remove iptable rules: %v", err)
|
||||
}
|
||||
expectNoError(verifyServeHostnameServiceUp(c, host, podNames1, svc1IP, servicePort))
|
||||
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
|
||||
@ -1304,11 +1305,12 @@ func verifyServeHostnameServiceUp(c *client.Client, host string, expectedPods []
|
||||
for _, cmd := range commands {
|
||||
passed := false
|
||||
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5) {
|
||||
stdout, _, code, err := SSH(cmd, host, testContext.Provider)
|
||||
if err != nil || code != 0 {
|
||||
Logf("error while SSH-ing to node: %v (code %v)", err, code)
|
||||
result, err := SSH(cmd, host, testContext.Provider)
|
||||
if err != nil || result.Code != 0 {
|
||||
LogSSHResult(result)
|
||||
Logf("error while SSH-ing to node: %v", err)
|
||||
}
|
||||
pods := strings.Split(strings.TrimSpace(stdout), "\n")
|
||||
pods := strings.Split(strings.TrimSpace(result.Stdout), "\n")
|
||||
sort.StringSlice(pods).Sort()
|
||||
if api.Semantic.DeepEqual(pods, expectedPods) {
|
||||
passed = true
|
||||
@ -1328,11 +1330,12 @@ func verifyServeHostnameServiceDown(c *client.Client, host string, serviceIP str
|
||||
"curl -s --connect-timeout 2 http://%s:%d && exit 99", serviceIP, servicePort)
|
||||
|
||||
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5 * time.Second) {
|
||||
_, _, code, err := SSH(command, host, testContext.Provider)
|
||||
result, err := SSH(command, host, testContext.Provider)
|
||||
if err != nil {
|
||||
LogSSHResult(result)
|
||||
Logf("error while SSH-ing to node: %v", err)
|
||||
}
|
||||
if code != 99 {
|
||||
if result.Code != 99 {
|
||||
return nil
|
||||
}
|
||||
Logf("service still alive - still waiting")
|
||||
|
@ -65,8 +65,8 @@ var _ = Describe("SSH", func() {
|
||||
for _, testCase := range testCases {
|
||||
By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd))
|
||||
for _, host := range hosts {
|
||||
stdout, stderr, code, err := SSH(testCase.cmd, host, testContext.Provider)
|
||||
stdout, stderr = strings.TrimSpace(stdout), strings.TrimSpace(stderr)
|
||||
result, err := SSH(testCase.cmd, host, testContext.Provider)
|
||||
stdout, stderr := strings.TrimSpace(result.Stdout), strings.TrimSpace(result.Stderr)
|
||||
if err != testCase.expectedError {
|
||||
Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError)
|
||||
}
|
||||
@ -76,8 +76,8 @@ var _ = Describe("SSH", func() {
|
||||
if stderr != testCase.expectedStderr {
|
||||
Failf("Ran %s on %s, got stderr '%s', expected '%s'", testCase.cmd, host, stderr, testCase.expectedStderr)
|
||||
}
|
||||
if code != testCase.expectedCode {
|
||||
Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, code, testCase.expectedCode)
|
||||
if result.Code != testCase.expectedCode {
|
||||
Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, result.Code, testCase.expectedCode)
|
||||
}
|
||||
// Show stdout, stderr for logging purposes.
|
||||
if len(stdout) > 0 {
|
||||
@ -91,7 +91,7 @@ var _ = Describe("SSH", func() {
|
||||
|
||||
// Quickly test that SSH itself errors correctly.
|
||||
By("SSH'ing to a nonexistent host")
|
||||
if _, _, _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
|
||||
if _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
|
||||
Failf("Expected error trying to SSH to nonexistent host.")
|
||||
}
|
||||
})
|
||||
|
@ -1932,43 +1932,48 @@ func NodeSSHHosts(c *client.Client) ([]string, error) {
|
||||
return sshHosts, nil
|
||||
}
|
||||
|
||||
type SSHResult struct {
|
||||
User string
|
||||
Host string
|
||||
Cmd string
|
||||
Stdout string
|
||||
Stderr string
|
||||
Code int
|
||||
}
|
||||
|
||||
// SSH synchronously SSHs to a node running on provider and runs cmd. If there
|
||||
// is no error performing the SSH, the stdout, stderr, and exit code are
|
||||
// returned.
|
||||
func SSH(cmd, host, provider string) (string, string, int, error) {
|
||||
return sshCore(cmd, host, provider, false)
|
||||
}
|
||||
func SSH(cmd, host, provider string) (SSHResult, error) {
|
||||
result := SSHResult{Host: host, Cmd: cmd}
|
||||
|
||||
// SSHVerbose is just like SSH, but it logs the command, user, host, stdout,
|
||||
// stderr, exit code, and error.
|
||||
func SSHVerbose(cmd, host, provider string) (string, string, int, error) {
|
||||
return sshCore(cmd, host, provider, true)
|
||||
}
|
||||
|
||||
func sshCore(cmd, host, provider string, verbose bool) (string, string, int, error) {
|
||||
// Get a signer for the provider.
|
||||
signer, err := getSigner(provider)
|
||||
if err != nil {
|
||||
return "", "", 0, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
|
||||
return result, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
|
||||
}
|
||||
|
||||
// RunSSHCommand will default to Getenv("USER") if user == "", but we're
|
||||
// defaulting here as well for logging clarity.
|
||||
user := os.Getenv("KUBE_SSH_USER")
|
||||
if user == "" {
|
||||
user = os.Getenv("USER")
|
||||
result.User = os.Getenv("KUBE_SSH_USER")
|
||||
if result.User == "" {
|
||||
result.User = os.Getenv("USER")
|
||||
}
|
||||
|
||||
stdout, stderr, code, err := util.RunSSHCommand(cmd, user, host, signer)
|
||||
if verbose {
|
||||
remote := fmt.Sprintf("%s@%s", user, host)
|
||||
Logf("[%s] Running `%s`", remote, cmd)
|
||||
Logf("[%s] stdout: %q", remote, stdout)
|
||||
Logf("[%s] stderr: %q", remote, stderr)
|
||||
Logf("[%s] exit code: %d", remote, code)
|
||||
Logf("[%s] error: %v", remote, err)
|
||||
}
|
||||
return stdout, stderr, code, err
|
||||
stdout, stderr, code, err := util.RunSSHCommand(cmd, result.User, host, signer)
|
||||
result.Stdout = stdout
|
||||
result.Stderr = stderr
|
||||
result.Code = code
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
func LogSSHResult(result SSHResult) {
|
||||
remote := fmt.Sprintf("%s@%s", result.User, result.Host)
|
||||
Logf("ssh %s: command: %s", remote, result.Cmd)
|
||||
Logf("ssh %s: stdout: %q", remote, result.Stdout)
|
||||
Logf("ssh %s: stderr: %q", remote, result.Stderr)
|
||||
Logf("ssh %s: exit code: %d", remote, result.Code)
|
||||
}
|
||||
|
||||
// NewHostExecPodSpec returns the pod spec of hostexec pod
|
||||
@ -2194,20 +2199,22 @@ func restartKubeProxy(host string) error {
|
||||
return fmt.Errorf("unsupported provider: %s", testContext.Provider)
|
||||
}
|
||||
// kubelet will restart the kube-proxy since it's running in a static pod
|
||||
_, _, code, err := SSH("sudo pkill kube-proxy", host, testContext.Provider)
|
||||
if err != nil || code != 0 {
|
||||
return fmt.Errorf("couldn't restart kube-proxy: %v (code %v)", err, code)
|
||||
result, err := SSH("sudo pkill kube-proxy", host, testContext.Provider)
|
||||
if err != nil || result.Code != 0 {
|
||||
LogSSHResult(result)
|
||||
return fmt.Errorf("couldn't restart kube-proxy: %v", err)
|
||||
}
|
||||
// wait for kube-proxy to come back up
|
||||
err = wait.Poll(5*time.Second, 60*time.Second, func() (bool, error) {
|
||||
stdout, stderr, code, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider)
|
||||
result, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if code != 0 {
|
||||
return false, fmt.Errorf("failed to run command, exited %v: %v", code, stderr)
|
||||
if result.Code != 0 {
|
||||
LogSSHResult(result)
|
||||
return false, fmt.Errorf("failed to run command, exited %d", result.Code)
|
||||
}
|
||||
if stdout == "0\n" {
|
||||
if result.Stdout == "0\n" {
|
||||
return false, nil
|
||||
}
|
||||
Logf("kube-proxy is back up.")
|
||||
@ -2230,9 +2237,10 @@ func restartApiserver() error {
|
||||
} else {
|
||||
command = "sudo /etc/init.d/kube-apiserver restart"
|
||||
}
|
||||
_, _, code, err := SSH(command, getMasterHost()+":22", testContext.Provider)
|
||||
if err != nil || code != 0 {
|
||||
return fmt.Errorf("couldn't restart apiserver: %v (code %v)", err, code)
|
||||
result, err := SSH(command, getMasterHost()+":22", testContext.Provider)
|
||||
if err != nil || result.Code != 0 {
|
||||
LogSSHResult(result)
|
||||
return fmt.Errorf("couldn't restart apiserver: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user