mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 22:46:12 +00:00
make SSH logging in e2e more consistent
This commit is contained in:
parent
80569e8866
commit
baeea3272a
@ -85,11 +85,11 @@ func logCore(cmds []command, hosts []string, dir, provider string) {
|
|||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component)
|
logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component)
|
||||||
fmt.Printf("Writing to %s.\n", logfile)
|
fmt.Printf("Writing to %s.\n", logfile)
|
||||||
stdout, stderr, _, err := SSH(cmd.cmd, host, provider)
|
result, err := SSH(cmd.cmd, host, provider)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error running command: %v\n", err)
|
fmt.Printf("Error running command: %v\n", err)
|
||||||
}
|
}
|
||||||
if err := ioutil.WriteFile(logfile, []byte(stdout+stderr), 0777); err != nil {
|
if err := ioutil.WriteFile(logfile, []byte(result.Stdout+result.Stderr), 0777); err != nil {
|
||||||
fmt.Printf("Error writing logfile: %v\n", err)
|
fmt.Printf("Error writing logfile: %v\n", err)
|
||||||
}
|
}
|
||||||
}(cmd, host)
|
}(cmd, host)
|
||||||
|
@ -58,10 +58,10 @@ const (
|
|||||||
// nodeExec execs the given cmd on node via SSH. Note that the nodeName is an sshable name,
|
// nodeExec execs the given cmd on node via SSH. Note that the nodeName is an sshable name,
|
||||||
// eg: the name returned by getMasterHost(). This is also not guaranteed to work across
|
// eg: the name returned by getMasterHost(). This is also not guaranteed to work across
|
||||||
// cloud providers since it involves ssh.
|
// cloud providers since it involves ssh.
|
||||||
func nodeExec(nodeName, cmd string) (string, string, int, error) {
|
func nodeExec(nodeName, cmd string) (SSHResult, error) {
|
||||||
stdout, stderr, code, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider)
|
result, err := SSH(cmd, fmt.Sprintf("%v:%v", nodeName, sshPort), testContext.Provider)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
return stdout, stderr, code, err
|
return result, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// restartDaemonConfig is a config to restart a running daemon on a node, and wait till
|
// restartDaemonConfig is a config to restart a running daemon on a node, and wait till
|
||||||
@ -99,10 +99,10 @@ func (r *restartDaemonConfig) waitUp() {
|
|||||||
"curl -s -o /dev/null -I -w \"%%{http_code}\" http://localhost:%v/healthz", r.healthzPort)
|
"curl -s -o /dev/null -I -w \"%%{http_code}\" http://localhost:%v/healthz", r.healthzPort)
|
||||||
|
|
||||||
err := wait.Poll(r.pollInterval, r.pollTimeout, func() (bool, error) {
|
err := wait.Poll(r.pollInterval, r.pollTimeout, func() (bool, error) {
|
||||||
stdout, stderr, code, err := nodeExec(r.nodeName, healthzCheck)
|
result, err := nodeExec(r.nodeName, healthzCheck)
|
||||||
expectNoError(err)
|
expectNoError(err)
|
||||||
if code == 0 {
|
if result.Code == 0 {
|
||||||
httpCode, err := strconv.Atoi(stdout)
|
httpCode, err := strconv.Atoi(result.Stdout)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Logf("Unable to parse healthz http return code: %v", err)
|
Logf("Unable to parse healthz http return code: %v", err)
|
||||||
} else if httpCode == 200 {
|
} else if httpCode == 200 {
|
||||||
@ -110,7 +110,7 @@ func (r *restartDaemonConfig) waitUp() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Logf("node %v exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v",
|
Logf("node %v exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v",
|
||||||
r.nodeName, healthzCheck, code, stdout, stderr)
|
r.nodeName, healthzCheck, result.Code, result.Stdout, result.Stderr)
|
||||||
return false, nil
|
return false, nil
|
||||||
})
|
})
|
||||||
expectNoError(err, "%v did not respond with a 200 via %v within %v", r, healthzCheck, r.pollTimeout)
|
expectNoError(err, "%v did not respond with a 200 via %v within %v", r, healthzCheck, r.pollTimeout)
|
||||||
|
@ -93,10 +93,11 @@ func doEtcdFailure(failCommand, fixCommand string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func masterExec(cmd string) {
|
func masterExec(cmd string) {
|
||||||
stdout, stderr, code, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
|
result, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
if code != 0 {
|
if result.Code != 0 {
|
||||||
Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr)
|
LogSSHResult(result)
|
||||||
|
Failf("master exec command returned non-zero")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,8 +162,9 @@ func issueSSHCommand(node *api.Node, provider, cmd string) error {
|
|||||||
return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
|
return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
|
||||||
}
|
}
|
||||||
Logf("Calling %s on %s", cmd, node.Name)
|
Logf("Calling %s on %s", cmd, node.Name)
|
||||||
if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil {
|
if result, err := SSH(cmd, host, provider); result.Code != 0 || err != nil {
|
||||||
return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err)
|
LogSSHResult(result)
|
||||||
|
return fmt.Errorf("failed running %q: %v (exit code %d)", cmd, err, result.Code)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -338,14 +338,15 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
|
|||||||
// may fail). Manual intervention is required in such case (recreating the
|
// may fail). Manual intervention is required in such case (recreating the
|
||||||
// cluster solves the problem too).
|
// cluster solves the problem too).
|
||||||
err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
|
err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
|
||||||
_, _, code, err := SSHVerbose(undropCmd, host, testContext.Provider)
|
result, err := SSH(undropCmd, host, testContext.Provider)
|
||||||
if code == 0 && err == nil {
|
if result.Code == 0 && err == nil {
|
||||||
return true, nil
|
return true, nil
|
||||||
} else {
|
|
||||||
Logf("Expected 0 exit code and nil error when running '%s' on %s, got %d and %v",
|
|
||||||
undropCmd, node.Name, code, err)
|
|
||||||
return false, nil
|
|
||||||
}
|
}
|
||||||
|
LogSSHResult(result)
|
||||||
|
if err != nil {
|
||||||
|
Logf("Unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Failf("Failed to remove the iptable REJECT rule. Manual intervention is "+
|
Failf("Failed to remove the iptable REJECT rule. Manual intervention is "+
|
||||||
@ -364,9 +365,9 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
|
|||||||
// We could also block network traffic from the master(s) to this node,
|
// We could also block network traffic from the master(s) to this node,
|
||||||
// but blocking it one way is sufficient for this test.
|
// but blocking it one way is sufficient for this test.
|
||||||
dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
|
dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
|
||||||
if _, _, code, err := SSHVerbose(dropCmd, host, testContext.Provider); code != 0 || err != nil {
|
if result, err := SSH(dropCmd, host, testContext.Provider); result.Code != 0 || err != nil {
|
||||||
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
|
LogSSHResult(result)
|
||||||
dropCmd, node.Name, code, err)
|
Failf("Unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)
|
Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)
|
||||||
|
@ -298,12 +298,13 @@ var _ = Describe("Services", func() {
|
|||||||
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
|
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
|
||||||
|
|
||||||
By("Removing iptable rules")
|
By("Removing iptable rules")
|
||||||
_, _, code, err := SSH(`
|
result, err := SSH(`
|
||||||
sudo iptables -t nat -F KUBE-SERVICES || true;
|
sudo iptables -t nat -F KUBE-SERVICES || true;
|
||||||
sudo iptables -t nat -F KUBE-PORTALS-HOST || true;
|
sudo iptables -t nat -F KUBE-PORTALS-HOST || true;
|
||||||
sudo iptables -t nat -F KUBE-PORTALS-CONTAINER || true`, host, testContext.Provider)
|
sudo iptables -t nat -F KUBE-PORTALS-CONTAINER || true`, host, testContext.Provider)
|
||||||
if err != nil || code != 0 {
|
if err != nil || result.Code != 0 {
|
||||||
Failf("couldn't remove iptable rules: %v (code %v)", err, code)
|
LogSSHResult(result)
|
||||||
|
Failf("couldn't remove iptable rules: %v", err)
|
||||||
}
|
}
|
||||||
expectNoError(verifyServeHostnameServiceUp(c, host, podNames1, svc1IP, servicePort))
|
expectNoError(verifyServeHostnameServiceUp(c, host, podNames1, svc1IP, servicePort))
|
||||||
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
|
expectNoError(verifyServeHostnameServiceUp(c, host, podNames2, svc2IP, servicePort))
|
||||||
@ -1304,11 +1305,12 @@ func verifyServeHostnameServiceUp(c *client.Client, host string, expectedPods []
|
|||||||
for _, cmd := range commands {
|
for _, cmd := range commands {
|
||||||
passed := false
|
passed := false
|
||||||
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5) {
|
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5) {
|
||||||
stdout, _, code, err := SSH(cmd, host, testContext.Provider)
|
result, err := SSH(cmd, host, testContext.Provider)
|
||||||
if err != nil || code != 0 {
|
if err != nil || result.Code != 0 {
|
||||||
Logf("error while SSH-ing to node: %v (code %v)", err, code)
|
LogSSHResult(result)
|
||||||
|
Logf("error while SSH-ing to node: %v", err)
|
||||||
}
|
}
|
||||||
pods := strings.Split(strings.TrimSpace(stdout), "\n")
|
pods := strings.Split(strings.TrimSpace(result.Stdout), "\n")
|
||||||
sort.StringSlice(pods).Sort()
|
sort.StringSlice(pods).Sort()
|
||||||
if api.Semantic.DeepEqual(pods, expectedPods) {
|
if api.Semantic.DeepEqual(pods, expectedPods) {
|
||||||
passed = true
|
passed = true
|
||||||
@ -1328,11 +1330,12 @@ func verifyServeHostnameServiceDown(c *client.Client, host string, serviceIP str
|
|||||||
"curl -s --connect-timeout 2 http://%s:%d && exit 99", serviceIP, servicePort)
|
"curl -s --connect-timeout 2 http://%s:%d && exit 99", serviceIP, servicePort)
|
||||||
|
|
||||||
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5 * time.Second) {
|
for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5 * time.Second) {
|
||||||
_, _, code, err := SSH(command, host, testContext.Provider)
|
result, err := SSH(command, host, testContext.Provider)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
LogSSHResult(result)
|
||||||
Logf("error while SSH-ing to node: %v", err)
|
Logf("error while SSH-ing to node: %v", err)
|
||||||
}
|
}
|
||||||
if code != 99 {
|
if result.Code != 99 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
Logf("service still alive - still waiting")
|
Logf("service still alive - still waiting")
|
||||||
|
@ -65,8 +65,8 @@ var _ = Describe("SSH", func() {
|
|||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd))
|
By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd))
|
||||||
for _, host := range hosts {
|
for _, host := range hosts {
|
||||||
stdout, stderr, code, err := SSH(testCase.cmd, host, testContext.Provider)
|
result, err := SSH(testCase.cmd, host, testContext.Provider)
|
||||||
stdout, stderr = strings.TrimSpace(stdout), strings.TrimSpace(stderr)
|
stdout, stderr := strings.TrimSpace(result.Stdout), strings.TrimSpace(result.Stderr)
|
||||||
if err != testCase.expectedError {
|
if err != testCase.expectedError {
|
||||||
Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError)
|
Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError)
|
||||||
}
|
}
|
||||||
@ -76,8 +76,8 @@ var _ = Describe("SSH", func() {
|
|||||||
if stderr != testCase.expectedStderr {
|
if stderr != testCase.expectedStderr {
|
||||||
Failf("Ran %s on %s, got stderr '%s', expected '%s'", testCase.cmd, host, stderr, testCase.expectedStderr)
|
Failf("Ran %s on %s, got stderr '%s', expected '%s'", testCase.cmd, host, stderr, testCase.expectedStderr)
|
||||||
}
|
}
|
||||||
if code != testCase.expectedCode {
|
if result.Code != testCase.expectedCode {
|
||||||
Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, code, testCase.expectedCode)
|
Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, result.Code, testCase.expectedCode)
|
||||||
}
|
}
|
||||||
// Show stdout, stderr for logging purposes.
|
// Show stdout, stderr for logging purposes.
|
||||||
if len(stdout) > 0 {
|
if len(stdout) > 0 {
|
||||||
@ -91,7 +91,7 @@ var _ = Describe("SSH", func() {
|
|||||||
|
|
||||||
// Quickly test that SSH itself errors correctly.
|
// Quickly test that SSH itself errors correctly.
|
||||||
By("SSH'ing to a nonexistent host")
|
By("SSH'ing to a nonexistent host")
|
||||||
if _, _, _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
|
if _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
|
||||||
Failf("Expected error trying to SSH to nonexistent host.")
|
Failf("Expected error trying to SSH to nonexistent host.")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -1932,43 +1932,48 @@ func NodeSSHHosts(c *client.Client) ([]string, error) {
|
|||||||
return sshHosts, nil
|
return sshHosts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SSHResult struct {
|
||||||
|
User string
|
||||||
|
Host string
|
||||||
|
Cmd string
|
||||||
|
Stdout string
|
||||||
|
Stderr string
|
||||||
|
Code int
|
||||||
|
}
|
||||||
|
|
||||||
// SSH synchronously SSHs to a node running on provider and runs cmd. If there
|
// SSH synchronously SSHs to a node running on provider and runs cmd. If there
|
||||||
// is no error performing the SSH, the stdout, stderr, and exit code are
|
// is no error performing the SSH, the stdout, stderr, and exit code are
|
||||||
// returned.
|
// returned.
|
||||||
func SSH(cmd, host, provider string) (string, string, int, error) {
|
func SSH(cmd, host, provider string) (SSHResult, error) {
|
||||||
return sshCore(cmd, host, provider, false)
|
result := SSHResult{Host: host, Cmd: cmd}
|
||||||
}
|
|
||||||
|
|
||||||
// SSHVerbose is just like SSH, but it logs the command, user, host, stdout,
|
|
||||||
// stderr, exit code, and error.
|
|
||||||
func SSHVerbose(cmd, host, provider string) (string, string, int, error) {
|
|
||||||
return sshCore(cmd, host, provider, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
func sshCore(cmd, host, provider string, verbose bool) (string, string, int, error) {
|
|
||||||
// Get a signer for the provider.
|
// Get a signer for the provider.
|
||||||
signer, err := getSigner(provider)
|
signer, err := getSigner(provider)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", "", 0, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
|
return result, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunSSHCommand will default to Getenv("USER") if user == "", but we're
|
// RunSSHCommand will default to Getenv("USER") if user == "", but we're
|
||||||
// defaulting here as well for logging clarity.
|
// defaulting here as well for logging clarity.
|
||||||
user := os.Getenv("KUBE_SSH_USER")
|
result.User = os.Getenv("KUBE_SSH_USER")
|
||||||
if user == "" {
|
if result.User == "" {
|
||||||
user = os.Getenv("USER")
|
result.User = os.Getenv("USER")
|
||||||
}
|
}
|
||||||
|
|
||||||
stdout, stderr, code, err := util.RunSSHCommand(cmd, user, host, signer)
|
stdout, stderr, code, err := util.RunSSHCommand(cmd, result.User, host, signer)
|
||||||
if verbose {
|
result.Stdout = stdout
|
||||||
remote := fmt.Sprintf("%s@%s", user, host)
|
result.Stderr = stderr
|
||||||
Logf("[%s] Running `%s`", remote, cmd)
|
result.Code = code
|
||||||
Logf("[%s] stdout: %q", remote, stdout)
|
|
||||||
Logf("[%s] stderr: %q", remote, stderr)
|
return result, err
|
||||||
Logf("[%s] exit code: %d", remote, code)
|
}
|
||||||
Logf("[%s] error: %v", remote, err)
|
|
||||||
}
|
func LogSSHResult(result SSHResult) {
|
||||||
return stdout, stderr, code, err
|
remote := fmt.Sprintf("%s@%s", result.User, result.Host)
|
||||||
|
Logf("ssh %s: command: %s", remote, result.Cmd)
|
||||||
|
Logf("ssh %s: stdout: %q", remote, result.Stdout)
|
||||||
|
Logf("ssh %s: stderr: %q", remote, result.Stderr)
|
||||||
|
Logf("ssh %s: exit code: %d", remote, result.Code)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewHostExecPodSpec returns the pod spec of hostexec pod
|
// NewHostExecPodSpec returns the pod spec of hostexec pod
|
||||||
@ -2194,20 +2199,22 @@ func restartKubeProxy(host string) error {
|
|||||||
return fmt.Errorf("unsupported provider: %s", testContext.Provider)
|
return fmt.Errorf("unsupported provider: %s", testContext.Provider)
|
||||||
}
|
}
|
||||||
// kubelet will restart the kube-proxy since it's running in a static pod
|
// kubelet will restart the kube-proxy since it's running in a static pod
|
||||||
_, _, code, err := SSH("sudo pkill kube-proxy", host, testContext.Provider)
|
result, err := SSH("sudo pkill kube-proxy", host, testContext.Provider)
|
||||||
if err != nil || code != 0 {
|
if err != nil || result.Code != 0 {
|
||||||
return fmt.Errorf("couldn't restart kube-proxy: %v (code %v)", err, code)
|
LogSSHResult(result)
|
||||||
|
return fmt.Errorf("couldn't restart kube-proxy: %v", err)
|
||||||
}
|
}
|
||||||
// wait for kube-proxy to come back up
|
// wait for kube-proxy to come back up
|
||||||
err = wait.Poll(5*time.Second, 60*time.Second, func() (bool, error) {
|
err = wait.Poll(5*time.Second, 60*time.Second, func() (bool, error) {
|
||||||
stdout, stderr, code, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider)
|
result, err := SSH("sudo /bin/sh -c 'pgrep kube-proxy | wc -l'", host, testContext.Provider)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
if code != 0 {
|
if result.Code != 0 {
|
||||||
return false, fmt.Errorf("failed to run command, exited %v: %v", code, stderr)
|
LogSSHResult(result)
|
||||||
|
return false, fmt.Errorf("failed to run command, exited %d", result.Code)
|
||||||
}
|
}
|
||||||
if stdout == "0\n" {
|
if result.Stdout == "0\n" {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
Logf("kube-proxy is back up.")
|
Logf("kube-proxy is back up.")
|
||||||
@ -2230,9 +2237,10 @@ func restartApiserver() error {
|
|||||||
} else {
|
} else {
|
||||||
command = "sudo /etc/init.d/kube-apiserver restart"
|
command = "sudo /etc/init.d/kube-apiserver restart"
|
||||||
}
|
}
|
||||||
_, _, code, err := SSH(command, getMasterHost()+":22", testContext.Provider)
|
result, err := SSH(command, getMasterHost()+":22", testContext.Provider)
|
||||||
if err != nil || code != 0 {
|
if err != nil || result.Code != 0 {
|
||||||
return fmt.Errorf("couldn't restart apiserver: %v (code %v)", err, code)
|
LogSSHResult(result)
|
||||||
|
return fmt.Errorf("couldn't restart apiserver: %v", err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user