Merge pull request #52352 from enisoc/sts-deflake

Automatic merge from submit-queue (batch tested with PRs 48226, 52046, 52231, 52344, 52352)

StatefulSet: Deflake e2e RunHostCmd more.

It turns out that at some points while the Node is recovering from a reboot, we get a different kind of error ("unable to upgrade connection"). Since we can't distinguish these transient errors from an error encountered after successfully executing the remote command, let's just retry all errors for 5min. If this doesn't work, I'm gonna blame it on sig-node.

ref #48031
This commit is contained in:
Kubernetes Submit Queue 2017-09-12 19:40:06 -07:00 committed by GitHub
commit 9636522137

View File

@ -3311,7 +3311,7 @@ func RunHostCmdOrDie(ns, name, cmd string) string {
return stdout return stdout
} }
// RunHostCmdWithRetries calls RunHostCmd and retries errors it thinks may be transient // RunHostCmdWithRetries calls RunHostCmd and retries all errors
// until it succeeds or the specified timeout expires. // until it succeeds or the specified timeout expires.
// This can be used with idempotent commands to deflake transient Node issues. // This can be used with idempotent commands to deflake transient Node issues.
func RunHostCmdWithRetries(ns, name, cmd string, interval, timeout time.Duration) (string, error) { func RunHostCmdWithRetries(ns, name, cmd string, interval, timeout time.Duration) (string, error) {
@ -3324,9 +3324,6 @@ func RunHostCmdWithRetries(ns, name, cmd string, interval, timeout time.Duration
if elapsed := time.Since(start); elapsed > timeout { if elapsed := time.Since(start); elapsed > timeout {
return out, fmt.Errorf("RunHostCmd still failed after %v: %v", elapsed, err) return out, fmt.Errorf("RunHostCmd still failed after %v: %v", elapsed, err)
} }
if !strings.Contains(err.Error(), "Error from server") {
return out, fmt.Errorf("Non-retryable RunHostCmd error: %v", err)
}
Logf("Waiting %v to retry failed RunHostCmd: %v", interval, err) Logf("Waiting %v to retry failed RunHostCmd: %v", interval, err)
time.Sleep(interval) time.Sleep(interval)
} }