test/e2e: Allow test invokers to skip test waits before and after

A number of e2e tests are useful to run after the system has been
disrupted or is in the progress of being disrupted, but the current
suite and test logic blocks progress waiting for all nodes to be
healthy.

By passing -1 to --minStartupPods or --allowed-not-ready-nodes flags
the caller can bypass wait logic before and after test suites that
would prevent running e2e during disruption. This allows use of parts
of the e2e suite during cluster duress to verify that controllers or
components still function.
This commit is contained in:
Clayton Coleman 2021-02-04 20:14:05 -05:00
parent 10e11baa05
commit 286d989d8d
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
5 changed files with 28 additions and 4 deletions

View File

@ -184,7 +184,13 @@ func getDefaultClusterIPFamily(c clientset.Interface) string {
// waitForDaemonSets for all daemonsets in the given namespace to be ready
// (defined as all but 'allowedNotReadyNodes' pods associated with that
// daemonset are ready).
//
// If allowedNotReadyNodes is -1, this method returns immediately without waiting.
func waitForDaemonSets(c clientset.Interface, ns string, allowedNotReadyNodes int32, timeout time.Duration) error {
if allowedNotReadyNodes == -1 {
return nil
}
start := time.Now()
framework.Logf("Waiting up to %v for all daemonsets in namespace '%s' to start",
timeout, ns)

View File

@ -202,6 +202,9 @@ func checkWaitListSchedulableNodes(c clientset.Interface) (*v1.NodeList, error)
func CheckReadyForTests(c clientset.Interface, nonblockingTaints string, allowedNotReadyNodes, largeClusterThreshold int) func() (bool, error) {
attempt := 0
return func() (bool, error) {
if allowedNotReadyNodes == -1 {
return true, nil
}
attempt++
var nodesNotReadyYet []v1.Node
opts := metav1.ListOptions{

View File

@ -102,7 +102,14 @@ func errorBadPodsStates(badPods []v1.Pod, desiredPods int, ns, desiredState stri
// waiting. All pods that are in SUCCESS state are not counted.
//
// If ignoreLabels is not empty, pods matching this selector are ignored.
//
// If minPods or allowedNotReadyPods are -1, this method returns immediately
// without waiting.
func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration, ignoreLabels map[string]string) error {
if minPods == -1 || allowedNotReadyPods == -1 {
return nil
}
ignoreSelector := labels.SelectorFromSet(map[string]string{})
start := time.Now()
e2elog.Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",

View File

@ -289,7 +289,7 @@ func RegisterCommonFlags(flags *flag.FlagSet) {
flags.StringVar(&TestContext.LogexporterGCSPath, "logexporter-gcs-path", "", "Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty.")
flags.BoolVar(&TestContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
flags.BoolVar(&TestContext.DeleteNamespaceOnFailure, "delete-namespace-on-failure", true, "If true, framework will delete test namespace on failure. Used only during test debugging.")
flags.IntVar(&TestContext.AllowedNotReadyNodes, "allowed-not-ready-nodes", 0, "If non-zero, framework will allow for that many non-ready nodes when checking for all ready nodes.")
flags.IntVar(&TestContext.AllowedNotReadyNodes, "allowed-not-ready-nodes", 0, "If greater than zero, framework will allow for that many non-ready nodes when checking for all ready nodes. If -1, no waiting will be performed for ready nodes or daemonset pods.")
flags.StringVar(&TestContext.Host, "host", "", fmt.Sprintf("The host, or apiserver, to connect to. Will default to %s if this argument and --kubeconfig are not set.", defaultHost))
flags.StringVar(&TestContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
@ -352,7 +352,7 @@ func RegisterClusterFlags(flags *flag.FlagSet) {
flags.StringVar(&cloudConfig.ClusterTag, "cluster-tag", "", "Tag used to identify resources. Only required if provider is aws.")
flags.StringVar(&cloudConfig.ConfigFile, "cloud-config-file", "", "Cloud config file. Only required if provider is azure or vsphere.")
flags.IntVar(&TestContext.MinStartupPods, "minStartupPods", 0, "The number of pods which we need to see in 'Running' state with a 'Ready' condition of true, before we try running tests. This is useful in any cluster which needs some base pod-based services running before it can be used.")
flags.IntVar(&TestContext.MinStartupPods, "minStartupPods", 0, "The number of pods which we need to see in 'Running' state with a 'Ready' condition of true, before we try running tests. This is useful in any cluster which needs some base pod-based services running before it can be used. If set to -1, no pods are checked and tests run straight away.")
flags.DurationVar(&TestContext.SystemPodsStartupTimeout, "system-pods-startup-timeout", 10*time.Minute, "Timeout for waiting for all system pods to be running before starting tests.")
flags.DurationVar(&TestContext.NodeSchedulableTimeout, "node-schedulable-timeout", 30*time.Minute, "Timeout for waiting for all nodes to be schedulable.")
flags.DurationVar(&TestContext.SystemDaemonsetStartupTimeout, "system-daemonsets-startup-timeout", 5*time.Minute, "Timeout for waiting for all system daemonsets to be ready.")

View File

@ -1024,8 +1024,11 @@ func getNodeEvents(c clientset.Interface, nodeName string) []v1.Event {
// WaitForAllNodesSchedulable waits up to timeout for all
// (but TestContext.AllowedNotReadyNodes) to become schedulable.
func WaitForAllNodesSchedulable(c clientset.Interface, timeout time.Duration) error {
Logf("Waiting up to %v for all (but %d) nodes to be schedulable", timeout, TestContext.AllowedNotReadyNodes)
if TestContext.AllowedNotReadyNodes == -1 {
return nil
}
Logf("Waiting up to %v for all (but %d) nodes to be schedulable", timeout, TestContext.AllowedNotReadyNodes)
return wait.PollImmediate(
30*time.Second,
timeout,
@ -1118,11 +1121,16 @@ func RunHostCmdWithRetries(ns, name, cmd string, interval, timeout time.Duration
}
}
// AllNodesReady checks whether all registered nodes are ready.
// AllNodesReady checks whether all registered nodes are ready. Setting -1 on
// TestContext.AllowedNotReadyNodes will bypass the post test node readiness check.
// TODO: we should change the AllNodesReady call in AfterEach to WaitForAllNodesHealthy,
// and figure out how to do it in a configurable way, as we can't expect all setups to run
// default test add-ons.
func AllNodesReady(c clientset.Interface, timeout time.Duration) error {
if TestContext.AllowedNotReadyNodes == -1 {
return nil
}
Logf("Waiting up to %v for all (but %d) nodes to be ready", timeout, TestContext.AllowedNotReadyNodes)
var notReady []*v1.Node