Merge pull request #67373 from mborsz/waitfordaemonsets

Automatic merge from submit-queue (batch tested with PRs 67137, 67372, 67505, 67373, 67357). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

In e2e framework, wait for kube-system daemonsets to be ready.

**What this PR does / why we need it**:
In https://github.com/kubernetes/kubernetes/issues/66672 we saw that starting fluentd daemonset tend to influence pod startup latency measurements.
I would like to add additional step of waiting up to 5 minutes to make sure that all daemonsets are properly booted before we start latency measurements.

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2018-08-16 10:34:21 -07:00 committed by GitHub
commit d68fb1e012
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 0 deletions

View File

@ -190,6 +190,10 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
framework.Failf("Error waiting for all pods to be running and ready: %v", err)
}
if err := framework.WaitForDaemonSets(c, metav1.NamespaceSystem, int32(framework.TestContext.AllowedNotReadyNodes), framework.TestContext.SystemDaemonsetStartupTimeout); err != nil {
framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
}
if err := framework.WaitForPodsSuccess(c, metav1.NamespaceSystem, framework.ImagePullerLabels, framework.ImagePrePullingTimeout); err != nil {
// There is no guarantee that the image pulling will succeed in 3 minutes
// and we don't even run the image puller on all platforms (including GKE).

View File

@ -99,6 +99,8 @@ type TestContextType struct {
OutputPrintType string
// NodeSchedulableTimeout is the timeout for waiting for all nodes to be schedulable.
NodeSchedulableTimeout time.Duration
// SystemDaemonsetStartupTimeout is the timeout for waiting for all system daemonsets to be ready.
SystemDaemonsetStartupTimeout time.Duration
// CreateTestingNS is responsible for creating namespace used for executing e2e tests.
// It accepts namespace base name, which will be prepended with e2e prefix, kube client
// and labels to be applied to a namespace.
@ -277,6 +279,7 @@ func RegisterClusterFlags() {
flag.IntVar(&TestContext.MinStartupPods, "minStartupPods", 0, "The number of pods which we need to see in 'Running' state with a 'Ready' condition of true, before we try running tests. This is useful in any cluster which needs some base pod-based services running before it can be used.")
flag.DurationVar(&TestContext.SystemPodsStartupTimeout, "system-pods-startup-timeout", 10*time.Minute, "Timeout for waiting for all system pods to be running before starting tests.")
flag.DurationVar(&TestContext.NodeSchedulableTimeout, "node-schedulable-timeout", 30*time.Minute, "Timeout for waiting for all nodes to be schedulable.")
flag.DurationVar(&TestContext.SystemDaemonsetStartupTimeout, "system-daemonsets-startup-timeout", 5*time.Minute, "Timeout for waiting for all system daemonsets to be ready.")
flag.StringVar(&TestContext.UpgradeTarget, "upgrade-target", "ci/latest", "Version to upgrade to (e.g. 'release/stable', 'release/latest', 'ci/latest', '0.19.1', '0.19.1-669-gabac8c8') if doing an upgrade test.")
flag.StringVar(&TestContext.EtcdUpgradeStorage, "etcd-upgrade-storage", "", "The storage version to upgrade to (either 'etcdv2' or 'etcdv3') if doing an etcd upgrade test.")
flag.StringVar(&TestContext.EtcdUpgradeVersion, "etcd-upgrade-version", "", "The etcd binary version to upgrade to (e.g., '3.0.14', '2.3.7') if doing an etcd upgrade test.")

View File

@ -733,6 +733,39 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN
return nil
}
// WaitForDaemonSets for all daemonsets in the given namespace to be ready
// (defined as all but 'allowedNotReadyNodes' pods associated with that
// daemonset are ready).
func WaitForDaemonSets(c clientset.Interface, ns string, allowedNotReadyNodes int32, timeout time.Duration) error {
start := time.Now()
Logf("Waiting up to %v for all daemonsets in namespace '%s' to start",
timeout, ns)
return wait.PollImmediate(Poll, timeout, func() (bool, error) {
dsList, err := c.AppsV1().DaemonSets(ns).List(metav1.ListOptions{})
if err != nil {
Logf("Error getting daemonsets in namespace: '%s': %v", ns, err)
if testutils.IsRetryableAPIError(err) {
return false, nil
}
return false, err
}
var notReadyDaemonSets []string
for _, ds := range dsList.Items {
Logf("%d / %d pods ready in namespace '%s' in daemonset '%s' (%d seconds elapsed)", ds.Status.DesiredNumberScheduled, ds.Status.NumberReady, ns, ds.ObjectMeta.Name, int(time.Since(start).Seconds()))
if ds.Status.DesiredNumberScheduled-ds.Status.NumberReady > allowedNotReadyNodes {
notReadyDaemonSets = append(notReadyDaemonSets, ds.ObjectMeta.Name)
}
}
if len(notReadyDaemonSets) > 0 {
return false, fmt.Errorf("there are not ready daemonsets: %v", notReadyDaemonSets)
}
return true, nil
})
}
func kubectlLogPod(c clientset.Interface, pod v1.Pod, containerNameSubstr string, logFunc func(ftm string, args ...interface{})) {
for _, container := range pod.Spec.Containers {
if strings.Contains(container.Name, containerNameSubstr) {