diff --git a/hack/ginkgo-e2e.sh b/hack/ginkgo-e2e.sh index 5a919c8914e..521193827f4 100755 --- a/hack/ginkgo-e2e.sh +++ b/hack/ginkgo-e2e.sh @@ -189,6 +189,7 @@ fi --master-tag="${MASTER_TAG:-}" \ --docker-config-file="${DOCKER_CONFIG_FILE:-}" \ --dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \ + --prepull-images="${PREPULL_IMAGES:-false}" \ --ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \ ${CONTAINER_RUNTIME:+"--container-runtime=${CONTAINER_RUNTIME}"} \ ${MASTER_OS_DISTRIBUTION:+"--master-os-distro=${MASTER_OS_DISTRIBUTION}"} \ diff --git a/test/e2e/common/util.go b/test/e2e/common/util.go index 7f83ad5e745..06ab88439ad 100644 --- a/test/e2e/common/util.go +++ b/test/e2e/common/util.go @@ -53,7 +53,7 @@ var CurrentSuite Suite // PrePulledImages are a list of images used in e2e/common tests. These images should be prepulled // before tests starts, so that the tests won't fail due image pulling flakes. -// Currently, this is only used by node e2e test. +// Currently, this is only used by node e2e test and E2E tests. // See also updateImageAllowList() in ../../e2e_node/image_list.go // TODO(random-liu): Change the image puller pod to use similar mechanism. var PrePulledImages = sets.NewString( @@ -67,6 +67,16 @@ var PrePulledImages = sets.NewString( imageutils.GetE2EImage(imageutils.NonRoot), ) +// WindowsPrePulledImages are a list of images used in e2e/common tests. These images should be prepulled +// before tests starts, so that the tests won't fail due image pulling flakes. These images also have +// Windows support. Currently, this is only used by E2E tests. +var WindowsPrePulledImages = sets.NewString( + imageutils.GetE2EImage(imageutils.Agnhost), + imageutils.GetE2EImage(imageutils.BusyBox), + imageutils.GetE2EImage(imageutils.Nginx), + imageutils.GetE2EImage(imageutils.Httpd), +) + type testImagesStruct struct { AgnhostImage string BusyBoxImage string diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 92660febc4a..b387e05983c 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -36,6 +36,7 @@ import ( "github.com/onsi/ginkgo/reporters" "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtimeutils "k8s.io/apimachinery/pkg/util/runtime" @@ -44,6 +45,7 @@ import ( "k8s.io/component-base/version" commontest "k8s.io/kubernetes/test/e2e/common" "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/e2e/framework/daemonset" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" @@ -257,6 +259,11 @@ func setupSuite() { framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err) } + if framework.TestContext.PrepullImages { + framework.Logf("Pre-pulling images so that they are cached for the tests.") + prepullImages(c) + } + // Log the version of the server and this client. framework.Logf("e2e test version: %s", version.Get().GitVersion) @@ -400,3 +407,42 @@ func setupSuitePerGinkgoNode() { framework.TestContext.IPFamily = getDefaultClusterIPFamily(c) framework.Logf("Cluster IP family: %s", framework.TestContext.IPFamily) } + +func prepullImages(c clientset.Interface) { + namespace, err := framework.CreateTestingNS("img-puller", c, map[string]string{ + "e2e-framework": "img-puller", + }) + framework.ExpectNoError(err) + ns := namespace.Name + defer c.CoreV1().Namespaces().Delete(context.TODO(), ns, metav1.DeleteOptions{}) + + images := commontest.PrePulledImages + if framework.NodeOSDistroIs("windows") { + images = commontest.WindowsPrePulledImages + } + + label := map[string]string{"app": "prepull-daemonset"} + var imgPullers []*appsv1.DaemonSet + for _, img := range images.List() { + dsName := fmt.Sprintf("img-pull-%s", strings.ReplaceAll(strings.ReplaceAll(img, "/", "-"), ":", "-")) + + dsSpec := daemonset.NewDaemonSet(dsName, img, label, nil, nil, nil) + ds, err := c.AppsV1().DaemonSets(ns).Create(context.TODO(), dsSpec, metav1.CreateOptions{}) + framework.ExpectNoError(err) + imgPullers = append(imgPullers, ds) + } + + // this should not be a multiple of 5, because node status updates + // every 5 seconds. See https://github.com/kubernetes/kubernetes/pull/14915. + dsRetryPeriod := 9 * time.Second + dsRetryTimeout := 5 * time.Minute + + for _, imgPuller := range imgPullers { + checkDaemonset := func() (bool, error) { + return daemonset.CheckPresentOnNodes(c, imgPuller, ns, framework.TestContext.CloudConfig.NumNodes) + } + framework.Logf("Waiting for %s", imgPuller.Name) + err := wait.PollImmediate(dsRetryPeriod, dsRetryTimeout, checkDaemonset) + framework.ExpectNoError(err, "error waiting for image to be pulled") + } +} diff --git a/test/e2e/framework/daemonset/fixtures.go b/test/e2e/framework/daemonset/fixtures.go index 1d765cf5991..1fc81fba325 100644 --- a/test/e2e/framework/daemonset/fixtures.go +++ b/test/e2e/framework/daemonset/fixtures.go @@ -67,6 +67,18 @@ func CheckRunningOnAllNodes(f *framework.Framework, ds *appsv1.DaemonSet) (bool, return CheckDaemonPodOnNodes(f, ds, nodeNames)() } +// CheckPresentOnNodes will check that the daemonset will be present on at least the given number of +// schedulable nodes. +func CheckPresentOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, numNodes int) (bool, error) { + nodeNames := SchedulableNodes(c, ds) + if len(nodeNames) < numNodes { + return false, nil + } + return checkDaemonPodStateOnNodes(c, ds, ns, nodeNames, func(pod *v1.Pod) bool { + return pod.Status.Phase != v1.PodPending + }) +} + func SchedulableNodes(c clientset.Interface, ds *appsv1.DaemonSet) []string { nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) framework.ExpectNoError(err) @@ -90,41 +102,47 @@ func canScheduleOnNode(node v1.Node, ds *appsv1.DaemonSet) bool { func CheckDaemonPodOnNodes(f *framework.Framework, ds *appsv1.DaemonSet, nodeNames []string) func() (bool, error) { return func() (bool, error) { - podList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{}) - if err != nil { - framework.Logf("could not get the pod list: %v", err) + return checkDaemonPodStateOnNodes(f.ClientSet, ds, f.Namespace.Name, nodeNames, func(pod *v1.Pod) bool { + return podutil.IsPodAvailable(pod, ds.Spec.MinReadySeconds, metav1.Now()) + }) + } +} + +func checkDaemonPodStateOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, nodeNames []string, stateChecker func(*v1.Pod) bool) (bool, error) { + podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + framework.Logf("could not get the pod list: %v", err) + return false, nil + } + pods := podList.Items + + nodesToPodCount := make(map[string]int) + for _, pod := range pods { + if !metav1.IsControlledBy(&pod, ds) { + continue + } + if pod.DeletionTimestamp != nil { + continue + } + if stateChecker(&pod) { + nodesToPodCount[pod.Spec.NodeName]++ + } + } + framework.Logf("Number of nodes with available pods controlled by daemonset %s: %d", ds.Name, len(nodesToPodCount)) + + // Ensure that exactly 1 pod is running on all nodes in nodeNames. + for _, nodeName := range nodeNames { + if nodesToPodCount[nodeName] != 1 { + framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName]) return false, nil } - pods := podList.Items - - nodesToPodCount := make(map[string]int) - for _, pod := range pods { - if !metav1.IsControlledBy(&pod, ds) { - continue - } - if pod.DeletionTimestamp != nil { - continue - } - if podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) { - nodesToPodCount[pod.Spec.NodeName]++ - } - } - framework.Logf("Number of nodes with available pods: %d", len(nodesToPodCount)) - - // Ensure that exactly 1 pod is running on all nodes in nodeNames. - for _, nodeName := range nodeNames { - if nodesToPodCount[nodeName] != 1 { - framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName]) - return false, nil - } - } - - framework.Logf("Number of running nodes: %d, number of available pods: %d", len(nodeNames), len(nodesToPodCount)) - // Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in - // nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any - // other nodes. - return len(nodesToPodCount) == len(nodeNames), nil } + + framework.Logf("Number of running nodes: %d, number of available pods: %d in daemonset %s", len(nodeNames), len(nodesToPodCount), ds.Name) + // Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in + // nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any + // other nodes. + return len(nodesToPodCount) == len(nodeNames), nil } func CheckDaemonStatus(f *framework.Framework, dsName string) error { diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index db8cbea44ce..43508106511 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -340,6 +340,9 @@ func RegisterClusterFlags(flags *flag.FlagSet) { flags.StringVar(&TestContext.KubeVolumeDir, "volume-dir", "/var/lib/kubelet", "Path to the directory containing the kubelet volumes.") flags.StringVar(&TestContext.CertDir, "cert-dir", "", "Path to the directory containing the certs. Default is empty, which doesn't use certs.") flags.StringVar(&TestContext.RepoRoot, "repo-root", "../../", "Root directory of kubernetes repository, for finding test files.") + // NOTE: Node E2E tests have this flag defined as well, but true by default. + // If this becomes true as well, they should be refactored into RegisterCommonFlags. + flags.BoolVar(&TestContext.PrepullImages, "prepull-images", false, "If true, prepull images so image pull failures do not cause test failures.") flags.StringVar(&TestContext.Provider, "provider", "", "The name of the Kubernetes provider (gce, gke, local, skeleton (the fallback if not set), etc.)") flags.StringVar(&TestContext.Tooling, "tooling", "", "The tooling in use (kops, gke, etc.)") flags.StringVar(&TestContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")