tests: Prepull images commonly used test images

Some tests have a short timeout for starting the pods (1 minute), but if those tests happen to be the first ones to run, and the images have to be pulled, then the test could timeout, especially with larger images. This commit will allow us to prepull commonly used E2E test images, so this issue can be avoided.
2025-07-19 01:40:13 +00:00 · 2021-09-27 19:38:27 +03:00 · 2021-09-27 19:38:27 +03:00 · 35e23afa50
commit 35e23afa50
parent 37efc5feec
5 changed files with 111 additions and 33 deletions
--- a/hack/ginkgo-e2e.sh
+++ b/hack/ginkgo-e2e.sh
@ -189,6 +189,7 @@ fi
  --master-tag="${MASTER_TAG:-}" \
  --docker-config-file="${DOCKER_CONFIG_FILE:-}" \
  --dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \
+  --prepull-images="${PREPULL_IMAGES:-false}" \
  --ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \
  ${CONTAINER_RUNTIME:+"--container-runtime=${CONTAINER_RUNTIME}"} \
  ${MASTER_OS_DISTRIBUTION:+"--master-os-distro=${MASTER_OS_DISTRIBUTION}"} \
--- a/test/e2e/common/util.go
+++ b/test/e2e/common/util.go
@ -53,7 +53,7 @@ var CurrentSuite Suite

 // PrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
 // before tests starts, so that the tests won't fail due image pulling flakes.
-// Currently, this is only used by node e2e test.
+// Currently, this is only used by node e2e test and E2E tests.
 // See also updateImageAllowList() in ../../e2e_node/image_list.go
 // TODO(random-liu): Change the image puller pod to use similar mechanism.
 var PrePulledImages = sets.NewString(
@ -67,6 +67,16 @@ var PrePulledImages = sets.NewString(
 	imageutils.GetE2EImage(imageutils.NonRoot),
 )

+// WindowsPrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
+// before tests starts, so that the tests won't fail due image pulling flakes. These images also have
+// Windows support. Currently, this is only used by E2E tests.
+var WindowsPrePulledImages = sets.NewString(
+	imageutils.GetE2EImage(imageutils.Agnhost),
+	imageutils.GetE2EImage(imageutils.BusyBox),
+	imageutils.GetE2EImage(imageutils.Nginx),
+	imageutils.GetE2EImage(imageutils.Httpd),
+)
+
 type testImagesStruct struct {
 	AgnhostImage  string
 	BusyBoxImage  string
--- a/test/e2e/e2e.go
+++ b/test/e2e/e2e.go
@ -36,6 +36,7 @@ import (
 	"github.com/onsi/ginkgo/reporters"
 	"github.com/onsi/gomega"

+	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	runtimeutils "k8s.io/apimachinery/pkg/util/runtime"
@ -44,6 +45,7 @@ import (
 	"k8s.io/component-base/version"
 	commontest "k8s.io/kubernetes/test/e2e/common"
 	"k8s.io/kubernetes/test/e2e/framework"
+	"k8s.io/kubernetes/test/e2e/framework/daemonset"
 	e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
 	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@ -257,6 +259,11 @@ func setupSuite() {
 		framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
 	}

+	if framework.TestContext.PrepullImages {
+		framework.Logf("Pre-pulling images so that they are cached for the tests.")
+		prepullImages(c)
+	}
+
 	// Log the version of the server and this client.
 	framework.Logf("e2e test version: %s", version.Get().GitVersion)

@ -400,3 +407,42 @@ func setupSuitePerGinkgoNode() {
 	framework.TestContext.IPFamily = getDefaultClusterIPFamily(c)
 	framework.Logf("Cluster IP family: %s", framework.TestContext.IPFamily)
 }
+
+func prepullImages(c clientset.Interface) {
+	namespace, err := framework.CreateTestingNS("img-puller", c, map[string]string{
+		"e2e-framework": "img-puller",
+	})
+	framework.ExpectNoError(err)
+	ns := namespace.Name
+	defer c.CoreV1().Namespaces().Delete(context.TODO(), ns, metav1.DeleteOptions{})
+
+	images := commontest.PrePulledImages
+	if framework.NodeOSDistroIs("windows") {
+		images = commontest.WindowsPrePulledImages
+	}
+
+	label := map[string]string{"app": "prepull-daemonset"}
+	var imgPullers []*appsv1.DaemonSet
+	for _, img := range images.List() {
+		dsName := fmt.Sprintf("img-pull-%s", strings.ReplaceAll(strings.ReplaceAll(img, "/", "-"), ":", "-"))
+
+		dsSpec := daemonset.NewDaemonSet(dsName, img, label, nil, nil, nil)
+		ds, err := c.AppsV1().DaemonSets(ns).Create(context.TODO(), dsSpec, metav1.CreateOptions{})
+		framework.ExpectNoError(err)
+		imgPullers = append(imgPullers, ds)
+	}
+
+	// this should not be a multiple of 5, because node status updates
+	// every 5 seconds. See https://github.com/kubernetes/kubernetes/pull/14915.
+	dsRetryPeriod := 9 * time.Second
+	dsRetryTimeout := 5 * time.Minute
+
+	for _, imgPuller := range imgPullers {
+		checkDaemonset := func() (bool, error) {
+			return daemonset.CheckPresentOnNodes(c, imgPuller, ns, framework.TestContext.CloudConfig.NumNodes)
+		}
+		framework.Logf("Waiting for %s", imgPuller.Name)
+		err := wait.PollImmediate(dsRetryPeriod, dsRetryTimeout, checkDaemonset)
+		framework.ExpectNoError(err, "error waiting for image to be pulled")
+	}
+}
--- a/test/e2e/framework/daemonset/fixtures.go
+++ b/test/e2e/framework/daemonset/fixtures.go
@ -67,6 +67,18 @@ func CheckRunningOnAllNodes(f *framework.Framework, ds *appsv1.DaemonSet) (bool,
 	return CheckDaemonPodOnNodes(f, ds, nodeNames)()
 }

+// CheckPresentOnNodes will check that the daemonset will be present on at least the given number of
+// schedulable nodes.
+func CheckPresentOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, numNodes int) (bool, error) {
+	nodeNames := SchedulableNodes(c, ds)
+	if len(nodeNames) < numNodes {
+		return false, nil
+	}
+	return checkDaemonPodStateOnNodes(c, ds, ns, nodeNames, func(pod *v1.Pod) bool {
+		return pod.Status.Phase != v1.PodPending
+	})
+}
+
 func SchedulableNodes(c clientset.Interface, ds *appsv1.DaemonSet) []string {
 	nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
 	framework.ExpectNoError(err)
@ -90,41 +102,47 @@ func canScheduleOnNode(node v1.Node, ds *appsv1.DaemonSet) bool {

 func CheckDaemonPodOnNodes(f *framework.Framework, ds *appsv1.DaemonSet, nodeNames []string) func() (bool, error) {
 	return func() (bool, error) {
-		podList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{})
-		if err != nil {
-			framework.Logf("could not get the pod list: %v", err)
+		return checkDaemonPodStateOnNodes(f.ClientSet, ds, f.Namespace.Name, nodeNames, func(pod *v1.Pod) bool {
+			return podutil.IsPodAvailable(pod, ds.Spec.MinReadySeconds, metav1.Now())
+		})
+	}
+}
+
+func checkDaemonPodStateOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, nodeNames []string, stateChecker func(*v1.Pod) bool) (bool, error) {
+	podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
+	if err != nil {
+		framework.Logf("could not get the pod list: %v", err)
+		return false, nil
+	}
+	pods := podList.Items
+
+	nodesToPodCount := make(map[string]int)
+	for _, pod := range pods {
+		if !metav1.IsControlledBy(&pod, ds) {
+			continue
+		}
+		if pod.DeletionTimestamp != nil {
+			continue
+		}
+		if stateChecker(&pod) {
+			nodesToPodCount[pod.Spec.NodeName]++
+		}
+	}
+	framework.Logf("Number of nodes with available pods controlled by daemonset %s: %d", ds.Name, len(nodesToPodCount))
+
+	// Ensure that exactly 1 pod is running on all nodes in nodeNames.
+	for _, nodeName := range nodeNames {
+		if nodesToPodCount[nodeName] != 1 {
+			framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
 			return false, nil
 		}
-		pods := podList.Items
-
-		nodesToPodCount := make(map[string]int)
-		for _, pod := range pods {
-			if !metav1.IsControlledBy(&pod, ds) {
-				continue
-			}
-			if pod.DeletionTimestamp != nil {
-				continue
-			}
-			if podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) {
-				nodesToPodCount[pod.Spec.NodeName]++
-			}
-		}
-		framework.Logf("Number of nodes with available pods: %d", len(nodesToPodCount))
-
-		// Ensure that exactly 1 pod is running on all nodes in nodeNames.
-		for _, nodeName := range nodeNames {
-			if nodesToPodCount[nodeName] != 1 {
-				framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
-				return false, nil
-			}
-		}
-
-		framework.Logf("Number of running nodes: %d, number of available pods: %d", len(nodeNames), len(nodesToPodCount))
-		// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
-		// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
-		// other nodes.
-		return len(nodesToPodCount) == len(nodeNames), nil
 	}
+
+	framework.Logf("Number of running nodes: %d, number of available pods: %d in daemonset %s", len(nodeNames), len(nodesToPodCount), ds.Name)
+	// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
+	// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
+	// other nodes.
+	return len(nodesToPodCount) == len(nodeNames), nil
 }

 func CheckDaemonStatus(f *framework.Framework, dsName string) error {
--- a/test/e2e/framework/test_context.go
+++ b/test/e2e/framework/test_context.go
@ -340,6 +340,9 @@ func RegisterClusterFlags(flags *flag.FlagSet) {
 	flags.StringVar(&TestContext.KubeVolumeDir, "volume-dir", "/var/lib/kubelet", "Path to the directory containing the kubelet volumes.")
 	flags.StringVar(&TestContext.CertDir, "cert-dir", "", "Path to the directory containing the certs. Default is empty, which doesn't use certs.")
 	flags.StringVar(&TestContext.RepoRoot, "repo-root", "../../", "Root directory of kubernetes repository, for finding test files.")
+	// NOTE: Node E2E tests have this flag defined as well, but true by default.
+	// If this becomes true as well, they should be refactored into RegisterCommonFlags.
+	flags.BoolVar(&TestContext.PrepullImages, "prepull-images", false, "If true, prepull images so image pull failures do not cause test failures.")
 	flags.StringVar(&TestContext.Provider, "provider", "", "The name of the Kubernetes provider (gce, gke, local, skeleton (the fallback if not set), etc.)")
 	flags.StringVar(&TestContext.Tooling, "tooling", "", "The tooling in use (kops, gke, etc.)")
 	flags.StringVar(&TestContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")