Merge pull request #105481 from claudiubelu/tests/e2e-prepull-images

tests: Prepull images
2025-07-24 12:15:52 +00:00 · 2021-11-18 17:22:51 -08:00 · 2021-11-18 17:22:51 -08:00 · 9b180d8913
commit 9b180d8913
parent 51b94de68f 35e23afa50
5 changed files with 111 additions and 33 deletions
--- a/hack/ginkgo-e2e.sh
+++ b/hack/ginkgo-e2e.sh
@ -189,6 +189,7 @@ fi
  --master-tag="${MASTER_TAG:-}" \
  --docker-config-file="${DOCKER_CONFIG_FILE:-}" \
  --dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \
+  --prepull-images="${PREPULL_IMAGES:-false}" \
  --ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \
  ${CONTAINER_RUNTIME:+"--container-runtime=${CONTAINER_RUNTIME}"} \
  ${MASTER_OS_DISTRIBUTION:+"--master-os-distro=${MASTER_OS_DISTRIBUTION}"} \
--- a/test/e2e/common/util.go
+++ b/test/e2e/common/util.go
@ -53,7 +53,7 @@ var CurrentSuite Suite

 // PrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
 // before tests starts, so that the tests won't fail due image pulling flakes.
-// Currently, this is only used by node e2e test.
+// Currently, this is only used by node e2e test and E2E tests.
 // See also updateImageAllowList() in ../../e2e_node/image_list.go
 // TODO(random-liu): Change the image puller pod to use similar mechanism.
 var PrePulledImages = sets.NewString(
@ -67,6 +67,16 @@ var PrePulledImages = sets.NewString(
 	imageutils.GetE2EImage(imageutils.NonRoot),
 )

+// WindowsPrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
+// before tests starts, so that the tests won't fail due image pulling flakes. These images also have
+// Windows support. Currently, this is only used by E2E tests.
+var WindowsPrePulledImages = sets.NewString(
+	imageutils.GetE2EImage(imageutils.Agnhost),
+	imageutils.GetE2EImage(imageutils.BusyBox),
+	imageutils.GetE2EImage(imageutils.Nginx),
+	imageutils.GetE2EImage(imageutils.Httpd),
+)
+
 type testImagesStruct struct {
 	AgnhostImage  string
 	BusyBoxImage  string
--- a/test/e2e/e2e.go
+++ b/test/e2e/e2e.go
@ -36,6 +36,7 @@ import (
 	"github.com/onsi/ginkgo/reporters"
 	"github.com/onsi/gomega"

+	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	runtimeutils "k8s.io/apimachinery/pkg/util/runtime"
@ -44,6 +45,7 @@ import (
 	"k8s.io/component-base/version"
 	commontest "k8s.io/kubernetes/test/e2e/common"
 	"k8s.io/kubernetes/test/e2e/framework"
+	"k8s.io/kubernetes/test/e2e/framework/daemonset"
 	e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
 	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@ -257,6 +259,11 @@ func setupSuite() {
 		framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
 	}

+	if framework.TestContext.PrepullImages {
+		framework.Logf("Pre-pulling images so that they are cached for the tests.")
+		prepullImages(c)
+	}
+
 	// Log the version of the server and this client.
 	framework.Logf("e2e test version: %s", version.Get().GitVersion)

@ -400,3 +407,42 @@ func setupSuitePerGinkgoNode() {
 	framework.TestContext.IPFamily = getDefaultClusterIPFamily(c)
 	framework.Logf("Cluster IP family: %s", framework.TestContext.IPFamily)
 }
+
+func prepullImages(c clientset.Interface) {
+	namespace, err := framework.CreateTestingNS("img-puller", c, map[string]string{
+		"e2e-framework": "img-puller",
+	})
+	framework.ExpectNoError(err)
+	ns := namespace.Name
+	defer c.CoreV1().Namespaces().Delete(context.TODO(), ns, metav1.DeleteOptions{})
+
+	images := commontest.PrePulledImages
+	if framework.NodeOSDistroIs("windows") {
+		images = commontest.WindowsPrePulledImages
+	}
+
+	label := map[string]string{"app": "prepull-daemonset"}
+	var imgPullers []*appsv1.DaemonSet
+	for _, img := range images.List() {
+		dsName := fmt.Sprintf("img-pull-%s", strings.ReplaceAll(strings.ReplaceAll(img, "/", "-"), ":", "-"))
+
+		dsSpec := daemonset.NewDaemonSet(dsName, img, label, nil, nil, nil)
+		ds, err := c.AppsV1().DaemonSets(ns).Create(context.TODO(), dsSpec, metav1.CreateOptions{})
+		framework.ExpectNoError(err)
+		imgPullers = append(imgPullers, ds)
+	}
+
+	// this should not be a multiple of 5, because node status updates
+	// every 5 seconds. See https://github.com/kubernetes/kubernetes/pull/14915.
+	dsRetryPeriod := 9 * time.Second
+	dsRetryTimeout := 5 * time.Minute
+
+	for _, imgPuller := range imgPullers {
+		checkDaemonset := func() (bool, error) {
+			return daemonset.CheckPresentOnNodes(c, imgPuller, ns, framework.TestContext.CloudConfig.NumNodes)
+		}
+		framework.Logf("Waiting for %s", imgPuller.Name)
+		err := wait.PollImmediate(dsRetryPeriod, dsRetryTimeout, checkDaemonset)
+		framework.ExpectNoError(err, "error waiting for image to be pulled")
+	}
+}
--- a/test/e2e/framework/daemonset/fixtures.go
+++ b/test/e2e/framework/daemonset/fixtures.go
@ -67,6 +67,18 @@ func CheckRunningOnAllNodes(f *framework.Framework, ds *appsv1.DaemonSet) (bool,
 	return CheckDaemonPodOnNodes(f, ds, nodeNames)()
 }

+// CheckPresentOnNodes will check that the daemonset will be present on at least the given number of
+// schedulable nodes.
+func CheckPresentOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, numNodes int) (bool, error) {
+	nodeNames := SchedulableNodes(c, ds)
+	if len(nodeNames) < numNodes {
+		return false, nil
+	}
+	return checkDaemonPodStateOnNodes(c, ds, ns, nodeNames, func(pod *v1.Pod) bool {
+		return pod.Status.Phase != v1.PodPending
+	})
+}
+
 func SchedulableNodes(c clientset.Interface, ds *appsv1.DaemonSet) []string {
 	nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
 	framework.ExpectNoError(err)
@ -90,41 +102,47 @@ func canScheduleOnNode(node v1.Node, ds *appsv1.DaemonSet) bool {

 func CheckDaemonPodOnNodes(f *framework.Framework, ds *appsv1.DaemonSet, nodeNames []string) func() (bool, error) {
 	return func() (bool, error) {
-		podList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{})
-		if err != nil {
-			framework.Logf("could not get the pod list: %v", err)
+		return checkDaemonPodStateOnNodes(f.ClientSet, ds, f.Namespace.Name, nodeNames, func(pod *v1.Pod) bool {
+			return podutil.IsPodAvailable(pod, ds.Spec.MinReadySeconds, metav1.Now())
+		})
+	}
+}
+
+func checkDaemonPodStateOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, nodeNames []string, stateChecker func(*v1.Pod) bool) (bool, error) {
+	podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
+	if err != nil {
+		framework.Logf("could not get the pod list: %v", err)
+		return false, nil
+	}
+	pods := podList.Items
+
+	nodesToPodCount := make(map[string]int)
+	for _, pod := range pods {
+		if !metav1.IsControlledBy(&pod, ds) {
+			continue
+		}
+		if pod.DeletionTimestamp != nil {
+			continue
+		}
+		if stateChecker(&pod) {
+			nodesToPodCount[pod.Spec.NodeName]++
+		}
+	}
+	framework.Logf("Number of nodes with available pods controlled by daemonset %s: %d", ds.Name, len(nodesToPodCount))
+
+	// Ensure that exactly 1 pod is running on all nodes in nodeNames.
+	for _, nodeName := range nodeNames {
+		if nodesToPodCount[nodeName] != 1 {
+			framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
 			return false, nil
 		}
-		pods := podList.Items
-
-		nodesToPodCount := make(map[string]int)
-		for _, pod := range pods {
-			if !metav1.IsControlledBy(&pod, ds) {
-				continue
-			}
-			if pod.DeletionTimestamp != nil {
-				continue
-			}
-			if podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) {
-				nodesToPodCount[pod.Spec.NodeName]++
-			}
-		}
-		framework.Logf("Number of nodes with available pods: %d", len(nodesToPodCount))
-
-		// Ensure that exactly 1 pod is running on all nodes in nodeNames.
-		for _, nodeName := range nodeNames {
-			if nodesToPodCount[nodeName] != 1 {
-				framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
-				return false, nil
-			}
-		}
-
-		framework.Logf("Number of running nodes: %d, number of available pods: %d", len(nodeNames), len(nodesToPodCount))
-		// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
-		// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
-		// other nodes.
-		return len(nodesToPodCount) == len(nodeNames), nil
 	}
+
+	framework.Logf("Number of running nodes: %d, number of available pods: %d in daemonset %s", len(nodeNames), len(nodesToPodCount), ds.Name)
+	// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
+	// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
+	// other nodes.
+	return len(nodesToPodCount) == len(nodeNames), nil
 }

 func CheckDaemonStatus(f *framework.Framework, dsName string) error {
--- a/test/e2e/framework/test_context.go
+++ b/test/e2e/framework/test_context.go
@ -340,6 +340,9 @@ func RegisterClusterFlags(flags *flag.FlagSet) {
 	flags.StringVar(&TestContext.KubeVolumeDir, "volume-dir", "/var/lib/kubelet", "Path to the directory containing the kubelet volumes.")
 	flags.StringVar(&TestContext.CertDir, "cert-dir", "", "Path to the directory containing the certs. Default is empty, which doesn't use certs.")
 	flags.StringVar(&TestContext.RepoRoot, "repo-root", "../../", "Root directory of kubernetes repository, for finding test files.")
+	// NOTE: Node E2E tests have this flag defined as well, but true by default.
+	// If this becomes true as well, they should be refactored into RegisterCommonFlags.
+	flags.BoolVar(&TestContext.PrepullImages, "prepull-images", false, "If true, prepull images so image pull failures do not cause test failures.")
 	flags.StringVar(&TestContext.Provider, "provider", "", "The name of the Kubernetes provider (gce, gke, local, skeleton (the fallback if not set), etc.)")
 	flags.StringVar(&TestContext.Tooling, "tooling", "", "The tooling in use (kops, gke, etc.)")
 	flags.StringVar(&TestContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")