tests: Prepull images commonly used test images

Some tests have a short timeout for starting the pods (1 minute), but if
those tests happen to be the first ones to run, and the images have to be
pulled, then the test could timeout, especially with larger images. This
commit will allow us to prepull commonly used E2E test images, so this issue
can be avoided.
This commit is contained in:
Claudiu Belu 2021-09-27 19:38:27 +03:00
parent 37efc5feec
commit 35e23afa50
5 changed files with 111 additions and 33 deletions

View File

@ -189,6 +189,7 @@ fi
--master-tag="${MASTER_TAG:-}" \
--docker-config-file="${DOCKER_CONFIG_FILE:-}" \
--dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \
--prepull-images="${PREPULL_IMAGES:-false}" \
--ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \
${CONTAINER_RUNTIME:+"--container-runtime=${CONTAINER_RUNTIME}"} \
${MASTER_OS_DISTRIBUTION:+"--master-os-distro=${MASTER_OS_DISTRIBUTION}"} \

View File

@ -53,7 +53,7 @@ var CurrentSuite Suite
// PrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
// before tests starts, so that the tests won't fail due image pulling flakes.
// Currently, this is only used by node e2e test.
// Currently, this is only used by node e2e test and E2E tests.
// See also updateImageAllowList() in ../../e2e_node/image_list.go
// TODO(random-liu): Change the image puller pod to use similar mechanism.
var PrePulledImages = sets.NewString(
@ -67,6 +67,16 @@ var PrePulledImages = sets.NewString(
imageutils.GetE2EImage(imageutils.NonRoot),
)
// WindowsPrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
// before tests starts, so that the tests won't fail due image pulling flakes. These images also have
// Windows support. Currently, this is only used by E2E tests.
var WindowsPrePulledImages = sets.NewString(
imageutils.GetE2EImage(imageutils.Agnhost),
imageutils.GetE2EImage(imageutils.BusyBox),
imageutils.GetE2EImage(imageutils.Nginx),
imageutils.GetE2EImage(imageutils.Httpd),
)
type testImagesStruct struct {
AgnhostImage string
BusyBoxImage string

View File

@ -36,6 +36,7 @@ import (
"github.com/onsi/ginkgo/reporters"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeutils "k8s.io/apimachinery/pkg/util/runtime"
@ -44,6 +45,7 @@ import (
"k8s.io/component-base/version"
commontest "k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/daemonset"
e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@ -257,6 +259,11 @@ func setupSuite() {
framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
}
if framework.TestContext.PrepullImages {
framework.Logf("Pre-pulling images so that they are cached for the tests.")
prepullImages(c)
}
// Log the version of the server and this client.
framework.Logf("e2e test version: %s", version.Get().GitVersion)
@ -400,3 +407,42 @@ func setupSuitePerGinkgoNode() {
framework.TestContext.IPFamily = getDefaultClusterIPFamily(c)
framework.Logf("Cluster IP family: %s", framework.TestContext.IPFamily)
}
func prepullImages(c clientset.Interface) {
namespace, err := framework.CreateTestingNS("img-puller", c, map[string]string{
"e2e-framework": "img-puller",
})
framework.ExpectNoError(err)
ns := namespace.Name
defer c.CoreV1().Namespaces().Delete(context.TODO(), ns, metav1.DeleteOptions{})
images := commontest.PrePulledImages
if framework.NodeOSDistroIs("windows") {
images = commontest.WindowsPrePulledImages
}
label := map[string]string{"app": "prepull-daemonset"}
var imgPullers []*appsv1.DaemonSet
for _, img := range images.List() {
dsName := fmt.Sprintf("img-pull-%s", strings.ReplaceAll(strings.ReplaceAll(img, "/", "-"), ":", "-"))
dsSpec := daemonset.NewDaemonSet(dsName, img, label, nil, nil, nil)
ds, err := c.AppsV1().DaemonSets(ns).Create(context.TODO(), dsSpec, metav1.CreateOptions{})
framework.ExpectNoError(err)
imgPullers = append(imgPullers, ds)
}
// this should not be a multiple of 5, because node status updates
// every 5 seconds. See https://github.com/kubernetes/kubernetes/pull/14915.
dsRetryPeriod := 9 * time.Second
dsRetryTimeout := 5 * time.Minute
for _, imgPuller := range imgPullers {
checkDaemonset := func() (bool, error) {
return daemonset.CheckPresentOnNodes(c, imgPuller, ns, framework.TestContext.CloudConfig.NumNodes)
}
framework.Logf("Waiting for %s", imgPuller.Name)
err := wait.PollImmediate(dsRetryPeriod, dsRetryTimeout, checkDaemonset)
framework.ExpectNoError(err, "error waiting for image to be pulled")
}
}

View File

@ -67,6 +67,18 @@ func CheckRunningOnAllNodes(f *framework.Framework, ds *appsv1.DaemonSet) (bool,
return CheckDaemonPodOnNodes(f, ds, nodeNames)()
}
// CheckPresentOnNodes will check that the daemonset will be present on at least the given number of
// schedulable nodes.
func CheckPresentOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, numNodes int) (bool, error) {
nodeNames := SchedulableNodes(c, ds)
if len(nodeNames) < numNodes {
return false, nil
}
return checkDaemonPodStateOnNodes(c, ds, ns, nodeNames, func(pod *v1.Pod) bool {
return pod.Status.Phase != v1.PodPending
})
}
func SchedulableNodes(c clientset.Interface, ds *appsv1.DaemonSet) []string {
nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
framework.ExpectNoError(err)
@ -90,41 +102,47 @@ func canScheduleOnNode(node v1.Node, ds *appsv1.DaemonSet) bool {
func CheckDaemonPodOnNodes(f *framework.Framework, ds *appsv1.DaemonSet, nodeNames []string) func() (bool, error) {
return func() (bool, error) {
podList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{})
if err != nil {
framework.Logf("could not get the pod list: %v", err)
return checkDaemonPodStateOnNodes(f.ClientSet, ds, f.Namespace.Name, nodeNames, func(pod *v1.Pod) bool {
return podutil.IsPodAvailable(pod, ds.Spec.MinReadySeconds, metav1.Now())
})
}
}
func checkDaemonPodStateOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, nodeNames []string, stateChecker func(*v1.Pod) bool) (bool, error) {
podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
if err != nil {
framework.Logf("could not get the pod list: %v", err)
return false, nil
}
pods := podList.Items
nodesToPodCount := make(map[string]int)
for _, pod := range pods {
if !metav1.IsControlledBy(&pod, ds) {
continue
}
if pod.DeletionTimestamp != nil {
continue
}
if stateChecker(&pod) {
nodesToPodCount[pod.Spec.NodeName]++
}
}
framework.Logf("Number of nodes with available pods controlled by daemonset %s: %d", ds.Name, len(nodesToPodCount))
// Ensure that exactly 1 pod is running on all nodes in nodeNames.
for _, nodeName := range nodeNames {
if nodesToPodCount[nodeName] != 1 {
framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
return false, nil
}
pods := podList.Items
nodesToPodCount := make(map[string]int)
for _, pod := range pods {
if !metav1.IsControlledBy(&pod, ds) {
continue
}
if pod.DeletionTimestamp != nil {
continue
}
if podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) {
nodesToPodCount[pod.Spec.NodeName]++
}
}
framework.Logf("Number of nodes with available pods: %d", len(nodesToPodCount))
// Ensure that exactly 1 pod is running on all nodes in nodeNames.
for _, nodeName := range nodeNames {
if nodesToPodCount[nodeName] != 1 {
framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
return false, nil
}
}
framework.Logf("Number of running nodes: %d, number of available pods: %d", len(nodeNames), len(nodesToPodCount))
// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
// other nodes.
return len(nodesToPodCount) == len(nodeNames), nil
}
framework.Logf("Number of running nodes: %d, number of available pods: %d in daemonset %s", len(nodeNames), len(nodesToPodCount), ds.Name)
// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
// other nodes.
return len(nodesToPodCount) == len(nodeNames), nil
}
func CheckDaemonStatus(f *framework.Framework, dsName string) error {

View File

@ -340,6 +340,9 @@ func RegisterClusterFlags(flags *flag.FlagSet) {
flags.StringVar(&TestContext.KubeVolumeDir, "volume-dir", "/var/lib/kubelet", "Path to the directory containing the kubelet volumes.")
flags.StringVar(&TestContext.CertDir, "cert-dir", "", "Path to the directory containing the certs. Default is empty, which doesn't use certs.")
flags.StringVar(&TestContext.RepoRoot, "repo-root", "../../", "Root directory of kubernetes repository, for finding test files.")
// NOTE: Node E2E tests have this flag defined as well, but true by default.
// If this becomes true as well, they should be refactored into RegisterCommonFlags.
flags.BoolVar(&TestContext.PrepullImages, "prepull-images", false, "If true, prepull images so image pull failures do not cause test failures.")
flags.StringVar(&TestContext.Provider, "provider", "", "The name of the Kubernetes provider (gce, gke, local, skeleton (the fallback if not set), etc.)")
flags.StringVar(&TestContext.Tooling, "tooling", "", "The tooling in use (kops, gke, etc.)")
flags.StringVar(&TestContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")