Merge pull request #105481 from claudiubelu/tests/e2e-prepull-images

tests: Prepull images
This commit is contained in:
Kubernetes Prow Robot 2021-11-18 17:22:51 -08:00 committed by GitHub
commit 9b180d8913
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 111 additions and 33 deletions

View File

@ -189,6 +189,7 @@ fi
--master-tag="${MASTER_TAG:-}" \
--docker-config-file="${DOCKER_CONFIG_FILE:-}" \
--dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \
--prepull-images="${PREPULL_IMAGES:-false}" \
--ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \
${CONTAINER_RUNTIME:+"--container-runtime=${CONTAINER_RUNTIME}"} \
${MASTER_OS_DISTRIBUTION:+"--master-os-distro=${MASTER_OS_DISTRIBUTION}"} \

View File

@ -53,7 +53,7 @@ var CurrentSuite Suite
// PrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
// before tests starts, so that the tests won't fail due image pulling flakes.
// Currently, this is only used by node e2e test.
// Currently, this is only used by node e2e test and E2E tests.
// See also updateImageAllowList() in ../../e2e_node/image_list.go
// TODO(random-liu): Change the image puller pod to use similar mechanism.
var PrePulledImages = sets.NewString(
@ -67,6 +67,16 @@ var PrePulledImages = sets.NewString(
imageutils.GetE2EImage(imageutils.NonRoot),
)
// WindowsPrePulledImages are a list of images used in e2e/common tests. These images should be prepulled
// before tests starts, so that the tests won't fail due image pulling flakes. These images also have
// Windows support. Currently, this is only used by E2E tests.
var WindowsPrePulledImages = sets.NewString(
imageutils.GetE2EImage(imageutils.Agnhost),
imageutils.GetE2EImage(imageutils.BusyBox),
imageutils.GetE2EImage(imageutils.Nginx),
imageutils.GetE2EImage(imageutils.Httpd),
)
type testImagesStruct struct {
AgnhostImage string
BusyBoxImage string

View File

@ -36,6 +36,7 @@ import (
"github.com/onsi/ginkgo/reporters"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeutils "k8s.io/apimachinery/pkg/util/runtime"
@ -44,6 +45,7 @@ import (
"k8s.io/component-base/version"
commontest "k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/daemonset"
e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@ -257,6 +259,11 @@ func setupSuite() {
framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
}
if framework.TestContext.PrepullImages {
framework.Logf("Pre-pulling images so that they are cached for the tests.")
prepullImages(c)
}
// Log the version of the server and this client.
framework.Logf("e2e test version: %s", version.Get().GitVersion)
@ -400,3 +407,42 @@ func setupSuitePerGinkgoNode() {
framework.TestContext.IPFamily = getDefaultClusterIPFamily(c)
framework.Logf("Cluster IP family: %s", framework.TestContext.IPFamily)
}
func prepullImages(c clientset.Interface) {
namespace, err := framework.CreateTestingNS("img-puller", c, map[string]string{
"e2e-framework": "img-puller",
})
framework.ExpectNoError(err)
ns := namespace.Name
defer c.CoreV1().Namespaces().Delete(context.TODO(), ns, metav1.DeleteOptions{})
images := commontest.PrePulledImages
if framework.NodeOSDistroIs("windows") {
images = commontest.WindowsPrePulledImages
}
label := map[string]string{"app": "prepull-daemonset"}
var imgPullers []*appsv1.DaemonSet
for _, img := range images.List() {
dsName := fmt.Sprintf("img-pull-%s", strings.ReplaceAll(strings.ReplaceAll(img, "/", "-"), ":", "-"))
dsSpec := daemonset.NewDaemonSet(dsName, img, label, nil, nil, nil)
ds, err := c.AppsV1().DaemonSets(ns).Create(context.TODO(), dsSpec, metav1.CreateOptions{})
framework.ExpectNoError(err)
imgPullers = append(imgPullers, ds)
}
// this should not be a multiple of 5, because node status updates
// every 5 seconds. See https://github.com/kubernetes/kubernetes/pull/14915.
dsRetryPeriod := 9 * time.Second
dsRetryTimeout := 5 * time.Minute
for _, imgPuller := range imgPullers {
checkDaemonset := func() (bool, error) {
return daemonset.CheckPresentOnNodes(c, imgPuller, ns, framework.TestContext.CloudConfig.NumNodes)
}
framework.Logf("Waiting for %s", imgPuller.Name)
err := wait.PollImmediate(dsRetryPeriod, dsRetryTimeout, checkDaemonset)
framework.ExpectNoError(err, "error waiting for image to be pulled")
}
}

View File

@ -67,6 +67,18 @@ func CheckRunningOnAllNodes(f *framework.Framework, ds *appsv1.DaemonSet) (bool,
return CheckDaemonPodOnNodes(f, ds, nodeNames)()
}
// CheckPresentOnNodes will check that the daemonset will be present on at least the given number of
// schedulable nodes.
func CheckPresentOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, numNodes int) (bool, error) {
nodeNames := SchedulableNodes(c, ds)
if len(nodeNames) < numNodes {
return false, nil
}
return checkDaemonPodStateOnNodes(c, ds, ns, nodeNames, func(pod *v1.Pod) bool {
return pod.Status.Phase != v1.PodPending
})
}
func SchedulableNodes(c clientset.Interface, ds *appsv1.DaemonSet) []string {
nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
framework.ExpectNoError(err)
@ -90,41 +102,47 @@ func canScheduleOnNode(node v1.Node, ds *appsv1.DaemonSet) bool {
func CheckDaemonPodOnNodes(f *framework.Framework, ds *appsv1.DaemonSet, nodeNames []string) func() (bool, error) {
return func() (bool, error) {
podList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{})
if err != nil {
framework.Logf("could not get the pod list: %v", err)
return checkDaemonPodStateOnNodes(f.ClientSet, ds, f.Namespace.Name, nodeNames, func(pod *v1.Pod) bool {
return podutil.IsPodAvailable(pod, ds.Spec.MinReadySeconds, metav1.Now())
})
}
}
func checkDaemonPodStateOnNodes(c clientset.Interface, ds *appsv1.DaemonSet, ns string, nodeNames []string, stateChecker func(*v1.Pod) bool) (bool, error) {
podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
if err != nil {
framework.Logf("could not get the pod list: %v", err)
return false, nil
}
pods := podList.Items
nodesToPodCount := make(map[string]int)
for _, pod := range pods {
if !metav1.IsControlledBy(&pod, ds) {
continue
}
if pod.DeletionTimestamp != nil {
continue
}
if stateChecker(&pod) {
nodesToPodCount[pod.Spec.NodeName]++
}
}
framework.Logf("Number of nodes with available pods controlled by daemonset %s: %d", ds.Name, len(nodesToPodCount))
// Ensure that exactly 1 pod is running on all nodes in nodeNames.
for _, nodeName := range nodeNames {
if nodesToPodCount[nodeName] != 1 {
framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
return false, nil
}
pods := podList.Items
nodesToPodCount := make(map[string]int)
for _, pod := range pods {
if !metav1.IsControlledBy(&pod, ds) {
continue
}
if pod.DeletionTimestamp != nil {
continue
}
if podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) {
nodesToPodCount[pod.Spec.NodeName]++
}
}
framework.Logf("Number of nodes with available pods: %d", len(nodesToPodCount))
// Ensure that exactly 1 pod is running on all nodes in nodeNames.
for _, nodeName := range nodeNames {
if nodesToPodCount[nodeName] != 1 {
framework.Logf("Node %s is running %d daemon pod, expected 1", nodeName, nodesToPodCount[nodeName])
return false, nil
}
}
framework.Logf("Number of running nodes: %d, number of available pods: %d", len(nodeNames), len(nodesToPodCount))
// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
// other nodes.
return len(nodesToPodCount) == len(nodeNames), nil
}
framework.Logf("Number of running nodes: %d, number of available pods: %d in daemonset %s", len(nodeNames), len(nodesToPodCount), ds.Name)
// Ensure that sizes of the lists are the same. We've verified that every element of nodeNames is in
// nodesToPodCount, so verifying the lengths are equal ensures that there aren't pods running on any
// other nodes.
return len(nodesToPodCount) == len(nodeNames), nil
}
func CheckDaemonStatus(f *framework.Framework, dsName string) error {

View File

@ -340,6 +340,9 @@ func RegisterClusterFlags(flags *flag.FlagSet) {
flags.StringVar(&TestContext.KubeVolumeDir, "volume-dir", "/var/lib/kubelet", "Path to the directory containing the kubelet volumes.")
flags.StringVar(&TestContext.CertDir, "cert-dir", "", "Path to the directory containing the certs. Default is empty, which doesn't use certs.")
flags.StringVar(&TestContext.RepoRoot, "repo-root", "../../", "Root directory of kubernetes repository, for finding test files.")
// NOTE: Node E2E tests have this flag defined as well, but true by default.
// If this becomes true as well, they should be refactored into RegisterCommonFlags.
flags.BoolVar(&TestContext.PrepullImages, "prepull-images", false, "If true, prepull images so image pull failures do not cause test failures.")
flags.StringVar(&TestContext.Provider, "provider", "", "The name of the Kubernetes provider (gce, gke, local, skeleton (the fallback if not set), etc.)")
flags.StringVar(&TestContext.Tooling, "tooling", "", "The tooling in use (kops, gke, etc.)")
flags.StringVar(&TestContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")