mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-18 17:33:39 +00:00
436 lines
17 KiB
Go
436 lines
17 KiB
Go
/*
|
|
Copyright 2015 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package e2e
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"k8s.io/klog/v2"
|
|
|
|
"github.com/onsi/ginkgo/v2"
|
|
"github.com/onsi/gomega"
|
|
|
|
appsv1 "k8s.io/api/apps/v1"
|
|
v1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
"k8s.io/component-base/logs"
|
|
"k8s.io/component-base/version"
|
|
commontest "k8s.io/kubernetes/test/e2e/common"
|
|
"k8s.io/kubernetes/test/e2e/framework"
|
|
"k8s.io/kubernetes/test/e2e/framework/daemonset"
|
|
e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
|
|
e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
|
|
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
|
e2ereporters "k8s.io/kubernetes/test/e2e/reporters"
|
|
utilnet "k8s.io/utils/net"
|
|
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
// ensure auth plugins are loaded
|
|
_ "k8s.io/client-go/plugin/pkg/client/auth"
|
|
|
|
// ensure that cloud providers are loaded
|
|
_ "k8s.io/kubernetes/test/e2e/framework/providers/aws"
|
|
_ "k8s.io/kubernetes/test/e2e/framework/providers/azure"
|
|
_ "k8s.io/kubernetes/test/e2e/framework/providers/gce"
|
|
_ "k8s.io/kubernetes/test/e2e/framework/providers/kubemark"
|
|
_ "k8s.io/kubernetes/test/e2e/framework/providers/openstack"
|
|
_ "k8s.io/kubernetes/test/e2e/framework/providers/vsphere"
|
|
|
|
// Ensure that logging flags are part of the command line.
|
|
_ "k8s.io/component-base/logs/testinit"
|
|
)
|
|
|
|
const (
|
|
// namespaceCleanupTimeout is how long to wait for the namespace to be deleted.
|
|
// If there are any orphaned namespaces to clean up, this test is running
|
|
// on a long lived cluster. A long wait here is preferably to spurious test
|
|
// failures caused by leaked resources from a previous test run.
|
|
namespaceCleanupTimeout = 15 * time.Minute
|
|
)
|
|
|
|
var progressReporter = &e2ereporters.ProgressReporter{}
|
|
|
|
var _ = ginkgo.SynchronizedBeforeSuite(func(ctx context.Context) []byte {
|
|
// Reference common test to make the import valid.
|
|
commontest.CurrentSuite = commontest.E2E
|
|
progressReporter.SetStartMsg()
|
|
setupSuite(ctx)
|
|
return nil
|
|
}, func(ctx context.Context, data []byte) {
|
|
// Run on all Ginkgo nodes
|
|
setupSuitePerGinkgoNode(ctx)
|
|
})
|
|
|
|
var _ = ginkgo.SynchronizedAfterSuite(func() {
|
|
progressReporter.SetEndMsg()
|
|
}, func(ctx context.Context) {
|
|
AfterSuiteActions(ctx)
|
|
})
|
|
|
|
// RunE2ETests checks configuration parameters (specified through flags) and then runs
|
|
// E2E tests using the Ginkgo runner.
|
|
// If a "report directory" is specified, one or more JUnit test reports will be
|
|
// generated in this directory, and cluster logs will also be saved.
|
|
// This function is called on each Ginkgo node in parallel mode.
|
|
func RunE2ETests(t *testing.T) {
|
|
// InitLogs disables contextual logging, without a way to enable it again
|
|
// in the E2E test suite because it has no feature gates. It used to have a
|
|
// misleading --feature-gates parameter but that didn't do what users
|
|
// and developers expected (define which features the cluster supports)
|
|
// and therefore got removed.
|
|
//
|
|
// Because contextual logging is useful and should get tested, it gets
|
|
// re-enabled here unconditionally.
|
|
logs.InitLogs()
|
|
defer logs.FlushLogs()
|
|
klog.EnableContextualLogging(true)
|
|
|
|
progressReporter = e2ereporters.NewProgressReporter(framework.TestContext.ProgressReportURL)
|
|
gomega.RegisterFailHandler(framework.Fail)
|
|
|
|
// Run tests through the Ginkgo runner with output to console + JUnit for Jenkins
|
|
suiteConfig, reporterConfig := framework.CreateGinkgoConfig()
|
|
klog.Infof("Starting e2e run %q on Ginkgo node %d", framework.RunID, suiteConfig.ParallelProcess)
|
|
ginkgo.RunSpecs(t, "Kubernetes e2e suite", suiteConfig, reporterConfig)
|
|
}
|
|
|
|
// getDefaultClusterIPFamily obtains the default IP family of the cluster
|
|
// using the Cluster IP address of the kubernetes service created in the default namespace
|
|
// This unequivocally identifies the default IP family because services are single family
|
|
// TODO: dual-stack may support multiple families per service
|
|
// but we can detect if a cluster is dual stack because pods have two addresses (one per family)
|
|
func getDefaultClusterIPFamily(ctx context.Context, c clientset.Interface) string {
|
|
// Get the ClusterIP of the kubernetes service created in the default namespace
|
|
svc, err := c.CoreV1().Services(metav1.NamespaceDefault).Get(ctx, "kubernetes", metav1.GetOptions{})
|
|
if err != nil {
|
|
framework.Failf("Failed to get kubernetes service ClusterIP: %v", err)
|
|
}
|
|
|
|
if utilnet.IsIPv6String(svc.Spec.ClusterIP) {
|
|
return "ipv6"
|
|
}
|
|
return "ipv4"
|
|
}
|
|
|
|
// waitForDaemonSets for all daemonsets in the given namespace to be ready
|
|
// (defined as all but 'allowedNotReadyNodes' pods associated with that
|
|
// daemonset are ready).
|
|
//
|
|
// If allowedNotReadyNodes is -1, this method returns immediately without waiting.
|
|
func waitForDaemonSets(ctx context.Context, c clientset.Interface, ns string, allowedNotReadyNodes int32, timeout time.Duration) error {
|
|
if allowedNotReadyNodes == -1 {
|
|
return nil
|
|
}
|
|
|
|
start := time.Now()
|
|
framework.Logf("Waiting up to %v for all daemonsets in namespace '%s' to start",
|
|
timeout, ns)
|
|
|
|
return wait.PollImmediateWithContext(ctx, framework.Poll, timeout, func(ctx context.Context) (bool, error) {
|
|
dsList, err := c.AppsV1().DaemonSets(ns).List(ctx, metav1.ListOptions{})
|
|
if err != nil {
|
|
framework.Logf("Error getting daemonsets in namespace: '%s': %v", ns, err)
|
|
return false, err
|
|
}
|
|
var notReadyDaemonSets []string
|
|
for _, ds := range dsList.Items {
|
|
framework.Logf("%d / %d pods ready in namespace '%s' in daemonset '%s' (%d seconds elapsed)", ds.Status.NumberReady, ds.Status.DesiredNumberScheduled, ns, ds.ObjectMeta.Name, int(time.Since(start).Seconds()))
|
|
if ds.Status.DesiredNumberScheduled-ds.Status.NumberReady > allowedNotReadyNodes {
|
|
notReadyDaemonSets = append(notReadyDaemonSets, ds.ObjectMeta.Name)
|
|
}
|
|
}
|
|
|
|
if len(notReadyDaemonSets) > 0 {
|
|
framework.Logf("there are not ready daemonsets: %v", notReadyDaemonSets)
|
|
return false, nil
|
|
}
|
|
|
|
return true, nil
|
|
})
|
|
}
|
|
|
|
// setupSuite is the boilerplate that can be used to setup ginkgo test suites, on the SynchronizedBeforeSuite step.
|
|
// There are certain operations we only want to run once per overall test invocation
|
|
// (such as deleting old namespaces, or verifying that all system pods are running.
|
|
// Because of the way Ginkgo runs tests in parallel, we must use SynchronizedBeforeSuite
|
|
// to ensure that these operations only run on the first parallel Ginkgo node.
|
|
//
|
|
// This function takes two parameters: one function which runs on only the first Ginkgo node,
|
|
// returning an opaque byte array, and then a second function which runs on all Ginkgo nodes,
|
|
// accepting the byte array.
|
|
func setupSuite(ctx context.Context) {
|
|
// Run only on Ginkgo node 1
|
|
|
|
switch framework.TestContext.Provider {
|
|
case "gce", "gke":
|
|
logClusterImageSources()
|
|
}
|
|
|
|
c, err := framework.LoadClientset()
|
|
if err != nil {
|
|
klog.Fatal("Error loading client: ", err)
|
|
}
|
|
|
|
// Delete any namespaces except those created by the system. This ensures no
|
|
// lingering resources are left over from a previous test run.
|
|
if framework.TestContext.CleanStart {
|
|
deleted, err := framework.DeleteNamespaces(ctx, c, nil, /* deleteFilter */
|
|
[]string{
|
|
metav1.NamespaceSystem,
|
|
metav1.NamespaceDefault,
|
|
metav1.NamespacePublic,
|
|
v1.NamespaceNodeLease,
|
|
})
|
|
if err != nil {
|
|
framework.Failf("Error deleting orphaned namespaces: %v", err)
|
|
}
|
|
if err := framework.WaitForNamespacesDeleted(ctx, c, deleted, namespaceCleanupTimeout); err != nil {
|
|
framework.Failf("Failed to delete orphaned namespaces %v: %v", deleted, err)
|
|
}
|
|
}
|
|
|
|
timeouts := framework.NewTimeoutContext()
|
|
|
|
// In large clusters we may get to this point but still have a bunch
|
|
// of nodes without Routes created. Since this would make a node
|
|
// unschedulable, we need to wait until all of them are schedulable.
|
|
framework.ExpectNoError(e2enode.WaitForAllNodesSchedulable(ctx, c, timeouts.NodeSchedulable))
|
|
|
|
// If NumNodes is not specified then auto-detect how many are scheduleable and not tainted
|
|
if framework.TestContext.CloudConfig.NumNodes == framework.DefaultNumNodes {
|
|
nodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
|
|
framework.ExpectNoError(err)
|
|
framework.TestContext.CloudConfig.NumNodes = len(nodes.Items)
|
|
}
|
|
|
|
// Ensure all pods are running and ready before starting tests (otherwise,
|
|
// cluster infrastructure pods that are being pulled or started can block
|
|
// test pods from running, and tests that ensure all pods are running and
|
|
// ready will fail).
|
|
//
|
|
// TODO: In large clusters, we often observe a non-starting pods due to
|
|
// #41007. To avoid those pods preventing the whole test runs (and just
|
|
// wasting the whole run), we allow for some not-ready pods (with the
|
|
// number equal to the number of allowed not-ready nodes).
|
|
if err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), timeouts.SystemPodsStartup); err != nil {
|
|
e2edebug.DumpAllNamespaceInfo(ctx, c, metav1.NamespaceSystem)
|
|
e2ekubectl.LogFailedContainers(ctx, c, metav1.NamespaceSystem, framework.Logf)
|
|
framework.Failf("Error waiting for all pods to be running and ready: %v", err)
|
|
}
|
|
|
|
if err := waitForDaemonSets(ctx, c, metav1.NamespaceSystem, int32(framework.TestContext.AllowedNotReadyNodes), timeouts.SystemDaemonsetStartup); err != nil {
|
|
framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
|
|
}
|
|
|
|
if framework.TestContext.PrepullImages {
|
|
framework.Logf("Pre-pulling images so that they are cached for the tests.")
|
|
prepullImages(ctx, c)
|
|
}
|
|
|
|
// Log the version of the server and this client.
|
|
framework.Logf("e2e test version: %s", version.Get().GitVersion)
|
|
|
|
dc := c.DiscoveryClient
|
|
|
|
serverVersion, serverErr := dc.ServerVersion()
|
|
if serverErr != nil {
|
|
framework.Logf("Unexpected server error retrieving version: %v", serverErr)
|
|
}
|
|
if serverVersion != nil {
|
|
framework.Logf("kube-apiserver version: %s", serverVersion.GitVersion)
|
|
}
|
|
|
|
if framework.TestContext.NodeKiller.Enabled {
|
|
nodeKiller := e2enode.NewNodeKiller(framework.TestContext.NodeKiller, c, framework.TestContext.Provider)
|
|
go nodeKiller.Run(framework.TestContext.NodeKiller.NodeKillerStopCtx)
|
|
}
|
|
}
|
|
|
|
// logClusterImageSources writes out cluster image sources.
|
|
func logClusterImageSources() {
|
|
controlPlaneNodeImg, workerNodeImg, err := lookupClusterImageSources()
|
|
if err != nil {
|
|
framework.Logf("Cluster image sources lookup failed: %v\n", err)
|
|
return
|
|
}
|
|
framework.Logf("cluster-control-plane-node-image: %s", controlPlaneNodeImg)
|
|
framework.Logf("cluster-worker-node-image: %s", workerNodeImg)
|
|
|
|
images := map[string]string{
|
|
"control_plane_node_os_image": controlPlaneNodeImg,
|
|
"worker_node_os_image": workerNodeImg,
|
|
}
|
|
|
|
outputBytes, _ := json.MarshalIndent(images, "", " ")
|
|
filePath := filepath.Join(framework.TestContext.ReportDir, "images.json")
|
|
if err := os.WriteFile(filePath, outputBytes, 0644); err != nil {
|
|
framework.Logf("cluster images sources, could not write to %q: %v", filePath, err)
|
|
}
|
|
}
|
|
|
|
// TODO: These should really just use the GCE API client library or at least use
|
|
// better formatted output from the --format flag.
|
|
|
|
// Returns control plane node & worker node image string, or error
|
|
func lookupClusterImageSources() (string, string, error) {
|
|
// Given args for a gcloud compute command, run it with other args, and return the values,
|
|
// whether separated by newlines, commas or semicolons.
|
|
gcloudf := func(argv ...string) ([]string, error) {
|
|
args := []string{"compute"}
|
|
args = append(args, argv...)
|
|
args = append(args, "--project", framework.TestContext.CloudConfig.ProjectID)
|
|
if framework.TestContext.CloudConfig.MultiMaster {
|
|
args = append(args, "--region", framework.TestContext.CloudConfig.Region)
|
|
} else {
|
|
args = append(args, "--zone", framework.TestContext.CloudConfig.Zone)
|
|
}
|
|
outputBytes, err := exec.Command("gcloud", args...).CombinedOutput()
|
|
str := strings.Replace(string(outputBytes), ",", "\n", -1)
|
|
str = strings.Replace(str, ";", "\n", -1)
|
|
lines := strings.Split(str, "\n")
|
|
if err != nil {
|
|
framework.Logf("lookupDiskImageSources: gcloud error with [%#v]; err:%v", argv, err)
|
|
for _, l := range lines {
|
|
framework.Logf(" > %s", l)
|
|
}
|
|
}
|
|
return lines, err
|
|
}
|
|
|
|
// Given a GCE instance, look through its disks, finding one that has a sourceImage
|
|
host2image := func(instance string) (string, error) {
|
|
// gcloud compute instances describe {INSTANCE} --format="get(disks[].source)"
|
|
// gcloud compute disks describe {DISKURL} --format="get(sourceImage)"
|
|
disks, err := gcloudf("instances", "describe", instance, "--format=get(disks[].source)")
|
|
if err != nil {
|
|
return "", err
|
|
} else if len(disks) == 0 {
|
|
return "", fmt.Errorf("instance %q had no findable disks", instance)
|
|
}
|
|
// Loop over disks, looking for the boot disk
|
|
for _, disk := range disks {
|
|
lines, err := gcloudf("disks", "describe", disk, "--format=get(sourceImage)")
|
|
if err != nil {
|
|
return "", err
|
|
} else if len(lines) > 0 && lines[0] != "" {
|
|
return lines[0], nil // break, we're done
|
|
}
|
|
}
|
|
return "", fmt.Errorf("instance %q had no disk with a sourceImage", instance)
|
|
}
|
|
|
|
// gcloud compute instance-groups list-instances {GROUPNAME} --format="get(instance)"
|
|
workerNodeName := ""
|
|
instGroupName := strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",")[0]
|
|
if lines, err := gcloudf("instance-groups", "list-instances", instGroupName, "--format=get(instance)"); err != nil {
|
|
return "", "", err
|
|
} else if len(lines) == 0 {
|
|
return "", "", fmt.Errorf("no instances inside instance-group %q", instGroupName)
|
|
} else {
|
|
workerNodeName = lines[0]
|
|
}
|
|
|
|
workerNodeImg, err := host2image(workerNodeName)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
frags := strings.Split(workerNodeImg, "/")
|
|
workerNodeImg = frags[len(frags)-1]
|
|
|
|
// For GKE clusters, controlPlaneNodeName will not be defined; we just leave controlPlaneNodeImg blank.
|
|
controlPlaneNodeImg := ""
|
|
if controlPlaneNodeName := framework.TestContext.CloudConfig.MasterName; controlPlaneNodeName != "" {
|
|
img, err := host2image(controlPlaneNodeName)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
frags = strings.Split(img, "/")
|
|
controlPlaneNodeImg = frags[len(frags)-1]
|
|
}
|
|
|
|
return controlPlaneNodeImg, workerNodeImg, nil
|
|
}
|
|
|
|
// setupSuitePerGinkgoNode is the boilerplate that can be used to setup ginkgo test suites, on the SynchronizedBeforeSuite step.
|
|
// There are certain operations we only want to run once per overall test invocation on each Ginkgo node
|
|
// such as making some global variables accessible to all parallel executions
|
|
// Because of the way Ginkgo runs tests in parallel, we must use SynchronizedBeforeSuite
|
|
// Ref: https://onsi.github.io/ginkgo/#parallel-specs
|
|
func setupSuitePerGinkgoNode(ctx context.Context) {
|
|
// Obtain the default IP family of the cluster
|
|
// Some e2e test are designed to work on IPv4 only, this global variable
|
|
// allows to adapt those tests to work on both IPv4 and IPv6
|
|
// TODO: dual-stack
|
|
// the dual stack clusters can be ipv4-ipv6 or ipv6-ipv4, order matters,
|
|
// and services use the primary IP family by default
|
|
c, err := framework.LoadClientset()
|
|
if err != nil {
|
|
klog.Fatal("Error loading client: ", err)
|
|
}
|
|
framework.TestContext.IPFamily = getDefaultClusterIPFamily(ctx, c)
|
|
framework.Logf("Cluster IP family: %s", framework.TestContext.IPFamily)
|
|
}
|
|
|
|
func prepullImages(ctx context.Context, c clientset.Interface) {
|
|
namespace, err := framework.CreateTestingNS(ctx, "img-puller", c, map[string]string{
|
|
"e2e-framework": "img-puller",
|
|
})
|
|
framework.ExpectNoError(err)
|
|
ns := namespace.Name
|
|
ginkgo.DeferCleanup(c.CoreV1().Namespaces().Delete, ns, metav1.DeleteOptions{})
|
|
|
|
images := commontest.PrePulledImages
|
|
if framework.NodeOSDistroIs("windows") {
|
|
images = commontest.WindowsPrePulledImages
|
|
}
|
|
|
|
label := map[string]string{"app": "prepull-daemonset"}
|
|
var imgPullers []*appsv1.DaemonSet
|
|
for _, img := range images.List() {
|
|
dsName := fmt.Sprintf("img-pull-%s", strings.ReplaceAll(strings.ReplaceAll(img, "/", "-"), ":", "-"))
|
|
|
|
dsSpec := daemonset.NewDaemonSet(dsName, img, label, nil, nil, nil)
|
|
ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, dsSpec, metav1.CreateOptions{})
|
|
framework.ExpectNoError(err)
|
|
imgPullers = append(imgPullers, ds)
|
|
}
|
|
|
|
// this should not be a multiple of 5, because node status updates
|
|
// every 5 seconds. See https://github.com/kubernetes/kubernetes/pull/14915.
|
|
dsRetryPeriod := 9 * time.Second
|
|
dsRetryTimeout := 5 * time.Minute
|
|
|
|
for _, imgPuller := range imgPullers {
|
|
checkDaemonset := func(ctx context.Context) (bool, error) {
|
|
return daemonset.CheckPresentOnNodes(ctx, c, imgPuller, ns, framework.TestContext.CloudConfig.NumNodes)
|
|
}
|
|
framework.Logf("Waiting for %s", imgPuller.Name)
|
|
err := wait.PollImmediateWithContext(ctx, dsRetryPeriod, dsRetryTimeout, checkDaemonset)
|
|
framework.ExpectNoError(err, "error waiting for image to be pulled")
|
|
}
|
|
}
|