mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-10-31 05:40:42 +00:00 
			
		
		
		
	We were treating multiple nodes as a failure; instead we can return all the node (internal) IPs we find.
		
			
				
	
	
		
			1405 lines
		
	
	
		
			52 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			1405 lines
		
	
	
		
			52 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
| Copyright 2014 The Kubernetes Authors.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| package framework
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"context"
 | |
| 	"encoding/json"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"math/rand"
 | |
| 	"net"
 | |
| 	"net/url"
 | |
| 	"os"
 | |
| 	"os/exec"
 | |
| 	"path"
 | |
| 	"sort"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"syscall"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/onsi/ginkgo"
 | |
| 	"github.com/onsi/gomega"
 | |
| 	gomegatypes "github.com/onsi/gomega/types"
 | |
| 
 | |
| 	v1 "k8s.io/api/core/v1"
 | |
| 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 | |
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | |
| 	"k8s.io/apimachinery/pkg/fields"
 | |
| 	"k8s.io/apimachinery/pkg/labels"
 | |
| 	"k8s.io/apimachinery/pkg/runtime"
 | |
| 	"k8s.io/apimachinery/pkg/runtime/schema"
 | |
| 	"k8s.io/apimachinery/pkg/util/sets"
 | |
| 	"k8s.io/apimachinery/pkg/util/uuid"
 | |
| 	"k8s.io/apimachinery/pkg/util/wait"
 | |
| 	"k8s.io/apimachinery/pkg/watch"
 | |
| 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | |
| 	"k8s.io/client-go/dynamic"
 | |
| 	clientset "k8s.io/client-go/kubernetes"
 | |
| 	"k8s.io/client-go/rest"
 | |
| 	restclient "k8s.io/client-go/rest"
 | |
| 	"k8s.io/client-go/tools/cache"
 | |
| 	"k8s.io/client-go/tools/clientcmd"
 | |
| 	clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
 | |
| 	watchtools "k8s.io/client-go/tools/watch"
 | |
| 	"k8s.io/component-base/featuregate"
 | |
| 	testutils "k8s.io/kubernetes/test/utils"
 | |
| 	imageutils "k8s.io/kubernetes/test/utils/image"
 | |
| 	uexec "k8s.io/utils/exec"
 | |
| 
 | |
| 	// TODO: Remove the following imports (ref: https://github.com/kubernetes/kubernetes/issues/81245)
 | |
| 	e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
 | |
| 	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
 | |
| 	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
 | |
| 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// Minimal number of nodes for the cluster to be considered large.
 | |
| 	largeClusterThreshold = 100
 | |
| 
 | |
| 	// TODO(justinsb): Avoid hardcoding this.
 | |
| 	awsMasterIP = "172.20.0.9"
 | |
| 
 | |
| 	// AllContainers specifies that all containers be visited
 | |
| 	// Copied from pkg/api/v1/pod to avoid pulling extra dependencies
 | |
| 	AllContainers = InitContainers | Containers | EphemeralContainers
 | |
| )
 | |
| 
 | |
| // DEPRECATED constants. Use the timeouts in framework.Framework instead.
 | |
| const (
 | |
| 	// PodListTimeout is how long to wait for the pod to be listable.
 | |
| 	PodListTimeout = time.Minute
 | |
| 
 | |
| 	// PodStartTimeout is how long to wait for the pod to be started.
 | |
| 	PodStartTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// PodStartShortTimeout is same as `PodStartTimeout` to wait for the pod to be started, but shorter.
 | |
| 	// Use it case by case when we are sure pod start will not be delayed.
 | |
| 	// minutes by slow docker pulls or something else.
 | |
| 	PodStartShortTimeout = 2 * time.Minute
 | |
| 
 | |
| 	// PodDeleteTimeout is how long to wait for a pod to be deleted.
 | |
| 	PodDeleteTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// PodGetTimeout is how long to wait for a pod to be got.
 | |
| 	PodGetTimeout = 2 * time.Minute
 | |
| 
 | |
| 	// PodEventTimeout is how much we wait for a pod event to occur.
 | |
| 	PodEventTimeout = 2 * time.Minute
 | |
| 
 | |
| 	// ServiceStartTimeout is how long to wait for a service endpoint to be resolvable.
 | |
| 	ServiceStartTimeout = 3 * time.Minute
 | |
| 
 | |
| 	// Poll is how often to Poll pods, nodes and claims.
 | |
| 	Poll = 2 * time.Second
 | |
| 
 | |
| 	// PollShortTimeout is the short timeout value in polling.
 | |
| 	PollShortTimeout = 1 * time.Minute
 | |
| 
 | |
| 	// ServiceAccountProvisionTimeout is how long to wait for a service account to be provisioned.
 | |
| 	// service accounts are provisioned after namespace creation
 | |
| 	// a service account is required to support pod creation in a namespace as part of admission control
 | |
| 	ServiceAccountProvisionTimeout = 2 * time.Minute
 | |
| 
 | |
| 	// SingleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
 | |
| 	// transient failures from failing tests.
 | |
| 	SingleCallTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// NodeReadyInitialTimeout is how long nodes have to be "ready" when a test begins. They should already
 | |
| 	// be "ready" before the test starts, so this is small.
 | |
| 	NodeReadyInitialTimeout = 20 * time.Second
 | |
| 
 | |
| 	// PodReadyBeforeTimeout is how long pods have to be "ready" when a test begins.
 | |
| 	PodReadyBeforeTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// ClaimProvisionShortTimeout is same as `ClaimProvisionTimeout` to wait for claim to be dynamically provisioned, but shorter.
 | |
| 	// Use it case by case when we are sure this timeout is enough.
 | |
| 	ClaimProvisionShortTimeout = 1 * time.Minute
 | |
| 
 | |
| 	// ClaimProvisionTimeout is how long claims have to become dynamically provisioned.
 | |
| 	ClaimProvisionTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// RestartNodeReadyAgainTimeout is how long a node is allowed to become "Ready" after it is restarted before
 | |
| 	// the test is considered failed.
 | |
| 	RestartNodeReadyAgainTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// RestartPodReadyAgainTimeout is how long a pod is allowed to become "running" and "ready" after a node
 | |
| 	// restart before test is considered failed.
 | |
| 	RestartPodReadyAgainTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// SnapshotCreateTimeout is how long for snapshot to create snapshotContent.
 | |
| 	SnapshotCreateTimeout = 5 * time.Minute
 | |
| 
 | |
| 	// SnapshotDeleteTimeout is how long for snapshot to delete snapshotContent.
 | |
| 	SnapshotDeleteTimeout = 5 * time.Minute
 | |
| )
 | |
| 
 | |
| var (
 | |
| 	// BusyBoxImage is the image URI of BusyBox.
 | |
| 	BusyBoxImage = imageutils.GetE2EImage(imageutils.BusyBox)
 | |
| 
 | |
| 	// ProvidersWithSSH are those providers where each node is accessible with SSH
 | |
| 	ProvidersWithSSH = []string{"gce", "gke", "aws", "local"}
 | |
| 
 | |
| 	// ServeHostnameImage is a serve hostname image name.
 | |
| 	ServeHostnameImage = imageutils.GetE2EImage(imageutils.Agnhost)
 | |
| )
 | |
| 
 | |
| // RunID is a unique identifier of the e2e run.
 | |
| // Beware that this ID is not the same for all tests in the e2e run, because each Ginkgo node creates it separately.
 | |
| var RunID = uuid.NewUUID()
 | |
| 
 | |
| // CreateTestingNSFn is a func that is responsible for creating namespace used for executing e2e tests.
 | |
| type CreateTestingNSFn func(baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error)
 | |
| 
 | |
| // APIAddress returns a address of an instance.
 | |
| func APIAddress() string {
 | |
| 	instanceURL, err := url.Parse(TestContext.Host)
 | |
| 	ExpectNoError(err)
 | |
| 	return instanceURL.Hostname()
 | |
| }
 | |
| 
 | |
| // ProviderIs returns true if the provider is included is the providers. Otherwise false.
 | |
| func ProviderIs(providers ...string) bool {
 | |
| 	for _, provider := range providers {
 | |
| 		if strings.EqualFold(provider, TestContext.Provider) {
 | |
| 			return true
 | |
| 		}
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // MasterOSDistroIs returns true if the master OS distro is included in the supportedMasterOsDistros. Otherwise false.
 | |
| func MasterOSDistroIs(supportedMasterOsDistros ...string) bool {
 | |
| 	for _, distro := range supportedMasterOsDistros {
 | |
| 		if strings.EqualFold(distro, TestContext.MasterOSDistro) {
 | |
| 			return true
 | |
| 		}
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // NodeOSDistroIs returns true if the node OS distro is included in the supportedNodeOsDistros. Otherwise false.
 | |
| func NodeOSDistroIs(supportedNodeOsDistros ...string) bool {
 | |
| 	for _, distro := range supportedNodeOsDistros {
 | |
| 		if strings.EqualFold(distro, TestContext.NodeOSDistro) {
 | |
| 			return true
 | |
| 		}
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // NodeOSArchIs returns true if the node OS arch is included in the supportedNodeOsArchs. Otherwise false.
 | |
| func NodeOSArchIs(supportedNodeOsArchs ...string) bool {
 | |
| 	for _, arch := range supportedNodeOsArchs {
 | |
| 		if strings.EqualFold(arch, TestContext.NodeOSArch) {
 | |
| 			return true
 | |
| 		}
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // DeleteNamespaces deletes all namespaces that match the given delete and skip filters.
 | |
| // Filter is by simple strings.Contains; first skip filter, then delete filter.
 | |
| // Returns the list of deleted namespaces or an error.
 | |
| func DeleteNamespaces(c clientset.Interface, deleteFilter, skipFilter []string) ([]string, error) {
 | |
| 	ginkgo.By("Deleting namespaces")
 | |
| 	nsList, err := c.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
 | |
| 	ExpectNoError(err, "Failed to get namespace list")
 | |
| 	var deleted []string
 | |
| 	var wg sync.WaitGroup
 | |
| OUTER:
 | |
| 	for _, item := range nsList.Items {
 | |
| 		for _, pattern := range skipFilter {
 | |
| 			if strings.Contains(item.Name, pattern) {
 | |
| 				continue OUTER
 | |
| 			}
 | |
| 		}
 | |
| 		if deleteFilter != nil {
 | |
| 			var shouldDelete bool
 | |
| 			for _, pattern := range deleteFilter {
 | |
| 				if strings.Contains(item.Name, pattern) {
 | |
| 					shouldDelete = true
 | |
| 					break
 | |
| 				}
 | |
| 			}
 | |
| 			if !shouldDelete {
 | |
| 				continue OUTER
 | |
| 			}
 | |
| 		}
 | |
| 		wg.Add(1)
 | |
| 		deleted = append(deleted, item.Name)
 | |
| 		go func(nsName string) {
 | |
| 			defer wg.Done()
 | |
| 			defer ginkgo.GinkgoRecover()
 | |
| 			gomega.Expect(c.CoreV1().Namespaces().Delete(context.TODO(), nsName, metav1.DeleteOptions{})).To(gomega.Succeed())
 | |
| 			Logf("namespace : %v api call to delete is complete ", nsName)
 | |
| 		}(item.Name)
 | |
| 	}
 | |
| 	wg.Wait()
 | |
| 	return deleted, nil
 | |
| }
 | |
| 
 | |
| // WaitForNamespacesDeleted waits for the namespaces to be deleted.
 | |
| func WaitForNamespacesDeleted(c clientset.Interface, namespaces []string, timeout time.Duration) error {
 | |
| 	ginkgo.By(fmt.Sprintf("Waiting for namespaces %+v to vanish", namespaces))
 | |
| 	nsMap := map[string]bool{}
 | |
| 	for _, ns := range namespaces {
 | |
| 		nsMap[ns] = true
 | |
| 	}
 | |
| 	//Now POLL until all namespaces have been eradicated.
 | |
| 	return wait.Poll(2*time.Second, timeout,
 | |
| 		func() (bool, error) {
 | |
| 			nsList, err := c.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
 | |
| 			if err != nil {
 | |
| 				return false, err
 | |
| 			}
 | |
| 			for _, item := range nsList.Items {
 | |
| 				if _, ok := nsMap[item.Name]; ok {
 | |
| 					return false, nil
 | |
| 				}
 | |
| 			}
 | |
| 			return true, nil
 | |
| 		})
 | |
| }
 | |
| 
 | |
| func waitForServiceAccountInNamespace(c clientset.Interface, ns, serviceAccountName string, timeout time.Duration) error {
 | |
| 	fieldSelector := fields.OneTermEqualSelector("metadata.name", serviceAccountName).String()
 | |
| 	lw := &cache.ListWatch{
 | |
| 		ListFunc: func(options metav1.ListOptions) (object runtime.Object, e error) {
 | |
| 			options.FieldSelector = fieldSelector
 | |
| 			return c.CoreV1().ServiceAccounts(ns).List(context.TODO(), options)
 | |
| 		},
 | |
| 		WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) {
 | |
| 			options.FieldSelector = fieldSelector
 | |
| 			return c.CoreV1().ServiceAccounts(ns).Watch(context.TODO(), options)
 | |
| 		},
 | |
| 	}
 | |
| 	ctx, cancel := watchtools.ContextWithOptionalTimeout(context.Background(), timeout)
 | |
| 	defer cancel()
 | |
| 	_, err := watchtools.UntilWithSync(ctx, lw, &v1.ServiceAccount{}, nil, serviceAccountHasSecrets)
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| // serviceAccountHasSecrets returns true if the service account has at least one secret,
 | |
| // false if it does not, or an error.
 | |
| func serviceAccountHasSecrets(event watch.Event) (bool, error) {
 | |
| 	switch event.Type {
 | |
| 	case watch.Deleted:
 | |
| 		return false, apierrors.NewNotFound(schema.GroupResource{Resource: "serviceaccounts"}, "")
 | |
| 	}
 | |
| 	switch t := event.Object.(type) {
 | |
| 	case *v1.ServiceAccount:
 | |
| 		return len(t.Secrets) > 0, nil
 | |
| 	}
 | |
| 	return false, nil
 | |
| }
 | |
| 
 | |
| // WaitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned
 | |
| // the default service account is what is associated with pods when they do not specify a service account
 | |
| // as a result, pods are not able to be provisioned in a namespace until the service account is provisioned
 | |
| func WaitForDefaultServiceAccountInNamespace(c clientset.Interface, namespace string) error {
 | |
| 	return waitForServiceAccountInNamespace(c, namespace, "default", ServiceAccountProvisionTimeout)
 | |
| }
 | |
| 
 | |
| // CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name.
 | |
| // Please see NewFramework instead of using this directly.
 | |
| func CreateTestingNS(baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) {
 | |
| 	if labels == nil {
 | |
| 		labels = map[string]string{}
 | |
| 	}
 | |
| 	labels["e2e-run"] = string(RunID)
 | |
| 
 | |
| 	// We don't use ObjectMeta.GenerateName feature, as in case of API call
 | |
| 	// failure we don't know whether the namespace was created and what is its
 | |
| 	// name.
 | |
| 	name := fmt.Sprintf("%v-%v", baseName, RandomSuffix())
 | |
| 
 | |
| 	namespaceObj := &v1.Namespace{
 | |
| 		ObjectMeta: metav1.ObjectMeta{
 | |
| 			Name:      name,
 | |
| 			Namespace: "",
 | |
| 			Labels:    labels,
 | |
| 		},
 | |
| 		Status: v1.NamespaceStatus{},
 | |
| 	}
 | |
| 	// Be robust about making the namespace creation call.
 | |
| 	var got *v1.Namespace
 | |
| 	if err := wait.PollImmediate(Poll, 30*time.Second, func() (bool, error) {
 | |
| 		var err error
 | |
| 		got, err = c.CoreV1().Namespaces().Create(context.TODO(), namespaceObj, metav1.CreateOptions{})
 | |
| 		if err != nil {
 | |
| 			if apierrors.IsAlreadyExists(err) {
 | |
| 				// regenerate on conflict
 | |
| 				Logf("Namespace name %q was already taken, generate a new name and retry", namespaceObj.Name)
 | |
| 				namespaceObj.Name = fmt.Sprintf("%v-%v", baseName, RandomSuffix())
 | |
| 			} else {
 | |
| 				Logf("Unexpected error while creating namespace: %v", err)
 | |
| 			}
 | |
| 			return false, nil
 | |
| 		}
 | |
| 		return true, nil
 | |
| 	}); err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	if TestContext.VerifyServiceAccount {
 | |
| 		if err := WaitForDefaultServiceAccountInNamespace(c, got.Name); err != nil {
 | |
| 			// Even if we fail to create serviceAccount in the namespace,
 | |
| 			// we have successfully create a namespace.
 | |
| 			// So, return the created namespace.
 | |
| 			return got, err
 | |
| 		}
 | |
| 	}
 | |
| 	return got, nil
 | |
| }
 | |
| 
 | |
| // CheckTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state
 | |
| // and waits until they are finally deleted. It ignores namespace skip.
 | |
| func CheckTestingNSDeletedExcept(c clientset.Interface, skip string) error {
 | |
| 	// TODO: Since we don't have support for bulk resource deletion in the API,
 | |
| 	// while deleting a namespace we are deleting all objects from that namespace
 | |
| 	// one by one (one deletion == one API call). This basically exposes us to
 | |
| 	// throttling - currently controller-manager has a limit of max 20 QPS.
 | |
| 	// Once #10217 is implemented and used in namespace-controller, deleting all
 | |
| 	// object from a given namespace should be much faster and we will be able
 | |
| 	// to lower this timeout.
 | |
| 	// However, now Density test is producing ~26000 events and Load capacity test
 | |
| 	// is producing ~35000 events, thus assuming there are no other requests it will
 | |
| 	// take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60
 | |
| 	// minutes to avoid any timeouts here.
 | |
| 	timeout := 60 * time.Minute
 | |
| 
 | |
| 	Logf("Waiting for terminating namespaces to be deleted...")
 | |
| 	for start := time.Now(); time.Since(start) < timeout; time.Sleep(15 * time.Second) {
 | |
| 		namespaces, err := c.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
 | |
| 		if err != nil {
 | |
| 			Logf("Listing namespaces failed: %v", err)
 | |
| 			continue
 | |
| 		}
 | |
| 		terminating := 0
 | |
| 		for _, ns := range namespaces.Items {
 | |
| 			if strings.HasPrefix(ns.ObjectMeta.Name, "e2e-tests-") && ns.ObjectMeta.Name != skip {
 | |
| 				if ns.Status.Phase == v1.NamespaceActive {
 | |
| 					return fmt.Errorf("Namespace %s is active", ns.ObjectMeta.Name)
 | |
| 				}
 | |
| 				terminating++
 | |
| 			}
 | |
| 		}
 | |
| 		if terminating == 0 {
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| 	return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out")
 | |
| }
 | |
| 
 | |
| //WaitForServiceEndpointsNum waits until the amount of endpoints that implement service to expectNum.
 | |
| func WaitForServiceEndpointsNum(c clientset.Interface, namespace, serviceName string, expectNum int, interval, timeout time.Duration) error {
 | |
| 	return wait.Poll(interval, timeout, func() (bool, error) {
 | |
| 		Logf("Waiting for amount of service:%s endpoints to be %d", serviceName, expectNum)
 | |
| 		list, err := c.CoreV1().Endpoints(namespace).List(context.TODO(), metav1.ListOptions{})
 | |
| 		if err != nil {
 | |
| 			return false, err
 | |
| 		}
 | |
| 
 | |
| 		for _, e := range list.Items {
 | |
| 			if e.Name == serviceName && countEndpointsNum(&e) == expectNum {
 | |
| 				return true, nil
 | |
| 			}
 | |
| 		}
 | |
| 		return false, nil
 | |
| 	})
 | |
| }
 | |
| 
 | |
| func countEndpointsNum(e *v1.Endpoints) int {
 | |
| 	num := 0
 | |
| 	for _, sub := range e.Subsets {
 | |
| 		num += len(sub.Addresses)
 | |
| 	}
 | |
| 	return num
 | |
| }
 | |
| 
 | |
| // restclientConfig returns a config holds the information needed to build connection to kubernetes clusters.
 | |
| func restclientConfig(kubeContext string) (*clientcmdapi.Config, error) {
 | |
| 	Logf(">>> kubeConfig: %s", TestContext.KubeConfig)
 | |
| 	if TestContext.KubeConfig == "" {
 | |
| 		return nil, fmt.Errorf("KubeConfig must be specified to load client config")
 | |
| 	}
 | |
| 	c, err := clientcmd.LoadFromFile(TestContext.KubeConfig)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("error loading KubeConfig: %v", err.Error())
 | |
| 	}
 | |
| 	if kubeContext != "" {
 | |
| 		Logf(">>> kubeContext: %s", kubeContext)
 | |
| 		c.CurrentContext = kubeContext
 | |
| 	}
 | |
| 	return c, nil
 | |
| }
 | |
| 
 | |
| // ClientConfigGetter is a func that returns getter to return a config.
 | |
| type ClientConfigGetter func() (*restclient.Config, error)
 | |
| 
 | |
| // LoadConfig returns a config for a rest client with the UserAgent set to include the current test name.
 | |
| func LoadConfig() (config *restclient.Config, err error) {
 | |
| 	defer func() {
 | |
| 		if err == nil && config != nil {
 | |
| 			testDesc := ginkgo.CurrentGinkgoTestDescription()
 | |
| 			if len(testDesc.ComponentTexts) > 0 {
 | |
| 				componentTexts := strings.Join(testDesc.ComponentTexts, " ")
 | |
| 				config.UserAgent = fmt.Sprintf("%s -- %s", rest.DefaultKubernetesUserAgent(), componentTexts)
 | |
| 			}
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	if TestContext.NodeE2E {
 | |
| 		// This is a node e2e test, apply the node e2e configuration
 | |
| 		return &restclient.Config{
 | |
| 			Host:        TestContext.Host,
 | |
| 			BearerToken: TestContext.BearerToken,
 | |
| 			TLSClientConfig: restclient.TLSClientConfig{
 | |
| 				Insecure: true,
 | |
| 			},
 | |
| 		}, nil
 | |
| 	}
 | |
| 	c, err := restclientConfig(TestContext.KubeContext)
 | |
| 	if err != nil {
 | |
| 		if TestContext.KubeConfig == "" {
 | |
| 			return restclient.InClusterConfig()
 | |
| 		}
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	// In case Host is not set in TestContext, sets it as
 | |
| 	// CurrentContext Server for k8s API client to connect to.
 | |
| 	if TestContext.Host == "" && c.Clusters != nil {
 | |
| 		currentContext, ok := c.Clusters[c.CurrentContext]
 | |
| 		if ok {
 | |
| 			TestContext.Host = currentContext.Server
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return clientcmd.NewDefaultClientConfig(*c, &clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: TestContext.Host}}).ClientConfig()
 | |
| }
 | |
| 
 | |
| // LoadClientset returns clientset for connecting to kubernetes clusters.
 | |
| func LoadClientset() (*clientset.Clientset, error) {
 | |
| 	config, err := LoadConfig()
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("error creating client: %v", err.Error())
 | |
| 	}
 | |
| 	return clientset.NewForConfig(config)
 | |
| }
 | |
| 
 | |
| // RandomSuffix provides a random sequence to append to pods,services,rcs.
 | |
| func RandomSuffix() string {
 | |
| 	return strconv.Itoa(rand.Intn(10000))
 | |
| }
 | |
| 
 | |
| // LookForStringInPodExec looks for the given string in the output of a command
 | |
| // executed in the first container of specified pod.
 | |
| // TODO(alejandrox1): move to pod/ subpkg once kubectl methods are refactored.
 | |
| func LookForStringInPodExec(ns, podName string, command []string, expectedString string, timeout time.Duration) (result string, err error) {
 | |
| 	return LookForStringInPodExecToContainer(ns, podName, "", command, expectedString, timeout)
 | |
| }
 | |
| 
 | |
| // LookForStringInPodExecToContainer looks for the given string in the output of a
 | |
| // command executed in specified pod container, or first container if not specified.
 | |
| func LookForStringInPodExecToContainer(ns, podName, containerName string, command []string, expectedString string, timeout time.Duration) (result string, err error) {
 | |
| 	return lookForString(expectedString, timeout, func() string {
 | |
| 		args := []string{"exec", podName, fmt.Sprintf("--namespace=%v", ns)}
 | |
| 		if len(containerName) > 0 {
 | |
| 			args = append(args, fmt.Sprintf("--container=%s", containerName))
 | |
| 		}
 | |
| 		args = append(args, "--")
 | |
| 		args = append(args, command...)
 | |
| 		return RunKubectlOrDie(ns, args...)
 | |
| 	})
 | |
| }
 | |
| 
 | |
| // lookForString looks for the given string in the output of fn, repeatedly calling fn until
 | |
| // the timeout is reached or the string is found. Returns last log and possibly
 | |
| // error if the string was not found.
 | |
| // TODO(alejandrox1): move to pod/ subpkg once kubectl methods are refactored.
 | |
| func lookForString(expectedString string, timeout time.Duration, fn func() string) (result string, err error) {
 | |
| 	for t := time.Now(); time.Since(t) < timeout; time.Sleep(Poll) {
 | |
| 		result = fn()
 | |
| 		if strings.Contains(result, expectedString) {
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| 	err = fmt.Errorf("Failed to find \"%s\", last result: \"%s\"", expectedString, result)
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // KubectlBuilder is used to build, customize and execute a kubectl Command.
 | |
| // Add more functions to customize the builder as needed.
 | |
| type KubectlBuilder struct {
 | |
| 	cmd     *exec.Cmd
 | |
| 	timeout <-chan time.Time
 | |
| }
 | |
| 
 | |
| // NewKubectlCommand returns a KubectlBuilder for running kubectl.
 | |
| func NewKubectlCommand(namespace string, args ...string) *KubectlBuilder {
 | |
| 	b := new(KubectlBuilder)
 | |
| 	tk := e2ekubectl.NewTestKubeconfig(TestContext.CertDir, TestContext.Host, TestContext.KubeConfig, TestContext.KubeContext, TestContext.KubectlPath, namespace)
 | |
| 	b.cmd = tk.KubectlCmd(args...)
 | |
| 	return b
 | |
| }
 | |
| 
 | |
| // WithEnv sets the given environment and returns itself.
 | |
| func (b *KubectlBuilder) WithEnv(env []string) *KubectlBuilder {
 | |
| 	b.cmd.Env = env
 | |
| 	return b
 | |
| }
 | |
| 
 | |
| // WithTimeout sets the given timeout and returns itself.
 | |
| func (b *KubectlBuilder) WithTimeout(t <-chan time.Time) *KubectlBuilder {
 | |
| 	b.timeout = t
 | |
| 	return b
 | |
| }
 | |
| 
 | |
| // WithStdinData sets the given data to stdin and returns itself.
 | |
| func (b KubectlBuilder) WithStdinData(data string) *KubectlBuilder {
 | |
| 	b.cmd.Stdin = strings.NewReader(data)
 | |
| 	return &b
 | |
| }
 | |
| 
 | |
| // WithStdinReader sets the given reader and returns itself.
 | |
| func (b KubectlBuilder) WithStdinReader(reader io.Reader) *KubectlBuilder {
 | |
| 	b.cmd.Stdin = reader
 | |
| 	return &b
 | |
| }
 | |
| 
 | |
| // ExecOrDie runs the kubectl executable or dies if error occurs.
 | |
| func (b KubectlBuilder) ExecOrDie(namespace string) string {
 | |
| 	str, err := b.Exec()
 | |
| 	// In case of i/o timeout error, try talking to the apiserver again after 2s before dying.
 | |
| 	// Note that we're still dying after retrying so that we can get visibility to triage it further.
 | |
| 	if isTimeout(err) {
 | |
| 		Logf("Hit i/o timeout error, talking to the server 2s later to see if it's temporary.")
 | |
| 		time.Sleep(2 * time.Second)
 | |
| 		retryStr, retryErr := RunKubectl(namespace, "version")
 | |
| 		Logf("stdout: %q", retryStr)
 | |
| 		Logf("err: %v", retryErr)
 | |
| 	}
 | |
| 	ExpectNoError(err)
 | |
| 	return str
 | |
| }
 | |
| 
 | |
| func isTimeout(err error) bool {
 | |
| 	switch err := err.(type) {
 | |
| 	case *url.Error:
 | |
| 		if err, ok := err.Err.(net.Error); ok && err.Timeout() {
 | |
| 			return true
 | |
| 		}
 | |
| 	case net.Error:
 | |
| 		if err.Timeout() {
 | |
| 			return true
 | |
| 		}
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // Exec runs the kubectl executable.
 | |
| func (b KubectlBuilder) Exec() (string, error) {
 | |
| 	stdout, _, err := b.ExecWithFullOutput()
 | |
| 	return stdout, err
 | |
| }
 | |
| 
 | |
| // ExecWithFullOutput runs the kubectl executable, and returns the stdout and stderr.
 | |
| func (b KubectlBuilder) ExecWithFullOutput() (string, string, error) {
 | |
| 	var stdout, stderr bytes.Buffer
 | |
| 	cmd := b.cmd
 | |
| 	cmd.Stdout, cmd.Stderr = &stdout, &stderr
 | |
| 
 | |
| 	Logf("Running '%s %s'", cmd.Path, strings.Join(cmd.Args[1:], " ")) // skip arg[0] as it is printed separately
 | |
| 	if err := cmd.Start(); err != nil {
 | |
| 		return "", "", fmt.Errorf("error starting %v:\nCommand stdout:\n%v\nstderr:\n%v\nerror:\n%v", cmd, cmd.Stdout, cmd.Stderr, err)
 | |
| 	}
 | |
| 	errCh := make(chan error, 1)
 | |
| 	go func() {
 | |
| 		errCh <- cmd.Wait()
 | |
| 	}()
 | |
| 	select {
 | |
| 	case err := <-errCh:
 | |
| 		if err != nil {
 | |
| 			var rc = 127
 | |
| 			if ee, ok := err.(*exec.ExitError); ok {
 | |
| 				rc = int(ee.Sys().(syscall.WaitStatus).ExitStatus())
 | |
| 				Logf("rc: %d", rc)
 | |
| 			}
 | |
| 			return stdout.String(), stderr.String(), uexec.CodeExitError{
 | |
| 				Err:  fmt.Errorf("error running %v:\nCommand stdout:\n%v\nstderr:\n%v\nerror:\n%v", cmd, cmd.Stdout, cmd.Stderr, err),
 | |
| 				Code: rc,
 | |
| 			}
 | |
| 		}
 | |
| 	case <-b.timeout:
 | |
| 		b.cmd.Process.Kill()
 | |
| 		return "", "", fmt.Errorf("timed out waiting for command %v:\nCommand stdout:\n%v\nstderr:\n%v", cmd, cmd.Stdout, cmd.Stderr)
 | |
| 	}
 | |
| 	Logf("stderr: %q", stderr.String())
 | |
| 	Logf("stdout: %q", stdout.String())
 | |
| 	return stdout.String(), stderr.String(), nil
 | |
| }
 | |
| 
 | |
| // RunKubectlOrDie is a convenience wrapper over kubectlBuilder
 | |
| func RunKubectlOrDie(namespace string, args ...string) string {
 | |
| 	return NewKubectlCommand(namespace, args...).ExecOrDie(namespace)
 | |
| }
 | |
| 
 | |
| // RunKubectl is a convenience wrapper over kubectlBuilder
 | |
| func RunKubectl(namespace string, args ...string) (string, error) {
 | |
| 	return NewKubectlCommand(namespace, args...).Exec()
 | |
| }
 | |
| 
 | |
| // RunKubectlWithFullOutput is a convenience wrapper over kubectlBuilder
 | |
| // It will also return the command's stderr.
 | |
| func RunKubectlWithFullOutput(namespace string, args ...string) (string, string, error) {
 | |
| 	return NewKubectlCommand(namespace, args...).ExecWithFullOutput()
 | |
| }
 | |
| 
 | |
| // RunKubectlOrDieInput is a convenience wrapper over kubectlBuilder that takes input to stdin
 | |
| func RunKubectlOrDieInput(namespace string, data string, args ...string) string {
 | |
| 	return NewKubectlCommand(namespace, args...).WithStdinData(data).ExecOrDie(namespace)
 | |
| }
 | |
| 
 | |
| // RunKubectlInput is a convenience wrapper over kubectlBuilder that takes input to stdin
 | |
| func RunKubectlInput(namespace string, data string, args ...string) (string, error) {
 | |
| 	return NewKubectlCommand(namespace, args...).WithStdinData(data).Exec()
 | |
| }
 | |
| 
 | |
| // RunKubemciWithKubeconfig is a convenience wrapper over RunKubemciCmd
 | |
| func RunKubemciWithKubeconfig(args ...string) (string, error) {
 | |
| 	if TestContext.KubeConfig != "" {
 | |
| 		args = append(args, "--"+clientcmd.RecommendedConfigPathFlag+"="+TestContext.KubeConfig)
 | |
| 	}
 | |
| 	return RunKubemciCmd(args...)
 | |
| }
 | |
| 
 | |
| // RunKubemciCmd is a convenience wrapper over kubectlBuilder to run kubemci.
 | |
| // It assumes that kubemci exists in PATH.
 | |
| func RunKubemciCmd(args ...string) (string, error) {
 | |
| 	// kubemci is assumed to be in PATH.
 | |
| 	kubemci := "kubemci"
 | |
| 	b := new(KubectlBuilder)
 | |
| 	args = append(args, "--gcp-project="+TestContext.CloudConfig.ProjectID)
 | |
| 
 | |
| 	b.cmd = exec.Command(kubemci, args...)
 | |
| 	return b.Exec()
 | |
| }
 | |
| 
 | |
| // StartCmdAndStreamOutput returns stdout and stderr after starting the given cmd.
 | |
| func StartCmdAndStreamOutput(cmd *exec.Cmd) (stdout, stderr io.ReadCloser, err error) {
 | |
| 	stdout, err = cmd.StdoutPipe()
 | |
| 	if err != nil {
 | |
| 		return
 | |
| 	}
 | |
| 	stderr, err = cmd.StderrPipe()
 | |
| 	if err != nil {
 | |
| 		return
 | |
| 	}
 | |
| 	Logf("Asynchronously running '%s %s'", cmd.Path, strings.Join(cmd.Args, " "))
 | |
| 	err = cmd.Start()
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // TryKill is rough equivalent of ctrl+c for cleaning up processes. Intended to be run in defer.
 | |
| func TryKill(cmd *exec.Cmd) {
 | |
| 	if err := cmd.Process.Kill(); err != nil {
 | |
| 		Logf("ERROR failed to kill command %v! The process may leak", cmd)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // testContainerOutputMatcher runs the given pod in the given namespace and waits
 | |
| // for all of the containers in the podSpec to move into the 'Success' status, and tests
 | |
| // the specified container log against the given expected output using the given matcher.
 | |
| func (f *Framework) testContainerOutputMatcher(scenarioName string,
 | |
| 	pod *v1.Pod,
 | |
| 	containerIndex int,
 | |
| 	expectedOutput []string,
 | |
| 	matcher func(string, ...interface{}) gomegatypes.GomegaMatcher) {
 | |
| 	ginkgo.By(fmt.Sprintf("Creating a pod to test %v", scenarioName))
 | |
| 	if containerIndex < 0 || containerIndex >= len(pod.Spec.Containers) {
 | |
| 		Failf("Invalid container index: %d", containerIndex)
 | |
| 	}
 | |
| 	ExpectNoError(f.MatchContainerOutput(pod, pod.Spec.Containers[containerIndex].Name, expectedOutput, matcher))
 | |
| }
 | |
| 
 | |
| // ContainerType signifies container type
 | |
| type ContainerType int
 | |
| 
 | |
| const (
 | |
| 	// FeatureEphemeralContainers allows running an ephemeral container in pod namespaces to troubleshoot a running pod
 | |
| 	FeatureEphemeralContainers featuregate.Feature = "EphemeralContainers"
 | |
| 	// Containers is for normal containers
 | |
| 	Containers ContainerType = 1 << iota
 | |
| 	// InitContainers is for init containers
 | |
| 	InitContainers
 | |
| 	// EphemeralContainers is for ephemeral containers
 | |
| 	EphemeralContainers
 | |
| )
 | |
| 
 | |
| // allFeatureEnabledContainers returns a ContainerType mask which includes all container
 | |
| // types except for the ones guarded by feature gate.
 | |
| // Copied from pkg/api/v1/pod to avoid pulling extra dependencies
 | |
| func allFeatureEnabledContainers() ContainerType {
 | |
| 	containerType := AllContainers
 | |
| 	if !utilfeature.DefaultFeatureGate.Enabled(FeatureEphemeralContainers) {
 | |
| 		containerType &= ^EphemeralContainers
 | |
| 	}
 | |
| 	return containerType
 | |
| }
 | |
| 
 | |
| // ContainerVisitor is called with each container spec, and returns true
 | |
| // if visiting should continue.
 | |
| // Copied from pkg/api/v1/pod to avoid pulling extra dependencies
 | |
| type ContainerVisitor func(container *v1.Container, containerType ContainerType) (shouldContinue bool)
 | |
| 
 | |
| // visitContainers invokes the visitor function with a pointer to every container
 | |
| // spec in the given pod spec with type set in mask. If visitor returns false,
 | |
| // visiting is short-circuited. visitContainers returns true if visiting completes,
 | |
| // false if visiting was short-circuited.
 | |
| // Copied from pkg/api/v1/pod to avoid pulling extra dependencies
 | |
| func visitContainers(podSpec *v1.PodSpec, mask ContainerType, visitor ContainerVisitor) bool {
 | |
| 	if mask&InitContainers != 0 {
 | |
| 		for i := range podSpec.InitContainers {
 | |
| 			if !visitor(&podSpec.InitContainers[i], InitContainers) {
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	if mask&Containers != 0 {
 | |
| 		for i := range podSpec.Containers {
 | |
| 			if !visitor(&podSpec.Containers[i], Containers) {
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	if mask&EphemeralContainers != 0 {
 | |
| 		for i := range podSpec.EphemeralContainers {
 | |
| 			if !visitor((*v1.Container)(&podSpec.EphemeralContainers[i].EphemeralContainerCommon), EphemeralContainers) {
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| // MatchContainerOutput creates a pod and waits for all it's containers to exit with success.
 | |
| // It then tests that the matcher with each expectedOutput matches the output of the specified container.
 | |
| func (f *Framework) MatchContainerOutput(
 | |
| 	pod *v1.Pod,
 | |
| 	containerName string,
 | |
| 	expectedOutput []string,
 | |
| 	matcher func(string, ...interface{}) gomegatypes.GomegaMatcher) error {
 | |
| 	ns := pod.ObjectMeta.Namespace
 | |
| 	if ns == "" {
 | |
| 		ns = f.Namespace.Name
 | |
| 	}
 | |
| 	podClient := f.PodClientNS(ns)
 | |
| 
 | |
| 	createdPod := podClient.Create(pod)
 | |
| 	defer func() {
 | |
| 		ginkgo.By("delete the pod")
 | |
| 		podClient.DeleteSync(createdPod.Name, metav1.DeleteOptions{}, DefaultPodDeletionTimeout)
 | |
| 	}()
 | |
| 
 | |
| 	// Wait for client pod to complete.
 | |
| 	podErr := e2epod.WaitForPodSuccessInNamespaceTimeout(f.ClientSet, createdPod.Name, ns, f.Timeouts.PodStart)
 | |
| 
 | |
| 	// Grab its logs.  Get host first.
 | |
| 	podStatus, err := podClient.Get(context.TODO(), createdPod.Name, metav1.GetOptions{})
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("failed to get pod status: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	if podErr != nil {
 | |
| 		// Pod failed. Dump all logs from all containers to see what's wrong
 | |
| 		_ = visitContainers(&podStatus.Spec, allFeatureEnabledContainers(), func(c *v1.Container, containerType ContainerType) bool {
 | |
| 			logs, err := e2epod.GetPodLogs(f.ClientSet, ns, podStatus.Name, c.Name)
 | |
| 			if err != nil {
 | |
| 				Logf("Failed to get logs from node %q pod %q container %q: %v",
 | |
| 					podStatus.Spec.NodeName, podStatus.Name, c.Name, err)
 | |
| 			} else {
 | |
| 				Logf("Output of node %q pod %q container %q: %s", podStatus.Spec.NodeName, podStatus.Name, c.Name, logs)
 | |
| 			}
 | |
| 			return true
 | |
| 		})
 | |
| 		return fmt.Errorf("expected pod %q success: %v", createdPod.Name, podErr)
 | |
| 	}
 | |
| 
 | |
| 	Logf("Trying to get logs from node %s pod %s container %s: %v",
 | |
| 		podStatus.Spec.NodeName, podStatus.Name, containerName, err)
 | |
| 
 | |
| 	// Sometimes the actual containers take a second to get started, try to get logs for 60s
 | |
| 	logs, err := e2epod.GetPodLogs(f.ClientSet, ns, podStatus.Name, containerName)
 | |
| 	if err != nil {
 | |
| 		Logf("Failed to get logs from node %q pod %q container %q. %v",
 | |
| 			podStatus.Spec.NodeName, podStatus.Name, containerName, err)
 | |
| 		return fmt.Errorf("failed to get logs from %s for %s: %v", podStatus.Name, containerName, err)
 | |
| 	}
 | |
| 
 | |
| 	for _, expected := range expectedOutput {
 | |
| 		m := matcher(expected)
 | |
| 		matches, err := m.Match(logs)
 | |
| 		if err != nil {
 | |
| 			return fmt.Errorf("expected %q in container output: %v", expected, err)
 | |
| 		} else if !matches {
 | |
| 			return fmt.Errorf("expected %q in container output: %s", expected, m.FailureMessage(logs))
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // EventsLister is a func that lists events.
 | |
| type EventsLister func(opts metav1.ListOptions, ns string) (*v1.EventList, error)
 | |
| 
 | |
| // dumpEventsInNamespace dumps events in the given namespace.
 | |
| func dumpEventsInNamespace(eventsLister EventsLister, namespace string) {
 | |
| 	ginkgo.By(fmt.Sprintf("Collecting events from namespace %q.", namespace))
 | |
| 	events, err := eventsLister(metav1.ListOptions{}, namespace)
 | |
| 	ExpectNoError(err, "failed to list events in namespace %q", namespace)
 | |
| 
 | |
| 	ginkgo.By(fmt.Sprintf("Found %d events.", len(events.Items)))
 | |
| 	// Sort events by their first timestamp
 | |
| 	sortedEvents := events.Items
 | |
| 	if len(sortedEvents) > 1 {
 | |
| 		sort.Sort(byFirstTimestamp(sortedEvents))
 | |
| 	}
 | |
| 	for _, e := range sortedEvents {
 | |
| 		Logf("At %v - event for %v: %v %v: %v", e.FirstTimestamp, e.InvolvedObject.Name, e.Source, e.Reason, e.Message)
 | |
| 	}
 | |
| 	// Note that we don't wait for any Cleanup to propagate, which means
 | |
| 	// that if you delete a bunch of pods right before ending your test,
 | |
| 	// you may or may not see the killing/deletion/Cleanup events.
 | |
| }
 | |
| 
 | |
| // DumpAllNamespaceInfo dumps events, pods and nodes information in the given namespace.
 | |
| func DumpAllNamespaceInfo(c clientset.Interface, namespace string) {
 | |
| 	dumpEventsInNamespace(func(opts metav1.ListOptions, ns string) (*v1.EventList, error) {
 | |
| 		return c.CoreV1().Events(ns).List(context.TODO(), opts)
 | |
| 	}, namespace)
 | |
| 
 | |
| 	e2epod.DumpAllPodInfoForNamespace(c, namespace)
 | |
| 
 | |
| 	// If cluster is large, then the following logs are basically useless, because:
 | |
| 	// 1. it takes tens of minutes or hours to grab all of them
 | |
| 	// 2. there are so many of them that working with them are mostly impossible
 | |
| 	// So we dump them only if the cluster is relatively small.
 | |
| 	maxNodesForDump := TestContext.MaxNodesToGather
 | |
| 	nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
 | |
| 	if err != nil {
 | |
| 		Logf("unable to fetch node list: %v", err)
 | |
| 		return
 | |
| 	}
 | |
| 	if len(nodes.Items) <= maxNodesForDump {
 | |
| 		dumpAllNodeInfo(c, nodes)
 | |
| 	} else {
 | |
| 		Logf("skipping dumping cluster info - cluster too large")
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // byFirstTimestamp sorts a slice of events by first timestamp, using their involvedObject's name as a tie breaker.
 | |
| type byFirstTimestamp []v1.Event
 | |
| 
 | |
| func (o byFirstTimestamp) Len() int      { return len(o) }
 | |
| func (o byFirstTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
 | |
| 
 | |
| func (o byFirstTimestamp) Less(i, j int) bool {
 | |
| 	if o[i].FirstTimestamp.Equal(&o[j].FirstTimestamp) {
 | |
| 		return o[i].InvolvedObject.Name < o[j].InvolvedObject.Name
 | |
| 	}
 | |
| 	return o[i].FirstTimestamp.Before(&o[j].FirstTimestamp)
 | |
| }
 | |
| 
 | |
| func dumpAllNodeInfo(c clientset.Interface, nodes *v1.NodeList) {
 | |
| 	names := make([]string, len(nodes.Items))
 | |
| 	for ix := range nodes.Items {
 | |
| 		names[ix] = nodes.Items[ix].Name
 | |
| 	}
 | |
| 	DumpNodeDebugInfo(c, names, Logf)
 | |
| }
 | |
| 
 | |
| // DumpNodeDebugInfo dumps debug information of the given nodes.
 | |
| func DumpNodeDebugInfo(c clientset.Interface, nodeNames []string, logFunc func(fmt string, args ...interface{})) {
 | |
| 	for _, n := range nodeNames {
 | |
| 		logFunc("\nLogging node info for node %v", n)
 | |
| 		node, err := c.CoreV1().Nodes().Get(context.TODO(), n, metav1.GetOptions{})
 | |
| 		if err != nil {
 | |
| 			logFunc("Error getting node info %v", err)
 | |
| 		}
 | |
| 		logFunc("Node Info: %v", node)
 | |
| 
 | |
| 		logFunc("\nLogging kubelet events for node %v", n)
 | |
| 		for _, e := range getNodeEvents(c, n) {
 | |
| 			logFunc("source %v type %v message %v reason %v first ts %v last ts %v, involved obj %+v",
 | |
| 				e.Source, e.Type, e.Message, e.Reason, e.FirstTimestamp, e.LastTimestamp, e.InvolvedObject)
 | |
| 		}
 | |
| 		logFunc("\nLogging pods the kubelet thinks is on node %v", n)
 | |
| 		podList, err := getKubeletPods(c, n)
 | |
| 		if err != nil {
 | |
| 			logFunc("Unable to retrieve kubelet pods for node %v: %v", n, err)
 | |
| 			continue
 | |
| 		}
 | |
| 		for _, p := range podList.Items {
 | |
| 			logFunc("%v started at %v (%d+%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.InitContainerStatuses), len(p.Status.ContainerStatuses))
 | |
| 			for _, c := range p.Status.InitContainerStatuses {
 | |
| 				logFunc("\tInit container %v ready: %v, restart count %v",
 | |
| 					c.Name, c.Ready, c.RestartCount)
 | |
| 			}
 | |
| 			for _, c := range p.Status.ContainerStatuses {
 | |
| 				logFunc("\tContainer %v ready: %v, restart count %v",
 | |
| 					c.Name, c.Ready, c.RestartCount)
 | |
| 			}
 | |
| 		}
 | |
| 		e2emetrics.HighLatencyKubeletOperations(c, 10*time.Second, n, logFunc)
 | |
| 		// TODO: Log node resource info
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // getKubeletPods retrieves the list of pods on the kubelet.
 | |
| func getKubeletPods(c clientset.Interface, node string) (*v1.PodList, error) {
 | |
| 	var client restclient.Result
 | |
| 	finished := make(chan struct{}, 1)
 | |
| 	go func() {
 | |
| 		// call chain tends to hang in some cases when Node is not ready. Add an artificial timeout for this call. #22165
 | |
| 		client = c.CoreV1().RESTClient().Get().
 | |
| 			Resource("nodes").
 | |
| 			SubResource("proxy").
 | |
| 			Name(fmt.Sprintf("%v:%v", node, KubeletPort)).
 | |
| 			Suffix("pods").
 | |
| 			Do(context.TODO())
 | |
| 
 | |
| 		finished <- struct{}{}
 | |
| 	}()
 | |
| 	select {
 | |
| 	case <-finished:
 | |
| 		result := &v1.PodList{}
 | |
| 		if err := client.Into(result); err != nil {
 | |
| 			return &v1.PodList{}, err
 | |
| 		}
 | |
| 		return result, nil
 | |
| 	case <-time.After(PodGetTimeout):
 | |
| 		return &v1.PodList{}, fmt.Errorf("Waiting up to %v for getting the list of pods", PodGetTimeout)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // logNodeEvents logs kubelet events from the given node. This includes kubelet
 | |
| // restart and node unhealthy events. Note that listing events like this will mess
 | |
| // with latency metrics, beware of calling it during a test.
 | |
| func getNodeEvents(c clientset.Interface, nodeName string) []v1.Event {
 | |
| 	selector := fields.Set{
 | |
| 		"involvedObject.kind":      "Node",
 | |
| 		"involvedObject.name":      nodeName,
 | |
| 		"involvedObject.namespace": metav1.NamespaceAll,
 | |
| 		"source":                   "kubelet",
 | |
| 	}.AsSelector().String()
 | |
| 	options := metav1.ListOptions{FieldSelector: selector}
 | |
| 	events, err := c.CoreV1().Events(metav1.NamespaceSystem).List(context.TODO(), options)
 | |
| 	if err != nil {
 | |
| 		Logf("Unexpected error retrieving node events %v", err)
 | |
| 		return []v1.Event{}
 | |
| 	}
 | |
| 	return events.Items
 | |
| }
 | |
| 
 | |
| // WaitForAllNodesSchedulable waits up to timeout for all
 | |
| // (but TestContext.AllowedNotReadyNodes) to become scheduable.
 | |
| func WaitForAllNodesSchedulable(c clientset.Interface, timeout time.Duration) error {
 | |
| 	Logf("Waiting up to %v for all (but %d) nodes to be schedulable", timeout, TestContext.AllowedNotReadyNodes)
 | |
| 
 | |
| 	return wait.PollImmediate(
 | |
| 		30*time.Second,
 | |
| 		timeout,
 | |
| 		e2enode.CheckReadyForTests(c, TestContext.NonblockingTaints, TestContext.AllowedNotReadyNodes, largeClusterThreshold),
 | |
| 	)
 | |
| }
 | |
| 
 | |
| // AddOrUpdateLabelOnNode adds the given label key and value to the given node or updates value.
 | |
| func AddOrUpdateLabelOnNode(c clientset.Interface, nodeName string, labelKey, labelValue string) {
 | |
| 	ExpectNoError(testutils.AddLabelsToNode(c, nodeName, map[string]string{labelKey: labelValue}))
 | |
| }
 | |
| 
 | |
| // ExpectNodeHasLabel expects that the given node has the given label pair.
 | |
| func ExpectNodeHasLabel(c clientset.Interface, nodeName string, labelKey string, labelValue string) {
 | |
| 	ginkgo.By("verifying the node has the label " + labelKey + " " + labelValue)
 | |
| 	node, err := c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
 | |
| 	ExpectNoError(err)
 | |
| 	ExpectEqual(node.Labels[labelKey], labelValue)
 | |
| }
 | |
| 
 | |
| // RemoveLabelOffNode is for cleaning up labels temporarily added to node,
 | |
| // won't fail if target label doesn't exist or has been removed.
 | |
| func RemoveLabelOffNode(c clientset.Interface, nodeName string, labelKey string) {
 | |
| 	ginkgo.By("removing the label " + labelKey + " off the node " + nodeName)
 | |
| 	ExpectNoError(testutils.RemoveLabelOffNode(c, nodeName, []string{labelKey}))
 | |
| 
 | |
| 	ginkgo.By("verifying the node doesn't have the label " + labelKey)
 | |
| 	ExpectNoError(testutils.VerifyLabelsRemoved(c, nodeName, []string{labelKey}))
 | |
| }
 | |
| 
 | |
| // ExpectNodeHasTaint expects that the node has the given taint.
 | |
| func ExpectNodeHasTaint(c clientset.Interface, nodeName string, taint *v1.Taint) {
 | |
| 	ginkgo.By("verifying the node has the taint " + taint.ToString())
 | |
| 	if has, err := NodeHasTaint(c, nodeName, taint); !has {
 | |
| 		ExpectNoError(err)
 | |
| 		Failf("Failed to find taint %s on node %s", taint.ToString(), nodeName)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // NodeHasTaint returns true if the node has the given taint, else returns false.
 | |
| func NodeHasTaint(c clientset.Interface, nodeName string, taint *v1.Taint) (bool, error) {
 | |
| 	node, err := c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
 | |
| 	if err != nil {
 | |
| 		return false, err
 | |
| 	}
 | |
| 
 | |
| 	nodeTaints := node.Spec.Taints
 | |
| 
 | |
| 	if len(nodeTaints) == 0 || !taintExists(nodeTaints, taint) {
 | |
| 		return false, nil
 | |
| 	}
 | |
| 	return true, nil
 | |
| }
 | |
| 
 | |
| // RunHostCmd runs the given cmd in the context of the given pod using `kubectl exec`
 | |
| // inside of a shell.
 | |
| func RunHostCmd(ns, name, cmd string) (string, error) {
 | |
| 	return RunKubectl(ns, "exec", name, "--", "/bin/sh", "-x", "-c", cmd)
 | |
| }
 | |
| 
 | |
| // RunHostCmdWithFullOutput runs the given cmd in the context of the given pod using `kubectl exec`
 | |
| // inside of a shell. It will also return the command's stderr.
 | |
| func RunHostCmdWithFullOutput(ns, name, cmd string) (string, string, error) {
 | |
| 	return RunKubectlWithFullOutput(ns, "exec", name, "--", "/bin/sh", "-x", "-c", cmd)
 | |
| }
 | |
| 
 | |
| // RunHostCmdOrDie calls RunHostCmd and dies on error.
 | |
| func RunHostCmdOrDie(ns, name, cmd string) string {
 | |
| 	stdout, err := RunHostCmd(ns, name, cmd)
 | |
| 	Logf("stdout: %v", stdout)
 | |
| 	ExpectNoError(err)
 | |
| 	return stdout
 | |
| }
 | |
| 
 | |
| // RunHostCmdWithRetries calls RunHostCmd and retries all errors
 | |
| // until it succeeds or the specified timeout expires.
 | |
| // This can be used with idempotent commands to deflake transient Node issues.
 | |
| func RunHostCmdWithRetries(ns, name, cmd string, interval, timeout time.Duration) (string, error) {
 | |
| 	start := time.Now()
 | |
| 	for {
 | |
| 		out, err := RunHostCmd(ns, name, cmd)
 | |
| 		if err == nil {
 | |
| 			return out, nil
 | |
| 		}
 | |
| 		if elapsed := time.Since(start); elapsed > timeout {
 | |
| 			return out, fmt.Errorf("RunHostCmd still failed after %v: %v", elapsed, err)
 | |
| 		}
 | |
| 		Logf("Waiting %v to retry failed RunHostCmd: %v", interval, err)
 | |
| 		time.Sleep(interval)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // AllNodesReady checks whether all registered nodes are ready.
 | |
| // TODO: we should change the AllNodesReady call in AfterEach to WaitForAllNodesHealthy,
 | |
| // and figure out how to do it in a configurable way, as we can't expect all setups to run
 | |
| // default test add-ons.
 | |
| func AllNodesReady(c clientset.Interface, timeout time.Duration) error {
 | |
| 	Logf("Waiting up to %v for all (but %d) nodes to be ready", timeout, TestContext.AllowedNotReadyNodes)
 | |
| 
 | |
| 	var notReady []*v1.Node
 | |
| 	err := wait.PollImmediate(Poll, timeout, func() (bool, error) {
 | |
| 		notReady = nil
 | |
| 		// It should be OK to list unschedulable Nodes here.
 | |
| 		nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
 | |
| 		if err != nil {
 | |
| 			return false, err
 | |
| 		}
 | |
| 		for i := range nodes.Items {
 | |
| 			node := &nodes.Items[i]
 | |
| 			if !e2enode.IsConditionSetAsExpected(node, v1.NodeReady, true) {
 | |
| 				notReady = append(notReady, node)
 | |
| 			}
 | |
| 		}
 | |
| 		// Framework allows for <TestContext.AllowedNotReadyNodes> nodes to be non-ready,
 | |
| 		// to make it possible e.g. for incorrect deployment of some small percentage
 | |
| 		// of nodes (which we allow in cluster validation). Some nodes that are not
 | |
| 		// provisioned correctly at startup will never become ready (e.g. when something
 | |
| 		// won't install correctly), so we can't expect them to be ready at any point.
 | |
| 		return len(notReady) <= TestContext.AllowedNotReadyNodes, nil
 | |
| 	})
 | |
| 
 | |
| 	if err != nil && err != wait.ErrWaitTimeout {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	if len(notReady) > TestContext.AllowedNotReadyNodes {
 | |
| 		msg := ""
 | |
| 		for _, node := range notReady {
 | |
| 			msg = fmt.Sprintf("%s, %s", msg, node.Name)
 | |
| 		}
 | |
| 		return fmt.Errorf("Not ready nodes: %#v", msg)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // LookForStringInLog looks for the given string in the log of a specific pod container
 | |
| func LookForStringInLog(ns, podName, container, expectedString string, timeout time.Duration) (result string, err error) {
 | |
| 	return lookForString(expectedString, timeout, func() string {
 | |
| 		return RunKubectlOrDie(ns, "logs", podName, container)
 | |
| 	})
 | |
| }
 | |
| 
 | |
| // EnsureLoadBalancerResourcesDeleted ensures that cloud load balancer resources that were created
 | |
| // are actually cleaned up.  Currently only implemented for GCE/GKE.
 | |
| func EnsureLoadBalancerResourcesDeleted(ip, portRange string) error {
 | |
| 	return TestContext.CloudConfig.Provider.EnsureLoadBalancerResourcesDeleted(ip, portRange)
 | |
| }
 | |
| 
 | |
| // CoreDump SSHs to the master and all nodes and dumps their logs into dir.
 | |
| // It shells out to cluster/log-dump/log-dump.sh to accomplish this.
 | |
| func CoreDump(dir string) {
 | |
| 	if TestContext.DisableLogDump {
 | |
| 		Logf("Skipping dumping logs from cluster")
 | |
| 		return
 | |
| 	}
 | |
| 	var cmd *exec.Cmd
 | |
| 	if TestContext.LogexporterGCSPath != "" {
 | |
| 		Logf("Dumping logs from nodes to GCS directly at path: %s", TestContext.LogexporterGCSPath)
 | |
| 		cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir, TestContext.LogexporterGCSPath)
 | |
| 	} else {
 | |
| 		Logf("Dumping logs locally to: %s", dir)
 | |
| 		cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir)
 | |
| 	}
 | |
| 	cmd.Env = append(os.Environ(), fmt.Sprintf("LOG_DUMP_SYSTEMD_SERVICES=%s", parseSystemdServices(TestContext.SystemdServices)))
 | |
| 	cmd.Env = append(os.Environ(), fmt.Sprintf("LOG_DUMP_SYSTEMD_JOURNAL=%v", TestContext.DumpSystemdJournal))
 | |
| 
 | |
| 	cmd.Stdout = os.Stdout
 | |
| 	cmd.Stderr = os.Stderr
 | |
| 	if err := cmd.Run(); err != nil {
 | |
| 		Logf("Error running cluster/log-dump/log-dump.sh: %v", err)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // parseSystemdServices converts services separator from comma to space.
 | |
| func parseSystemdServices(services string) string {
 | |
| 	return strings.TrimSpace(strings.Replace(services, ",", " ", -1))
 | |
| }
 | |
| 
 | |
| // RunCmd runs cmd using args and returns its stdout and stderr. It also outputs
 | |
| // cmd's stdout and stderr to their respective OS streams.
 | |
| func RunCmd(command string, args ...string) (string, string, error) {
 | |
| 	return RunCmdEnv(nil, command, args...)
 | |
| }
 | |
| 
 | |
| // RunCmdEnv runs cmd with the provided environment and args and
 | |
| // returns its stdout and stderr. It also outputs cmd's stdout and
 | |
| // stderr to their respective OS streams.
 | |
| func RunCmdEnv(env []string, command string, args ...string) (string, string, error) {
 | |
| 	Logf("Running %s %v", command, args)
 | |
| 	var bout, berr bytes.Buffer
 | |
| 	cmd := exec.Command(command, args...)
 | |
| 	// We also output to the OS stdout/stderr to aid in debugging in case cmd
 | |
| 	// hangs and never returns before the test gets killed.
 | |
| 	//
 | |
| 	// This creates some ugly output because gcloud doesn't always provide
 | |
| 	// newlines.
 | |
| 	cmd.Stdout = io.MultiWriter(os.Stdout, &bout)
 | |
| 	cmd.Stderr = io.MultiWriter(os.Stderr, &berr)
 | |
| 	cmd.Env = env
 | |
| 	err := cmd.Run()
 | |
| 	stdout, stderr := bout.String(), berr.String()
 | |
| 	if err != nil {
 | |
| 		return "", "", fmt.Errorf("error running %s %v; got error %v, stdout %q, stderr %q",
 | |
| 			command, args, err, stdout, stderr)
 | |
| 	}
 | |
| 	return stdout, stderr, nil
 | |
| }
 | |
| 
 | |
| // getControlPlaneAddresses returns the externalIP, internalIP and hostname fields of control plane nodes.
 | |
| // If any of these is unavailable, empty slices are returned.
 | |
| func getControlPlaneAddresses(c clientset.Interface) ([]string, []string, []string) {
 | |
| 	var externalIPs, internalIPs, hostnames []string
 | |
| 
 | |
| 	// Populate the internal IPs.
 | |
| 	eps, err := c.CoreV1().Endpoints(metav1.NamespaceDefault).Get(context.TODO(), "kubernetes", metav1.GetOptions{})
 | |
| 	if err != nil {
 | |
| 		Failf("Failed to get kubernetes endpoints: %v", err)
 | |
| 	}
 | |
| 	for _, subset := range eps.Subsets {
 | |
| 		for _, address := range subset.Addresses {
 | |
| 			if address.IP != "" {
 | |
| 				internalIPs = append(internalIPs, address.IP)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Populate the external IP/hostname.
 | |
| 	hostURL, err := url.Parse(TestContext.Host)
 | |
| 	if err != nil {
 | |
| 		Failf("Failed to parse hostname: %v", err)
 | |
| 	}
 | |
| 	if net.ParseIP(hostURL.Host) != nil {
 | |
| 		externalIPs = append(externalIPs, hostURL.Host)
 | |
| 	} else {
 | |
| 		hostnames = append(hostnames, hostURL.Host)
 | |
| 	}
 | |
| 
 | |
| 	return externalIPs, internalIPs, hostnames
 | |
| }
 | |
| 
 | |
| // GetControlPlaneAddresses returns all IP addresses on which the kubelet can reach the control plane.
 | |
| // It may return internal and external IPs, even if we expect for
 | |
| // e.g. internal IPs to be used (issue #56787), so that we can be
 | |
| // sure to block the control plane fully during tests.
 | |
| func GetControlPlaneAddresses(c clientset.Interface) []string {
 | |
| 	externalIPs, internalIPs, _ := getControlPlaneAddresses(c)
 | |
| 
 | |
| 	ips := sets.NewString()
 | |
| 	switch TestContext.Provider {
 | |
| 	case "gce", "gke":
 | |
| 		for _, ip := range externalIPs {
 | |
| 			ips.Insert(ip)
 | |
| 		}
 | |
| 		for _, ip := range internalIPs {
 | |
| 			ips.Insert(ip)
 | |
| 		}
 | |
| 	case "aws":
 | |
| 		ips.Insert(awsMasterIP)
 | |
| 	default:
 | |
| 		Failf("This test is not supported for provider %s and should be disabled", TestContext.Provider)
 | |
| 	}
 | |
| 	return ips.List()
 | |
| }
 | |
| 
 | |
| // CreateEmptyFileOnPod creates empty file at given path on the pod.
 | |
| // TODO(alejandrox1): move to subpkg pod once kubectl methods have been refactored.
 | |
| func CreateEmptyFileOnPod(namespace string, podName string, filePath string) error {
 | |
| 	_, err := RunKubectl(namespace, "exec", podName, "--", "/bin/sh", "-c", fmt.Sprintf("touch %s", filePath))
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| // DumpDebugInfo dumps debug info of tests.
 | |
| func DumpDebugInfo(c clientset.Interface, ns string) {
 | |
| 	sl, _ := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{LabelSelector: labels.Everything().String()})
 | |
| 	for _, s := range sl.Items {
 | |
| 		desc, _ := RunKubectl(ns, "describe", "po", s.Name)
 | |
| 		Logf("\nOutput of kubectl describe %v:\n%v", s.Name, desc)
 | |
| 
 | |
| 		l, _ := RunKubectl(ns, "logs", s.Name, "--tail=100")
 | |
| 		Logf("\nLast 100 log lines of %v:\n%v", s.Name, l)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // PrettyPrintJSON converts metrics to JSON format.
 | |
| func PrettyPrintJSON(metrics interface{}) string {
 | |
| 	output := &bytes.Buffer{}
 | |
| 	if err := json.NewEncoder(output).Encode(metrics); err != nil {
 | |
| 		Logf("Error building encoder: %v", err)
 | |
| 		return ""
 | |
| 	}
 | |
| 	formatted := &bytes.Buffer{}
 | |
| 	if err := json.Indent(formatted, output.Bytes(), "", "  "); err != nil {
 | |
| 		Logf("Error indenting: %v", err)
 | |
| 		return ""
 | |
| 	}
 | |
| 	return string(formatted.Bytes())
 | |
| }
 | |
| 
 | |
| // taintExists checks if the given taint exists in list of taints. Returns true if exists false otherwise.
 | |
| func taintExists(taints []v1.Taint, taintToFind *v1.Taint) bool {
 | |
| 	for _, taint := range taints {
 | |
| 		if taint.MatchTaint(taintToFind) {
 | |
| 			return true
 | |
| 		}
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // WatchEventSequenceVerifier ...
 | |
| // manages a watch for a given resource, ensures that events take place in a given order, retries the test on failure
 | |
| //   testContext         cancelation signal across API boundries, e.g: context.TODO()
 | |
| //   dc                  sets up a client to the API
 | |
| //   resourceType        specify the type of resource
 | |
| //   namespace           select a namespace
 | |
| //   resourceName        the name of the given resource
 | |
| //   listOptions         options used to find the resource, recommended to use listOptions.labelSelector
 | |
| //   expectedWatchEvents array of events which are expected to occur
 | |
| //   scenario            the test itself
 | |
| //   retryCleanup        a function to run which ensures that there are no dangling resources upon test failure
 | |
| // this tooling relies on the test to return the events as they occur
 | |
| // the entire scenario must be run to ensure that the desired watch events arrive in order (allowing for interweaving of watch events)
 | |
| //   if an expected watch event is missing we elect to clean up and run the entire scenario again
 | |
| // we try the scenario three times to allow the sequencing to fail a couple of times
 | |
| func WatchEventSequenceVerifier(ctx context.Context, dc dynamic.Interface, resourceType schema.GroupVersionResource, namespace string, resourceName string, listOptions metav1.ListOptions, expectedWatchEvents []watch.Event, scenario func(*watchtools.RetryWatcher) []watch.Event, retryCleanup func() error) {
 | |
| 	listWatcher := &cache.ListWatch{
 | |
| 		WatchFunc: func(listOptions metav1.ListOptions) (watch.Interface, error) {
 | |
| 			return dc.Resource(resourceType).Namespace(namespace).Watch(ctx, listOptions)
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	retries := 3
 | |
| retriesLoop:
 | |
| 	for try := 1; try <= retries; try++ {
 | |
| 		initResource, err := dc.Resource(resourceType).Namespace(namespace).List(ctx, listOptions)
 | |
| 		ExpectNoError(err, "Failed to fetch initial resource")
 | |
| 
 | |
| 		resourceWatch, err := watchtools.NewRetryWatcher(initResource.GetResourceVersion(), listWatcher)
 | |
| 		ExpectNoError(err, "Failed to create a resource watch of %v in namespace %v", resourceType.Resource, namespace)
 | |
| 
 | |
| 		// NOTE the test may need access to the events to see what's going on, such as a change in status
 | |
| 		actualWatchEvents := scenario(resourceWatch)
 | |
| 		errs := sets.NewString()
 | |
| 		ExpectEqual(len(expectedWatchEvents) <= len(actualWatchEvents), true, "Error: actual watch events amount (%d) must be greater than or equal to expected watch events amount (%d)", len(actualWatchEvents), len(expectedWatchEvents))
 | |
| 
 | |
| 		totalValidWatchEvents := 0
 | |
| 		foundEventIndexes := map[int]*int{}
 | |
| 
 | |
| 		for watchEventIndex, expectedWatchEvent := range expectedWatchEvents {
 | |
| 			foundExpectedWatchEvent := false
 | |
| 		actualWatchEventsLoop:
 | |
| 			for actualWatchEventIndex, actualWatchEvent := range actualWatchEvents {
 | |
| 				if foundEventIndexes[actualWatchEventIndex] != nil {
 | |
| 					continue actualWatchEventsLoop
 | |
| 				}
 | |
| 				if actualWatchEvent.Type == expectedWatchEvent.Type {
 | |
| 					foundExpectedWatchEvent = true
 | |
| 					foundEventIndexes[actualWatchEventIndex] = &watchEventIndex
 | |
| 					break actualWatchEventsLoop
 | |
| 				}
 | |
| 			}
 | |
| 			if foundExpectedWatchEvent == false {
 | |
| 				errs.Insert(fmt.Sprintf("Watch event %v not found", expectedWatchEvent.Type))
 | |
| 			}
 | |
| 			totalValidWatchEvents++
 | |
| 		}
 | |
| 		err = retryCleanup()
 | |
| 		ExpectNoError(err, "Error occurred when cleaning up resources")
 | |
| 		if errs.Len() > 0 && try < retries {
 | |
| 			fmt.Println("invariants violated:\n", strings.Join(errs.List(), "\n - "))
 | |
| 			continue retriesLoop
 | |
| 		}
 | |
| 		ExpectEqual(errs.Len() > 0, false, strings.Join(errs.List(), "\n - "))
 | |
| 		ExpectEqual(totalValidWatchEvents, len(expectedWatchEvents), "Error: there must be an equal amount of total valid watch events (%d) and expected watch events (%d)", totalValidWatchEvents, len(expectedWatchEvents))
 | |
| 		break retriesLoop
 | |
| 	}
 | |
| }
 |