From 36233b985ba413da498e010a569e241bab4a817a Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 5 Nov 2021 11:58:12 -0400 Subject: [PATCH 1/5] run_remote.go: factor out registerGceHostIP() Prep for future patch. --- test/e2e_node/runner/remote/run_remote.go | 28 ++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index 5f3285120ed..87575b82c49 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -436,28 +436,30 @@ func getImageMetadata(input string) *compute.Metadata { return &ret } -// Run tests in archive against host -func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult { +func registerGceHostIP(host string) error { instance, err := computeService.Instances.Get(*project, *zone, host).Do() if err != nil { - return &TestResult{ - err: err, - host: host, - exitOk: false, - } + return err } if strings.ToUpper(instance.Status) != "RUNNING" { - err = fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status) - return &TestResult{ - err: err, - host: host, - exitOk: false, - } + return fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status) } externalIP := getExternalIP(instance) if len(externalIP) > 0 { remote.AddHostnameIP(host, externalIP) } + return nil +} + +// Run tests in archive against host +func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult { + if err := registerGceHostIP(host); err != nil { + return &TestResult{ + err: err, + host: host, + exitOk: false, + } + } path, err := arc.getArchive() if err != nil { From 032dbd206384b5a092a21934e97ce6545c26d408 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 5 Nov 2021 11:59:14 -0400 Subject: [PATCH 2/5] run_remote.go: move registerGceHostIP() call to testImage() I.e. don't assume that `testHost` is called on a GCE host. Prep for future patch. --- test/e2e_node/runner/remote/run_remote.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index 87575b82c49..0c60628d6de 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -453,14 +453,6 @@ func registerGceHostIP(host string) error { // Run tests in archive against host func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult { - if err := registerGceHostIP(host); err != nil { - return &TestResult{ - err: err, - host: host, - exitOk: false, - } - } - path, err := arc.getArchive() if err != nil { // Don't log fatal because we need to do any needed cleanup contained in "defer" statements @@ -555,6 +547,14 @@ func testImage(imageConfig *internalGCEImage, junitFileName string) *TestResult // If we are going to delete the instance, don't bother with cleaning up the files deleteFiles := !*deleteInstances && *cleanup + if err = registerGceHostIP(host); err != nil { + return &TestResult{ + err: err, + host: host, + exitOk: false, + } + } + result := testHost(host, deleteFiles, imageConfig.imageDesc, junitFileName, ginkgoFlagsStr) // This is a temporary solution to collect serial node serial log. Only port 1 contains useful information. // TODO(random-liu): Extract out and unify log collection logic with cluste e2e. From 591f4cdb7742ea1b5990b9d0273a9f210bccb077 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 5 Nov 2021 12:07:42 -0400 Subject: [PATCH 3/5] run_remote.go: factor out prepareGceImages() Mostly a pure code move. Only changed the `klog.Fatalf` to `fmt.Errorf`. Prep for future patch. --- test/e2e_node/runner/remote/run_remote.go | 155 ++++++++++++---------- 1 file changed, 84 insertions(+), 71 deletions(-) diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index 0c60628d6de..b94d1576a5a 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -232,9 +232,89 @@ func main() { klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err) } + var gceImages *internalImageConfig + if gceImages, err = prepareGceImages(); err != nil { + klog.Fatalf("While preparing GCE images: %v", err) + } + + if *instanceNamePrefix == "" { + *instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8] + } + + // Setup coloring + stat, _ := os.Stdout.Stat() + useColor := (stat.Mode() & os.ModeCharDevice) != 0 + blue := "" + noColour := "" + if useColor { + blue = "\033[0;34m" + noColour = "\033[0m" + } + + go arc.getArchive() + defer arc.deleteArchive() + + results := make(chan *TestResult) + running := 0 + if gceImages != nil { + for shortName := range gceImages.images { + imageConfig := gceImages.images[shortName] + fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image) + running++ + go func(image *internalGCEImage, junitFileName string) { + results <- testImage(image, junitFileName) + }(&imageConfig, shortName) + } + } + if *hosts != "" { + for _, host := range strings.Split(*hosts, ",") { + fmt.Printf("Initializing e2e tests using host %s.\n", host) + running++ + go func(host string, junitFileName string) { + results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags) + }(host, host) + } + } + + // Wait for all tests to complete and emit the results + errCount := 0 + exitOk := true + for i := 0; i < running; i++ { + tr := <-results + host := tr.host + fmt.Println() // Print an empty line + fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) + fmt.Printf("%s> START TEST >%s\n", blue, noColour) + fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) + fmt.Printf("Start Test Suite on Host %s\n", host) + fmt.Printf("%s\n", tr.output) + if tr.err != nil { + errCount++ + fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err) + } else { + fmt.Printf("Success Finished Test Suite on Host %s\n", host) + } + exitOk = exitOk && tr.exitOk + fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) + fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour) + fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) + fmt.Println() // Print an empty line + } + // Set the exit code if there were failures + if !exitOk { + fmt.Printf("Failure: %d errors encountered.\n", errCount) + callGubernator(*gubernator) + arc.deleteArchive() + os.Exit(1) + } + callGubernator(*gubernator) +} + +func prepareGceImages() (*internalImageConfig, error) { gceImages := &internalImageConfig{ images: make(map[string]internalGCEImage), } + // Parse images from given config file and convert them to internalGCEImage. if *imageConfigFile != "" { configPath := *imageConfigFile @@ -244,14 +324,14 @@ func main() { imageConfigData, err := ioutil.ReadFile(configPath) if err != nil { - klog.Fatalf("Could not read image config file provided: %v", err) + return nil, fmt.Errorf("Could not read image config file provided: %v", err) } // Unmarshal the given image config file. All images for this test run will be organized into a map. // shortName->GCEImage, e.g cos-stable->cos-stable-81-12871-103-0. externalImageConfig := ImageConfig{Images: make(map[string]GCEImage)} err = yaml.Unmarshal(imageConfigData, &externalImageConfig) if err != nil { - klog.Fatalf("Could not parse image config file: %v", err) + return nil, fmt.Errorf("Could not parse image config file: %v", err) } for shortName, imageConfig := range externalImageConfig.Images { @@ -259,7 +339,7 @@ func main() { if (imageConfig.ImageRegex != "" || imageConfig.ImageFamily != "") && imageConfig.Image == "" { image, err = getGCEImage(imageConfig.ImageRegex, imageConfig.ImageFamily, imageConfig.Project) if err != nil { - klog.Fatalf("Could not retrieve a image based on image regex %q and family %q: %v", + return nil, fmt.Errorf("Could not retrieve a image based on image regex %q and family %q: %v", imageConfig.ImageRegex, imageConfig.ImageFamily, err) } } else { @@ -318,75 +398,8 @@ func main() { klog.Fatal("Must specify --project flag to launch images into") } } - if *instanceNamePrefix == "" { - *instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8] - } - // Setup coloring - stat, _ := os.Stdout.Stat() - useColor := (stat.Mode() & os.ModeCharDevice) != 0 - blue := "" - noColour := "" - if useColor { - blue = "\033[0;34m" - noColour = "\033[0m" - } - - go arc.getArchive() - defer arc.deleteArchive() - - results := make(chan *TestResult) - running := 0 - for shortName := range gceImages.images { - imageConfig := gceImages.images[shortName] - fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image) - running++ - go func(image *internalGCEImage, junitFileName string) { - results <- testImage(image, junitFileName) - }(&imageConfig, shortName) - } - if *hosts != "" { - for _, host := range strings.Split(*hosts, ",") { - fmt.Printf("Initializing e2e tests using host %s.\n", host) - running++ - go func(host string, junitFileName string) { - results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags) - }(host, host) - } - } - - // Wait for all tests to complete and emit the results - errCount := 0 - exitOk := true - for i := 0; i < running; i++ { - tr := <-results - host := tr.host - fmt.Println() // Print an empty line - fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) - fmt.Printf("%s> START TEST >%s\n", blue, noColour) - fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) - fmt.Printf("Start Test Suite on Host %s\n", host) - fmt.Printf("%s\n", tr.output) - if tr.err != nil { - errCount++ - fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err) - } else { - fmt.Printf("Success Finished Test Suite on Host %s\n", host) - } - exitOk = exitOk && tr.exitOk - fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) - fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour) - fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) - fmt.Println() // Print an empty line - } - // Set the exit code if there were failures - if !exitOk { - fmt.Printf("Failure: %d errors encountered.\n", errCount) - callGubernator(*gubernator) - arc.deleteArchive() - os.Exit(1) - } - callGubernator(*gubernator) + return gceImages, nil } func callGubernator(gubernator bool) { From e0723c1e64f314b0ff5178415c3d9be672dd713d Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Mon, 18 Oct 2021 22:30:11 -0400 Subject: [PATCH 4/5] test-e2e-node: add SSH_OPTIONS This allows overriding the default options. --- build/root/Makefile | 1 + hack/make-rules/test-e2e-node.sh | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/build/root/Makefile b/build/root/Makefile index 0a9c2e00fdc..f15eccd7c27 100644 --- a/build/root/Makefile +++ b/build/root/Makefile @@ -256,6 +256,7 @@ define TEST_E2E_NODE_HELP_INFO # Defaults to false. # TEST_SUITE: For REMOTE=true only. Test suite to use. Defaults to "default". # SSH_KEY: For REMOTE=true only. Path to SSH key to use. +# SSH_OPTIONS: For REMOTE=true only. SSH options to use. # RUNTIME_CONFIG: The runtime configuration for the API server on the node e2e tests. # # Example: diff --git a/hack/make-rules/test-e2e-node.sh b/hack/make-rules/test-e2e-node.sh index c951afbadf9..8017006d3a5 100755 --- a/hack/make-rules/test-e2e-node.sh +++ b/hack/make-rules/test-e2e-node.sh @@ -49,6 +49,7 @@ extra_envs=${EXTRA_ENVS:-} runtime_config=${RUNTIME_CONFIG:-} ssh_user=${SSH_USER:-"${USER}"} ssh_key=${SSH_KEY:-} +ssh_options=${SSH_OPTIONS:-} kubelet_config_file=${KUBELET_CONFIG_FILE:-"test/e2e_node/jenkins/default-kubelet-config.yaml"} # Parse the flags to pass to ginkgo @@ -177,7 +178,8 @@ if [ "${remote}" = true ] ; then --delete-instances="${delete_instances}" --test_args="${test_args}" --instance-metadata="${metadata}" \ --image-config-file="${image_config_file}" --system-spec-name="${system_spec_name}" \ --runtime-config="${runtime_config}" --preemptible-instances="${preemptible_instances}" \ - --ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --image-config-dir="${image_config_dir}" \ + --ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \ + --image-config-dir="${image_config_dir}" \ --extra-envs="${extra_envs}" --kubelet-config-file="${kubelet_config_file}" --test-suite="${test_suite}" \ "${timeout_arg}" \ 2>&1 | tee -i "${artifacts}/build-log.txt" From 3ebd93cd02ca9e2ed7f1f6cff4d49b878f93a626 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 5 Nov 2021 12:08:39 -0400 Subject: [PATCH 5/5] test-e2e-node: support pure SSH mode Right now, `run_remote.go` only supports GCE instances. But actually running the tests is completely independent of GCE and could work just as well on any SSH-accessible machine. This patch adds a new `--mode` switch, which defaults to `gce` for backwards compatibility, but can be set to `ssh`. In that mode, the GCE API is not used at all, and we simply connect to the hosts given via `--hosts`. This is still better than `run_local.go` because the latter mixes build environment with test environment, which doesn't fit well with container-optimized operating systems. This is part of an effort to setup the e2e node tests on Fedora CoreOS (see https://github.com/coreos/fedora-coreos-tracker/issues/990). Patch best viewed with whitespace ignored. --- build/root/Makefile | 6 +++-- hack/make-rules/test-e2e-node.sh | 27 ++++++++++++++++++-- test/e2e_node/runner/remote/run_remote.go | 31 ++++++++++++----------- 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/build/root/Makefile b/build/root/Makefile index f15eccd7c27..3d742b4703d 100644 --- a/build/root/Makefile +++ b/build/root/Makefile @@ -214,8 +214,10 @@ define TEST_E2E_NODE_HELP_INFO # Defaults to "". # RUN_UNTIL_FAILURE: If true, pass --untilItFails to ginkgo so tests are run # repeatedly until they fail. Defaults to false. -# REMOTE: If true, run the tests on a remote host instance on GCE. Defaults -# to false. +# REMOTE: If true, run the tests on a remote host. Defaults to false. +# REMOTE_MODE: For REMOTE=true only. Mode for remote execution (eg. gce, ssh). +# If set to "gce", an instance can be provisioned or reused from GCE. If set +# to "ssh", provided `HOSTS` must be IPs or resolvable. Defaults to "gce". # ARTIFACTS: Local directory to scp test artifacts into from the remote hosts # for REMOTE=true. Local directory to write juntil xml results into for REMOTE=false. # Defaults to "/tmp/_artifacts/$$(date +%y%m%dT%H%M%S)". diff --git a/hack/make-rules/test-e2e-node.sh b/hack/make-rules/test-e2e-node.sh index 8017006d3a5..fb0720a77a5 100755 --- a/hack/make-rules/test-e2e-node.sh +++ b/hack/make-rules/test-e2e-node.sh @@ -38,6 +38,7 @@ skip=${SKIP-"\[Flaky\]|\[Slow\]|\[Serial\]"} parallelism=${PARALLELISM:-8} artifacts="${ARTIFACTS:-"/tmp/_artifacts/$(date +%y%m%dT%H%M%S)"}" remote=${REMOTE:-"false"} +remote_mode=${REMOTE_MODE:-"gce"} runtime=${RUNTIME:-"docker"} container_runtime_endpoint=${CONTAINER_RUNTIME_ENDPOINT:-""} image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""} @@ -87,8 +88,8 @@ if [[ ${runtime} == "remote" ]] ; then fi -if [ "${remote}" = true ] ; then - # The following options are only valid in remote run. +if [ "${remote}" = true ] && [ "${remote_mode}" = gce ] ; then + # The following options are only valid in remote GCE run. images=${IMAGES:-""} hosts=${HOSTS:-""} image_project=${IMAGE_PROJECT:-"kubernetes-node-e2e-images"} @@ -185,6 +186,28 @@ if [ "${remote}" = true ] ; then 2>&1 | tee -i "${artifacts}/build-log.txt" exit $? +elif [ "${remote}" = true ] && [ "${remote_mode}" = ssh ] ; then + hosts=${HOSTS:-""} + test_suite=${TEST_SUITE:-"default"} + if [[ -n "${TIMEOUT:-}" ]] ; then + timeout_arg="--test-timeout=${TIMEOUT}" + fi + + # Use cluster.local as default dns-domain + test_args='--dns-domain="'${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args} + test_args='--kubelet-flags="--cluster-domain='${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args} + + # Invoke the runner + go run test/e2e_node/runner/remote/run_remote.go --mode="ssh" --logtostderr --vmodule=*=4 \ + --hosts="${hosts}" --results-dir="${artifacts}" --ginkgo-flags="${ginkgoflags}" \ + --test_args="${test_args}" --system-spec-name="${system_spec_name}" \ + --runtime-config="${runtime_config}" \ + --ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \ + --extra-envs="${extra_envs}" --test-suite="${test_suite}" \ + "${timeout_arg}" \ + 2>&1 | tee -i "${artifacts}/build-log.txt" + exit $? + else # Refresh sudo credentials if needed if ping -c 1 -q metadata.google.internal &> /dev/null; then diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index b94d1576a5a..a46b8db73af 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -49,6 +49,7 @@ import ( "sigs.k8s.io/yaml" ) +var mode = flag.String("mode", "gce", "Mode to operate in. One of gce|ssh. Defaults to gce") var testArgs = flag.String("test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.") var testSuite = flag.String("test-suite", "default", "Test suite the runner initializes with. Currently support default|cadvisor|conformance") var instanceNamePrefix = flag.String("instance-name-prefix", "", "prefix for instance names") @@ -223,22 +224,22 @@ func main() { return } - if *hosts == "" && *imageConfigFile == "" && *images == "" { - klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.") - } - var err error - computeService, err = getComputeClient() - if err != nil { - klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err) - } - var gceImages *internalImageConfig - if gceImages, err = prepareGceImages(); err != nil { - klog.Fatalf("While preparing GCE images: %v", err) - } - - if *instanceNamePrefix == "" { - *instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8] + if *mode == "gce" { + if *hosts == "" && *imageConfigFile == "" && *images == "" { + klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.") + } + var err error + computeService, err = getComputeClient() + if err != nil { + klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err) + } + if gceImages, err = prepareGceImages(); err != nil { + klog.Fatalf("While preparing GCE images: %v", err) + } + if *instanceNamePrefix == "" { + *instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8] + } } // Setup coloring