diff --git a/build/root/Makefile b/build/root/Makefile index 0a9c2e00fdc..3d742b4703d 100644 --- a/build/root/Makefile +++ b/build/root/Makefile @@ -214,8 +214,10 @@ define TEST_E2E_NODE_HELP_INFO # Defaults to "". # RUN_UNTIL_FAILURE: If true, pass --untilItFails to ginkgo so tests are run # repeatedly until they fail. Defaults to false. -# REMOTE: If true, run the tests on a remote host instance on GCE. Defaults -# to false. +# REMOTE: If true, run the tests on a remote host. Defaults to false. +# REMOTE_MODE: For REMOTE=true only. Mode for remote execution (eg. gce, ssh). +# If set to "gce", an instance can be provisioned or reused from GCE. If set +# to "ssh", provided `HOSTS` must be IPs or resolvable. Defaults to "gce". # ARTIFACTS: Local directory to scp test artifacts into from the remote hosts # for REMOTE=true. Local directory to write juntil xml results into for REMOTE=false. # Defaults to "/tmp/_artifacts/$$(date +%y%m%dT%H%M%S)". @@ -256,6 +258,7 @@ define TEST_E2E_NODE_HELP_INFO # Defaults to false. # TEST_SUITE: For REMOTE=true only. Test suite to use. Defaults to "default". # SSH_KEY: For REMOTE=true only. Path to SSH key to use. +# SSH_OPTIONS: For REMOTE=true only. SSH options to use. # RUNTIME_CONFIG: The runtime configuration for the API server on the node e2e tests. # # Example: diff --git a/hack/make-rules/test-e2e-node.sh b/hack/make-rules/test-e2e-node.sh index c951afbadf9..fb0720a77a5 100755 --- a/hack/make-rules/test-e2e-node.sh +++ b/hack/make-rules/test-e2e-node.sh @@ -38,6 +38,7 @@ skip=${SKIP-"\[Flaky\]|\[Slow\]|\[Serial\]"} parallelism=${PARALLELISM:-8} artifacts="${ARTIFACTS:-"/tmp/_artifacts/$(date +%y%m%dT%H%M%S)"}" remote=${REMOTE:-"false"} +remote_mode=${REMOTE_MODE:-"gce"} runtime=${RUNTIME:-"docker"} container_runtime_endpoint=${CONTAINER_RUNTIME_ENDPOINT:-""} image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""} @@ -49,6 +50,7 @@ extra_envs=${EXTRA_ENVS:-} runtime_config=${RUNTIME_CONFIG:-} ssh_user=${SSH_USER:-"${USER}"} ssh_key=${SSH_KEY:-} +ssh_options=${SSH_OPTIONS:-} kubelet_config_file=${KUBELET_CONFIG_FILE:-"test/e2e_node/jenkins/default-kubelet-config.yaml"} # Parse the flags to pass to ginkgo @@ -86,8 +88,8 @@ if [[ ${runtime} == "remote" ]] ; then fi -if [ "${remote}" = true ] ; then - # The following options are only valid in remote run. +if [ "${remote}" = true ] && [ "${remote_mode}" = gce ] ; then + # The following options are only valid in remote GCE run. images=${IMAGES:-""} hosts=${HOSTS:-""} image_project=${IMAGE_PROJECT:-"kubernetes-node-e2e-images"} @@ -177,12 +179,35 @@ if [ "${remote}" = true ] ; then --delete-instances="${delete_instances}" --test_args="${test_args}" --instance-metadata="${metadata}" \ --image-config-file="${image_config_file}" --system-spec-name="${system_spec_name}" \ --runtime-config="${runtime_config}" --preemptible-instances="${preemptible_instances}" \ - --ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --image-config-dir="${image_config_dir}" \ + --ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \ + --image-config-dir="${image_config_dir}" \ --extra-envs="${extra_envs}" --kubelet-config-file="${kubelet_config_file}" --test-suite="${test_suite}" \ "${timeout_arg}" \ 2>&1 | tee -i "${artifacts}/build-log.txt" exit $? +elif [ "${remote}" = true ] && [ "${remote_mode}" = ssh ] ; then + hosts=${HOSTS:-""} + test_suite=${TEST_SUITE:-"default"} + if [[ -n "${TIMEOUT:-}" ]] ; then + timeout_arg="--test-timeout=${TIMEOUT}" + fi + + # Use cluster.local as default dns-domain + test_args='--dns-domain="'${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args} + test_args='--kubelet-flags="--cluster-domain='${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args} + + # Invoke the runner + go run test/e2e_node/runner/remote/run_remote.go --mode="ssh" --logtostderr --vmodule=*=4 \ + --hosts="${hosts}" --results-dir="${artifacts}" --ginkgo-flags="${ginkgoflags}" \ + --test_args="${test_args}" --system-spec-name="${system_spec_name}" \ + --runtime-config="${runtime_config}" \ + --ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \ + --extra-envs="${extra_envs}" --test-suite="${test_suite}" \ + "${timeout_arg}" \ + 2>&1 | tee -i "${artifacts}/build-log.txt" + exit $? + else # Refresh sudo credentials if needed if ping -c 1 -q metadata.google.internal &> /dev/null; then diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index 5f3285120ed..a46b8db73af 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -49,6 +49,7 @@ import ( "sigs.k8s.io/yaml" ) +var mode = flag.String("mode", "gce", "Mode to operate in. One of gce|ssh. Defaults to gce") var testArgs = flag.String("test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.") var testSuite = flag.String("test-suite", "default", "Test suite the runner initializes with. Currently support default|cadvisor|conformance") var instanceNamePrefix = flag.String("instance-name-prefix", "", "prefix for instance names") @@ -223,18 +224,98 @@ func main() { return } - if *hosts == "" && *imageConfigFile == "" && *images == "" { - klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.") - } - var err error - computeService, err = getComputeClient() - if err != nil { - klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err) + var gceImages *internalImageConfig + if *mode == "gce" { + if *hosts == "" && *imageConfigFile == "" && *images == "" { + klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.") + } + var err error + computeService, err = getComputeClient() + if err != nil { + klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err) + } + if gceImages, err = prepareGceImages(); err != nil { + klog.Fatalf("While preparing GCE images: %v", err) + } + if *instanceNamePrefix == "" { + *instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8] + } } + // Setup coloring + stat, _ := os.Stdout.Stat() + useColor := (stat.Mode() & os.ModeCharDevice) != 0 + blue := "" + noColour := "" + if useColor { + blue = "\033[0;34m" + noColour = "\033[0m" + } + + go arc.getArchive() + defer arc.deleteArchive() + + results := make(chan *TestResult) + running := 0 + if gceImages != nil { + for shortName := range gceImages.images { + imageConfig := gceImages.images[shortName] + fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image) + running++ + go func(image *internalGCEImage, junitFileName string) { + results <- testImage(image, junitFileName) + }(&imageConfig, shortName) + } + } + if *hosts != "" { + for _, host := range strings.Split(*hosts, ",") { + fmt.Printf("Initializing e2e tests using host %s.\n", host) + running++ + go func(host string, junitFileName string) { + results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags) + }(host, host) + } + } + + // Wait for all tests to complete and emit the results + errCount := 0 + exitOk := true + for i := 0; i < running; i++ { + tr := <-results + host := tr.host + fmt.Println() // Print an empty line + fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) + fmt.Printf("%s> START TEST >%s\n", blue, noColour) + fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) + fmt.Printf("Start Test Suite on Host %s\n", host) + fmt.Printf("%s\n", tr.output) + if tr.err != nil { + errCount++ + fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err) + } else { + fmt.Printf("Success Finished Test Suite on Host %s\n", host) + } + exitOk = exitOk && tr.exitOk + fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) + fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour) + fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) + fmt.Println() // Print an empty line + } + // Set the exit code if there were failures + if !exitOk { + fmt.Printf("Failure: %d errors encountered.\n", errCount) + callGubernator(*gubernator) + arc.deleteArchive() + os.Exit(1) + } + callGubernator(*gubernator) +} + +func prepareGceImages() (*internalImageConfig, error) { gceImages := &internalImageConfig{ images: make(map[string]internalGCEImage), } + // Parse images from given config file and convert them to internalGCEImage. if *imageConfigFile != "" { configPath := *imageConfigFile @@ -244,14 +325,14 @@ func main() { imageConfigData, err := ioutil.ReadFile(configPath) if err != nil { - klog.Fatalf("Could not read image config file provided: %v", err) + return nil, fmt.Errorf("Could not read image config file provided: %v", err) } // Unmarshal the given image config file. All images for this test run will be organized into a map. // shortName->GCEImage, e.g cos-stable->cos-stable-81-12871-103-0. externalImageConfig := ImageConfig{Images: make(map[string]GCEImage)} err = yaml.Unmarshal(imageConfigData, &externalImageConfig) if err != nil { - klog.Fatalf("Could not parse image config file: %v", err) + return nil, fmt.Errorf("Could not parse image config file: %v", err) } for shortName, imageConfig := range externalImageConfig.Images { @@ -259,7 +340,7 @@ func main() { if (imageConfig.ImageRegex != "" || imageConfig.ImageFamily != "") && imageConfig.Image == "" { image, err = getGCEImage(imageConfig.ImageRegex, imageConfig.ImageFamily, imageConfig.Project) if err != nil { - klog.Fatalf("Could not retrieve a image based on image regex %q and family %q: %v", + return nil, fmt.Errorf("Could not retrieve a image based on image regex %q and family %q: %v", imageConfig.ImageRegex, imageConfig.ImageFamily, err) } } else { @@ -318,75 +399,8 @@ func main() { klog.Fatal("Must specify --project flag to launch images into") } } - if *instanceNamePrefix == "" { - *instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8] - } - // Setup coloring - stat, _ := os.Stdout.Stat() - useColor := (stat.Mode() & os.ModeCharDevice) != 0 - blue := "" - noColour := "" - if useColor { - blue = "\033[0;34m" - noColour = "\033[0m" - } - - go arc.getArchive() - defer arc.deleteArchive() - - results := make(chan *TestResult) - running := 0 - for shortName := range gceImages.images { - imageConfig := gceImages.images[shortName] - fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image) - running++ - go func(image *internalGCEImage, junitFileName string) { - results <- testImage(image, junitFileName) - }(&imageConfig, shortName) - } - if *hosts != "" { - for _, host := range strings.Split(*hosts, ",") { - fmt.Printf("Initializing e2e tests using host %s.\n", host) - running++ - go func(host string, junitFileName string) { - results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags) - }(host, host) - } - } - - // Wait for all tests to complete and emit the results - errCount := 0 - exitOk := true - for i := 0; i < running; i++ { - tr := <-results - host := tr.host - fmt.Println() // Print an empty line - fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) - fmt.Printf("%s> START TEST >%s\n", blue, noColour) - fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour) - fmt.Printf("Start Test Suite on Host %s\n", host) - fmt.Printf("%s\n", tr.output) - if tr.err != nil { - errCount++ - fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err) - } else { - fmt.Printf("Success Finished Test Suite on Host %s\n", host) - } - exitOk = exitOk && tr.exitOk - fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) - fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour) - fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour) - fmt.Println() // Print an empty line - } - // Set the exit code if there were failures - if !exitOk { - fmt.Printf("Failure: %d errors encountered.\n", errCount) - callGubernator(*gubernator) - arc.deleteArchive() - os.Exit(1) - } - callGubernator(*gubernator) + return gceImages, nil } func callGubernator(gubernator bool) { @@ -436,29 +450,23 @@ func getImageMetadata(input string) *compute.Metadata { return &ret } -// Run tests in archive against host -func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult { +func registerGceHostIP(host string) error { instance, err := computeService.Instances.Get(*project, *zone, host).Do() if err != nil { - return &TestResult{ - err: err, - host: host, - exitOk: false, - } + return err } if strings.ToUpper(instance.Status) != "RUNNING" { - err = fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status) - return &TestResult{ - err: err, - host: host, - exitOk: false, - } + return fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status) } externalIP := getExternalIP(instance) if len(externalIP) > 0 { remote.AddHostnameIP(host, externalIP) } + return nil +} +// Run tests in archive against host +func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult { path, err := arc.getArchive() if err != nil { // Don't log fatal because we need to do any needed cleanup contained in "defer" statements @@ -553,6 +561,14 @@ func testImage(imageConfig *internalGCEImage, junitFileName string) *TestResult // If we are going to delete the instance, don't bother with cleaning up the files deleteFiles := !*deleteInstances && *cleanup + if err = registerGceHostIP(host); err != nil { + return &TestResult{ + err: err, + host: host, + exitOk: false, + } + } + result := testHost(host, deleteFiles, imageConfig.imageDesc, junitFileName, ginkgoFlagsStr) // This is a temporary solution to collect serial node serial log. Only port 1 contains useful information. // TODO(random-liu): Extract out and unify log collection logic with cluste e2e.