Merge pull request #105764 from jlebon/pr/add-ssh-mode

test/e2e_node/remote: support pure SSH mode
This commit is contained in:
Kubernetes Prow Robot 2021-11-22 10:53:33 -08:00 committed by GitHub
commit a142f86351
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 140 additions and 96 deletions

View File

@ -214,8 +214,10 @@ define TEST_E2E_NODE_HELP_INFO
# Defaults to "".
# RUN_UNTIL_FAILURE: If true, pass --untilItFails to ginkgo so tests are run
# repeatedly until they fail. Defaults to false.
# REMOTE: If true, run the tests on a remote host instance on GCE. Defaults
# to false.
# REMOTE: If true, run the tests on a remote host. Defaults to false.
# REMOTE_MODE: For REMOTE=true only. Mode for remote execution (eg. gce, ssh).
# If set to "gce", an instance can be provisioned or reused from GCE. If set
# to "ssh", provided `HOSTS` must be IPs or resolvable. Defaults to "gce".
# ARTIFACTS: Local directory to scp test artifacts into from the remote hosts
# for REMOTE=true. Local directory to write juntil xml results into for REMOTE=false.
# Defaults to "/tmp/_artifacts/$$(date +%y%m%dT%H%M%S)".
@ -256,6 +258,7 @@ define TEST_E2E_NODE_HELP_INFO
# Defaults to false.
# TEST_SUITE: For REMOTE=true only. Test suite to use. Defaults to "default".
# SSH_KEY: For REMOTE=true only. Path to SSH key to use.
# SSH_OPTIONS: For REMOTE=true only. SSH options to use.
# RUNTIME_CONFIG: The runtime configuration for the API server on the node e2e tests.
#
# Example:

View File

@ -38,6 +38,7 @@ skip=${SKIP-"\[Flaky\]|\[Slow\]|\[Serial\]"}
parallelism=${PARALLELISM:-8}
artifacts="${ARTIFACTS:-"/tmp/_artifacts/$(date +%y%m%dT%H%M%S)"}"
remote=${REMOTE:-"false"}
remote_mode=${REMOTE_MODE:-"gce"}
runtime=${RUNTIME:-"docker"}
container_runtime_endpoint=${CONTAINER_RUNTIME_ENDPOINT:-""}
image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""}
@ -49,6 +50,7 @@ extra_envs=${EXTRA_ENVS:-}
runtime_config=${RUNTIME_CONFIG:-}
ssh_user=${SSH_USER:-"${USER}"}
ssh_key=${SSH_KEY:-}
ssh_options=${SSH_OPTIONS:-}
kubelet_config_file=${KUBELET_CONFIG_FILE:-"test/e2e_node/jenkins/default-kubelet-config.yaml"}
# Parse the flags to pass to ginkgo
@ -86,8 +88,8 @@ if [[ ${runtime} == "remote" ]] ; then
fi
if [ "${remote}" = true ] ; then
# The following options are only valid in remote run.
if [ "${remote}" = true ] && [ "${remote_mode}" = gce ] ; then
# The following options are only valid in remote GCE run.
images=${IMAGES:-""}
hosts=${HOSTS:-""}
image_project=${IMAGE_PROJECT:-"kubernetes-node-e2e-images"}
@ -177,12 +179,35 @@ if [ "${remote}" = true ] ; then
--delete-instances="${delete_instances}" --test_args="${test_args}" --instance-metadata="${metadata}" \
--image-config-file="${image_config_file}" --system-spec-name="${system_spec_name}" \
--runtime-config="${runtime_config}" --preemptible-instances="${preemptible_instances}" \
--ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --image-config-dir="${image_config_dir}" \
--ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \
--image-config-dir="${image_config_dir}" \
--extra-envs="${extra_envs}" --kubelet-config-file="${kubelet_config_file}" --test-suite="${test_suite}" \
"${timeout_arg}" \
2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?
elif [ "${remote}" = true ] && [ "${remote_mode}" = ssh ] ; then
hosts=${HOSTS:-""}
test_suite=${TEST_SUITE:-"default"}
if [[ -n "${TIMEOUT:-}" ]] ; then
timeout_arg="--test-timeout=${TIMEOUT}"
fi
# Use cluster.local as default dns-domain
test_args='--dns-domain="'${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args}
test_args='--kubelet-flags="--cluster-domain='${KUBE_DNS_DOMAIN:-cluster.local}'" '${test_args}
# Invoke the runner
go run test/e2e_node/runner/remote/run_remote.go --mode="ssh" --logtostderr --vmodule=*=4 \
--hosts="${hosts}" --results-dir="${artifacts}" --ginkgo-flags="${ginkgoflags}" \
--test_args="${test_args}" --system-spec-name="${system_spec_name}" \
--runtime-config="${runtime_config}" \
--ssh-user="${ssh_user}" --ssh-key="${ssh_key}" --ssh-options="${ssh_options}" \
--extra-envs="${extra_envs}" --test-suite="${test_suite}" \
"${timeout_arg}" \
2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?
else
# Refresh sudo credentials if needed
if ping -c 1 -q metadata.google.internal &> /dev/null; then

View File

@ -49,6 +49,7 @@ import (
"sigs.k8s.io/yaml"
)
var mode = flag.String("mode", "gce", "Mode to operate in. One of gce|ssh. Defaults to gce")
var testArgs = flag.String("test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.")
var testSuite = flag.String("test-suite", "default", "Test suite the runner initializes with. Currently support default|cadvisor|conformance")
var instanceNamePrefix = flag.String("instance-name-prefix", "", "prefix for instance names")
@ -223,18 +224,98 @@ func main() {
return
}
if *hosts == "" && *imageConfigFile == "" && *images == "" {
klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.")
}
var err error
computeService, err = getComputeClient()
if err != nil {
klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err)
var gceImages *internalImageConfig
if *mode == "gce" {
if *hosts == "" && *imageConfigFile == "" && *images == "" {
klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.")
}
var err error
computeService, err = getComputeClient()
if err != nil {
klog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err)
}
if gceImages, err = prepareGceImages(); err != nil {
klog.Fatalf("While preparing GCE images: %v", err)
}
if *instanceNamePrefix == "" {
*instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8]
}
}
// Setup coloring
stat, _ := os.Stdout.Stat()
useColor := (stat.Mode() & os.ModeCharDevice) != 0
blue := ""
noColour := ""
if useColor {
blue = "\033[0;34m"
noColour = "\033[0m"
}
go arc.getArchive()
defer arc.deleteArchive()
results := make(chan *TestResult)
running := 0
if gceImages != nil {
for shortName := range gceImages.images {
imageConfig := gceImages.images[shortName]
fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image)
running++
go func(image *internalGCEImage, junitFileName string) {
results <- testImage(image, junitFileName)
}(&imageConfig, shortName)
}
}
if *hosts != "" {
for _, host := range strings.Split(*hosts, ",") {
fmt.Printf("Initializing e2e tests using host %s.\n", host)
running++
go func(host string, junitFileName string) {
results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags)
}(host, host)
}
}
// Wait for all tests to complete and emit the results
errCount := 0
exitOk := true
for i := 0; i < running; i++ {
tr := <-results
host := tr.host
fmt.Println() // Print an empty line
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("%s> START TEST >%s\n", blue, noColour)
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("Start Test Suite on Host %s\n", host)
fmt.Printf("%s\n", tr.output)
if tr.err != nil {
errCount++
fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err)
} else {
fmt.Printf("Success Finished Test Suite on Host %s\n", host)
}
exitOk = exitOk && tr.exitOk
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour)
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Println() // Print an empty line
}
// Set the exit code if there were failures
if !exitOk {
fmt.Printf("Failure: %d errors encountered.\n", errCount)
callGubernator(*gubernator)
arc.deleteArchive()
os.Exit(1)
}
callGubernator(*gubernator)
}
func prepareGceImages() (*internalImageConfig, error) {
gceImages := &internalImageConfig{
images: make(map[string]internalGCEImage),
}
// Parse images from given config file and convert them to internalGCEImage.
if *imageConfigFile != "" {
configPath := *imageConfigFile
@ -244,14 +325,14 @@ func main() {
imageConfigData, err := ioutil.ReadFile(configPath)
if err != nil {
klog.Fatalf("Could not read image config file provided: %v", err)
return nil, fmt.Errorf("Could not read image config file provided: %v", err)
}
// Unmarshal the given image config file. All images for this test run will be organized into a map.
// shortName->GCEImage, e.g cos-stable->cos-stable-81-12871-103-0.
externalImageConfig := ImageConfig{Images: make(map[string]GCEImage)}
err = yaml.Unmarshal(imageConfigData, &externalImageConfig)
if err != nil {
klog.Fatalf("Could not parse image config file: %v", err)
return nil, fmt.Errorf("Could not parse image config file: %v", err)
}
for shortName, imageConfig := range externalImageConfig.Images {
@ -259,7 +340,7 @@ func main() {
if (imageConfig.ImageRegex != "" || imageConfig.ImageFamily != "") && imageConfig.Image == "" {
image, err = getGCEImage(imageConfig.ImageRegex, imageConfig.ImageFamily, imageConfig.Project)
if err != nil {
klog.Fatalf("Could not retrieve a image based on image regex %q and family %q: %v",
return nil, fmt.Errorf("Could not retrieve a image based on image regex %q and family %q: %v",
imageConfig.ImageRegex, imageConfig.ImageFamily, err)
}
} else {
@ -318,75 +399,8 @@ func main() {
klog.Fatal("Must specify --project flag to launch images into")
}
}
if *instanceNamePrefix == "" {
*instanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8]
}
// Setup coloring
stat, _ := os.Stdout.Stat()
useColor := (stat.Mode() & os.ModeCharDevice) != 0
blue := ""
noColour := ""
if useColor {
blue = "\033[0;34m"
noColour = "\033[0m"
}
go arc.getArchive()
defer arc.deleteArchive()
results := make(chan *TestResult)
running := 0
for shortName := range gceImages.images {
imageConfig := gceImages.images[shortName]
fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image)
running++
go func(image *internalGCEImage, junitFileName string) {
results <- testImage(image, junitFileName)
}(&imageConfig, shortName)
}
if *hosts != "" {
for _, host := range strings.Split(*hosts, ",") {
fmt.Printf("Initializing e2e tests using host %s.\n", host)
running++
go func(host string, junitFileName string) {
results <- testHost(host, *cleanup, "", junitFileName, *ginkgoFlags)
}(host, host)
}
}
// Wait for all tests to complete and emit the results
errCount := 0
exitOk := true
for i := 0; i < running; i++ {
tr := <-results
host := tr.host
fmt.Println() // Print an empty line
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("%s> START TEST >%s\n", blue, noColour)
fmt.Printf("%s>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>%s\n", blue, noColour)
fmt.Printf("Start Test Suite on Host %s\n", host)
fmt.Printf("%s\n", tr.output)
if tr.err != nil {
errCount++
fmt.Printf("Failure Finished Test Suite on Host %s\n%v\n", host, tr.err)
} else {
fmt.Printf("Success Finished Test Suite on Host %s\n", host)
}
exitOk = exitOk && tr.exitOk
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Printf("%s< FINISH TEST <%s\n", blue, noColour)
fmt.Printf("%s<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<%s\n", blue, noColour)
fmt.Println() // Print an empty line
}
// Set the exit code if there were failures
if !exitOk {
fmt.Printf("Failure: %d errors encountered.\n", errCount)
callGubernator(*gubernator)
arc.deleteArchive()
os.Exit(1)
}
callGubernator(*gubernator)
return gceImages, nil
}
func callGubernator(gubernator bool) {
@ -436,29 +450,23 @@ func getImageMetadata(input string) *compute.Metadata {
return &ret
}
// Run tests in archive against host
func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult {
func registerGceHostIP(host string) error {
instance, err := computeService.Instances.Get(*project, *zone, host).Do()
if err != nil {
return &TestResult{
err: err,
host: host,
exitOk: false,
}
return err
}
if strings.ToUpper(instance.Status) != "RUNNING" {
err = fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status)
return &TestResult{
err: err,
host: host,
exitOk: false,
}
return fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status)
}
externalIP := getExternalIP(instance)
if len(externalIP) > 0 {
remote.AddHostnameIP(host, externalIP)
}
return nil
}
// Run tests in archive against host
func testHost(host string, deleteFiles bool, imageDesc, junitFileName, ginkgoFlagsStr string) *TestResult {
path, err := arc.getArchive()
if err != nil {
// Don't log fatal because we need to do any needed cleanup contained in "defer" statements
@ -553,6 +561,14 @@ func testImage(imageConfig *internalGCEImage, junitFileName string) *TestResult
// If we are going to delete the instance, don't bother with cleaning up the files
deleteFiles := !*deleteInstances && *cleanup
if err = registerGceHostIP(host); err != nil {
return &TestResult{
err: err,
host: host,
exitOk: false,
}
}
result := testHost(host, deleteFiles, imageConfig.imageDesc, junitFileName, ginkgoFlagsStr)
// This is a temporary solution to collect serial node serial log. Only port 1 contains useful information.
// TODO(random-liu): Extract out and unify log collection logic with cluste e2e.