diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 7e71c18298c..9e0955a0797 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -102,6 +102,8 @@ type TestContextType struct { type NodeTestContextType struct { // Name of the node to run tests on (node e2e suite only). NodeName string + // NodeConformance indicates whether the test is running in node conformance mode. + NodeConformance bool // DisableKubenet disables kubenet when starting kubelet. DisableKubenet bool // Whether to enable the QoS Cgroup Hierarchy or not @@ -209,6 +211,13 @@ func RegisterClusterFlags() { // Register flags specific to the node e2e test suite. func RegisterNodeFlags() { flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).") + // TODO(random-liu): Move kubelet start logic out of the test. + // TODO(random-liu): Move log fetch logic out of the test. + // There are different ways to start kubelet (systemd, initd, docker, rkt, manually started etc.) + // and manage logs (journald, upstart etc.). + // For different situation we need to mount different things into the container, run different commands. + // It is hard and unnecessary to deal with the complexity inside the test suite. + flag.BoolVar(&TestContext.NodeConformance, "conformance", false, "If true, the test suite will not start kubelet, and fetch system log (kernel, docker, kubelet log etc.) to the report directory.") // TODO(random-liu): Remove kubelet related flags when we move the kubelet start logic out of the test. // TODO(random-liu): Find someway to get kubelet configuration, and automatic config and filter test based on the configuration. flag.BoolVar(&TestContext.DisableKubenet, "disable-kubenet", false, "If true, start kubelet without kubenet. (default false)") diff --git a/test/e2e_node/apparmor_test.go b/test/e2e_node/apparmor_test.go index 440ec9068fe..7e33a2d97a5 100644 --- a/test/e2e_node/apparmor_test.go +++ b/test/e2e_node/apparmor_test.go @@ -119,6 +119,7 @@ func loadTestProfiles() error { return fmt.Errorf("failed to write profiles to file: %v", err) } + // TODO(random-liu): The test is run as root now, no need to use sudo here. cmd := exec.Command("sudo", "apparmor_parser", "-r", "-W", f.Name()) stderr := &bytes.Buffer{} cmd.Stderr = stderr diff --git a/test/e2e_node/conformance/build/Dockerfile b/test/e2e_node/conformance/build/Dockerfile new file mode 100644 index 00000000000..8e490331f54 --- /dev/null +++ b/test/e2e_node/conformance/build/Dockerfile @@ -0,0 +1,44 @@ +# Copyright 2016 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM BASEIMAGE + +COPY ginkgo /usr/local/bin/ +COPY e2e_node.test /usr/local/bin + +# The following environment variables can be override when starting the container. +# FOCUS is regex matching test to run. By default run all conformance test. +# SKIP is regex matching test to skip. By default empty. +# PARALLELISM is the number of processes the test will run in parallel. +# REPORT_PATH is the path in the container to save test result and logs. +# MANIFEST_PATH is the kubelet manifest path in the container. +# FLAKE_ATTEMPTS is the time to retry when there is a test failure. By default 2. +# TEST_ARGS is the test arguments passed into the test. +ENV FOCUS="\[Conformance\]" \ + SKIP="\[Flaky\]|\[Serial\]" \ + PARALLELISM=8 \ + REPORT_PATH="/var/result" \ + MANIFEST_PATH="/etc/manifest" \ + FLAKE_ATTEMPTS=2 \ + TEST_ARGS="" + +ENTRYPOINT ginkgo --focus="$FOCUS" \ + --skip="$SKIP" \ + --nodes=$PARALLELISM \ + --flakeAttempts=$FLAKE_ATTEMPTS \ + /usr/local/bin/e2e_node.test \ + -- --conformance=true \ + --prepull-images=false \ + --manifest-path="$MANIFEST_PATH"\ + --report-dir="$REPORT_PATH $TEST_ARGS" diff --git a/test/e2e_node/conformance/build/Makefile b/test/e2e_node/conformance/build/Makefile new file mode 100644 index 00000000000..2ac003f747b --- /dev/null +++ b/test/e2e_node/conformance/build/Makefile @@ -0,0 +1,60 @@ +# Copyright 2016 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Build the node-test image. +# +# Usage: +# [ARCH=amd64] [REGISTRY="gcr.io/google_containers"] [BIN_DIR="../../../../_output/bin"] make (build|push) VERSION={some_version_number e.g. 0.1} + +# TODO(random-liu): Add this into release progress. +REGISTRY?=gcr.io/google_containers +ARCH?=amd64 +# BIN_DIR is the directory to find binaries, overwrite with ../../../../_output/bin +# for local development. +BIN_DIR?=../../../../_output/dockerized/bin/linux/${ARCH} +TEMP_DIR:=$(shell mktemp -d) + +BASEIMAGE_amd64=debian:jessie +BASEIMAGE_arm=armel/debian:jessie +BASEIMAGE_arm64=aarch64/debian:jessie +BASEIMAGE_ppc64le=ppc64le/debian:jessie + +BASEIMAGE?=${BASEIMAGE_${ARCH}} + +all: build + +build: + +ifndef VERSION + $(error VERSION is undefined) +endif + cp -r ./* ${TEMP_DIR} + + cp ${BIN_DIR}/ginkgo ${TEMP_DIR} + cp ${BIN_DIR}/e2e_node.test ${TEMP_DIR} + + cd ${TEMP_DIR} && sed -i.back "s|BASEIMAGE|${BASEIMAGE}|g" Dockerfile + + # Make scripts executable before they are copied into the Docker image. If we make them executable later, in another layer + # they'll take up twice the space because the new executable binary differs from the old one, but everything is cached in layers. + cd ${TEMP_DIR} && chmod a+rx \ + e2e_node.test \ + ginkgo + + docker build -t ${REGISTRY}/node-test-${ARCH}:${VERSION} ${TEMP_DIR} + +push: build + gcloud docker push ${REGISTRY}/node-test-${ARCH}:${VERSION} + +.PHONY: all diff --git a/test/e2e_node/conformance/run_test.sh b/test/e2e_node/conformance/run_test.sh new file mode 100755 index 00000000000..a1ad7af0f5c --- /dev/null +++ b/test/e2e_node/conformance/run_test.sh @@ -0,0 +1,174 @@ +#!/bin/bash + +# Copyright 2016 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script is only for demonstrating how to use the node test container. In +# production environment, kubelet bootstrap will be more complicated, user +# should configure the node test container accordingly. +# In addition, this script will also be used in the node e2e test to let it use +# the containerized test suite. + +# TODO(random-liu): Use standard installer to install kubelet. +# TODO(random-liu): Use standard tool to start kubelet in production way (such +# as systemd, supervisord etc.) +# TODO(random-liu): Initialize kubelet with standard configmap after dynamic +# configuration landing, so that all test could get the current kubelet +# configuration and react accordingly. + +# Refresh sudo credentials if not running on GCE. +if ! ping -c 1 -q metadata.google.internal &> /dev/null; then + sudo -v || exit 1 +fi + +# FOCUS is ginkgo focus to select which tests to run. By default, FOCUS is +# initialized as "\[Conformance\]" in the test container to run all conformance +# test. +FOCUS=${FOCUS:-""} + +# SKIP is ginkgo skip to select which tests to skip. By default, SKIP is +# initialized as "\[Flaky\]|\[Serial\]" in the test container skipping all +# flaky and serial test. +SKIP=${SKIP:-""} + +# REGISTRY is the image registry for node test image. +REGISTRY=${REGISTRY:-"gcr.io/google_containers"} + +# ARCH is the architecture of current machine, the script will use this to +# select corresponding test container image. +ARCH=${ARCH:-"amd64"} + +# VERSION is the version of the test container image. +VERSION=${VERSION:-"0.1"} + +# KUBELET_BIN is the kubelet binary name. If it is not specified, use the +# default binary name "kubelet". +KUBELET_BIN=${KUBELET_BIN:-"kubelet"} + +# KUBELET is the kubelet binary path. If it is not specified, assume kubelet is +# in PATH. +KUBELET=${KUBELET:-"`which $KUBELET_BIN`"} + +# LOG_DIR is the absolute path of the directory where the test will collect all +# logs to. By default, use the current directory. +LOG_DIR=${LOG_DIR:-`pwd`} +mkdir -p $LOG_DIR + +# NETWORK_PLUGIN is the network plugin used by kubelet. Do not use network +# plugin by default. +NETWORK_PLUGIN=${NETWORK_PLUGIN:-""} + +# NETWORK_PLUGIN_PATH is the path to network plugin binary. +NETWORK_PLUGIN_PATH=${NETWORK_PLUGIN_PATH:-""} + +# start_kubelet starts kubelet and redirect kubelet log to $LOG_DIR/kubelet.log. +kubelet_log=kubelet.log +start_kubelet() { + echo "Starting kubelet..." + sudo -b $KUBELET $@ &>$LOG_DIR/$kubelet_log + if [ $? -ne 0 ]; then + echo "Failed to start kubelet" + exit 1 + fi +} + +# wait_kubelet retris for 10 times for kubelet to be ready by checking http://127.0.0.1:10255/healthz. +wait_kubelet() { + echo "Health checking kubelet..." + healthCheckURL=http://127.0.0.1:10255/healthz + local maxRetry=10 + local cur=1 + while [ $cur -le $maxRetry ]; do + curl -s $healthCheckURL > /dev/null + if [ $? -eq 0 ]; then + echo "Kubelet is ready" + break + fi + if [ $cur -eq $maxRetry]; then + echo "Health check exceeds max retry" + exit 1 + fi + echo "Kubelet is not ready" + sleep 1 + ((cur++)) + done +} + +# kill_kubelet kills kubelet. +kill_kubelet() { + echo "Stopping kubelet..." + sudo pkill $KUBELET_BIN + if [ $? -ne 0 ]; then + echo "Failed to stop kubelet." + exit 1 + fi +} + +# run_test runs the node test container. +run_test() { + env="" + if [ ! -z "$FOCUS" ]; then + env="$env -e FOCUS=$FOCUS" + fi + if [ ! -z "$SKIP" ]; then + env="$env -e SKIP=$SKIP" + fi + # The test assumes that inside the container: + # * kubelet manifest path is mounted to /etc/manifest; + # * log collect directory is mounted to /var/result; + # * root file system is mounted to /rootfs. + sudo docker run -it --rm --privileged=true --net=host -v /:/rootfs \ + -v $config_dir:/etc/manifest -v $LOG_DIR:/var/result $env $REGISTRY/node-test-$ARCH:$VERSION +} + +# Check whether kubelet is running. If kubelet is running, tell the user to stop +# it before running the test. +pid=`pidof $KUBELET_BIN` +if [ ! -z $pid ]; then + echo "Kubelet is running (pid=$pid), please stop it before running the test." + exit 1 +fi + +apiserver=http://localhost:8080 +volume_stats_agg_period=10s +allow_privileged=true +serialize_image_pulls=false +config_dir=`mktemp -d` +file_check_frequency=10s +pod_cidr=10.180.0.0/24 +log_level=4 +start_kubelet --api-servers $apiserver \ + --volume-stats-agg-period $volume_stats_agg_period \ + --allow-privileged=$allow_privileged \ + --serialize-image-pulls=$serialize_image_pulls \ + --config $config_dir \ + --file-check-frequency $file_check_frequency \ + --pod-cidr=$pod_cidr \ + --runtime-cgroups=/docker-daemon \ + --kubelet-cgroups=/kubelet \ + --system-cgroups=/system \ + --cgroup-root=/ \ + --network-plugin=$NETWORK_PLUGIN \ + --network-plugin-dir=$NETWORK_PLUGIN_PATH \ + --v=$log_level \ + --logtostderr + +wait_kubelet + +run_test + +kill_kubelet + +# Clean up the kubelet config directory +sudo rm -rf $config_dir diff --git a/test/e2e_node/e2e_node_suite_test.go b/test/e2e_node/e2e_node_suite_test.go index 62b68ec156e..1a407ed3b76 100644 --- a/test/e2e_node/e2e_node_suite_test.go +++ b/test/e2e_node/e2e_node_suite_test.go @@ -28,6 +28,7 @@ import ( "os" "os/exec" "path" + "syscall" "testing" "time" @@ -71,6 +72,10 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +// When running the containerized conformance test, we'll mount the +// host root filesystem as readonly to /rootfs. +const rootfs = "/rootfs" + func TestE2eNode(t *testing.T) { if *runServicesMode { // If run-services-mode is specified, only run services in current process. @@ -79,6 +84,15 @@ func TestE2eNode(t *testing.T) { } if *systemValidateMode { // If system-validate-mode is specified, only run system validation in current process. + if framework.TestContext.NodeConformance { + // Chroot to /rootfs to make system validation can check system + // as in the root filesystem. + // TODO(random-liu): Consider to chroot the whole test process to make writing + // test easier. + if err := syscall.Chroot(rootfs); err != nil { + glog.Exitf("chroot %q failed: %v", rootfs, err) + } + } if err := system.Validate(); err != nil { glog.Exitf("system validation failed: %v", err) } @@ -172,12 +186,12 @@ func validateSystem() error { if err != nil { return fmt.Errorf("can't get current binary: %v", err) } - // TODO(random-liu): Remove sudo in containerize PR. - output, err := exec.Command("sudo", testBin, "--system-validate-mode").CombinedOutput() + // Pass all flags into the child process, so that it will see the same flag set. + output, err := exec.Command(testBin, append([]string{"--system-validate-mode"}, os.Args[1:]...)...).CombinedOutput() // The output of system validation should have been formatted, directly print here. fmt.Print(string(output)) if err != nil { - return fmt.Errorf("system validation failed") + return fmt.Errorf("system validation failed: %v", err) } return nil } @@ -190,7 +204,7 @@ func maskLocksmithdOnCoreos() { return } if bytes.Contains(data, []byte("ID=coreos")) { - output, err := exec.Command("sudo", "systemctl", "mask", "--now", "locksmithd").CombinedOutput() + output, err := exec.Command("systemctl", "mask", "--now", "locksmithd").CombinedOutput() Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output))) glog.Infof("Locksmithd is masked successfully") } diff --git a/test/e2e_node/remote/remote.go b/test/e2e_node/remote/remote.go index 53fd68d0299..bcc2cf353f8 100644 --- a/test/e2e_node/remote/remote.go +++ b/test/e2e_node/remote/remote.go @@ -161,7 +161,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string if err != nil { return "", false, fmt.Errorf("could not find username: %v", err) } - output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username) + output, err := SSH(host, "usermod", "-a", "-G", "docker", uname.Username) if err != nil { return "", false, fmt.Errorf("instance %s not running docker daemon - Command failed: %s", host, output) } @@ -172,14 +172,15 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string dirName := fmt.Sprintf("gcloud-e2e-%d", rand.Int31()) tmp := fmt.Sprintf("/tmp/%s", dirName) - _, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "mkdir", tmp) + // Do not sudo here, so that we can use scp to copy test archive to the directdory. + _, err := SSHNoSudo(host, "mkdir", tmp) if err != nil { // Exit failure with the error return "", false, err } if cleanup { defer func() { - output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "rm", "-rf", tmp) + output, err := SSH(host, "rm", "-rf", tmp) if err != nil { glog.Errorf("failed to cleanup tmp directory %s on host %v. Output:\n%s", tmp, err, output) } @@ -188,57 +189,62 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string // Install the cni plugin. cniPath := filepath.Join(tmp, CNIDirectory) - if _, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", - getSshCommand(" ; ", fmt.Sprintf("sudo mkdir -p %s", cniPath), - fmt.Sprintf("sudo wget -O - %s | sudo tar -xz -C %s", CNIURL, cniPath))); err != nil { + cmd := getSSHCommand(" ; ", + fmt.Sprintf("mkdir -p %s", cniPath), + fmt.Sprintf("wget -O - %s | tar -xz -C %s", CNIURL, cniPath), + ) + if _, err := SSH(host, "sh", "-c", cmd); err != nil { // Exit failure with the error return "", false, err } // Configure iptables firewall rules // TODO: consider calling bootstrap script to configure host based on OS - cmd := getSshCommand("&&", + cmd = getSSHCommand("&&", `iptables -L INPUT | grep "Chain INPUT (policy DROP)"`, "(iptables -C INPUT -w -p TCP -j ACCEPT || iptables -A INPUT -w -p TCP -j ACCEPT)", "(iptables -C INPUT -w -p UDP -j ACCEPT || iptables -A INPUT -w -p UDP -j ACCEPT)", "(iptables -C INPUT -w -p ICMP -j ACCEPT || iptables -A INPUT -w -p ICMP -j ACCEPT)") - output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "sh", "-c", cmd) + output, err := SSH(host, "sh", "-c", cmd) if err != nil { glog.Errorf("Failed to configured firewall: %v output: %v", err, output) } - cmd = getSshCommand("&&", + cmd = getSSHCommand("&&", `iptables -L FORWARD | grep "Chain FORWARD (policy DROP)" > /dev/null`, "(iptables -C FORWARD -w -p TCP -j ACCEPT || iptables -A FORWARD -w -p TCP -j ACCEPT)", "(iptables -C FORWARD -w -p UDP -j ACCEPT || iptables -A FORWARD -w -p UDP -j ACCEPT)", "(iptables -C FORWARD -w -p ICMP -j ACCEPT || iptables -A FORWARD -w -p ICMP -j ACCEPT)") - output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "sh", "-c", cmd) + output, err = SSH(host, "sh", "-c", cmd) if err != nil { glog.Errorf("Failed to configured firewall: %v output: %v", err, output) } // Copy the archive to the staging directory - _, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), tmp)) + _, err = runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), tmp)) if err != nil { // Exit failure with the error return "", false, err } // Kill any running node processes - cmd = getSshCommand(" ; ", - "sudo pkill kubelet", - "sudo pkill kube-apiserver", - "sudo pkill etcd", + cmd = getSSHCommand(" ; ", + "pkill kubelet", + "pkill kube-apiserver", + "pkill etcd", ) // No need to log an error if pkill fails since pkill will fail if the commands are not running. // If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd // logs about failing to bind the required ports. glog.Infof("Killing any existing node processes on %s", host) - RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) + SSH(host, "sh", "-c", cmd) // Extract the archive - cmd = getSshCommand(" && ", fmt.Sprintf("cd %s", tmp), fmt.Sprintf("tar -xzvf ./%s", archiveName)) + cmd = getSSHCommand(" && ", + fmt.Sprintf("cd %s", tmp), + fmt.Sprintf("tar -xzvf ./%s", archiveName), + ) glog.Infof("Extracting tar on %s", host) - output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) + output, err = SSH(host, "sh", "-c", cmd) if err != nil { // Exit failure with the error return "", false, err @@ -261,7 +267,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string } // Determine if tests will run on a GCI node. - output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", "'cat /etc/os-release'") + output, err = SSH(host, "sh", "-c", "'cat /etc/os-release'") if err != nil { glog.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output) return "", false, fmt.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output) @@ -270,7 +276,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string // Note this implicitly requires the script to be where we expect in the tarball, so if that location changes the error // here will tell us to update the remote test runner. mounterPath := filepath.Join(tmp, "cluster/gce/gci/mounter/mounter") - output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath)) + output, err = SSH(host, "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath)) if err != nil { glog.Errorf("Unable to chmod 544 GCI mounter script. Err: %v, Output:\n%s", err, output) return "", false, err @@ -284,7 +290,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string } // Run the tests - cmd = getSshCommand(" && ", + cmd = getSSHCommand(" && ", fmt.Sprintf("cd %s", tmp), fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --logtostderr --v 4 --node-name=%s --report-dir=%s/results --report-prefix=%s %s", testTimeoutSeconds.Seconds(), ginkgoFlags, host, tmp, junitFilePrefix, testArgs), @@ -292,7 +298,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string aggErrs := []error{} glog.Infof("Starting tests on %s", host) - output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) + output, err = SSH(host, "sh", "-c", cmd) if err != nil { aggErrs = append(aggErrs, err) @@ -313,10 +319,10 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string // Try getting the system logs from journald and store it to a file. // Don't reuse the original test directory on the remote host because // it could've be been removed if the node was rebooted. - _, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", fmt.Sprintf("'sudo journalctl --system --all > %s'", logPath)) + _, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)) if err == nil { glog.Infof("Got the system logs from journald; copying it back...") - if _, err := RunSshCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil { + if _, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil { glog.Infof("Failed to copy the log: err: %v", err) } } else { @@ -334,26 +340,38 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string } func getTestArtifacts(host, testDir string) error { - _, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/%s", *resultsDir, host)) + _, err := runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/%s", *resultsDir, host)) if err != nil { return err } // Copy junit to the top of artifacts - _, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/", *resultsDir)) + _, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/", *resultsDir)) if err != nil { return err } return nil } -// getSshCommand handles proper quoting so that multiple commands are executed in the same shell over ssh -func getSshCommand(sep string, args ...string) string { +// getSSHCommand handles proper quoting so that multiple commands are executed in the same shell over ssh +func getSSHCommand(sep string, args ...string) string { return fmt.Sprintf("'%s'", strings.Join(args, sep)) } -// runSshCommand executes the ssh or scp command, adding the flag provided --ssh-options -func RunSshCommand(cmd string, args ...string) (string, error) { +// SSH executes ssh command with runSSHCommand as root. The `sudo` makes sure that all commands +// are executed by root, so that there won't be permission mismatch between different commands. +func SSH(host string, cmd ...string) (string, error) { + return runSSHCommand("ssh", append([]string{GetHostnameOrIp(host), "--", "sudo"}, cmd...)...) +} + +// SSHNoSudo executes ssh command with runSSHCommand as normal user. Sometimes we need this, +// for example creating a directory that we'll copy files there with scp. +func SSHNoSudo(host string, cmd ...string) (string, error) { + return runSSHCommand("ssh", append([]string{GetHostnameOrIp(host), "--"}, cmd...)...) +} + +// runSSHCommand executes the ssh or scp command, adding the flag provided --ssh-options +func runSSHCommand(cmd string, args ...string) (string, error) { if env, found := sshOptionsMap[*sshEnv]; found { args = append(strings.Split(env, " "), args...) } diff --git a/test/e2e_node/runner/local/run_local.go b/test/e2e_node/runner/local/run_local.go index 241c060eb7f..3371f533515 100644 --- a/test/e2e_node/runner/local/run_local.go +++ b/test/e2e_node/runner/local/run_local.go @@ -56,7 +56,7 @@ func main() { func runCommand(name string, args ...string) error { glog.Infof("Running command: %v %v", name, strings.Join(args, " ")) - cmd := exec.Command("sh", "-c", strings.Join(append([]string{name}, args...), " ")) + cmd := exec.Command("sudo", "sh", "-c", strings.Join(append([]string{name}, args...), " ")) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr return cmd.Run() diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index 193e697479f..045a4278ee6 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -507,7 +507,7 @@ func createInstance(imageConfig *internalGCEImage) (string, error) { remote.AddHostnameIp(name, externalIp) } var output string - output, err = remote.RunSshCommand("ssh", remote.GetHostnameOrIp(name), "--", "sudo", "docker", "version") + output, err = remote.SSH(name, "docker", "version") if err != nil { err = fmt.Errorf("instance %s not running docker daemon - Command failed: %s", name, output) continue diff --git a/test/e2e_node/services/server.go b/test/e2e_node/services/server.go index 18a9d0a31eb..2911a162104 100644 --- a/test/e2e_node/services/server.go +++ b/test/e2e_node/services/server.go @@ -44,7 +44,7 @@ type server struct { // startCommand is the command used to start the server startCommand *exec.Cmd // killCommand is the command used to stop the server. It is not required. If it - // is not specified, `sudo kill` will be used to stop the server. + // is not specified, `kill` will be used to stop the server. killCommand *exec.Cmd // restartCommand is the command used to restart the server. If provided, it will be used // instead of startCommand when restarting the server. @@ -338,19 +338,7 @@ func (s *server) kill() error { const timeout = 10 * time.Second for _, signal := range []string{"-TERM", "-KILL"} { glog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal) - cmd := exec.Command("sudo", "kill", signal, strconv.Itoa(pid)) - - // Run the 'kill' command in a separate process group so sudo doesn't ignore it - attrs := &syscall.SysProcAttr{} - // Hack to set unix-only field without build tags. - setpgidField := reflect.ValueOf(attrs).Elem().FieldByName("Setpgid") - if setpgidField.IsValid() { - setpgidField.Set(reflect.ValueOf(true)) - } else { - return fmt.Errorf("Failed to set Setpgid field (non-unix build)") - } - cmd.SysProcAttr = attrs - + cmd := exec.Command("kill", signal, strconv.Itoa(pid)) _, err := cmd.Output() if err != nil { glog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err) diff --git a/test/e2e_node/services/services.go b/test/e2e_node/services/services.go index 56250df3d6a..8b998735e93 100644 --- a/test/e2e_node/services/services.go +++ b/test/e2e_node/services/services.go @@ -78,16 +78,18 @@ func NewE2EServices(monitorParent bool) *E2EServices { // standard kubelet launcher) func (e *E2EServices) Start() error { var err error - // Start kubelet - // Create the manifest path for kubelet. - // TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test. - framework.TestContext.ManifestPath, err = ioutil.TempDir("", "node-e2e-pod") - if err != nil { - return fmt.Errorf("failed to create static pod manifest directory: %v", err) - } - e.kubelet, err = e.startKubelet() - if err != nil { - return fmt.Errorf("failed to start kubelet: %v", err) + if !framework.TestContext.NodeConformance { + // Start kubelet + // Create the manifest path for kubelet. + // TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test. + framework.TestContext.ManifestPath, err = ioutil.TempDir("", "node-e2e-pod") + if err != nil { + return fmt.Errorf("failed to create static pod manifest directory: %v", err) + } + e.kubelet, err = e.startKubelet() + if err != nil { + return fmt.Errorf("failed to start kubelet: %v", err) + } } e.services, err = e.startInternalServices() return err @@ -96,14 +98,16 @@ func (e *E2EServices) Start() error { // Stop stops the e2e services. func (e *E2EServices) Stop() { defer func() { - // Collect log files. - e.getLogFiles() - // Cleanup the manifest path for kubelet. - manifestPath := framework.TestContext.ManifestPath - if manifestPath != "" { - err := os.RemoveAll(manifestPath) - if err != nil { - glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err) + if !framework.TestContext.NodeConformance { + // Collect log files. + e.getLogFiles() + // Cleanup the manifest path for kubelet. + manifestPath := framework.TestContext.ManifestPath + if manifestPath != "" { + err := os.RemoveAll(manifestPath) + if err != nil { + glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err) + } } } }() @@ -144,17 +148,8 @@ func (e *E2EServices) startInternalServices() (*server, error) { if err != nil { return nil, fmt.Errorf("can't get current binary: %v", err) } - startCmd := exec.Command("sudo", testBin, - // TODO(mtaufen): Flags e.g. that target the TestContext need to be manually forwarded to the - // test binary when we start it up in run-services mode. This is not ideal. - // Very unintuitive because it prevents any flags NOT manually forwarded here - // from being set via TEST_ARGS when running tests from the command line. - "--run-services-mode", - "--server-start-timeout", serverStartTimeout.String(), - "--feature-gates", framework.TestContext.FeatureGates, - "--logtostderr", - "--vmodule=*="+LOG_VERBOSITY_LEVEL, - ) + // Pass all flags into the child process, so that it will see the same flag set. + startCmd := exec.Command(testBin, append([]string{"--run-services-mode"}, os.Args[1:]...)...) server := newServer("services", startCmd, nil, nil, getServicesHealthCheckURLs(), servicesLogFile, e.monitorParent, false) return server, server.start() } @@ -180,8 +175,8 @@ func (e *E2EServices) startKubelet() (*server, error) { // sense to test it that way unitName := fmt.Sprintf("kubelet-%d.service", rand.Int31()) cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, "--remain-after-exit", builder.GetKubeletServerBin()) - killCommand = exec.Command("sudo", "systemctl", "kill", unitName) - restartCommand = exec.Command("sudo", "systemctl", "restart", unitName) + killCommand = exec.Command("systemctl", "kill", unitName) + restartCommand = exec.Command("systemctl", "restart", unitName) e.logFiles["kubelet.log"] = logFileData{ journalctlCommand: []string{"-u", unitName}, } @@ -246,7 +241,7 @@ func (e *E2EServices) startKubelet() (*server, error) { "--network-plugin-dir", filepath.Join(cwd, "cni", "bin")) // Enable kubenet } - cmd := exec.Command("sudo", cmdArgs...) + cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...) server := newServer( "kubelet", cmd, @@ -281,7 +276,7 @@ func (e *E2EServices) getLogFiles() { continue } glog.Infof("Get log file %q with journalctl command %v.", targetFileName, logFileData.journalctlCommand) - out, err := exec.Command("sudo", append([]string{"journalctl"}, logFileData.journalctlCommand...)...).CombinedOutput() + out, err := exec.Command("journalctl", logFileData.journalctlCommand...).CombinedOutput() if err != nil { glog.Errorf("failed to get %q from journald: %v, %v", targetFileName, string(out), err) } else { @@ -314,10 +309,10 @@ func isJournaldAvailable() bool { func copyLogFile(src, target string) error { // If not a journald based distro, then just symlink files. - if out, err := exec.Command("sudo", "cp", src, target).CombinedOutput(); err != nil { + if out, err := exec.Command("cp", src, target).CombinedOutput(); err != nil { return fmt.Errorf("failed to copy %q to %q: %v, %v", src, target, out, err) } - if out, err := exec.Command("sudo", "chmod", "a+r", target).CombinedOutput(); err != nil { + if out, err := exec.Command("chmod", "a+r", target).CombinedOutput(); err != nil { return fmt.Errorf("failed to make log file %q world readable: %v, %v", target, out, err) } return nil diff --git a/test/e2e_node/system/kernel_validator.go b/test/e2e_node/system/kernel_validator.go index f71d7072e32..e98cd46510f 100644 --- a/test/e2e_node/system/kernel_validator.go +++ b/test/e2e_node/system/kernel_validator.go @@ -208,14 +208,13 @@ func (k *KernelValidator) getKernelConfigReader() (io.Reader, error) { } // If the kernel config file is not found, try to load the kernel // config module and check again. - // TODO(random-liu): Remove "sudo" in containerize test PR #31093 - output, err := exec.Command("sudo", modprobeCmd, configsModule).CombinedOutput() + output, err := exec.Command(modprobeCmd, configsModule).CombinedOutput() if err != nil { return nil, fmt.Errorf("unable to load kernel module %q: output - %q, err - %v", configsModule, output, err) } // Unload the kernel config module to make sure the validation have no side effect. - defer exec.Command("sudo", modprobeCmd, "-r", configsModule).Run() + defer exec.Command(modprobeCmd, "-r", configsModule).Run() loadModule = true } return nil, fmt.Errorf("no config path in %v is available", possibePaths)