Merge pull request #31093 from Random-Liu/containerize-node-e2e-test

Automatic merge from submit-queue

Node Conformance Test: Containerize the node e2e test

For #30122, #30174.
Based on #32427, #32454.

**Please only review the last 3 commits.**

This PR packages the node e2e test into a docker image:
- 1st commit: Add `NodeConformance` flag in the node e2e framework to avoid starting kubelet and collecting system logs. We do this because:
  - There are all kinds of ways to manage kubelet and system logs, for different situation we need to mount different things into the container, run different commands. It is hard and unnecessary to handle the complexity inside the test suite.
- 2nd commit: Remove all `sudo` in the test container. We do this because:
  - In most container, there is no `sudo` command, and there is no need to use `sudo` inside the container.
  - It introduces some complexity to use `sudo` inside the test. (https://github.com/kubernetes/kubernetes/issues/29211, https://github.com/kubernetes/kubernetes/issues/26748) In fact we just need to run the test suite with `sudo`.
- 3rd commit: Package the test into a docker container with corresponding `Makefile` and `Dockerfile`. We also added a `run_test.sh` script to start kubelet and run the test container. The script is only for demonstration purpose and we'll also use the script in our node e2e framework. In the future, we should update the script to start kubelet in production way (maybe with `systemd` or `supervisord`).

@dchen1107 @vishh 
/cc @kubernetes/sig-node @kubernetes/sig-testing



**Release note**:

<!--  Steps to write your release note:
1. Use the release-note-* labels to set the release note state (if you have access) 
2. Enter your extended release note in the below block; leaving it blank means using the PR title as the release note. If no release note is required, just write `NONE`. 
-->

``` release-note
Release alpha version node test container gcr.io/google_containers/node-test-ARCH:0.1 for users to verify their node setup.
```
This commit is contained in:
Kubernetes Submit Queue 2016-11-07 23:41:25 -08:00 committed by GitHub
commit 0df6384770
12 changed files with 390 additions and 88 deletions

View File

@ -102,6 +102,8 @@ type TestContextType struct {
type NodeTestContextType struct { type NodeTestContextType struct {
// Name of the node to run tests on (node e2e suite only). // Name of the node to run tests on (node e2e suite only).
NodeName string NodeName string
// NodeConformance indicates whether the test is running in node conformance mode.
NodeConformance bool
// DisableKubenet disables kubenet when starting kubelet. // DisableKubenet disables kubenet when starting kubelet.
DisableKubenet bool DisableKubenet bool
// Whether to enable the QoS Cgroup Hierarchy or not // Whether to enable the QoS Cgroup Hierarchy or not
@ -209,6 +211,13 @@ func RegisterClusterFlags() {
// Register flags specific to the node e2e test suite. // Register flags specific to the node e2e test suite.
func RegisterNodeFlags() { func RegisterNodeFlags() {
flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).") flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).")
// TODO(random-liu): Move kubelet start logic out of the test.
// TODO(random-liu): Move log fetch logic out of the test.
// There are different ways to start kubelet (systemd, initd, docker, rkt, manually started etc.)
// and manage logs (journald, upstart etc.).
// For different situation we need to mount different things into the container, run different commands.
// It is hard and unnecessary to deal with the complexity inside the test suite.
flag.BoolVar(&TestContext.NodeConformance, "conformance", false, "If true, the test suite will not start kubelet, and fetch system log (kernel, docker, kubelet log etc.) to the report directory.")
// TODO(random-liu): Remove kubelet related flags when we move the kubelet start logic out of the test. // TODO(random-liu): Remove kubelet related flags when we move the kubelet start logic out of the test.
// TODO(random-liu): Find someway to get kubelet configuration, and automatic config and filter test based on the configuration. // TODO(random-liu): Find someway to get kubelet configuration, and automatic config and filter test based on the configuration.
flag.BoolVar(&TestContext.DisableKubenet, "disable-kubenet", false, "If true, start kubelet without kubenet. (default false)") flag.BoolVar(&TestContext.DisableKubenet, "disable-kubenet", false, "If true, start kubelet without kubenet. (default false)")

View File

@ -119,6 +119,7 @@ func loadTestProfiles() error {
return fmt.Errorf("failed to write profiles to file: %v", err) return fmt.Errorf("failed to write profiles to file: %v", err)
} }
// TODO(random-liu): The test is run as root now, no need to use sudo here.
cmd := exec.Command("sudo", "apparmor_parser", "-r", "-W", f.Name()) cmd := exec.Command("sudo", "apparmor_parser", "-r", "-W", f.Name())
stderr := &bytes.Buffer{} stderr := &bytes.Buffer{}
cmd.Stderr = stderr cmd.Stderr = stderr

View File

@ -0,0 +1,44 @@
# Copyright 2016 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM BASEIMAGE
COPY ginkgo /usr/local/bin/
COPY e2e_node.test /usr/local/bin
# The following environment variables can be override when starting the container.
# FOCUS is regex matching test to run. By default run all conformance test.
# SKIP is regex matching test to skip. By default empty.
# PARALLELISM is the number of processes the test will run in parallel.
# REPORT_PATH is the path in the container to save test result and logs.
# MANIFEST_PATH is the kubelet manifest path in the container.
# FLAKE_ATTEMPTS is the time to retry when there is a test failure. By default 2.
# TEST_ARGS is the test arguments passed into the test.
ENV FOCUS="\[Conformance\]" \
SKIP="\[Flaky\]|\[Serial\]" \
PARALLELISM=8 \
REPORT_PATH="/var/result" \
MANIFEST_PATH="/etc/manifest" \
FLAKE_ATTEMPTS=2 \
TEST_ARGS=""
ENTRYPOINT ginkgo --focus="$FOCUS" \
--skip="$SKIP" \
--nodes=$PARALLELISM \
--flakeAttempts=$FLAKE_ATTEMPTS \
/usr/local/bin/e2e_node.test \
-- --conformance=true \
--prepull-images=false \
--manifest-path="$MANIFEST_PATH"\
--report-dir="$REPORT_PATH $TEST_ARGS"

View File

@ -0,0 +1,60 @@
# Copyright 2016 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build the node-test image.
#
# Usage:
# [ARCH=amd64] [REGISTRY="gcr.io/google_containers"] [BIN_DIR="../../../../_output/bin"] make (build|push) VERSION={some_version_number e.g. 0.1}
# TODO(random-liu): Add this into release progress.
REGISTRY?=gcr.io/google_containers
ARCH?=amd64
# BIN_DIR is the directory to find binaries, overwrite with ../../../../_output/bin
# for local development.
BIN_DIR?=../../../../_output/dockerized/bin/linux/${ARCH}
TEMP_DIR:=$(shell mktemp -d)
BASEIMAGE_amd64=debian:jessie
BASEIMAGE_arm=armel/debian:jessie
BASEIMAGE_arm64=aarch64/debian:jessie
BASEIMAGE_ppc64le=ppc64le/debian:jessie
BASEIMAGE?=${BASEIMAGE_${ARCH}}
all: build
build:
ifndef VERSION
$(error VERSION is undefined)
endif
cp -r ./* ${TEMP_DIR}
cp ${BIN_DIR}/ginkgo ${TEMP_DIR}
cp ${BIN_DIR}/e2e_node.test ${TEMP_DIR}
cd ${TEMP_DIR} && sed -i.back "s|BASEIMAGE|${BASEIMAGE}|g" Dockerfile
# Make scripts executable before they are copied into the Docker image. If we make them executable later, in another layer
# they'll take up twice the space because the new executable binary differs from the old one, but everything is cached in layers.
cd ${TEMP_DIR} && chmod a+rx \
e2e_node.test \
ginkgo
docker build -t ${REGISTRY}/node-test-${ARCH}:${VERSION} ${TEMP_DIR}
push: build
gcloud docker push ${REGISTRY}/node-test-${ARCH}:${VERSION}
.PHONY: all

View File

@ -0,0 +1,174 @@
#!/bin/bash
# Copyright 2016 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is only for demonstrating how to use the node test container. In
# production environment, kubelet bootstrap will be more complicated, user
# should configure the node test container accordingly.
# In addition, this script will also be used in the node e2e test to let it use
# the containerized test suite.
# TODO(random-liu): Use standard installer to install kubelet.
# TODO(random-liu): Use standard tool to start kubelet in production way (such
# as systemd, supervisord etc.)
# TODO(random-liu): Initialize kubelet with standard configmap after dynamic
# configuration landing, so that all test could get the current kubelet
# configuration and react accordingly.
# Refresh sudo credentials if not running on GCE.
if ! ping -c 1 -q metadata.google.internal &> /dev/null; then
sudo -v || exit 1
fi
# FOCUS is ginkgo focus to select which tests to run. By default, FOCUS is
# initialized as "\[Conformance\]" in the test container to run all conformance
# test.
FOCUS=${FOCUS:-""}
# SKIP is ginkgo skip to select which tests to skip. By default, SKIP is
# initialized as "\[Flaky\]|\[Serial\]" in the test container skipping all
# flaky and serial test.
SKIP=${SKIP:-""}
# REGISTRY is the image registry for node test image.
REGISTRY=${REGISTRY:-"gcr.io/google_containers"}
# ARCH is the architecture of current machine, the script will use this to
# select corresponding test container image.
ARCH=${ARCH:-"amd64"}
# VERSION is the version of the test container image.
VERSION=${VERSION:-"0.1"}
# KUBELET_BIN is the kubelet binary name. If it is not specified, use the
# default binary name "kubelet".
KUBELET_BIN=${KUBELET_BIN:-"kubelet"}
# KUBELET is the kubelet binary path. If it is not specified, assume kubelet is
# in PATH.
KUBELET=${KUBELET:-"`which $KUBELET_BIN`"}
# LOG_DIR is the absolute path of the directory where the test will collect all
# logs to. By default, use the current directory.
LOG_DIR=${LOG_DIR:-`pwd`}
mkdir -p $LOG_DIR
# NETWORK_PLUGIN is the network plugin used by kubelet. Do not use network
# plugin by default.
NETWORK_PLUGIN=${NETWORK_PLUGIN:-""}
# NETWORK_PLUGIN_PATH is the path to network plugin binary.
NETWORK_PLUGIN_PATH=${NETWORK_PLUGIN_PATH:-""}
# start_kubelet starts kubelet and redirect kubelet log to $LOG_DIR/kubelet.log.
kubelet_log=kubelet.log
start_kubelet() {
echo "Starting kubelet..."
sudo -b $KUBELET $@ &>$LOG_DIR/$kubelet_log
if [ $? -ne 0 ]; then
echo "Failed to start kubelet"
exit 1
fi
}
# wait_kubelet retris for 10 times for kubelet to be ready by checking http://127.0.0.1:10255/healthz.
wait_kubelet() {
echo "Health checking kubelet..."
healthCheckURL=http://127.0.0.1:10255/healthz
local maxRetry=10
local cur=1
while [ $cur -le $maxRetry ]; do
curl -s $healthCheckURL > /dev/null
if [ $? -eq 0 ]; then
echo "Kubelet is ready"
break
fi
if [ $cur -eq $maxRetry]; then
echo "Health check exceeds max retry"
exit 1
fi
echo "Kubelet is not ready"
sleep 1
((cur++))
done
}
# kill_kubelet kills kubelet.
kill_kubelet() {
echo "Stopping kubelet..."
sudo pkill $KUBELET_BIN
if [ $? -ne 0 ]; then
echo "Failed to stop kubelet."
exit 1
fi
}
# run_test runs the node test container.
run_test() {
env=""
if [ ! -z "$FOCUS" ]; then
env="$env -e FOCUS=$FOCUS"
fi
if [ ! -z "$SKIP" ]; then
env="$env -e SKIP=$SKIP"
fi
# The test assumes that inside the container:
# * kubelet manifest path is mounted to /etc/manifest;
# * log collect directory is mounted to /var/result;
# * root file system is mounted to /rootfs.
sudo docker run -it --rm --privileged=true --net=host -v /:/rootfs \
-v $config_dir:/etc/manifest -v $LOG_DIR:/var/result $env $REGISTRY/node-test-$ARCH:$VERSION
}
# Check whether kubelet is running. If kubelet is running, tell the user to stop
# it before running the test.
pid=`pidof $KUBELET_BIN`
if [ ! -z $pid ]; then
echo "Kubelet is running (pid=$pid), please stop it before running the test."
exit 1
fi
apiserver=http://localhost:8080
volume_stats_agg_period=10s
allow_privileged=true
serialize_image_pulls=false
config_dir=`mktemp -d`
file_check_frequency=10s
pod_cidr=10.180.0.0/24
log_level=4
start_kubelet --api-servers $apiserver \
--volume-stats-agg-period $volume_stats_agg_period \
--allow-privileged=$allow_privileged \
--serialize-image-pulls=$serialize_image_pulls \
--config $config_dir \
--file-check-frequency $file_check_frequency \
--pod-cidr=$pod_cidr \
--runtime-cgroups=/docker-daemon \
--kubelet-cgroups=/kubelet \
--system-cgroups=/system \
--cgroup-root=/ \
--network-plugin=$NETWORK_PLUGIN \
--network-plugin-dir=$NETWORK_PLUGIN_PATH \
--v=$log_level \
--logtostderr
wait_kubelet
run_test
kill_kubelet
# Clean up the kubelet config directory
sudo rm -rf $config_dir

View File

@ -28,6 +28,7 @@ import (
"os" "os"
"os/exec" "os/exec"
"path" "path"
"syscall"
"testing" "testing"
"time" "time"
@ -71,6 +72,10 @@ func TestMain(m *testing.M) {
os.Exit(m.Run()) os.Exit(m.Run())
} }
// When running the containerized conformance test, we'll mount the
// host root filesystem as readonly to /rootfs.
const rootfs = "/rootfs"
func TestE2eNode(t *testing.T) { func TestE2eNode(t *testing.T) {
if *runServicesMode { if *runServicesMode {
// If run-services-mode is specified, only run services in current process. // If run-services-mode is specified, only run services in current process.
@ -79,6 +84,15 @@ func TestE2eNode(t *testing.T) {
} }
if *systemValidateMode { if *systemValidateMode {
// If system-validate-mode is specified, only run system validation in current process. // If system-validate-mode is specified, only run system validation in current process.
if framework.TestContext.NodeConformance {
// Chroot to /rootfs to make system validation can check system
// as in the root filesystem.
// TODO(random-liu): Consider to chroot the whole test process to make writing
// test easier.
if err := syscall.Chroot(rootfs); err != nil {
glog.Exitf("chroot %q failed: %v", rootfs, err)
}
}
if err := system.Validate(); err != nil { if err := system.Validate(); err != nil {
glog.Exitf("system validation failed: %v", err) glog.Exitf("system validation failed: %v", err)
} }
@ -172,12 +186,12 @@ func validateSystem() error {
if err != nil { if err != nil {
return fmt.Errorf("can't get current binary: %v", err) return fmt.Errorf("can't get current binary: %v", err)
} }
// TODO(random-liu): Remove sudo in containerize PR. // Pass all flags into the child process, so that it will see the same flag set.
output, err := exec.Command("sudo", testBin, "--system-validate-mode").CombinedOutput() output, err := exec.Command(testBin, append([]string{"--system-validate-mode"}, os.Args[1:]...)...).CombinedOutput()
// The output of system validation should have been formatted, directly print here. // The output of system validation should have been formatted, directly print here.
fmt.Print(string(output)) fmt.Print(string(output))
if err != nil { if err != nil {
return fmt.Errorf("system validation failed") return fmt.Errorf("system validation failed: %v", err)
} }
return nil return nil
} }
@ -190,7 +204,7 @@ func maskLocksmithdOnCoreos() {
return return
} }
if bytes.Contains(data, []byte("ID=coreos")) { if bytes.Contains(data, []byte("ID=coreos")) {
output, err := exec.Command("sudo", "systemctl", "mask", "--now", "locksmithd").CombinedOutput() output, err := exec.Command("systemctl", "mask", "--now", "locksmithd").CombinedOutput()
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output))) Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output)))
glog.Infof("Locksmithd is masked successfully") glog.Infof("Locksmithd is masked successfully")
} }

View File

@ -161,7 +161,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
if err != nil { if err != nil {
return "", false, fmt.Errorf("could not find username: %v", err) return "", false, fmt.Errorf("could not find username: %v", err)
} }
output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username) output, err := SSH(host, "usermod", "-a", "-G", "docker", uname.Username)
if err != nil { if err != nil {
return "", false, fmt.Errorf("instance %s not running docker daemon - Command failed: %s", host, output) return "", false, fmt.Errorf("instance %s not running docker daemon - Command failed: %s", host, output)
} }
@ -172,14 +172,15 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
dirName := fmt.Sprintf("gcloud-e2e-%d", rand.Int31()) dirName := fmt.Sprintf("gcloud-e2e-%d", rand.Int31())
tmp := fmt.Sprintf("/tmp/%s", dirName) tmp := fmt.Sprintf("/tmp/%s", dirName)
_, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "mkdir", tmp) // Do not sudo here, so that we can use scp to copy test archive to the directdory.
_, err := SSHNoSudo(host, "mkdir", tmp)
if err != nil { if err != nil {
// Exit failure with the error // Exit failure with the error
return "", false, err return "", false, err
} }
if cleanup { if cleanup {
defer func() { defer func() {
output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "rm", "-rf", tmp) output, err := SSH(host, "rm", "-rf", tmp)
if err != nil { if err != nil {
glog.Errorf("failed to cleanup tmp directory %s on host %v. Output:\n%s", tmp, err, output) glog.Errorf("failed to cleanup tmp directory %s on host %v. Output:\n%s", tmp, err, output)
} }
@ -188,57 +189,62 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
// Install the cni plugin. // Install the cni plugin.
cniPath := filepath.Join(tmp, CNIDirectory) cniPath := filepath.Join(tmp, CNIDirectory)
if _, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd := getSSHCommand(" ; ",
getSshCommand(" ; ", fmt.Sprintf("sudo mkdir -p %s", cniPath), fmt.Sprintf("mkdir -p %s", cniPath),
fmt.Sprintf("sudo wget -O - %s | sudo tar -xz -C %s", CNIURL, cniPath))); err != nil { fmt.Sprintf("wget -O - %s | tar -xz -C %s", CNIURL, cniPath),
)
if _, err := SSH(host, "sh", "-c", cmd); err != nil {
// Exit failure with the error // Exit failure with the error
return "", false, err return "", false, err
} }
// Configure iptables firewall rules // Configure iptables firewall rules
// TODO: consider calling bootstrap script to configure host based on OS // TODO: consider calling bootstrap script to configure host based on OS
cmd := getSshCommand("&&", cmd = getSSHCommand("&&",
`iptables -L INPUT | grep "Chain INPUT (policy DROP)"`, `iptables -L INPUT | grep "Chain INPUT (policy DROP)"`,
"(iptables -C INPUT -w -p TCP -j ACCEPT || iptables -A INPUT -w -p TCP -j ACCEPT)", "(iptables -C INPUT -w -p TCP -j ACCEPT || iptables -A INPUT -w -p TCP -j ACCEPT)",
"(iptables -C INPUT -w -p UDP -j ACCEPT || iptables -A INPUT -w -p UDP -j ACCEPT)", "(iptables -C INPUT -w -p UDP -j ACCEPT || iptables -A INPUT -w -p UDP -j ACCEPT)",
"(iptables -C INPUT -w -p ICMP -j ACCEPT || iptables -A INPUT -w -p ICMP -j ACCEPT)") "(iptables -C INPUT -w -p ICMP -j ACCEPT || iptables -A INPUT -w -p ICMP -j ACCEPT)")
output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "sh", "-c", cmd) output, err := SSH(host, "sh", "-c", cmd)
if err != nil { if err != nil {
glog.Errorf("Failed to configured firewall: %v output: %v", err, output) glog.Errorf("Failed to configured firewall: %v output: %v", err, output)
} }
cmd = getSshCommand("&&", cmd = getSSHCommand("&&",
`iptables -L FORWARD | grep "Chain FORWARD (policy DROP)" > /dev/null`, `iptables -L FORWARD | grep "Chain FORWARD (policy DROP)" > /dev/null`,
"(iptables -C FORWARD -w -p TCP -j ACCEPT || iptables -A FORWARD -w -p TCP -j ACCEPT)", "(iptables -C FORWARD -w -p TCP -j ACCEPT || iptables -A FORWARD -w -p TCP -j ACCEPT)",
"(iptables -C FORWARD -w -p UDP -j ACCEPT || iptables -A FORWARD -w -p UDP -j ACCEPT)", "(iptables -C FORWARD -w -p UDP -j ACCEPT || iptables -A FORWARD -w -p UDP -j ACCEPT)",
"(iptables -C FORWARD -w -p ICMP -j ACCEPT || iptables -A FORWARD -w -p ICMP -j ACCEPT)") "(iptables -C FORWARD -w -p ICMP -j ACCEPT || iptables -A FORWARD -w -p ICMP -j ACCEPT)")
output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "sh", "-c", cmd) output, err = SSH(host, "sh", "-c", cmd)
if err != nil { if err != nil {
glog.Errorf("Failed to configured firewall: %v output: %v", err, output) glog.Errorf("Failed to configured firewall: %v output: %v", err, output)
} }
// Copy the archive to the staging directory // Copy the archive to the staging directory
_, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), tmp)) _, err = runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), tmp))
if err != nil { if err != nil {
// Exit failure with the error // Exit failure with the error
return "", false, err return "", false, err
} }
// Kill any running node processes // Kill any running node processes
cmd = getSshCommand(" ; ", cmd = getSSHCommand(" ; ",
"sudo pkill kubelet", "pkill kubelet",
"sudo pkill kube-apiserver", "pkill kube-apiserver",
"sudo pkill etcd", "pkill etcd",
) )
// No need to log an error if pkill fails since pkill will fail if the commands are not running. // No need to log an error if pkill fails since pkill will fail if the commands are not running.
// If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd // If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd
// logs about failing to bind the required ports. // logs about failing to bind the required ports.
glog.Infof("Killing any existing node processes on %s", host) glog.Infof("Killing any existing node processes on %s", host)
RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) SSH(host, "sh", "-c", cmd)
// Extract the archive // Extract the archive
cmd = getSshCommand(" && ", fmt.Sprintf("cd %s", tmp), fmt.Sprintf("tar -xzvf ./%s", archiveName)) cmd = getSSHCommand(" && ",
fmt.Sprintf("cd %s", tmp),
fmt.Sprintf("tar -xzvf ./%s", archiveName),
)
glog.Infof("Extracting tar on %s", host) glog.Infof("Extracting tar on %s", host)
output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) output, err = SSH(host, "sh", "-c", cmd)
if err != nil { if err != nil {
// Exit failure with the error // Exit failure with the error
return "", false, err return "", false, err
@ -261,7 +267,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
} }
// Determine if tests will run on a GCI node. // Determine if tests will run on a GCI node.
output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", "'cat /etc/os-release'") output, err = SSH(host, "sh", "-c", "'cat /etc/os-release'")
if err != nil { if err != nil {
glog.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output) glog.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output)
return "", false, fmt.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output) return "", false, fmt.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output)
@ -270,7 +276,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
// Note this implicitly requires the script to be where we expect in the tarball, so if that location changes the error // Note this implicitly requires the script to be where we expect in the tarball, so if that location changes the error
// here will tell us to update the remote test runner. // here will tell us to update the remote test runner.
mounterPath := filepath.Join(tmp, "cluster/gce/gci/mounter/mounter") mounterPath := filepath.Join(tmp, "cluster/gce/gci/mounter/mounter")
output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath)) output, err = SSH(host, "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath))
if err != nil { if err != nil {
glog.Errorf("Unable to chmod 544 GCI mounter script. Err: %v, Output:\n%s", err, output) glog.Errorf("Unable to chmod 544 GCI mounter script. Err: %v, Output:\n%s", err, output)
return "", false, err return "", false, err
@ -284,7 +290,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
} }
// Run the tests // Run the tests
cmd = getSshCommand(" && ", cmd = getSSHCommand(" && ",
fmt.Sprintf("cd %s", tmp), fmt.Sprintf("cd %s", tmp),
fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --logtostderr --v 4 --node-name=%s --report-dir=%s/results --report-prefix=%s %s", fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --logtostderr --v 4 --node-name=%s --report-dir=%s/results --report-prefix=%s %s",
testTimeoutSeconds.Seconds(), ginkgoFlags, host, tmp, junitFilePrefix, testArgs), testTimeoutSeconds.Seconds(), ginkgoFlags, host, tmp, junitFilePrefix, testArgs),
@ -292,7 +298,7 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
aggErrs := []error{} aggErrs := []error{}
glog.Infof("Starting tests on %s", host) glog.Infof("Starting tests on %s", host)
output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) output, err = SSH(host, "sh", "-c", cmd)
if err != nil { if err != nil {
aggErrs = append(aggErrs, err) aggErrs = append(aggErrs, err)
@ -313,10 +319,10 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
// Try getting the system logs from journald and store it to a file. // Try getting the system logs from journald and store it to a file.
// Don't reuse the original test directory on the remote host because // Don't reuse the original test directory on the remote host because
// it could've be been removed if the node was rebooted. // it could've be been removed if the node was rebooted.
_, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", fmt.Sprintf("'sudo journalctl --system --all > %s'", logPath)) _, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath))
if err == nil { if err == nil {
glog.Infof("Got the system logs from journald; copying it back...") glog.Infof("Got the system logs from journald; copying it back...")
if _, err := RunSshCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil { if _, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil {
glog.Infof("Failed to copy the log: err: %v", err) glog.Infof("Failed to copy the log: err: %v", err)
} }
} else { } else {
@ -334,26 +340,38 @@ func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string
} }
func getTestArtifacts(host, testDir string) error { func getTestArtifacts(host, testDir string) error {
_, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/%s", *resultsDir, host)) _, err := runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/%s", *resultsDir, host))
if err != nil { if err != nil {
return err return err
} }
// Copy junit to the top of artifacts // Copy junit to the top of artifacts
_, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/", *resultsDir)) _, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/", *resultsDir))
if err != nil { if err != nil {
return err return err
} }
return nil return nil
} }
// getSshCommand handles proper quoting so that multiple commands are executed in the same shell over ssh // getSSHCommand handles proper quoting so that multiple commands are executed in the same shell over ssh
func getSshCommand(sep string, args ...string) string { func getSSHCommand(sep string, args ...string) string {
return fmt.Sprintf("'%s'", strings.Join(args, sep)) return fmt.Sprintf("'%s'", strings.Join(args, sep))
} }
// runSshCommand executes the ssh or scp command, adding the flag provided --ssh-options // SSH executes ssh command with runSSHCommand as root. The `sudo` makes sure that all commands
func RunSshCommand(cmd string, args ...string) (string, error) { // are executed by root, so that there won't be permission mismatch between different commands.
func SSH(host string, cmd ...string) (string, error) {
return runSSHCommand("ssh", append([]string{GetHostnameOrIp(host), "--", "sudo"}, cmd...)...)
}
// SSHNoSudo executes ssh command with runSSHCommand as normal user. Sometimes we need this,
// for example creating a directory that we'll copy files there with scp.
func SSHNoSudo(host string, cmd ...string) (string, error) {
return runSSHCommand("ssh", append([]string{GetHostnameOrIp(host), "--"}, cmd...)...)
}
// runSSHCommand executes the ssh or scp command, adding the flag provided --ssh-options
func runSSHCommand(cmd string, args ...string) (string, error) {
if env, found := sshOptionsMap[*sshEnv]; found { if env, found := sshOptionsMap[*sshEnv]; found {
args = append(strings.Split(env, " "), args...) args = append(strings.Split(env, " "), args...)
} }

View File

@ -56,7 +56,7 @@ func main() {
func runCommand(name string, args ...string) error { func runCommand(name string, args ...string) error {
glog.Infof("Running command: %v %v", name, strings.Join(args, " ")) glog.Infof("Running command: %v %v", name, strings.Join(args, " "))
cmd := exec.Command("sh", "-c", strings.Join(append([]string{name}, args...), " ")) cmd := exec.Command("sudo", "sh", "-c", strings.Join(append([]string{name}, args...), " "))
cmd.Stdout = os.Stdout cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr cmd.Stderr = os.Stderr
return cmd.Run() return cmd.Run()

View File

@ -507,7 +507,7 @@ func createInstance(imageConfig *internalGCEImage) (string, error) {
remote.AddHostnameIp(name, externalIp) remote.AddHostnameIp(name, externalIp)
} }
var output string var output string
output, err = remote.RunSshCommand("ssh", remote.GetHostnameOrIp(name), "--", "sudo", "docker", "version") output, err = remote.SSH(name, "docker", "version")
if err != nil { if err != nil {
err = fmt.Errorf("instance %s not running docker daemon - Command failed: %s", name, output) err = fmt.Errorf("instance %s not running docker daemon - Command failed: %s", name, output)
continue continue

View File

@ -44,7 +44,7 @@ type server struct {
// startCommand is the command used to start the server // startCommand is the command used to start the server
startCommand *exec.Cmd startCommand *exec.Cmd
// killCommand is the command used to stop the server. It is not required. If it // killCommand is the command used to stop the server. It is not required. If it
// is not specified, `sudo kill` will be used to stop the server. // is not specified, `kill` will be used to stop the server.
killCommand *exec.Cmd killCommand *exec.Cmd
// restartCommand is the command used to restart the server. If provided, it will be used // restartCommand is the command used to restart the server. If provided, it will be used
// instead of startCommand when restarting the server. // instead of startCommand when restarting the server.
@ -338,19 +338,7 @@ func (s *server) kill() error {
const timeout = 10 * time.Second const timeout = 10 * time.Second
for _, signal := range []string{"-TERM", "-KILL"} { for _, signal := range []string{"-TERM", "-KILL"} {
glog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal) glog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal)
cmd := exec.Command("sudo", "kill", signal, strconv.Itoa(pid)) cmd := exec.Command("kill", signal, strconv.Itoa(pid))
// Run the 'kill' command in a separate process group so sudo doesn't ignore it
attrs := &syscall.SysProcAttr{}
// Hack to set unix-only field without build tags.
setpgidField := reflect.ValueOf(attrs).Elem().FieldByName("Setpgid")
if setpgidField.IsValid() {
setpgidField.Set(reflect.ValueOf(true))
} else {
return fmt.Errorf("Failed to set Setpgid field (non-unix build)")
}
cmd.SysProcAttr = attrs
_, err := cmd.Output() _, err := cmd.Output()
if err != nil { if err != nil {
glog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err) glog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err)

View File

@ -78,6 +78,7 @@ func NewE2EServices(monitorParent bool) *E2EServices {
// standard kubelet launcher) // standard kubelet launcher)
func (e *E2EServices) Start() error { func (e *E2EServices) Start() error {
var err error var err error
if !framework.TestContext.NodeConformance {
// Start kubelet // Start kubelet
// Create the manifest path for kubelet. // Create the manifest path for kubelet.
// TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test. // TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test.
@ -89,6 +90,7 @@ func (e *E2EServices) Start() error {
if err != nil { if err != nil {
return fmt.Errorf("failed to start kubelet: %v", err) return fmt.Errorf("failed to start kubelet: %v", err)
} }
}
e.services, err = e.startInternalServices() e.services, err = e.startInternalServices()
return err return err
} }
@ -96,6 +98,7 @@ func (e *E2EServices) Start() error {
// Stop stops the e2e services. // Stop stops the e2e services.
func (e *E2EServices) Stop() { func (e *E2EServices) Stop() {
defer func() { defer func() {
if !framework.TestContext.NodeConformance {
// Collect log files. // Collect log files.
e.getLogFiles() e.getLogFiles()
// Cleanup the manifest path for kubelet. // Cleanup the manifest path for kubelet.
@ -106,6 +109,7 @@ func (e *E2EServices) Stop() {
glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err) glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err)
} }
} }
}
}() }()
if e.services != nil { if e.services != nil {
if err := e.services.kill(); err != nil { if err := e.services.kill(); err != nil {
@ -144,17 +148,8 @@ func (e *E2EServices) startInternalServices() (*server, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("can't get current binary: %v", err) return nil, fmt.Errorf("can't get current binary: %v", err)
} }
startCmd := exec.Command("sudo", testBin, // Pass all flags into the child process, so that it will see the same flag set.
// TODO(mtaufen): Flags e.g. that target the TestContext need to be manually forwarded to the startCmd := exec.Command(testBin, append([]string{"--run-services-mode"}, os.Args[1:]...)...)
// test binary when we start it up in run-services mode. This is not ideal.
// Very unintuitive because it prevents any flags NOT manually forwarded here
// from being set via TEST_ARGS when running tests from the command line.
"--run-services-mode",
"--server-start-timeout", serverStartTimeout.String(),
"--feature-gates", framework.TestContext.FeatureGates,
"--logtostderr",
"--vmodule=*="+LOG_VERBOSITY_LEVEL,
)
server := newServer("services", startCmd, nil, nil, getServicesHealthCheckURLs(), servicesLogFile, e.monitorParent, false) server := newServer("services", startCmd, nil, nil, getServicesHealthCheckURLs(), servicesLogFile, e.monitorParent, false)
return server, server.start() return server, server.start()
} }
@ -180,8 +175,8 @@ func (e *E2EServices) startKubelet() (*server, error) {
// sense to test it that way // sense to test it that way
unitName := fmt.Sprintf("kubelet-%d.service", rand.Int31()) unitName := fmt.Sprintf("kubelet-%d.service", rand.Int31())
cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, "--remain-after-exit", builder.GetKubeletServerBin()) cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, "--remain-after-exit", builder.GetKubeletServerBin())
killCommand = exec.Command("sudo", "systemctl", "kill", unitName) killCommand = exec.Command("systemctl", "kill", unitName)
restartCommand = exec.Command("sudo", "systemctl", "restart", unitName) restartCommand = exec.Command("systemctl", "restart", unitName)
e.logFiles["kubelet.log"] = logFileData{ e.logFiles["kubelet.log"] = logFileData{
journalctlCommand: []string{"-u", unitName}, journalctlCommand: []string{"-u", unitName},
} }
@ -246,7 +241,7 @@ func (e *E2EServices) startKubelet() (*server, error) {
"--network-plugin-dir", filepath.Join(cwd, "cni", "bin")) // Enable kubenet "--network-plugin-dir", filepath.Join(cwd, "cni", "bin")) // Enable kubenet
} }
cmd := exec.Command("sudo", cmdArgs...) cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...)
server := newServer( server := newServer(
"kubelet", "kubelet",
cmd, cmd,
@ -281,7 +276,7 @@ func (e *E2EServices) getLogFiles() {
continue continue
} }
glog.Infof("Get log file %q with journalctl command %v.", targetFileName, logFileData.journalctlCommand) glog.Infof("Get log file %q with journalctl command %v.", targetFileName, logFileData.journalctlCommand)
out, err := exec.Command("sudo", append([]string{"journalctl"}, logFileData.journalctlCommand...)...).CombinedOutput() out, err := exec.Command("journalctl", logFileData.journalctlCommand...).CombinedOutput()
if err != nil { if err != nil {
glog.Errorf("failed to get %q from journald: %v, %v", targetFileName, string(out), err) glog.Errorf("failed to get %q from journald: %v, %v", targetFileName, string(out), err)
} else { } else {
@ -314,10 +309,10 @@ func isJournaldAvailable() bool {
func copyLogFile(src, target string) error { func copyLogFile(src, target string) error {
// If not a journald based distro, then just symlink files. // If not a journald based distro, then just symlink files.
if out, err := exec.Command("sudo", "cp", src, target).CombinedOutput(); err != nil { if out, err := exec.Command("cp", src, target).CombinedOutput(); err != nil {
return fmt.Errorf("failed to copy %q to %q: %v, %v", src, target, out, err) return fmt.Errorf("failed to copy %q to %q: %v, %v", src, target, out, err)
} }
if out, err := exec.Command("sudo", "chmod", "a+r", target).CombinedOutput(); err != nil { if out, err := exec.Command("chmod", "a+r", target).CombinedOutput(); err != nil {
return fmt.Errorf("failed to make log file %q world readable: %v, %v", target, out, err) return fmt.Errorf("failed to make log file %q world readable: %v, %v", target, out, err)
} }
return nil return nil

View File

@ -208,14 +208,13 @@ func (k *KernelValidator) getKernelConfigReader() (io.Reader, error) {
} }
// If the kernel config file is not found, try to load the kernel // If the kernel config file is not found, try to load the kernel
// config module and check again. // config module and check again.
// TODO(random-liu): Remove "sudo" in containerize test PR #31093 output, err := exec.Command(modprobeCmd, configsModule).CombinedOutput()
output, err := exec.Command("sudo", modprobeCmd, configsModule).CombinedOutput()
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to load kernel module %q: output - %q, err - %v", return nil, fmt.Errorf("unable to load kernel module %q: output - %q, err - %v",
configsModule, output, err) configsModule, output, err)
} }
// Unload the kernel config module to make sure the validation have no side effect. // Unload the kernel config module to make sure the validation have no side effect.
defer exec.Command("sudo", modprobeCmd, "-r", configsModule).Run() defer exec.Command(modprobeCmd, "-r", configsModule).Run()
loadModule = true loadModule = true
} }
return nil, fmt.Errorf("no config path in %v is available", possibePaths) return nil, fmt.Errorf("no config path in %v is available", possibePaths)