From bca5aea5ba021f2b968bd90dc688e634b1866a1f Mon Sep 17 00:00:00 2001 From: Random-Liu Date: Tue, 29 Nov 2016 17:55:18 -0800 Subject: [PATCH] Refactor RunRemote to support TestSuite interface. --- test/e2e_node/remote/node_e2e.go | 86 +++++++++-- test/e2e_node/remote/remote.go | 178 +++++----------------- test/e2e_node/remote/utils.go | 97 ++++++++++++ test/e2e_node/runner/remote/run_remote.go | 2 +- 4 files changed, 213 insertions(+), 150 deletions(-) create mode 100644 test/e2e_node/remote/utils.go diff --git a/test/e2e_node/remote/node_e2e.go b/test/e2e_node/remote/node_e2e.go index ad778aec959..032f4a4ddf1 100644 --- a/test/e2e_node/remote/node_e2e.go +++ b/test/e2e_node/remote/node_e2e.go @@ -21,11 +21,15 @@ import ( "os" "os/exec" "path/filepath" + "strings" "time" + "github.com/golang/glog" + "k8s.io/kubernetes/test/e2e_node/builder" ) +// NodeE2ERemote contains the specific functions in the node e2e test suite. type NodeE2ERemote struct{} func InitNodeE2ERemote() TestSuite { @@ -33,12 +37,7 @@ func InitNodeE2ERemote() TestSuite { return &NodeE2ERemote{} } -const ( - localGCIMounterPath = "cluster/gce/gci/mounter/mounter" - CNIRelease = "07a8a28637e97b22eb8dfe710eeae1344f69d16e" - CNIDirectory = "cni" - CNIURL = "https://storage.googleapis.com/kubernetes-release/network-plugins/cni-" + CNIRelease + ".tar.gz" -) +const localGCIMounterPath = "cluster/gce/gci/mounter/mounter" // SetupTestPackage sets up the test package with binaries k8s required for node e2e tests func (n *NodeE2ERemote) SetupTestPackage(tardir string) error { @@ -71,8 +70,7 @@ func (n *NodeE2ERemote) SetupTestPackage(tardir string) error { if err != nil { return fmt.Errorf("Could not find K8s root dir! Err: %v", err) } - localSource := "cluster/gce/gci/mounter/mounter" - source := filepath.Join(k8sDir, localSource) + source := filepath.Join(k8sDir, localGCIMounterPath) // Require the GCI mounter script, we want to make sure the remote test runner stays up to date if the mounter file moves if _, err := os.Stat(source); err != nil { @@ -94,7 +92,73 @@ func (n *NodeE2ERemote) SetupTestPackage(tardir string) error { return nil } -// RunTest runs test on the node. -func (n *NodeE2ERemote) RunTest(host, workspace, results, junitFilePrefix, testArgs, ginkgoFlags string, timeout time.Duration) (string, error) { - return "", fmt.Errorf("not implemented") +// updateGCIMounterPath updates kubelet flags to set gci mounter path. This will only take effect for +// GCI image. +func updateGCIMounterPath(args, host, workspace string) (string, error) { + // Determine if tests will run on a GCI node. + output, err := SSH(host, "cat", "/etc/os-release") + if err != nil { + return args, fmt.Errorf("issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output) + } + if !strings.Contains(output, "ID=gci") { + // This is not a GCI image + return args, nil + } + + // If we are testing on a GCI node, we chmod 544 the mounter and specify a different mounter path in the test args. + // We do this here because the local var `workspace` tells us which /tmp/node-e2e-%d is relevant to the current test run. + + // Determine if the GCI mounter script exists locally. + k8sDir, err := builder.GetK8sRootDir() + if err != nil { + return args, fmt.Errorf("could not find K8s root dir! Err: %v", err) + } + source := filepath.Join(k8sDir, localGCIMounterPath) + + // Require the GCI mounter script, we want to make sure the remote test runner stays up to date if the mounter file moves + if _, err = os.Stat(source); err != nil { + return args, fmt.Errorf("could not find GCI mounter script at %q! If this script has been (re)moved, please update the e2e node remote test runner accordingly! Err: %v", source, err) + } + + glog.Infof("GCI node and GCI mounter both detected, modifying --experimental-mounter-path accordingly") + // Note this implicitly requires the script to be where we expect in the tarball, so if that location changes the error + // here will tell us to update the remote test runner. + mounterPath := filepath.Join(workspace, localGCIMounterPath) + output, err = SSH(host, "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath)) + if err != nil { + return args, fmt.Errorf("unabled to chmod 544 GCI mounter script. Err: %v, Output:\n%s", err, output) + } + // Insert args at beginning of test args, so any values from command line take precedence + args = fmt.Sprintf("--kubelet-flags=--experimental-mounter-path=%s ", mounterPath) + args + return args, nil +} + +// RunTest runs test on the node. +func (n *NodeE2ERemote) RunTest(host, workspace, results, junitFilePrefix, testArgs, ginkgoArgs string, timeout time.Duration) (string, error) { + // Install the cni plugin. + if err := installCNI(host, workspace); err != nil { + return "", err + } + + // Configure iptables firewall rules + if err := configureFirewall(host); err != nil { + return "", err + } + + // Kill any running node processes + cleanupNodeProcesses(host) + + testArgs, err := updateGCIMounterPath(testArgs, host, workspace) + if err != nil { + return "", err + } + + // Run the tests + glog.Infof("Starting tests on %q", host) + cmd := getSSHCommand(" && ", + fmt.Sprintf("cd %s", workspace), + fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s %s", + timeout.Seconds(), ginkgoArgs, host, results, junitFilePrefix, testArgs), + ) + return SSH(host, "sh", "-c", cmd) } diff --git a/test/e2e_node/remote/remote.go b/test/e2e_node/remote/remote.go index e9cfd62db34..a7d3fd84a85 100644 --- a/test/e2e_node/remote/remote.go +++ b/test/e2e_node/remote/remote.go @@ -23,12 +23,10 @@ import ( "os" "os/exec" "path/filepath" - "strings" "time" "github.com/golang/glog" utilerrors "k8s.io/kubernetes/pkg/util/errors" - "k8s.io/kubernetes/test/e2e_node/builder" ) var testTimeoutSeconds = flag.Duration("test-timeout", 45*time.Minute, "How long (in golang duration format) to wait for ginkgo tests to complete.") @@ -63,177 +61,54 @@ func CreateTestArchive(suite TestSuite) (string, error) { return filepath.Join(dir, archiveName), nil } -const ( - CNIRelease = "07a8a28637e97b22eb8dfe710eeae1344f69d16e" - CNIDirectory = "cni" - CNIURL = "https://storage.googleapis.com/kubernetes-release/network-plugins/cni-" + CNIRelease + ".tar.gz" -) - // Returns the command output, whether the exit was ok, and any errors -func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string, testArgs string, ginkgoFlags string) (string, bool, error) { +// TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name. +func RunRemote(suite TestSuite, archive string, host string, cleanup bool, junitFilePrefix string, testArgs string, ginkgoArgs string) (string, bool, error) { // Create the temp staging directory - glog.Infof("Staging test binaries on %s", host) + glog.Infof("Staging test binaries on %q", host) workspace := fmt.Sprintf("/tmp/node-e2e-%s", getTimestamp()) // Do not sudo here, so that we can use scp to copy test archive to the directdory. if output, err := SSHNoSudo(host, "mkdir", workspace); err != nil { // Exit failure with the error - return "", false, fmt.Errorf("failed to create workspace directory: %v output: %q", err, output) + return "", false, fmt.Errorf("failed to create workspace directory %q on host %q: %v output: %q", workspace, host, err, output) } if cleanup { defer func() { output, err := SSH(host, "rm", "-rf", workspace) if err != nil { - glog.Errorf("failed to cleanup workspace %s on host %v. Output:\n%s", workspace, err, output) + glog.Errorf("failed to cleanup workspace %q on host %q: %v. Output:\n%s", workspace, host, err, output) } }() } - // Install the cni plugin. - cniPath := filepath.Join(workspace, CNIDirectory) - cmd := getSSHCommand(" ; ", - fmt.Sprintf("mkdir -p %s", cniPath), - fmt.Sprintf("wget -O - %s | tar -xz -C %s", CNIURL, cniPath), - ) - if output, err := SSH(host, "sh", "-c", cmd); err != nil { - // Exit failure with the error - return "", false, fmt.Errorf("failed to install cni plugin: %v output: %q", err, output) - } - - // Configure iptables firewall rules - // TODO: consider calling bootstrap script to configure host based on OS - output, err := SSH(host, "iptables", "-L", "INPUT") - if err != nil { - return "", false, fmt.Errorf("failed to get iptables INPUT: %v output: %q", err, output) - } - if strings.Contains(output, "Chain INPUT (policy DROP)") { - cmd = getSSHCommand("&&", - "(iptables -C INPUT -w -p TCP -j ACCEPT || iptables -A INPUT -w -p TCP -j ACCEPT)", - "(iptables -C INPUT -w -p UDP -j ACCEPT || iptables -A INPUT -w -p UDP -j ACCEPT)", - "(iptables -C INPUT -w -p ICMP -j ACCEPT || iptables -A INPUT -w -p ICMP -j ACCEPT)") - output, err := SSH(host, "sh", "-c", cmd) - if err != nil { - return "", false, fmt.Errorf("failed to configured firewall: %v output: %v", err, output) - } - } - output, err = SSH(host, "iptables", "-L", "FORWARD") - if err != nil { - return "", false, fmt.Errorf("failed to get iptables FORWARD: %v output: %q", err, output) - } - if strings.Contains(output, "Chain FORWARD (policy DROP)") { - cmd = getSSHCommand("&&", - "(iptables -C FORWARD -w -p TCP -j ACCEPT || iptables -A FORWARD -w -p TCP -j ACCEPT)", - "(iptables -C FORWARD -w -p UDP -j ACCEPT || iptables -A FORWARD -w -p UDP -j ACCEPT)", - "(iptables -C FORWARD -w -p ICMP -j ACCEPT || iptables -A FORWARD -w -p ICMP -j ACCEPT)") - output, err = SSH(host, "sh", "-c", cmd) - if err != nil { - return "", false, fmt.Errorf("failed to configured firewall: %v output: %v", err, output) - } - } - // Copy the archive to the staging directory - if output, err = runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), workspace)); err != nil { + if output, err := runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), workspace)); err != nil { // Exit failure with the error return "", false, fmt.Errorf("failed to copy test archive: %v, output: %q", err, output) } - // Kill any running node processes - cmd = getSSHCommand(" ; ", - "pkill kubelet", - "pkill kube-apiserver", - "pkill etcd", - ) - // No need to log an error if pkill fails since pkill will fail if the commands are not running. - // If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd - // logs about failing to bind the required ports. - glog.Infof("Killing any existing node processes on %s", host) - SSH(host, "sh", "-c", cmd) - // Extract the archive - cmd = getSSHCommand(" && ", + cmd := getSSHCommand(" && ", fmt.Sprintf("cd %s", workspace), fmt.Sprintf("tar -xzvf ./%s", archiveName), ) - glog.Infof("Extracting tar on %s", host) - if output, err = SSH(host, "sh", "-c", cmd); err != nil { + glog.Infof("Extracting tar on %q", host) + if output, err := SSH(host, "sh", "-c", cmd); err != nil { // Exit failure with the error return "", false, fmt.Errorf("failed to extract test archive: %v, output: %q", err, output) } - // If we are testing on a GCI node, we chmod 544 the mounter and specify a different mounter path in the test args. - // We do this here because the local var `workspace` tells us which /tmp/node-e2e-%d is relevant to the current test run. + glog.Infof("Running test on %q", host) + output, err := suite.RunTest(host, workspace, filepath.Join(workspace, "results"), junitFilePrefix, testArgs, ginkgoArgs, *testTimeoutSeconds) - // Determine if the GCI mounter script exists locally. - k8sDir, err := builder.GetK8sRootDir() - if err != nil { - return "", false, fmt.Errorf("Could not find K8s root dir! Err: %v", err) - } - localSource := "cluster/gce/gci/mounter/mounter" - source := filepath.Join(k8sDir, localSource) - - // Require the GCI mounter script, we want to make sure the remote test runner stays up to date if the mounter file moves - if _, err = os.Stat(source); err != nil { - return "", false, fmt.Errorf("Could not find GCI mounter script at %q! If this script has been (re)moved, please update the e2e node remote test runner accordingly! Err: %v", source, err) - } - - // Determine if tests will run on a GCI node. - output, err = SSH(host, "sh", "-c", "'cat /etc/os-release'") - if err != nil { - glog.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output) - return "", false, fmt.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output) - } - if strings.Contains(output, "ID=gci") { - glog.Infof("GCI node and GCI mounter both detected, modifying --experimental-mounter-path accordingly") - // Note this implicitly requires the script to be where we expect in the tarball, so if that location changes the error - // here will tell us to update the remote test runner. - mounterPath := filepath.Join(workspace, "cluster/gce/gci/mounter/mounter") - output, err = SSH(host, "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath)) - if err != nil { - glog.Errorf("Unable to chmod 544 GCI mounter script. Err: %v, Output:\n%s", err, output) - return "", false, err - } - // Insert args at beginning of testArgs, so any values from command line take precedence - testArgs = fmt.Sprintf("--kubelet-flags=--experimental-mounter-path=%s ", mounterPath) + testArgs - } - - // Run the tests - cmd = getSSHCommand(" && ", - fmt.Sprintf("cd %s", workspace), - fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --logtostderr --v 4 --node-name=%s --report-dir=%s/results --report-prefix=%s %s", - testTimeoutSeconds.Seconds(), ginkgoFlags, host, workspace, junitFilePrefix, testArgs), - ) aggErrs := []error{} - - glog.Infof("Starting tests on %s", host) - output, err = SSH(host, "sh", "-c", cmd) // Do not log the output here, let the caller deal with the test output. if err != nil { aggErrs = append(aggErrs, err) - - // Encountered an unexpected error. The remote test harness may not - // have finished retrieved and stored all the logs in this case. Try - // to get some logs for debugging purposes. - // TODO: This is a best-effort, temporary hack that only works for - // journald nodes. We should have a more robust way to collect logs. - var ( - logName = "system.log" - logPath = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName) - destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName) - ) - glog.Infof("Test failed unexpectedly. Attempting to retreiving system logs (only works for nodes with journald)") - // Try getting the system logs from journald and store it to a file. - // Don't reuse the original test directory on the remote host because - // it could've be been removed if the node was rebooted. - if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil { - glog.Infof("Got the system logs from journald; copying it back...") - if output, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil { - glog.Infof("Failed to copy the log: err: %v, output: %q", err, output) - } - } else { - glog.Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output) - } + collectSystemLog(host, workspace) } - glog.Infof("Copying test artifacts from %s", host) + glog.Infof("Copying test artifacts from %q", host) scpErr := getTestArtifacts(host, workspace) if scpErr != nil { aggErrs = append(aggErrs, scpErr) @@ -267,6 +142,33 @@ func getTestArtifacts(host, testDir string) error { return nil } +// collectSystemLog is a temporary hack to collect system log when encountered on +// unexpected error. +func collectSystemLog(host, workspace string) { + // Encountered an unexpected error. The remote test harness may not + // have finished retrieved and stored all the logs in this case. Try + // to get some logs for debugging purposes. + // TODO: This is a best-effort, temporary hack that only works for + // journald nodes. We should have a more robust way to collect logs. + var ( + logName = "system.log" + logPath = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName) + destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName) + ) + glog.Infof("Test failed unexpectedly. Attempting to retreiving system logs (only works for nodes with journald)") + // Try getting the system logs from journald and store it to a file. + // Don't reuse the original test directory on the remote host because + // it could've be been removed if the node was rebooted. + if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil { + glog.Infof("Got the system logs from journald; copying it back...") + if output, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil { + glog.Infof("Failed to copy the log: err: %v, output: %q", err, output) + } + } else { + glog.Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output) + } +} + // WriteLog is a temporary function to make it possible to write log // in the runner. This is used to collect serial console log. // TODO(random-liu): Use the log-dump script in cluster e2e. diff --git a/test/e2e_node/remote/utils.go b/test/e2e_node/remote/utils.go new file mode 100644 index 00000000000..ef7efef4ae8 --- /dev/null +++ b/test/e2e_node/remote/utils.go @@ -0,0 +1,97 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package remote + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/golang/glog" +) + +// utils.go contains functions used accross test suites. + +const ( + cniRelease = "07a8a28637e97b22eb8dfe710eeae1344f69d16e" + cniDirectory = "cni" + cniURL = "https://storage.googleapis.com/kubernetes-release/network-plugins/cni-" + cniRelease + ".tar.gz" +) + +// Install the cni plugin. +func installCNI(host, workspace string) error { + glog.Infof("Install CNI on %q", host) + cniPath := filepath.Join(workspace, cniDirectory) + cmd := getSSHCommand(" ; ", + fmt.Sprintf("mkdir -p %s", cniPath), + fmt.Sprintf("wget -O - %s | tar -xz -C %s", cniURL, cniPath), + ) + if output, err := SSH(host, "sh", "-c", cmd); err != nil { + return fmt.Errorf("failed to install cni plugin on %q: %v output: %q", host, err, output) + } + return nil +} + +// configureFirewall configures iptable firewall rules. +func configureFirewall(host string) error { + glog.Infof("Configure iptables firewall rules on %q", host) + // TODO: consider calling bootstrap script to configure host based on OS + output, err := SSH(host, "iptables", "-L", "INPUT") + if err != nil { + return fmt.Errorf("failed to get iptables INPUT on %q: %v output: %q", host, err, output) + } + if strings.Contains(output, "Chain INPUT (policy DROP)") { + cmd := getSSHCommand("&&", + "(iptables -C INPUT -w -p TCP -j ACCEPT || iptables -A INPUT -w -p TCP -j ACCEPT)", + "(iptables -C INPUT -w -p UDP -j ACCEPT || iptables -A INPUT -w -p UDP -j ACCEPT)", + "(iptables -C INPUT -w -p ICMP -j ACCEPT || iptables -A INPUT -w -p ICMP -j ACCEPT)") + output, err := SSH(host, "sh", "-c", cmd) + if err != nil { + return fmt.Errorf("failed to configured firewall on %q: %v output: %v", host, err, output) + } + } + output, err = SSH(host, "iptables", "-L", "FORWARD") + if err != nil { + return fmt.Errorf("failed to get iptables FORWARD on %q: %v output: %q", host, err, output) + } + if strings.Contains(output, "Chain FORWARD (policy DROP)") { + cmd := getSSHCommand("&&", + "(iptables -C FORWARD -w -p TCP -j ACCEPT || iptables -A FORWARD -w -p TCP -j ACCEPT)", + "(iptables -C FORWARD -w -p UDP -j ACCEPT || iptables -A FORWARD -w -p UDP -j ACCEPT)", + "(iptables -C FORWARD -w -p ICMP -j ACCEPT || iptables -A FORWARD -w -p ICMP -j ACCEPT)") + output, err = SSH(host, "sh", "-c", cmd) + if err != nil { + return fmt.Errorf("failed to configured firewall on %q: %v output: %v", host, err, output) + } + } + return nil +} + +// cleanupNodeProcesses kills all running node processes may conflict with the test. +func cleanupNodeProcesses(host string) { + glog.Infof("Killing any existing node processes on %q", host) + cmd := getSSHCommand(" ; ", + "pkill kubelet", + "pkill kube-apiserver", + "pkill etcd", + "pkill e2e_node.test", + ) + // No need to log an error if pkill fails since pkill will fail if the commands are not running. + // If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd + // logs about failing to bind the required ports. + SSH(host, "sh", "-c", cmd) +} diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index c3fa9dcfc77..d5d3c523fa0 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -382,7 +382,7 @@ func testHost(host string, deleteFiles bool, junitFilePrefix string, ginkgoFlags } } - output, exitOk, err := remote.RunRemote(path, host, deleteFiles, junitFilePrefix, *testArgs, ginkgoFlagsStr) + output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, junitFilePrefix, *testArgs, ginkgoFlagsStr) return &TestResult{ output: output, err: err,