From 6303edb790519340b42aa29ae4e6c262484460c3 Mon Sep 17 00:00:00 2001 From: Euan Kemp Date: Thu, 9 Jun 2016 15:18:55 -0700 Subject: [PATCH 1/4] e2e_node: launch kubelet via systemd-run This more closely mimics what the kubelet will usually have for cgroups, and also fixes an issue with cgroup detection of it. Fixes #26431 --- test/e2e_node/e2e_service.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/e2e_node/e2e_service.go b/test/e2e_node/e2e_service.go index 6d4ffc98520..6dc99f1a635 100644 --- a/test/e2e_node/e2e_service.go +++ b/test/e2e_node/e2e_service.go @@ -206,7 +206,17 @@ func (es *e2eService) startKubeletServer() (*exec.Cmd, error) { return nil, err } es.kubeletStaticPodDir = dataDir - cmd := exec.Command("sudo", getKubeletServerBin(), + cmdArgs := []string{} + if systemdRun, err := exec.LookPath("systemd-run"); err == nil { + // On systemd services, detection of a service / unit works reliably while + // detection of a process started from an ssh session does not work. + // Since kubelet will typically be run as a service it also makes more + // sense to test it that way + cmdArgs = append(cmdArgs, systemdRun, getKubeletServerBin()) + } else { + cmdArgs = append(cmdArgs, getKubeletServerBin()) + } + cmdArgs = append(cmdArgs, "--api-servers", "http://127.0.0.1:8080", "--address", "0.0.0.0", "--port", "10250", @@ -218,6 +228,7 @@ func (es *e2eService) startKubeletServer() (*exec.Cmd, error) { "--file-check-frequency", "10s", // Check file frequently so tests won't wait too long "--v", "8", "--logtostderr", ) + cmd := exec.Command("sudo", cmdArgs...) hcc := newHealthCheckCommand( "http://127.0.0.1:10255/healthz", cmd, From b004122dfc6c94daee6268e4f7353f3e9a674195 Mon Sep 17 00:00:00 2001 From: Euan Kemp Date: Thu, 9 Jun 2016 15:50:23 -0700 Subject: [PATCH 2/4] Revert "Disable CoreOS image from node e2e testing." This reverts commit 2494c779727abcdde93b5e05a457c46ce2c044f8. The previous commit, which launches the kubelet under `systemd-run`, fixes an error in detecting the kubelet's cgroup stats. Fixes #26979 --- test/e2e_node/jenkins/jenkins-ci.properties | 5 +---- test/e2e_node/jenkins/jenkins-pull.properties | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/test/e2e_node/jenkins/jenkins-ci.properties b/test/e2e_node/jenkins/jenkins-ci.properties index 990b7f80e1a..64f8261abf9 100644 --- a/test/e2e_node/jenkins/jenkins-ci.properties +++ b/test/e2e_node/jenkins/jenkins-ci.properties @@ -3,10 +3,7 @@ GCE_HOSTS= # To copy an image between projects: # `gcloud compute --project disks create --image=https://www.googleapis.com/compute/v1/projects//global/images/` # `gcloud compute --project images create --source-disk=` -# -# Testing disabled on the following images: -# e2e-node-coreos-stable20160531-image - Github Issue #26903 -GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-containervm-v20160321-image +GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-coreos-stable20160531-image,e2e-node-containervm-v20160321-image GCE_ZONE=us-central1-f GCE_PROJECT=kubernetes-jenkins GCE_IMAGE_PROJECT=kubernetes-jenkins diff --git a/test/e2e_node/jenkins/jenkins-pull.properties b/test/e2e_node/jenkins/jenkins-pull.properties index db2e1331c17..0fff91d98e6 100644 --- a/test/e2e_node/jenkins/jenkins-pull.properties +++ b/test/e2e_node/jenkins/jenkins-pull.properties @@ -3,10 +3,7 @@ GCE_HOSTS= # To copy an image between projects: # `gcloud compute --project disks create --image=https://www.googleapis.com/compute/v1/projects//global/images/` # `gcloud compute --project images create --source-disk=` -# -# Testing disabled on the following images: -# e2e-node-coreos-stable20160531-image - Github Issue #26903 -GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-containervm-v20160321-image +GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-coreos-stable20160531-image,e2e-node-containervm-v20160321-image GCE_ZONE=us-central1-f GCE_PROJECT=kubernetes-jenkins-pull GCE_IMAGE_PROJECT=kubernetes-jenkins-pull From 34996f1b84104a83033fb62cec72ec86f9eef8c1 Mon Sep 17 00:00:00 2001 From: Euan Kemp Date: Thu, 9 Jun 2016 19:25:36 -0700 Subject: [PATCH 3/4] e2e_node: Correctly kill and log systemd kubelet --- test/e2e_node/e2e_service.go | 81 ++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/test/e2e_node/e2e_service.go b/test/e2e_node/e2e_service.go index 6dc99f1a635..630cbef6439 100644 --- a/test/e2e_node/e2e_service.go +++ b/test/e2e_node/e2e_service.go @@ -20,6 +20,7 @@ import ( "flag" "fmt" "io/ioutil" + "math/rand" "net/http" "os" "os/exec" @@ -37,16 +38,28 @@ var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, var reportDir = flag.String("report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.") type e2eService struct { - etcdCmd *exec.Cmd + etcdCmd *killCmd etcdDataDir string - apiServerCmd *exec.Cmd - kubeletCmd *exec.Cmd + apiServerCmd *killCmd + kubeletCmd *killCmd kubeletStaticPodDir string nodeName string + logFiles map[string]logFileData +} + +type logFileData struct { + files []string + journalctlCommand []string } func newE2eService(nodeName string) *e2eService { - return &e2eService{nodeName: nodeName} + // Special log files that need to be collected for additional debugging. + var logFiles = map[string]logFileData{ + "kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}}, + "docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}}, + } + + return &e2eService{nodeName: nodeName, logFiles: logFiles} } func (es *e2eService) start() error { @@ -81,22 +94,12 @@ func (es *e2eService) start() error { // Get logs of interest either via journalctl or by creating sym links. // Since we scp files from the remote directory, symlinks will be treated as normal files and file contents will be copied over. func (es *e2eService) getLogFiles() { - // Special log files that need to be collected for additional debugging. - type logFileData struct { - files []string - journalctlCommand []string - } - var logFiles = map[string]logFileData{ - "kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}}, - "docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}}, - } - // Nothing to do if report dir is not specified. if *reportDir == "" { return } journaldFound := isJournaldAvailable() - for targetFileName, logFileData := range logFiles { + for targetFileName, logFileData := range es.logFiles { targetLink := path.Join(*reportDir, targetFileName) if journaldFound { // Skip log files that do not have an equivalent in journald based machines. @@ -144,7 +147,7 @@ func isJournaldAvailable() bool { } func (es *e2eService) stop() { - if err := es.stopService("kubelet", es.kubeletCmd); err != nil { + if err := es.stopService(es.kubeletCmd); err != nil { glog.Errorf("Failed to stop kubelet: %v", err) } if es.kubeletStaticPodDir != "" { @@ -153,10 +156,10 @@ func (es *e2eService) stop() { glog.Errorf("Failed to delete kubelet static pod directory %s.\n%v", es.kubeletStaticPodDir, err) } } - if err := es.stopService("kube-apiserver", es.apiServerCmd); err != nil { + if err := es.stopService(es.apiServerCmd); err != nil { glog.Errorf("Failed to stop kube-apiserver: %v", err) } - if err := es.stopService("etcd", es.etcdCmd); err != nil { + if err := es.stopService(es.etcdCmd); err != nil { glog.Errorf("Failed to stop etcd: %v", err) } if es.etcdDataDir != "" { @@ -167,7 +170,7 @@ func (es *e2eService) stop() { } } -func (es *e2eService) startEtcd() (*exec.Cmd, error) { +func (es *e2eService) startEtcd() (*killCmd, error) { dataDir, err := ioutil.TempDir("", "node-e2e") if err != nil { return nil, err @@ -181,10 +184,10 @@ func (es *e2eService) startEtcd() (*exec.Cmd, error) { "http://127.0.0.1:4001/v2/keys/", // Trailing slash is required, cmd, "etcd.log") - return cmd, es.startServer(hcc) + return &killCmd{name: "etcd", cmd: cmd}, es.startServer(hcc) } -func (es *e2eService) startApiServer() (*exec.Cmd, error) { +func (es *e2eService) startApiServer() (*killCmd, error) { cmd := exec.Command("sudo", getApiServerBin(), "--etcd-servers", "http://127.0.0.1:4001", "--insecure-bind-address", "0.0.0.0", @@ -197,22 +200,28 @@ func (es *e2eService) startApiServer() (*exec.Cmd, error) { "http://127.0.0.1:8080/healthz", cmd, "kube-apiserver.log") - return cmd, es.startServer(hcc) + return &killCmd{name: "kube-apiserver", cmd: cmd}, es.startServer(hcc) } -func (es *e2eService) startKubeletServer() (*exec.Cmd, error) { +func (es *e2eService) startKubeletServer() (*killCmd, error) { dataDir, err := ioutil.TempDir("", "node-e2e-pod") if err != nil { return nil, err } es.kubeletStaticPodDir = dataDir + var killOverride *exec.Cmd cmdArgs := []string{} if systemdRun, err := exec.LookPath("systemd-run"); err == nil { // On systemd services, detection of a service / unit works reliably while // detection of a process started from an ssh session does not work. // Since kubelet will typically be run as a service it also makes more // sense to test it that way - cmdArgs = append(cmdArgs, systemdRun, getKubeletServerBin()) + unitName := fmt.Sprintf("kubelet-%d.service", rand.Int31()) + cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, getKubeletServerBin()) + killOverride = exec.Command("sudo", "systemctl", "kill", unitName) + es.logFiles["kubelet.log"] = logFileData{ + journalctlCommand: []string{"-u", unitName}, + } } else { cmdArgs = append(cmdArgs, getKubeletServerBin()) } @@ -233,7 +242,7 @@ func (es *e2eService) startKubeletServer() (*exec.Cmd, error) { "http://127.0.0.1:10255/healthz", cmd, "kubelet.log") - return cmd, es.startServer(hcc) + return &killCmd{name: "kubelet", cmd: cmd, override: killOverride}, es.startServer(hcc) } func (es *e2eService) startServer(cmd *healthCheckCommand) error { @@ -290,7 +299,27 @@ func (es *e2eService) startServer(cmd *healthCheckCommand) error { return fmt.Errorf("Timeout waiting for service %s", cmd) } -func (es *e2eService) stopService(name string, cmd *exec.Cmd) error { +func (es *e2eService) stopService(cmd *killCmd) error { + return cmd.Kill() +} + +// killCmd is a struct to kill a given cmd. The cmd member specifies a command +// to find the pid of and attempt to kill. +// If the override field is set, that will be used instead to kill the command. +// name is only used for logging +type killCmd struct { + name string + cmd *exec.Cmd + override *exec.Cmd +} + +func (k *killCmd) Kill() error { + if k.override != nil { + return k.override.Run() + } + name := k.name + cmd := k.cmd + if cmd == nil || cmd.Process == nil { glog.V(2).Infof("%s not running", name) return nil From 3d478a3facdfc175a5cc794d14dd8a2250eda236 Mon Sep 17 00:00:00 2001 From: Euan Kemp Date: Thu, 9 Jun 2016 19:31:24 -0700 Subject: [PATCH 4/4] e2e_node: Improve coreos update disable mechanism The previous method would still allow updates to download which slowed down tests due to the network / disk IO --- test/e2e_node/environment/setup_host.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/e2e_node/environment/setup_host.sh b/test/e2e_node/environment/setup_host.sh index 87e332253c1..48393469cc2 100755 --- a/test/e2e_node/environment/setup_host.sh +++ b/test/e2e_node/environment/setup_host.sh @@ -41,6 +41,11 @@ EOF sudo systemctl daemon-reload fi +# For coreos, disable updates +if $(sudo systemctl status update-engine &>/dev/null); then + sudo systemctl mask update-engine locksmithd +fi + # Fixup sudoers require tty sudo grep -q "# Defaults requiretty" /etc/sudoers if [ $? -ne 0 ] ; then