diff --git a/test/e2e_node/e2e_service.go b/test/e2e_node/e2e_service.go index 6d4ffc98520..630cbef6439 100644 --- a/test/e2e_node/e2e_service.go +++ b/test/e2e_node/e2e_service.go @@ -20,6 +20,7 @@ import ( "flag" "fmt" "io/ioutil" + "math/rand" "net/http" "os" "os/exec" @@ -37,16 +38,28 @@ var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, var reportDir = flag.String("report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.") type e2eService struct { - etcdCmd *exec.Cmd + etcdCmd *killCmd etcdDataDir string - apiServerCmd *exec.Cmd - kubeletCmd *exec.Cmd + apiServerCmd *killCmd + kubeletCmd *killCmd kubeletStaticPodDir string nodeName string + logFiles map[string]logFileData +} + +type logFileData struct { + files []string + journalctlCommand []string } func newE2eService(nodeName string) *e2eService { - return &e2eService{nodeName: nodeName} + // Special log files that need to be collected for additional debugging. + var logFiles = map[string]logFileData{ + "kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}}, + "docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}}, + } + + return &e2eService{nodeName: nodeName, logFiles: logFiles} } func (es *e2eService) start() error { @@ -81,22 +94,12 @@ func (es *e2eService) start() error { // Get logs of interest either via journalctl or by creating sym links. // Since we scp files from the remote directory, symlinks will be treated as normal files and file contents will be copied over. func (es *e2eService) getLogFiles() { - // Special log files that need to be collected for additional debugging. - type logFileData struct { - files []string - journalctlCommand []string - } - var logFiles = map[string]logFileData{ - "kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}}, - "docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}}, - } - // Nothing to do if report dir is not specified. if *reportDir == "" { return } journaldFound := isJournaldAvailable() - for targetFileName, logFileData := range logFiles { + for targetFileName, logFileData := range es.logFiles { targetLink := path.Join(*reportDir, targetFileName) if journaldFound { // Skip log files that do not have an equivalent in journald based machines. @@ -144,7 +147,7 @@ func isJournaldAvailable() bool { } func (es *e2eService) stop() { - if err := es.stopService("kubelet", es.kubeletCmd); err != nil { + if err := es.stopService(es.kubeletCmd); err != nil { glog.Errorf("Failed to stop kubelet: %v", err) } if es.kubeletStaticPodDir != "" { @@ -153,10 +156,10 @@ func (es *e2eService) stop() { glog.Errorf("Failed to delete kubelet static pod directory %s.\n%v", es.kubeletStaticPodDir, err) } } - if err := es.stopService("kube-apiserver", es.apiServerCmd); err != nil { + if err := es.stopService(es.apiServerCmd); err != nil { glog.Errorf("Failed to stop kube-apiserver: %v", err) } - if err := es.stopService("etcd", es.etcdCmd); err != nil { + if err := es.stopService(es.etcdCmd); err != nil { glog.Errorf("Failed to stop etcd: %v", err) } if es.etcdDataDir != "" { @@ -167,7 +170,7 @@ func (es *e2eService) stop() { } } -func (es *e2eService) startEtcd() (*exec.Cmd, error) { +func (es *e2eService) startEtcd() (*killCmd, error) { dataDir, err := ioutil.TempDir("", "node-e2e") if err != nil { return nil, err @@ -181,10 +184,10 @@ func (es *e2eService) startEtcd() (*exec.Cmd, error) { "http://127.0.0.1:4001/v2/keys/", // Trailing slash is required, cmd, "etcd.log") - return cmd, es.startServer(hcc) + return &killCmd{name: "etcd", cmd: cmd}, es.startServer(hcc) } -func (es *e2eService) startApiServer() (*exec.Cmd, error) { +func (es *e2eService) startApiServer() (*killCmd, error) { cmd := exec.Command("sudo", getApiServerBin(), "--etcd-servers", "http://127.0.0.1:4001", "--insecure-bind-address", "0.0.0.0", @@ -197,16 +200,32 @@ func (es *e2eService) startApiServer() (*exec.Cmd, error) { "http://127.0.0.1:8080/healthz", cmd, "kube-apiserver.log") - return cmd, es.startServer(hcc) + return &killCmd{name: "kube-apiserver", cmd: cmd}, es.startServer(hcc) } -func (es *e2eService) startKubeletServer() (*exec.Cmd, error) { +func (es *e2eService) startKubeletServer() (*killCmd, error) { dataDir, err := ioutil.TempDir("", "node-e2e-pod") if err != nil { return nil, err } es.kubeletStaticPodDir = dataDir - cmd := exec.Command("sudo", getKubeletServerBin(), + var killOverride *exec.Cmd + cmdArgs := []string{} + if systemdRun, err := exec.LookPath("systemd-run"); err == nil { + // On systemd services, detection of a service / unit works reliably while + // detection of a process started from an ssh session does not work. + // Since kubelet will typically be run as a service it also makes more + // sense to test it that way + unitName := fmt.Sprintf("kubelet-%d.service", rand.Int31()) + cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, getKubeletServerBin()) + killOverride = exec.Command("sudo", "systemctl", "kill", unitName) + es.logFiles["kubelet.log"] = logFileData{ + journalctlCommand: []string{"-u", unitName}, + } + } else { + cmdArgs = append(cmdArgs, getKubeletServerBin()) + } + cmdArgs = append(cmdArgs, "--api-servers", "http://127.0.0.1:8080", "--address", "0.0.0.0", "--port", "10250", @@ -218,11 +237,12 @@ func (es *e2eService) startKubeletServer() (*exec.Cmd, error) { "--file-check-frequency", "10s", // Check file frequently so tests won't wait too long "--v", "8", "--logtostderr", ) + cmd := exec.Command("sudo", cmdArgs...) hcc := newHealthCheckCommand( "http://127.0.0.1:10255/healthz", cmd, "kubelet.log") - return cmd, es.startServer(hcc) + return &killCmd{name: "kubelet", cmd: cmd, override: killOverride}, es.startServer(hcc) } func (es *e2eService) startServer(cmd *healthCheckCommand) error { @@ -279,7 +299,27 @@ func (es *e2eService) startServer(cmd *healthCheckCommand) error { return fmt.Errorf("Timeout waiting for service %s", cmd) } -func (es *e2eService) stopService(name string, cmd *exec.Cmd) error { +func (es *e2eService) stopService(cmd *killCmd) error { + return cmd.Kill() +} + +// killCmd is a struct to kill a given cmd. The cmd member specifies a command +// to find the pid of and attempt to kill. +// If the override field is set, that will be used instead to kill the command. +// name is only used for logging +type killCmd struct { + name string + cmd *exec.Cmd + override *exec.Cmd +} + +func (k *killCmd) Kill() error { + if k.override != nil { + return k.override.Run() + } + name := k.name + cmd := k.cmd + if cmd == nil || cmd.Process == nil { glog.V(2).Infof("%s not running", name) return nil diff --git a/test/e2e_node/environment/setup_host.sh b/test/e2e_node/environment/setup_host.sh index 87e332253c1..48393469cc2 100755 --- a/test/e2e_node/environment/setup_host.sh +++ b/test/e2e_node/environment/setup_host.sh @@ -41,6 +41,11 @@ EOF sudo systemctl daemon-reload fi +# For coreos, disable updates +if $(sudo systemctl status update-engine &>/dev/null); then + sudo systemctl mask update-engine locksmithd +fi + # Fixup sudoers require tty sudo grep -q "# Defaults requiretty" /etc/sudoers if [ $? -ne 0 ] ; then diff --git a/test/e2e_node/jenkins/jenkins-ci.properties b/test/e2e_node/jenkins/jenkins-ci.properties index 990b7f80e1a..64f8261abf9 100644 --- a/test/e2e_node/jenkins/jenkins-ci.properties +++ b/test/e2e_node/jenkins/jenkins-ci.properties @@ -3,10 +3,7 @@ GCE_HOSTS= # To copy an image between projects: # `gcloud compute --project disks create --image=https://www.googleapis.com/compute/v1/projects//global/images/` # `gcloud compute --project images create --source-disk=` -# -# Testing disabled on the following images: -# e2e-node-coreos-stable20160531-image - Github Issue #26903 -GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-containervm-v20160321-image +GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-coreos-stable20160531-image,e2e-node-containervm-v20160321-image GCE_ZONE=us-central1-f GCE_PROJECT=kubernetes-jenkins GCE_IMAGE_PROJECT=kubernetes-jenkins diff --git a/test/e2e_node/jenkins/jenkins-pull.properties b/test/e2e_node/jenkins/jenkins-pull.properties index db2e1331c17..0fff91d98e6 100644 --- a/test/e2e_node/jenkins/jenkins-pull.properties +++ b/test/e2e_node/jenkins/jenkins-pull.properties @@ -3,10 +3,7 @@ GCE_HOSTS= # To copy an image between projects: # `gcloud compute --project disks create --image=https://www.googleapis.com/compute/v1/projects//global/images/` # `gcloud compute --project images create --source-disk=` -# -# Testing disabled on the following images: -# e2e-node-coreos-stable20160531-image - Github Issue #26903 -GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-containervm-v20160321-image +GCE_IMAGES=e2e-node-ubuntu-trusty-docker10-image,e2e-node-ubuntu-trusty-docker9-image,e2e-node-ubuntu-trusty-docker8-image,e2e-node-coreos-stable20160531-image,e2e-node-containervm-v20160321-image GCE_ZONE=us-central1-f GCE_PROJECT=kubernetes-jenkins-pull GCE_IMAGE_PROJECT=kubernetes-jenkins-pull