e2e_node: Correctly kill and log systemd kubelet

This commit is contained in:
Euan Kemp 2016-06-09 19:25:36 -07:00
parent b004122dfc
commit 34996f1b84

View File

@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"io/ioutil"
"math/rand"
"net/http"
"os"
"os/exec"
@ -37,16 +38,28 @@ var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120,
var reportDir = flag.String("report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.")
type e2eService struct {
etcdCmd *exec.Cmd
etcdCmd *killCmd
etcdDataDir string
apiServerCmd *exec.Cmd
kubeletCmd *exec.Cmd
apiServerCmd *killCmd
kubeletCmd *killCmd
kubeletStaticPodDir string
nodeName string
logFiles map[string]logFileData
}
type logFileData struct {
files []string
journalctlCommand []string
}
func newE2eService(nodeName string) *e2eService {
return &e2eService{nodeName: nodeName}
// Special log files that need to be collected for additional debugging.
var logFiles = map[string]logFileData{
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
"docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}},
}
return &e2eService{nodeName: nodeName, logFiles: logFiles}
}
func (es *e2eService) start() error {
@ -81,22 +94,12 @@ func (es *e2eService) start() error {
// Get logs of interest either via journalctl or by creating sym links.
// Since we scp files from the remote directory, symlinks will be treated as normal files and file contents will be copied over.
func (es *e2eService) getLogFiles() {
// Special log files that need to be collected for additional debugging.
type logFileData struct {
files []string
journalctlCommand []string
}
var logFiles = map[string]logFileData{
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
"docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}},
}
// Nothing to do if report dir is not specified.
if *reportDir == "" {
return
}
journaldFound := isJournaldAvailable()
for targetFileName, logFileData := range logFiles {
for targetFileName, logFileData := range es.logFiles {
targetLink := path.Join(*reportDir, targetFileName)
if journaldFound {
// Skip log files that do not have an equivalent in journald based machines.
@ -144,7 +147,7 @@ func isJournaldAvailable() bool {
}
func (es *e2eService) stop() {
if err := es.stopService("kubelet", es.kubeletCmd); err != nil {
if err := es.stopService(es.kubeletCmd); err != nil {
glog.Errorf("Failed to stop kubelet: %v", err)
}
if es.kubeletStaticPodDir != "" {
@ -153,10 +156,10 @@ func (es *e2eService) stop() {
glog.Errorf("Failed to delete kubelet static pod directory %s.\n%v", es.kubeletStaticPodDir, err)
}
}
if err := es.stopService("kube-apiserver", es.apiServerCmd); err != nil {
if err := es.stopService(es.apiServerCmd); err != nil {
glog.Errorf("Failed to stop kube-apiserver: %v", err)
}
if err := es.stopService("etcd", es.etcdCmd); err != nil {
if err := es.stopService(es.etcdCmd); err != nil {
glog.Errorf("Failed to stop etcd: %v", err)
}
if es.etcdDataDir != "" {
@ -167,7 +170,7 @@ func (es *e2eService) stop() {
}
}
func (es *e2eService) startEtcd() (*exec.Cmd, error) {
func (es *e2eService) startEtcd() (*killCmd, error) {
dataDir, err := ioutil.TempDir("", "node-e2e")
if err != nil {
return nil, err
@ -181,10 +184,10 @@ func (es *e2eService) startEtcd() (*exec.Cmd, error) {
"http://127.0.0.1:4001/v2/keys/", // Trailing slash is required,
cmd,
"etcd.log")
return cmd, es.startServer(hcc)
return &killCmd{name: "etcd", cmd: cmd}, es.startServer(hcc)
}
func (es *e2eService) startApiServer() (*exec.Cmd, error) {
func (es *e2eService) startApiServer() (*killCmd, error) {
cmd := exec.Command("sudo", getApiServerBin(),
"--etcd-servers", "http://127.0.0.1:4001",
"--insecure-bind-address", "0.0.0.0",
@ -197,22 +200,28 @@ func (es *e2eService) startApiServer() (*exec.Cmd, error) {
"http://127.0.0.1:8080/healthz",
cmd,
"kube-apiserver.log")
return cmd, es.startServer(hcc)
return &killCmd{name: "kube-apiserver", cmd: cmd}, es.startServer(hcc)
}
func (es *e2eService) startKubeletServer() (*exec.Cmd, error) {
func (es *e2eService) startKubeletServer() (*killCmd, error) {
dataDir, err := ioutil.TempDir("", "node-e2e-pod")
if err != nil {
return nil, err
}
es.kubeletStaticPodDir = dataDir
var killOverride *exec.Cmd
cmdArgs := []string{}
if systemdRun, err := exec.LookPath("systemd-run"); err == nil {
// On systemd services, detection of a service / unit works reliably while
// detection of a process started from an ssh session does not work.
// Since kubelet will typically be run as a service it also makes more
// sense to test it that way
cmdArgs = append(cmdArgs, systemdRun, getKubeletServerBin())
unitName := fmt.Sprintf("kubelet-%d.service", rand.Int31())
cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, getKubeletServerBin())
killOverride = exec.Command("sudo", "systemctl", "kill", unitName)
es.logFiles["kubelet.log"] = logFileData{
journalctlCommand: []string{"-u", unitName},
}
} else {
cmdArgs = append(cmdArgs, getKubeletServerBin())
}
@ -233,7 +242,7 @@ func (es *e2eService) startKubeletServer() (*exec.Cmd, error) {
"http://127.0.0.1:10255/healthz",
cmd,
"kubelet.log")
return cmd, es.startServer(hcc)
return &killCmd{name: "kubelet", cmd: cmd, override: killOverride}, es.startServer(hcc)
}
func (es *e2eService) startServer(cmd *healthCheckCommand) error {
@ -290,7 +299,27 @@ func (es *e2eService) startServer(cmd *healthCheckCommand) error {
return fmt.Errorf("Timeout waiting for service %s", cmd)
}
func (es *e2eService) stopService(name string, cmd *exec.Cmd) error {
func (es *e2eService) stopService(cmd *killCmd) error {
return cmd.Kill()
}
// killCmd is a struct to kill a given cmd. The cmd member specifies a command
// to find the pid of and attempt to kill.
// If the override field is set, that will be used instead to kill the command.
// name is only used for logging
type killCmd struct {
name string
cmd *exec.Cmd
override *exec.Cmd
}
func (k *killCmd) Kill() error {
if k.override != nil {
return k.override.Run()
}
name := k.name
cmd := k.cmd
if cmd == nil || cmd.Process == nil {
glog.V(2).Infof("%s not running", name)
return nil