mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Merge pull request #33063 from pmorie/node-e2e
Automatic merge from submit-queue Make node E2E tests more transparent Add some logging and minor code reorg to make the node E2E tests a little more transparent and understandable.
This commit is contained in:
commit
db07433782
1
Makefile
1
Makefile
@ -168,6 +168,7 @@ test-e2e: ginkgo generated_files
|
|||||||
# INSTANCE_METADATA: For REMOTE=true and running on GCE only.
|
# INSTANCE_METADATA: For REMOTE=true and running on GCE only.
|
||||||
# GUBERNATOR: For REMOTE=true only. Produce link to Gubernator to view logs.
|
# GUBERNATOR: For REMOTE=true only. Produce link to Gubernator to view logs.
|
||||||
# Defaults to false.
|
# Defaults to false.
|
||||||
|
# PARALLELISM: The number of gingko nodes to run. Defaults to 8.
|
||||||
#
|
#
|
||||||
# Example:
|
# Example:
|
||||||
# make test-e2e-node FOCUS=Kubelet SKIP=container
|
# make test-e2e-node FOCUS=Kubelet SKIP=container
|
||||||
|
@ -141,8 +141,9 @@ if [ $remote = true ] ; then
|
|||||||
exit $?
|
exit $?
|
||||||
|
|
||||||
else
|
else
|
||||||
# Refresh sudo credentials if not running on GCE.
|
# Refresh sudo credentials for local run
|
||||||
if ! ping -c 1 -q metadata.google.internal &> /dev/null; then
|
if ! ping -c 1 -q metadata.google.internal &> /dev/null; then
|
||||||
|
echo "Updating sudo credentials"
|
||||||
sudo -v || exit 1
|
sudo -v || exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -47,6 +47,7 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Fatalf("Failed to get build output directory: %v", err)
|
glog.Fatalf("Failed to get build output directory: %v", err)
|
||||||
}
|
}
|
||||||
|
glog.Infof("Got build output dir: %v", outputDir)
|
||||||
ginkgo := filepath.Join(outputDir, "ginkgo")
|
ginkgo := filepath.Join(outputDir, "ginkgo")
|
||||||
test := filepath.Join(outputDir, "e2e_node.test")
|
test := filepath.Join(outputDir, "e2e_node.test")
|
||||||
runCommand(ginkgo, *ginkgoFlags, test, "--", *testFlags)
|
runCommand(ginkgo, *ginkgoFlags, test, "--", *testFlags)
|
||||||
@ -54,6 +55,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func runCommand(name string, args ...string) error {
|
func runCommand(name string, args ...string) error {
|
||||||
|
glog.Infof("Running command: %v %v", name, strings.Join(args, " "))
|
||||||
cmd := exec.Command("sh", "-c", strings.Join(append([]string{name}, args...), " "))
|
cmd := exec.Command("sh", "-c", strings.Join(append([]string{name}, args...), " "))
|
||||||
cmd.Stdout = os.Stdout
|
cmd.Stdout = os.Stdout
|
||||||
cmd.Stderr = os.Stderr
|
cmd.Stderr = os.Stderr
|
||||||
|
@ -57,7 +57,7 @@ func (a *APIServer) Start() error {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
err = readinessCheck([]string{apiserverHealthCheckURL}, errCh)
|
err = readinessCheck("apiserver", []string{apiserverHealthCheckURL}, errCh)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -118,7 +118,7 @@ func (e *EtcdServer) Start() error {
|
|||||||
errCh <- srv.Serve(l)
|
errCh <- srv.Serve(l)
|
||||||
}(e.clientListen)
|
}(e.clientListen)
|
||||||
|
|
||||||
err = readinessCheck([]string{etcdHealthCheckURL}, errCh)
|
err = readinessCheck("etcd", []string{etcdHealthCheckURL}, errCh)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -44,12 +44,13 @@ import (
|
|||||||
// TODO(random-liu): Move this file to a separate package.
|
// TODO(random-liu): Move this file to a separate package.
|
||||||
var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
|
var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
|
||||||
|
|
||||||
// E2EServices starts and stops e2e services in a separate process. The test uses it to start and
|
// E2EServices starts and stops e2e services in a separate process. The test
|
||||||
// stop all e2e services.
|
// uses it to start and stop all e2e services.
|
||||||
type E2EServices struct {
|
type E2EServices struct {
|
||||||
services *server
|
services *server
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewE2EServices returns a new E2EServices instance.
|
||||||
func NewE2EServices() *E2EServices {
|
func NewE2EServices() *E2EServices {
|
||||||
return &E2EServices{}
|
return &E2EServices{}
|
||||||
}
|
}
|
||||||
@ -57,11 +58,12 @@ func NewE2EServices() *E2EServices {
|
|||||||
// services.log is the combined log of all services
|
// services.log is the combined log of all services
|
||||||
const servicesLogFile = "services.log"
|
const servicesLogFile = "services.log"
|
||||||
|
|
||||||
// Start starts the e2e services in another process, it returns when all e2e
|
// Start starts the e2e services in another process by calling back into the
|
||||||
// services are ready.
|
// test binary. Returns when all e2e services are ready or an error.
|
||||||
|
//
|
||||||
// We want to statically link e2e services into the test binary, but we don't
|
// We want to statically link e2e services into the test binary, but we don't
|
||||||
// want their glog to pollute the test result. So we run the binary in run-
|
// want their glog output to pollute the test result. So we run the binary in
|
||||||
// services-mode to start e2e services in another process.
|
// run- services-mode to start e2e services in another process.
|
||||||
func (e *E2EServices) Start() error {
|
func (e *E2EServices) Start() error {
|
||||||
var err error
|
var err error
|
||||||
// Create the manifest path for kubelet.
|
// Create the manifest path for kubelet.
|
||||||
@ -155,18 +157,20 @@ func getHealthCheckURLs() []string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// e2eService is used internally in this file to start e2e services in current process.
|
// e2eService manages e2e services in current process.
|
||||||
type e2eService struct {
|
type e2eService struct {
|
||||||
services []*server
|
services []*server
|
||||||
rmDirs []string
|
rmDirs []string
|
||||||
logFiles map[string]logFileData
|
logFiles map[string]logFileData
|
||||||
|
|
||||||
// All statically linked e2e services
|
// statically linked e2e services
|
||||||
etcdServer *EtcdServer
|
etcdServer *EtcdServer
|
||||||
apiServer *APIServer
|
apiServer *APIServer
|
||||||
nsController *NamespaceController
|
nsController *NamespaceController
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// logFileData holds data about logfiles to fetch with a journalctl command or
|
||||||
|
// symlink from a node's file system.
|
||||||
type logFileData struct {
|
type logFileData struct {
|
||||||
files []string
|
files []string
|
||||||
journalctlCommand []string
|
journalctlCommand []string
|
||||||
@ -206,6 +210,7 @@ func (es *e2eService) run() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// start starts the tests embedded services or returns an error.
|
||||||
func (es *e2eService) start() error {
|
func (es *e2eService) start() error {
|
||||||
glog.Info("Starting e2e services...")
|
glog.Info("Starting e2e services...")
|
||||||
err := es.startEtcd()
|
err := es.startEtcd()
|
||||||
@ -218,7 +223,7 @@ func (es *e2eService) start() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
s, err := es.startKubeletServer()
|
s, err := es.startKubelet()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -232,8 +237,9 @@ func (es *e2eService) start() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get logs of interest either via journalctl or by creating sym links.
|
// getLogFiles gets logs of interest either via journalctl or by creating sym
|
||||||
// Since we scp files from the remote directory, symlinks will be treated as normal files and file contents will be copied over.
|
// links. Since we scp files from the remote directory, symlinks will be
|
||||||
|
// treated as normal files and file contents will be copied over.
|
||||||
func (es *e2eService) getLogFiles() {
|
func (es *e2eService) getLogFiles() {
|
||||||
// Nothing to do if report dir is not specified.
|
// Nothing to do if report dir is not specified.
|
||||||
if framework.TestContext.ReportDir == "" {
|
if framework.TestContext.ReportDir == "" {
|
||||||
@ -244,7 +250,7 @@ func (es *e2eService) getLogFiles() {
|
|||||||
for targetFileName, logFileData := range es.logFiles {
|
for targetFileName, logFileData := range es.logFiles {
|
||||||
targetLink := path.Join(framework.TestContext.ReportDir, targetFileName)
|
targetLink := path.Join(framework.TestContext.ReportDir, targetFileName)
|
||||||
if journaldFound {
|
if journaldFound {
|
||||||
// Skip log files that do not have an equivalent in journald based machines.
|
// Skip log files that do not have an equivalent in journald-based machines.
|
||||||
if len(logFileData.journalctlCommand) == 0 {
|
if len(logFileData.journalctlCommand) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -273,59 +279,55 @@ func (es *e2eService) getLogFiles() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func copyLogFile(src, target string) error {
|
// stop stops the embedded e2e services.
|
||||||
// If not a journald based distro, then just symlink files.
|
|
||||||
if out, err := exec.Command("sudo", "cp", src, target).CombinedOutput(); err != nil {
|
|
||||||
return fmt.Errorf("failed to copy %q to %q: %v, %v", src, target, out, err)
|
|
||||||
}
|
|
||||||
if out, err := exec.Command("sudo", "chmod", "a+r", target).CombinedOutput(); err != nil {
|
|
||||||
return fmt.Errorf("failed to make log file %q world readable: %v, %v", target, out, err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func isJournaldAvailable() bool {
|
|
||||||
_, err := exec.LookPath("journalctl")
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (es *e2eService) stop() {
|
func (es *e2eService) stop() {
|
||||||
glog.Info("Stopping e2e services...")
|
glog.Info("Stopping e2e services...")
|
||||||
es.getLogFiles()
|
es.getLogFiles()
|
||||||
// TODO(random-liu): Use a loop to stop all services after introducing service interface.
|
|
||||||
// Stop namespace controller
|
// TODO(random-liu): Use a loop to stop all services after introducing
|
||||||
|
// service interface.
|
||||||
|
glog.Info("Stopping namespace controller")
|
||||||
if es.nsController != nil {
|
if es.nsController != nil {
|
||||||
if err := es.nsController.Stop(); err != nil {
|
if err := es.nsController.Stop(); err != nil {
|
||||||
glog.Errorf("Failed to stop %q: %v", es.nsController.Name(), err)
|
glog.Errorf("Failed to stop %q: %v", es.nsController.Name(), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Stop apiserver
|
|
||||||
|
glog.Info("Stopping API server")
|
||||||
if es.apiServer != nil {
|
if es.apiServer != nil {
|
||||||
if err := es.apiServer.Stop(); err != nil {
|
if err := es.apiServer.Stop(); err != nil {
|
||||||
glog.Errorf("Failed to stop %q: %v", es.apiServer.Name(), err)
|
glog.Errorf("Failed to stop %q: %v", es.apiServer.Name(), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, s := range es.services {
|
for _, s := range es.services {
|
||||||
|
glog.Info("Stopping service %q", s.name)
|
||||||
if err := s.kill(); err != nil {
|
if err := s.kill(); err != nil {
|
||||||
glog.Errorf("Failed to stop %v: %v", s.name, err)
|
glog.Errorf("Failed to stop %v: %v", s.name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Stop etcd
|
|
||||||
|
glog.Info("Stopping etcd")
|
||||||
if es.etcdServer != nil {
|
if es.etcdServer != nil {
|
||||||
if err := es.etcdServer.Stop(); err != nil {
|
if err := es.etcdServer.Stop(); err != nil {
|
||||||
glog.Errorf("Failed to stop %q: %v", es.etcdServer.Name(), err)
|
glog.Errorf("Failed to stop %q: %v", es.etcdServer.Name(), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, d := range es.rmDirs {
|
for _, d := range es.rmDirs {
|
||||||
|
glog.Info("Deleting directory %v", d)
|
||||||
err := os.RemoveAll(d)
|
err := os.RemoveAll(d)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed to delete directory %s.\n%v", d, err)
|
glog.Errorf("Failed to delete directory %s.\n%v", d, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
glog.Info("E2E services stopped.")
|
glog.Info("E2E services stopped.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startEtcd starts the embedded etcd instance or returns an error.
|
||||||
func (es *e2eService) startEtcd() error {
|
func (es *e2eService) startEtcd() error {
|
||||||
|
glog.Info("Starting etcd")
|
||||||
dataDir, err := ioutil.TempDir("", "node-e2e")
|
dataDir, err := ioutil.TempDir("", "node-e2e")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -336,17 +338,24 @@ func (es *e2eService) startEtcd() error {
|
|||||||
return es.etcdServer.Start()
|
return es.etcdServer.Start()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startApiServer starts the embedded API server or returns an error.
|
||||||
func (es *e2eService) startApiServer() error {
|
func (es *e2eService) startApiServer() error {
|
||||||
|
glog.Info("Starting API server")
|
||||||
es.apiServer = NewAPIServer()
|
es.apiServer = NewAPIServer()
|
||||||
return es.apiServer.Start()
|
return es.apiServer.Start()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startNamespaceController starts the embedded namespace controller or returns an error.
|
||||||
func (es *e2eService) startNamespaceController() error {
|
func (es *e2eService) startNamespaceController() error {
|
||||||
|
glog.Info("Starting namespace controller")
|
||||||
es.nsController = NewNamespaceController()
|
es.nsController = NewNamespaceController()
|
||||||
return es.nsController.Start()
|
return es.nsController.Start()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (es *e2eService) startKubeletServer() (*server, error) {
|
// startKubelet starts the Kubelet in a separate process or returns an error
|
||||||
|
// if the Kubelet fails to start.
|
||||||
|
func (es *e2eService) startKubelet() (*server, error) {
|
||||||
|
glog.Info("Starting kubelet")
|
||||||
var killCommand, restartCommand *exec.Cmd
|
var killCommand, restartCommand *exec.Cmd
|
||||||
cmdArgs := []string{}
|
cmdArgs := []string{}
|
||||||
if systemdRun, err := exec.LookPath("systemd-run"); err == nil {
|
if systemdRun, err := exec.LookPath("systemd-run"); err == nil {
|
||||||
@ -417,11 +426,12 @@ func (es *e2eService) startKubeletServer() (*server, error) {
|
|||||||
restartCommand,
|
restartCommand,
|
||||||
[]string{kubeletHealthCheckURL},
|
[]string{kubeletHealthCheckURL},
|
||||||
"kubelet.log",
|
"kubelet.log",
|
||||||
true)
|
true /* restartOnExit */)
|
||||||
return server, server.start()
|
return server, server.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
// server manages a server started and killed with commands.
|
// A server manages a separate server process started and killed with
|
||||||
|
// commands.
|
||||||
type server struct {
|
type server struct {
|
||||||
// name is the name of the server, it is only used for logging.
|
// name is the name of the server, it is only used for logging.
|
||||||
name string
|
name string
|
||||||
@ -447,14 +457,16 @@ type server struct {
|
|||||||
ackStopRestartingCh <-chan bool
|
ackStopRestartingCh <-chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func newServer(name string, start, kill, restart *exec.Cmd, urls []string, filename string, restartOnExit bool) *server {
|
// newServer returns a new server with the given name, commands, health check
|
||||||
|
// URLs, etc.
|
||||||
|
func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, restartOnExit bool) *server {
|
||||||
return &server{
|
return &server{
|
||||||
name: name,
|
name: name,
|
||||||
startCommand: start,
|
startCommand: start,
|
||||||
killCommand: kill,
|
killCommand: kill,
|
||||||
restartCommand: restart,
|
restartCommand: restart,
|
||||||
healthCheckUrls: urls,
|
healthCheckUrls: urls,
|
||||||
outFilename: filename,
|
outFilename: outputFileName,
|
||||||
restartOnExit: restartOnExit,
|
restartOnExit: restartOnExit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -472,54 +484,15 @@ func (s *server) String() string {
|
|||||||
commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
|
commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
|
||||||
}
|
}
|
||||||
|
|
||||||
// readinessCheck checks whether services are ready via the health check urls. Once there is
|
// start starts the server by running its commands, monitors it with a health
|
||||||
// an error in errCh, the function will stop waiting and return the error.
|
// check, and ensures that it is restarted if applicable.
|
||||||
// TODO(random-liu): Move this to util
|
//
|
||||||
func readinessCheck(urls []string, errCh <-chan error) error {
|
|
||||||
endTime := time.Now().Add(*serverStartTimeout)
|
|
||||||
blockCh := make(chan error)
|
|
||||||
defer close(blockCh)
|
|
||||||
for endTime.After(time.Now()) {
|
|
||||||
select {
|
|
||||||
// We *always* want to run the health check if there is no error on the channel.
|
|
||||||
// With systemd, reads from errCh report nil because cmd.Run() waits
|
|
||||||
// on systemd-run, rather than the service process. systemd-run quickly
|
|
||||||
// exits with status 0, causing the channel to be closed with no error. In
|
|
||||||
// this case, you want to wait for the health check to complete, rather
|
|
||||||
// than returning from readinessCheck as soon as the channel is closed.
|
|
||||||
case err, ok := <-errCh:
|
|
||||||
if ok { // The channel is not closed, this is a real error
|
|
||||||
if err != nil { // If there is an error, return it
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// If not, keep checking readiness.
|
|
||||||
} else { // The channel is closed, this is only a zero value.
|
|
||||||
// Replace the errCh with blockCh to avoid busy loop,
|
|
||||||
// and keep checking readiness.
|
|
||||||
errCh = blockCh
|
|
||||||
}
|
|
||||||
case <-time.After(time.Second):
|
|
||||||
ready := true
|
|
||||||
for _, url := range urls {
|
|
||||||
resp, err := http.Head(url)
|
|
||||||
if err != nil || resp.StatusCode != http.StatusOK {
|
|
||||||
ready = false
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ready {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fmt.Errorf("e2e service readiness check timeout %v", *serverStartTimeout)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly.
|
// Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly.
|
||||||
func (s *server) start() error {
|
func (s *server) start() error {
|
||||||
glog.Infof("Start server %q with command %q", s.name, commandToString(s.startCommand))
|
glog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand))
|
||||||
errCh := make(chan error)
|
errCh := make(chan error)
|
||||||
|
|
||||||
|
// Set up restart channels if the server is configured for restart on exit.
|
||||||
var stopRestartingCh, ackStopRestartingCh chan bool
|
var stopRestartingCh, ackStopRestartingCh chan bool
|
||||||
if s.restartOnExit {
|
if s.restartOnExit {
|
||||||
if len(s.healthCheckUrls) == 0 {
|
if len(s.healthCheckUrls) == 0 {
|
||||||
@ -533,6 +506,7 @@ func (s *server) start() error {
|
|||||||
s.ackStopRestartingCh = ackStopRestartingCh
|
s.ackStopRestartingCh = ackStopRestartingCh
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This goroutine actually runs the start command for the server.
|
||||||
go func() {
|
go func() {
|
||||||
defer close(errCh)
|
defer close(errCh)
|
||||||
|
|
||||||
@ -542,6 +516,8 @@ func (s *server) start() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
errCh <- fmt.Errorf("failed to create file %q for `%s` %v.", outPath, s, err)
|
errCh <- fmt.Errorf("failed to create file %q for `%s` %v.", outPath, s, err)
|
||||||
return
|
return
|
||||||
|
} else {
|
||||||
|
glog.Infof("Output file for server %q: %v", s.name, outfile.Name())
|
||||||
}
|
}
|
||||||
defer outfile.Close()
|
defer outfile.Close()
|
||||||
defer outfile.Sync()
|
defer outfile.Sync()
|
||||||
@ -569,25 +545,29 @@ func (s *server) start() error {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
if !s.restartOnExit {
|
if !s.restartOnExit {
|
||||||
|
glog.Infof("Waiting for server %q start command to complete", s.name)
|
||||||
// If we aren't planning on restarting, ok to Wait() here to release resources.
|
// If we aren't planning on restarting, ok to Wait() here to release resources.
|
||||||
// Otherwise, we Wait() in the restart loop.
|
// Otherwise, we Wait() in the restart loop.
|
||||||
err = s.startCommand.Wait()
|
err = s.startCommand.Wait()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errCh <- fmt.Errorf("failed to run %s: %v", s, err)
|
errCh <- fmt.Errorf("failed to run start command for server %q: %v", s.name, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// New stuff
|
|
||||||
usedStartCmd := true
|
usedStartCmd := true
|
||||||
for {
|
for {
|
||||||
|
glog.Infof("Running health check for service %q", s.name)
|
||||||
// Wait for an initial health check to pass, so that we are sure the server started.
|
// Wait for an initial health check to pass, so that we are sure the server started.
|
||||||
err := readinessCheck(s.healthCheckUrls, nil)
|
err := readinessCheck(s.name, s.healthCheckUrls, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if usedStartCmd {
|
if usedStartCmd {
|
||||||
|
glog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name)
|
||||||
s.startCommand.Wait() // Release resources if necessary.
|
s.startCommand.Wait() // Release resources if necessary.
|
||||||
}
|
}
|
||||||
// This should not happen, immediately stop the e2eService process.
|
// This should not happen, immediately stop the e2eService process.
|
||||||
glog.Fatalf("restart loop readinessCheck failed for %s", s)
|
glog.Fatalf("restart loop readinessCheck failed for %s", s)
|
||||||
|
} else {
|
||||||
|
glog.Infof("Initial health check passed for service %q", s.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initial health check passed, wait until a health check fails again.
|
// Initial health check passed, wait until a health check fails again.
|
||||||
@ -612,7 +592,8 @@ func (s *server) start() error {
|
|||||||
usedStartCmd = false
|
usedStartCmd = false
|
||||||
}
|
}
|
||||||
if s.restartCommand != nil {
|
if s.restartCommand != nil {
|
||||||
// Always make a fresh copy of restartCommand before running, we may have to restart multiple times
|
// Always make a fresh copy of restartCommand before
|
||||||
|
// running, we may have to restart multiple times
|
||||||
s.restartCommand = &exec.Cmd{
|
s.restartCommand = &exec.Cmd{
|
||||||
Path: s.restartCommand.Path,
|
Path: s.restartCommand.Path,
|
||||||
Args: s.restartCommand.Args,
|
Args: s.restartCommand.Args,
|
||||||
@ -624,10 +605,13 @@ func (s *server) start() error {
|
|||||||
ExtraFiles: s.restartCommand.ExtraFiles,
|
ExtraFiles: s.restartCommand.ExtraFiles,
|
||||||
SysProcAttr: s.restartCommand.SysProcAttr,
|
SysProcAttr: s.restartCommand.SysProcAttr,
|
||||||
}
|
}
|
||||||
err = s.restartCommand.Run() // Run and wait for exit. This command is assumed to have short duration, e.g. systemctl restart
|
// Run and wait for exit. This command is assumed to have
|
||||||
|
// short duration, e.g. systemctl restart
|
||||||
|
glog.Infof("Restarting server %q with restart command", s.name)
|
||||||
|
err = s.restartCommand.Run()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// This should not happen, immediately stop the e2eService process.
|
// This should not happen, immediately stop the e2eService process.
|
||||||
glog.Fatalf("restarting %s with restartCommand failed. Error: %v.", s, err)
|
glog.Fatalf("restarting server %s with restartCommand failed. Error: %v.", s, err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s.startCommand = &exec.Cmd{
|
s.startCommand = &exec.Cmd{
|
||||||
@ -641,20 +625,22 @@ func (s *server) start() error {
|
|||||||
ExtraFiles: s.startCommand.ExtraFiles,
|
ExtraFiles: s.startCommand.ExtraFiles,
|
||||||
SysProcAttr: s.startCommand.SysProcAttr,
|
SysProcAttr: s.startCommand.SysProcAttr,
|
||||||
}
|
}
|
||||||
|
glog.Infof("Restarting server %q with start command", s.name)
|
||||||
err = s.startCommand.Start()
|
err = s.startCommand.Start()
|
||||||
usedStartCmd = true
|
usedStartCmd = true
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// This should not happen, immediately stop the e2eService process.
|
// This should not happen, immediately stop the e2eService process.
|
||||||
glog.Fatalf("restarting %s with startCommand failed. Error: %v.", s, err)
|
glog.Fatalf("Restarting %s with startCommand failed. Error: %v.", s, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return readinessCheck(s.healthCheckUrls, errCh)
|
return readinessCheck(s.name, s.healthCheckUrls, errCh)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// kill runs the server's kill command.
|
||||||
func (s *server) kill() error {
|
func (s *server) kill() error {
|
||||||
glog.Infof("Kill server %q", s.name)
|
glog.Infof("Kill server %q", s.name)
|
||||||
name := s.name
|
name := s.name
|
||||||
@ -731,3 +717,66 @@ func (s *server) kill() error {
|
|||||||
func adjustConfigForSystemd(config string) string {
|
func adjustConfigForSystemd(config string) string {
|
||||||
return strings.Replace(config, "%", "%%", -1)
|
return strings.Replace(config, "%", "%%", -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func copyLogFile(src, target string) error {
|
||||||
|
// If not a journald based distro, then just symlink files.
|
||||||
|
if out, err := exec.Command("sudo", "cp", src, target).CombinedOutput(); err != nil {
|
||||||
|
return fmt.Errorf("failed to copy %q to %q: %v, %v", src, target, out, err)
|
||||||
|
}
|
||||||
|
if out, err := exec.Command("sudo", "chmod", "a+r", target).CombinedOutput(); err != nil {
|
||||||
|
return fmt.Errorf("failed to make log file %q world readable: %v, %v", target, out, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isJournaldAvailable returns whether the system executing the tests uses
|
||||||
|
// journald.
|
||||||
|
func isJournaldAvailable() bool {
|
||||||
|
_, err := exec.LookPath("journalctl")
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readinessCheck checks whether services are ready via the supplied health
|
||||||
|
// check URLs. Once there is an error in errCh, the function will stop waiting
|
||||||
|
// and return the error.
|
||||||
|
// TODO(random-liu): Move this to util
|
||||||
|
func readinessCheck(name string, urls []string, errCh <-chan error) error {
|
||||||
|
glog.Infof("Running readiness check for service %q", name)
|
||||||
|
endTime := time.Now().Add(*serverStartTimeout)
|
||||||
|
blockCh := make(chan error)
|
||||||
|
defer close(blockCh)
|
||||||
|
for endTime.After(time.Now()) {
|
||||||
|
select {
|
||||||
|
// We *always* want to run the health check if there is no error on the channel.
|
||||||
|
// With systemd, reads from errCh report nil because cmd.Run() waits
|
||||||
|
// on systemd-run, rather than the service process. systemd-run quickly
|
||||||
|
// exits with status 0, causing the channel to be closed with no error. In
|
||||||
|
// this case, you want to wait for the health check to complete, rather
|
||||||
|
// than returning from readinessCheck as soon as the channel is closed.
|
||||||
|
case err, ok := <-errCh:
|
||||||
|
if ok { // The channel is not closed, this is a real error
|
||||||
|
if err != nil { // If there is an error, return it
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If not, keep checking readiness.
|
||||||
|
} else { // The channel is closed, this is only a zero value.
|
||||||
|
// Replace the errCh with blockCh to avoid busy loop,
|
||||||
|
// and keep checking readiness.
|
||||||
|
errCh = blockCh
|
||||||
|
}
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
ready := true
|
||||||
|
for _, url := range urls {
|
||||||
|
resp, err := http.Head(url)
|
||||||
|
if err != nil || resp.StatusCode != http.StatusOK {
|
||||||
|
ready = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ready {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("e2e service %q readiness check timeout %v", name, *serverStartTimeout)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user