mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Split services.go into services.go, internal_services.go and server.go.
This commit is contained in:
parent
5b3860ce0b
commit
0d3befd7ea
148
test/e2e_node/services/internal_services.go
Normal file
148
test/e2e_node/services/internal_services.go
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package services
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// e2eService manages e2e services in current process.
|
||||
type e2eServices struct {
|
||||
rmDirs []string
|
||||
// statically linked e2e services
|
||||
etcdServer *EtcdServer
|
||||
apiServer *APIServer
|
||||
nsController *NamespaceController
|
||||
}
|
||||
|
||||
func newE2EServices() *e2eServices {
|
||||
return &e2eServices{}
|
||||
}
|
||||
|
||||
// terminationSignals are signals that cause the program to exit in the
|
||||
// supported platforms (linux, darwin, windows).
|
||||
var terminationSignals = []os.Signal{syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT}
|
||||
|
||||
// run starts all e2e services and wait for the termination signal. Once receives the
|
||||
// termination signal, it will stop the e2e services gracefully.
|
||||
func (es *e2eServices) run() error {
|
||||
defer es.stop()
|
||||
if err := es.start(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Wait until receiving a termination signal.
|
||||
sig := make(chan os.Signal, 1)
|
||||
signal.Notify(sig, terminationSignals...)
|
||||
<-sig
|
||||
return nil
|
||||
}
|
||||
|
||||
// start starts the tests embedded services or returns an error.
|
||||
func (es *e2eServices) start() error {
|
||||
glog.Info("Starting e2e services...")
|
||||
err := es.startEtcd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = es.startApiServer()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = es.startNamespaceController()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
glog.Info("E2E services started.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// stop stops the embedded e2e services.
|
||||
func (es *e2eServices) stop() {
|
||||
glog.Info("Stopping e2e services...")
|
||||
// TODO(random-liu): Use a loop to stop all services after introducing
|
||||
// service interface.
|
||||
glog.Info("Stopping namespace controller")
|
||||
if es.nsController != nil {
|
||||
if err := es.nsController.Stop(); err != nil {
|
||||
glog.Errorf("Failed to stop %q: %v", es.nsController.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
glog.Info("Stopping API server")
|
||||
if es.apiServer != nil {
|
||||
if err := es.apiServer.Stop(); err != nil {
|
||||
glog.Errorf("Failed to stop %q: %v", es.apiServer.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
glog.Info("Stopping etcd")
|
||||
if es.etcdServer != nil {
|
||||
if err := es.etcdServer.Stop(); err != nil {
|
||||
glog.Errorf("Failed to stop %q: %v", es.etcdServer.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, d := range es.rmDirs {
|
||||
glog.Info("Deleting directory %v", d)
|
||||
err := os.RemoveAll(d)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to delete directory %s.\n%v", d, err)
|
||||
}
|
||||
}
|
||||
|
||||
glog.Info("E2E services stopped.")
|
||||
}
|
||||
|
||||
// startEtcd starts the embedded etcd instance or returns an error.
|
||||
func (es *e2eServices) startEtcd() error {
|
||||
glog.Info("Starting etcd")
|
||||
dataDir, err := ioutil.TempDir("", "node-e2e")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Mark the dataDir as directories to remove.
|
||||
es.rmDirs = append(es.rmDirs, dataDir)
|
||||
es.etcdServer = NewEtcd(dataDir)
|
||||
return es.etcdServer.Start()
|
||||
}
|
||||
|
||||
// startApiServer starts the embedded API server or returns an error.
|
||||
func (es *e2eServices) startApiServer() error {
|
||||
glog.Info("Starting API server")
|
||||
es.apiServer = NewAPIServer()
|
||||
return es.apiServer.Start()
|
||||
}
|
||||
|
||||
// startNamespaceController starts the embedded namespace controller or returns an error.
|
||||
func (es *e2eServices) startNamespaceController() error {
|
||||
glog.Info("Starting namespace controller")
|
||||
es.nsController = NewNamespaceController()
|
||||
return es.nsController.Start()
|
||||
}
|
||||
|
||||
// getServicesHealthCheckURLs returns the health check urls for the internal services.
|
||||
func getServicesHealthCheckURLs() []string {
|
||||
return []string{
|
||||
getEtcdHealthCheckURL(),
|
||||
getAPIServerHealthCheckURL(),
|
||||
}
|
||||
}
|
366
test/e2e_node/services/server.go
Normal file
366
test/e2e_node/services/server.go
Normal file
@ -0,0 +1,366 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package services
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
)
|
||||
|
||||
var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
|
||||
|
||||
// A server manages a separate server process started and killed with
|
||||
// commands.
|
||||
type server struct {
|
||||
// name is the name of the server, it is only used for logging.
|
||||
name string
|
||||
// startCommand is the command used to start the server
|
||||
startCommand *exec.Cmd
|
||||
// killCommand is the command used to stop the server. It is not required. If it
|
||||
// is not specified, `sudo kill` will be used to stop the server.
|
||||
killCommand *exec.Cmd
|
||||
// restartCommand is the command used to restart the server. If provided, it will be used
|
||||
// instead of startCommand when restarting the server.
|
||||
restartCommand *exec.Cmd
|
||||
// healthCheckUrls is the urls used to check whether the server is ready.
|
||||
healthCheckUrls []string
|
||||
// outFilename is the name of the log file. The stdout and stderr of the server
|
||||
// will be redirected to this file.
|
||||
outFilename string
|
||||
// restartOnExit determines whether a restart loop is launched with the server
|
||||
restartOnExit bool
|
||||
// Writing to this channel, if it is not nil, stops the restart loop.
|
||||
// When tearing down a server, you should check for this channel and write to it if it exists.
|
||||
stopRestartingCh chan<- bool
|
||||
// Read from this to confirm that the restart loop has stopped.
|
||||
ackStopRestartingCh <-chan bool
|
||||
}
|
||||
|
||||
// newServer returns a new server with the given name, commands, health check
|
||||
// URLs, etc.
|
||||
func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, restartOnExit bool) *server {
|
||||
return &server{
|
||||
name: name,
|
||||
startCommand: start,
|
||||
killCommand: kill,
|
||||
restartCommand: restart,
|
||||
healthCheckUrls: urls,
|
||||
outFilename: outputFileName,
|
||||
restartOnExit: restartOnExit,
|
||||
}
|
||||
}
|
||||
|
||||
// commandToString format command to string.
|
||||
func commandToString(c *exec.Cmd) string {
|
||||
if c == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ")
|
||||
}
|
||||
|
||||
func (s *server) String() string {
|
||||
return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, restart-command: `%s`, health-check: %v, output-file: %q", s.name,
|
||||
commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
|
||||
}
|
||||
|
||||
// readinessCheck checks whether services are ready via the supplied health
|
||||
// check URLs. Once there is an error in errCh, the function will stop waiting
|
||||
// and return the error.
|
||||
// TODO(random-liu): Move this to util
|
||||
func readinessCheck(name string, urls []string, errCh <-chan error) error {
|
||||
glog.Infof("Running readiness check for service %q", name)
|
||||
endTime := time.Now().Add(*serverStartTimeout)
|
||||
blockCh := make(chan error)
|
||||
defer close(blockCh)
|
||||
for endTime.After(time.Now()) {
|
||||
select {
|
||||
// We *always* want to run the health check if there is no error on the channel.
|
||||
// With systemd, reads from errCh report nil because cmd.Run() waits
|
||||
// on systemd-run, rather than the service process. systemd-run quickly
|
||||
// exits with status 0, causing the channel to be closed with no error. In
|
||||
// this case, you want to wait for the health check to complete, rather
|
||||
// than returning from readinessCheck as soon as the channel is closed.
|
||||
case err, ok := <-errCh:
|
||||
if ok { // The channel is not closed, this is a real error
|
||||
if err != nil { // If there is an error, return it
|
||||
return err
|
||||
}
|
||||
// If not, keep checking readiness.
|
||||
} else { // The channel is closed, this is only a zero value.
|
||||
// Replace the errCh with blockCh to avoid busy loop,
|
||||
// and keep checking readiness.
|
||||
errCh = blockCh
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
ready := true
|
||||
for _, url := range urls {
|
||||
resp, err := http.Head(url)
|
||||
if err != nil || resp.StatusCode != http.StatusOK {
|
||||
ready = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if ready {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("e2e service %q readiness check timeout %v", name, *serverStartTimeout)
|
||||
}
|
||||
|
||||
// start starts the server by running its commands, monitors it with a health
|
||||
// check, and ensures that it is restarted if applicable.
|
||||
//
|
||||
// Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly.
|
||||
func (s *server) start() error {
|
||||
glog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand))
|
||||
errCh := make(chan error)
|
||||
|
||||
// Set up restart channels if the server is configured for restart on exit.
|
||||
var stopRestartingCh, ackStopRestartingCh chan bool
|
||||
if s.restartOnExit {
|
||||
if len(s.healthCheckUrls) == 0 {
|
||||
return fmt.Errorf("Tried to start %s which has s.restartOnExit == true, but no health check urls provided.", s)
|
||||
}
|
||||
|
||||
stopRestartingCh = make(chan bool)
|
||||
ackStopRestartingCh = make(chan bool)
|
||||
|
||||
s.stopRestartingCh = stopRestartingCh
|
||||
s.ackStopRestartingCh = ackStopRestartingCh
|
||||
}
|
||||
|
||||
// This goroutine actually runs the start command for the server.
|
||||
go func() {
|
||||
defer close(errCh)
|
||||
|
||||
// Create the output filename
|
||||
outPath := path.Join(framework.TestContext.ReportDir, s.outFilename)
|
||||
outfile, err := os.Create(outPath)
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("failed to create file %q for `%s` %v.", outPath, s, err)
|
||||
return
|
||||
} else {
|
||||
glog.Infof("Output file for server %q: %v", s.name, outfile.Name())
|
||||
}
|
||||
defer outfile.Close()
|
||||
defer outfile.Sync()
|
||||
|
||||
// Set the command to write the output file
|
||||
s.startCommand.Stdout = outfile
|
||||
s.startCommand.Stderr = outfile
|
||||
|
||||
// Death of this test process should kill the server as well.
|
||||
attrs := &syscall.SysProcAttr{}
|
||||
// Hack to set linux-only field without build tags.
|
||||
deathSigField := reflect.ValueOf(attrs).Elem().FieldByName("Pdeathsig")
|
||||
if deathSigField.IsValid() {
|
||||
deathSigField.Set(reflect.ValueOf(syscall.SIGTERM))
|
||||
} else {
|
||||
errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)")
|
||||
return
|
||||
}
|
||||
s.startCommand.SysProcAttr = attrs
|
||||
|
||||
// Start the command
|
||||
err = s.startCommand.Start()
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("failed to run %s: %v", s, err)
|
||||
return
|
||||
}
|
||||
if !s.restartOnExit {
|
||||
glog.Infof("Waiting for server %q start command to complete", s.name)
|
||||
// If we aren't planning on restarting, ok to Wait() here to release resources.
|
||||
// Otherwise, we Wait() in the restart loop.
|
||||
err = s.startCommand.Wait()
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("failed to run start command for server %q: %v", s.name, err)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
usedStartCmd := true
|
||||
for {
|
||||
glog.Infof("Running health check for service %q", s.name)
|
||||
// Wait for an initial health check to pass, so that we are sure the server started.
|
||||
err := readinessCheck(s.name, s.healthCheckUrls, nil)
|
||||
if err != nil {
|
||||
if usedStartCmd {
|
||||
glog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name)
|
||||
s.startCommand.Wait() // Release resources if necessary.
|
||||
}
|
||||
// This should not happen, immediately stop the e2eService process.
|
||||
glog.Fatalf("Restart loop readinessCheck failed for %s", s)
|
||||
} else {
|
||||
glog.Infof("Initial health check passed for service %q", s.name)
|
||||
}
|
||||
|
||||
// Initial health check passed, wait until a health check fails again.
|
||||
stillAlive:
|
||||
for {
|
||||
select {
|
||||
case <-stopRestartingCh:
|
||||
ackStopRestartingCh <- true
|
||||
return
|
||||
case <-time.After(time.Second):
|
||||
for _, url := range s.healthCheckUrls {
|
||||
resp, err := http.Head(url)
|
||||
if err != nil || resp.StatusCode != http.StatusOK {
|
||||
break stillAlive
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if usedStartCmd {
|
||||
s.startCommand.Wait() // Release resources from last cmd
|
||||
usedStartCmd = false
|
||||
}
|
||||
if s.restartCommand != nil {
|
||||
// Always make a fresh copy of restartCommand before
|
||||
// running, we may have to restart multiple times
|
||||
s.restartCommand = &exec.Cmd{
|
||||
Path: s.restartCommand.Path,
|
||||
Args: s.restartCommand.Args,
|
||||
Env: s.restartCommand.Env,
|
||||
Dir: s.restartCommand.Dir,
|
||||
Stdin: s.restartCommand.Stdin,
|
||||
Stdout: s.restartCommand.Stdout,
|
||||
Stderr: s.restartCommand.Stderr,
|
||||
ExtraFiles: s.restartCommand.ExtraFiles,
|
||||
SysProcAttr: s.restartCommand.SysProcAttr,
|
||||
}
|
||||
// Run and wait for exit. This command is assumed to have
|
||||
// short duration, e.g. systemctl restart
|
||||
glog.Infof("Restarting server %q with restart command", s.name)
|
||||
err = s.restartCommand.Run()
|
||||
if err != nil {
|
||||
// This should not happen, immediately stop the e2eService process.
|
||||
glog.Fatalf("Restarting server %s with restartCommand failed. Error: %v.", s, err)
|
||||
}
|
||||
} else {
|
||||
s.startCommand = &exec.Cmd{
|
||||
Path: s.startCommand.Path,
|
||||
Args: s.startCommand.Args,
|
||||
Env: s.startCommand.Env,
|
||||
Dir: s.startCommand.Dir,
|
||||
Stdin: s.startCommand.Stdin,
|
||||
Stdout: s.startCommand.Stdout,
|
||||
Stderr: s.startCommand.Stderr,
|
||||
ExtraFiles: s.startCommand.ExtraFiles,
|
||||
SysProcAttr: s.startCommand.SysProcAttr,
|
||||
}
|
||||
glog.Infof("Restarting server %q with start command", s.name)
|
||||
err = s.startCommand.Start()
|
||||
usedStartCmd = true
|
||||
if err != nil {
|
||||
// This should not happen, immediately stop the e2eService process.
|
||||
glog.Fatalf("Restarting server %s with startCommand failed. Error: %v.", s, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return readinessCheck(s.name, s.healthCheckUrls, errCh)
|
||||
}
|
||||
|
||||
// kill runs the server's kill command.
|
||||
func (s *server) kill() error {
|
||||
glog.Infof("Kill server %q", s.name)
|
||||
name := s.name
|
||||
cmd := s.startCommand
|
||||
|
||||
// If s has a restart loop, turn it off.
|
||||
if s.restartOnExit {
|
||||
s.stopRestartingCh <- true
|
||||
<-s.ackStopRestartingCh
|
||||
}
|
||||
|
||||
if s.killCommand != nil {
|
||||
return s.killCommand.Run()
|
||||
}
|
||||
|
||||
if cmd == nil {
|
||||
return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name)
|
||||
}
|
||||
|
||||
if cmd.Process == nil {
|
||||
glog.V(2).Infof("%q not running", name)
|
||||
return nil
|
||||
}
|
||||
pid := cmd.Process.Pid
|
||||
if pid <= 1 {
|
||||
return fmt.Errorf("invalid PID %d for %q", pid, name)
|
||||
}
|
||||
|
||||
// Attempt to shut down the process in a friendly manner before forcing it.
|
||||
waitChan := make(chan error)
|
||||
go func() {
|
||||
_, err := cmd.Process.Wait()
|
||||
waitChan <- err
|
||||
close(waitChan)
|
||||
}()
|
||||
|
||||
const timeout = 10 * time.Second
|
||||
for _, signal := range []string{"-TERM", "-KILL"} {
|
||||
glog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal)
|
||||
cmd := exec.Command("sudo", "kill", signal, strconv.Itoa(pid))
|
||||
|
||||
// Run the 'kill' command in a separate process group so sudo doesn't ignore it
|
||||
attrs := &syscall.SysProcAttr{}
|
||||
// Hack to set unix-only field without build tags.
|
||||
setpgidField := reflect.ValueOf(attrs).Elem().FieldByName("Setpgid")
|
||||
if setpgidField.IsValid() {
|
||||
setpgidField.Set(reflect.ValueOf(true))
|
||||
} else {
|
||||
return fmt.Errorf("Failed to set Setpgid field (non-unix build)")
|
||||
}
|
||||
cmd.SysProcAttr = attrs
|
||||
|
||||
_, err := cmd.Output()
|
||||
if err != nil {
|
||||
glog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err)
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case err := <-waitChan:
|
||||
if err != nil {
|
||||
return fmt.Errorf("error stopping %q: %v", name, err)
|
||||
}
|
||||
// Success!
|
||||
return nil
|
||||
case <-time.After(timeout):
|
||||
// Continue.
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to stop %q", name)
|
||||
}
|
@ -17,21 +17,14 @@ limitations under the License.
|
||||
package services
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/kardianos/osext"
|
||||
@ -41,132 +34,12 @@ import (
|
||||
"k8s.io/kubernetes/test/e2e_node/build"
|
||||
)
|
||||
|
||||
// TODO(random-liu): Move this file to a separate package.
|
||||
var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
|
||||
|
||||
// E2EServices starts and stops e2e services in a separate process. The test
|
||||
// uses it to start and stop all e2e services.
|
||||
type E2EServices struct {
|
||||
services *server
|
||||
}
|
||||
|
||||
// NewE2EServices returns a new E2EServices instance.
|
||||
func NewE2EServices() *E2EServices {
|
||||
return &E2EServices{}
|
||||
}
|
||||
|
||||
// services.log is the combined log of all services
|
||||
const servicesLogFile = "services.log"
|
||||
|
||||
// Start starts the e2e services in another process by calling back into the
|
||||
// test binary. Returns when all e2e services are ready or an error.
|
||||
//
|
||||
// We want to statically link e2e services into the test binary, but we don't
|
||||
// want their glog output to pollute the test result. So we run the binary in
|
||||
// run- services-mode to start e2e services in another process.
|
||||
func (e *E2EServices) Start() error {
|
||||
var err error
|
||||
// Create the manifest path for kubelet.
|
||||
// TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test.
|
||||
framework.TestContext.ManifestPath, err = ioutil.TempDir("", "node-e2e-pod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create static pod manifest directory: %v", err)
|
||||
}
|
||||
testBin, err := osext.Executable()
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't get current binary: %v", err)
|
||||
}
|
||||
// TODO(random-liu): Add sudo after we statically link apiserver and etcd, because apiserver needs
|
||||
// sudo. We can't add sudo now, because etcd may not be in PATH of root.
|
||||
startCmd := exec.Command(testBin,
|
||||
// TODO(mtaufen): Flags e.g. that target the TestContext need to be manually forwarded to the
|
||||
// test binary when we start it up in run-services mode. This is not ideal.
|
||||
// Very unintuitive because it prevents any falgs NOT manually forwarded here
|
||||
// from being set via TEST_ARGS when running tests from the command line.
|
||||
"--run-services-mode",
|
||||
"--server-start-timeout", serverStartTimeout.String(),
|
||||
"--report-dir", framework.TestContext.ReportDir,
|
||||
// TODO(random-liu): Remove the following flags after we move kubelet starting logic
|
||||
// out of the test.
|
||||
"--node-name", framework.TestContext.NodeName,
|
||||
"--disable-kubenet="+strconv.FormatBool(framework.TestContext.DisableKubenet),
|
||||
// TODO: enable when flag is introduced in 1.5
|
||||
// "--cgroups-per-qos="+strconv.FormatBool(framework.TestContext.CgroupsPerQOS),
|
||||
"--manifest-path", framework.TestContext.ManifestPath,
|
||||
"--eviction-hard", framework.TestContext.EvictionHard,
|
||||
"--feature-gates", framework.TestContext.FeatureGates,
|
||||
"--runtime-integration-type", framework.TestContext.RuntimeIntegrationType,
|
||||
"--logtostderr",
|
||||
"--vmodule=*=4",
|
||||
)
|
||||
e.services = newServer("services", startCmd, nil, nil, getHealthCheckURLs(), servicesLogFile, false)
|
||||
return e.services.start()
|
||||
}
|
||||
|
||||
// Stop stops the e2e services.
|
||||
func (e *E2EServices) Stop() error {
|
||||
defer func() {
|
||||
// Cleanup the manifest path for kubelet.
|
||||
manifestPath := framework.TestContext.ManifestPath
|
||||
if manifestPath != "" {
|
||||
err := os.RemoveAll(manifestPath)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
if e.services == nil {
|
||||
glog.Errorf("can't stop e2e services, because `services` is nil")
|
||||
}
|
||||
return e.services.kill()
|
||||
}
|
||||
|
||||
// RunE2EServices actually start the e2e services. This function is used to
|
||||
// start e2e services in current process. This is only used in run-services-mode.
|
||||
func RunE2EServices() {
|
||||
// Populate global DefaultFeatureGate with value from TestContext.FeatureGates.
|
||||
// This way, statically-linked components see the same feature gate config as the test context.
|
||||
utilconfig.DefaultFeatureGate.Set(framework.TestContext.FeatureGates)
|
||||
e := newE2EService()
|
||||
if err := e.run(); err != nil {
|
||||
glog.Fatalf("Failed to run e2e services: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Ports of different e2e services.
|
||||
const (
|
||||
kubeletPort = "10250"
|
||||
kubeletReadOnlyPort = "10255"
|
||||
)
|
||||
|
||||
// Health check urls of different e2e services.
|
||||
var (
|
||||
kubeletHealthCheckURL = getEndpoint(kubeletReadOnlyPort) + "/healthz"
|
||||
)
|
||||
|
||||
// getEndpoint generates endpoint url from service port.
|
||||
func getEndpoint(port string) string {
|
||||
return "http://127.0.0.1:" + port
|
||||
}
|
||||
|
||||
func getHealthCheckURLs() []string {
|
||||
return []string{
|
||||
getEtcdHealthCheckURL(),
|
||||
getAPIServerHealthCheckURL(),
|
||||
kubeletHealthCheckURL,
|
||||
}
|
||||
}
|
||||
|
||||
// e2eService manages e2e services in current process.
|
||||
type e2eService struct {
|
||||
services []*server
|
||||
rmDirs []string
|
||||
kubelet *server
|
||||
logFiles map[string]logFileData
|
||||
|
||||
// statically linked e2e services
|
||||
etcdServer *EtcdServer
|
||||
apiServer *APIServer
|
||||
nsController *NamespaceController
|
||||
}
|
||||
|
||||
// logFileData holds data about logfiles to fetch with a journalctl command or
|
||||
@ -176,185 +49,123 @@ type logFileData struct {
|
||||
journalctlCommand []string
|
||||
}
|
||||
|
||||
// NewE2EServices returns a new E2EServices instance.
|
||||
func NewE2EServices() *E2EServices {
|
||||
return &E2EServices{
|
||||
// Special log files that need to be collected for additional debugging.
|
||||
logFiles: map[string]logFileData{
|
||||
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
|
||||
"docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}},
|
||||
"cloud-init.log": {[]string{"/var/log/cloud-init.log"}, []string{"-u", "cloud*"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the e2e services in another process by calling back into the
|
||||
// test binary. Returns when all e2e services are ready or an error.
|
||||
//
|
||||
// We want to statically link e2e services into the test binary, but we don't
|
||||
// want their glog output to pollute the test result. So we run the binary in
|
||||
// run-services-mode to start e2e services in another process.
|
||||
// The function starts 2 processes:
|
||||
// * internal e2e services: services which statically linked in the test binary - apiserver, etcd and
|
||||
// namespace controller.
|
||||
// * kubelet: kubelet binary is outside. (We plan to move main kubelet start logic out when we have
|
||||
// standard kubelet launcher)
|
||||
func (e *E2EServices) Start() error {
|
||||
var err error
|
||||
// Start kubelet
|
||||
// Create the manifest path for kubelet.
|
||||
// TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test.
|
||||
framework.TestContext.ManifestPath, err = ioutil.TempDir("", "node-e2e-pod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create static pod manifest directory: %v", err)
|
||||
}
|
||||
e.kubelet, err = e.startKubelet()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start kubelet: %v", err)
|
||||
}
|
||||
e.services, err = e.startInternalServices()
|
||||
return err
|
||||
}
|
||||
|
||||
// Stop stops the e2e services.
|
||||
func (e *E2EServices) Stop() {
|
||||
defer func() {
|
||||
// Collect log files.
|
||||
e.getLogFiles()
|
||||
// Cleanup the manifest path for kubelet.
|
||||
manifestPath := framework.TestContext.ManifestPath
|
||||
if manifestPath != "" {
|
||||
err := os.RemoveAll(manifestPath)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
if e.services != nil {
|
||||
if err := e.services.kill(); err != nil {
|
||||
glog.Errorf("Failed to stop services: %v", err)
|
||||
}
|
||||
}
|
||||
if e.kubelet != nil {
|
||||
if err := e.kubelet.kill(); err != nil {
|
||||
glog.Errorf("Failed to stop kubelet: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RunE2EServices actually start the e2e services. This function is used to
|
||||
// start e2e services in current process. This is only used in run-services-mode.
|
||||
func RunE2EServices() {
|
||||
// Populate global DefaultFeatureGate with value from TestContext.FeatureGates.
|
||||
// This way, statically-linked components see the same feature gate config as the test context.
|
||||
utilconfig.DefaultFeatureGate.Set(framework.TestContext.FeatureGates)
|
||||
e := newE2EServices()
|
||||
if err := e.run(); err != nil {
|
||||
glog.Fatalf("Failed to run e2e services: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
// This is consistent with the level used in a cluster e2e test.
|
||||
// services.log is the combined log of all services
|
||||
servicesLogFile = "services.log"
|
||||
// LOG_VERBOSITY_LEVEL is consistent with the level used in a cluster e2e test.
|
||||
LOG_VERBOSITY_LEVEL = "4"
|
||||
)
|
||||
|
||||
func newE2EService() *e2eService {
|
||||
// Special log files that need to be collected for additional debugging.
|
||||
var logFiles = map[string]logFileData{
|
||||
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
|
||||
"docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}},
|
||||
"cloud-init.log": {[]string{"/var/log/cloud-init.log"}, []string{"-u", "cloud*"}},
|
||||
}
|
||||
|
||||
return &e2eService{logFiles: logFiles}
|
||||
}
|
||||
|
||||
// terminationSignals are signals that cause the program to exit in the
|
||||
// supported platforms (linux, darwin, windows).
|
||||
var terminationSignals = []os.Signal{syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT}
|
||||
|
||||
// run starts all e2e services and wait for the termination signal. Once receives the
|
||||
// termination signal, it will stop the e2e services gracefully.
|
||||
func (es *e2eService) run() error {
|
||||
defer es.stop()
|
||||
if err := es.start(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Wait until receiving a termination signal.
|
||||
sig := make(chan os.Signal, 1)
|
||||
signal.Notify(sig, terminationSignals...)
|
||||
<-sig
|
||||
return nil
|
||||
}
|
||||
|
||||
// start starts the tests embedded services or returns an error.
|
||||
func (es *e2eService) start() error {
|
||||
glog.Info("Starting e2e services...")
|
||||
err := es.startEtcd()
|
||||
// startInternalServices starts the internal services in a separate process.
|
||||
func (e *E2EServices) startInternalServices() (*server, error) {
|
||||
testBin, err := osext.Executable()
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, fmt.Errorf("can't get current binary: %v", err)
|
||||
}
|
||||
|
||||
err = es.startApiServer()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s, err := es.startKubelet()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
es.services = append(es.services, s)
|
||||
|
||||
err = es.startNamespaceController()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
glog.Info("E2E services started.")
|
||||
return nil
|
||||
startCmd := exec.Command("sudo", testBin,
|
||||
// TODO(mtaufen): Flags e.g. that target the TestContext need to be manually forwarded to the
|
||||
// test binary when we start it up in run-services mode. This is not ideal.
|
||||
// Very unintuitive because it prevents any flags NOT manually forwarded here
|
||||
// from being set via TEST_ARGS when running tests from the command line.
|
||||
"--run-services-mode",
|
||||
"--server-start-timeout", serverStartTimeout.String(),
|
||||
"--feature-gates", framework.TestContext.FeatureGates,
|
||||
"--logtostderr",
|
||||
"--vmodule=*="+LOG_VERBOSITY_LEVEL,
|
||||
)
|
||||
server := newServer("services", startCmd, nil, nil, getServicesHealthCheckURLs(), servicesLogFile, false)
|
||||
return server, server.start()
|
||||
}
|
||||
|
||||
// getLogFiles gets logs of interest either via journalctl or by creating sym
|
||||
// links. Since we scp files from the remote directory, symlinks will be
|
||||
// treated as normal files and file contents will be copied over.
|
||||
func (es *e2eService) getLogFiles() {
|
||||
// Nothing to do if report dir is not specified.
|
||||
if framework.TestContext.ReportDir == "" {
|
||||
return
|
||||
}
|
||||
glog.Info("Fetching log files...")
|
||||
journaldFound := isJournaldAvailable()
|
||||
for targetFileName, logFileData := range es.logFiles {
|
||||
targetLink := path.Join(framework.TestContext.ReportDir, targetFileName)
|
||||
if journaldFound {
|
||||
// Skip log files that do not have an equivalent in journald-based machines.
|
||||
if len(logFileData.journalctlCommand) == 0 {
|
||||
continue
|
||||
}
|
||||
glog.Infof("Get log file %q with journalctl command %v.", targetFileName, logFileData.journalctlCommand)
|
||||
out, err := exec.Command("sudo", append([]string{"journalctl"}, logFileData.journalctlCommand...)...).CombinedOutput()
|
||||
if err != nil {
|
||||
glog.Errorf("failed to get %q from journald: %v, %v", targetFileName, string(out), err)
|
||||
} else {
|
||||
if err = ioutil.WriteFile(targetLink, out, 0644); err != nil {
|
||||
glog.Errorf("failed to write logs to %q: %v", targetLink, err)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
for _, file := range logFileData.files {
|
||||
if _, err := os.Stat(file); err != nil {
|
||||
// Expected file not found on this distro.
|
||||
continue
|
||||
}
|
||||
if err := copyLogFile(file, targetLink); err != nil {
|
||||
glog.Error(err)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// stop stops the embedded e2e services.
|
||||
func (es *e2eService) stop() {
|
||||
glog.Info("Stopping e2e services...")
|
||||
es.getLogFiles()
|
||||
|
||||
// TODO(random-liu): Use a loop to stop all services after introducing
|
||||
// service interface.
|
||||
glog.Info("Stopping namespace controller")
|
||||
if es.nsController != nil {
|
||||
if err := es.nsController.Stop(); err != nil {
|
||||
glog.Errorf("Failed to stop %q: %v", es.nsController.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
glog.Info("Stopping API server")
|
||||
if es.apiServer != nil {
|
||||
if err := es.apiServer.Stop(); err != nil {
|
||||
glog.Errorf("Failed to stop %q: %v", es.apiServer.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, s := range es.services {
|
||||
glog.Info("Stopping service %q", s.name)
|
||||
if err := s.kill(); err != nil {
|
||||
glog.Errorf("Failed to stop %v: %v", s.name, err)
|
||||
}
|
||||
}
|
||||
|
||||
glog.Info("Stopping etcd")
|
||||
if es.etcdServer != nil {
|
||||
if err := es.etcdServer.Stop(); err != nil {
|
||||
glog.Errorf("Failed to stop %q: %v", es.etcdServer.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, d := range es.rmDirs {
|
||||
glog.Info("Deleting directory %v", d)
|
||||
err := os.RemoveAll(d)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to delete directory %s.\n%v", d, err)
|
||||
}
|
||||
}
|
||||
|
||||
glog.Info("E2E services stopped.")
|
||||
}
|
||||
|
||||
// startEtcd starts the embedded etcd instance or returns an error.
|
||||
func (es *e2eService) startEtcd() error {
|
||||
glog.Info("Starting etcd")
|
||||
dataDir, err := ioutil.TempDir("", "node-e2e")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Mark the dataDir as directories to remove.
|
||||
es.rmDirs = append(es.rmDirs, dataDir)
|
||||
es.etcdServer = NewEtcd(dataDir)
|
||||
return es.etcdServer.Start()
|
||||
}
|
||||
|
||||
// startApiServer starts the embedded API server or returns an error.
|
||||
func (es *e2eService) startApiServer() error {
|
||||
glog.Info("Starting API server")
|
||||
es.apiServer = NewAPIServer()
|
||||
return es.apiServer.Start()
|
||||
}
|
||||
|
||||
// startNamespaceController starts the embedded namespace controller or returns an error.
|
||||
func (es *e2eService) startNamespaceController() error {
|
||||
glog.Info("Starting namespace controller")
|
||||
es.nsController = NewNamespaceController()
|
||||
return es.nsController.Start()
|
||||
}
|
||||
const (
|
||||
// Ports of different e2e services.
|
||||
kubeletPort = "10250"
|
||||
kubeletReadOnlyPort = "10255"
|
||||
// Health check url of kubelet
|
||||
kubeletHealthCheckURL = "http://127.0.0.1:" + kubeletReadOnlyPort + "/healthz"
|
||||
)
|
||||
|
||||
// startKubelet starts the Kubelet in a separate process or returns an error
|
||||
// if the Kubelet fails to start.
|
||||
func (es *e2eService) startKubelet() (*server, error) {
|
||||
func (e *E2EServices) startKubelet() (*server, error) {
|
||||
glog.Info("Starting kubelet")
|
||||
var killCommand, restartCommand *exec.Cmd
|
||||
cmdArgs := []string{}
|
||||
@ -367,7 +178,7 @@ func (es *e2eService) startKubelet() (*server, error) {
|
||||
cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, "--remain-after-exit", build.GetKubeletServerBin())
|
||||
killCommand = exec.Command("sudo", "systemctl", "kill", unitName)
|
||||
restartCommand = exec.Command("sudo", "systemctl", "restart", unitName)
|
||||
es.logFiles["kubelet.log"] = logFileData{
|
||||
e.logFiles["kubelet.log"] = logFileData{
|
||||
journalctlCommand: []string{"-u", unitName},
|
||||
}
|
||||
framework.TestContext.EvictionHard = adjustConfigForSystemd(framework.TestContext.EvictionHard)
|
||||
@ -395,6 +206,7 @@ func (es *e2eService) startKubelet() (*server, error) {
|
||||
"--eviction-hard", framework.TestContext.EvictionHard,
|
||||
"--eviction-pressure-transition-period", "30s",
|
||||
"--feature-gates", framework.TestContext.FeatureGates,
|
||||
"--runtime-integration-type", framework.TestContext.RuntimeIntegrationType,
|
||||
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
||||
)
|
||||
if framework.TestContext.RuntimeIntegrationType != "" {
|
||||
@ -430,294 +242,59 @@ func (es *e2eService) startKubelet() (*server, error) {
|
||||
return server, server.start()
|
||||
}
|
||||
|
||||
// A server manages a separate server process started and killed with
|
||||
// commands.
|
||||
type server struct {
|
||||
// name is the name of the server, it is only used for logging.
|
||||
name string
|
||||
// startCommand is the command used to start the server
|
||||
startCommand *exec.Cmd
|
||||
// killCommand is the command used to stop the server. It is not required. If it
|
||||
// is not specified, `sudo kill` will be used to stop the server.
|
||||
killCommand *exec.Cmd
|
||||
// restartCommand is the command used to restart the server. If provided, it will be used
|
||||
// instead of startCommand when restarting the server.
|
||||
restartCommand *exec.Cmd
|
||||
// healthCheckUrls is the urls used to check whether the server is ready.
|
||||
healthCheckUrls []string
|
||||
// outFilename is the name of the log file. The stdout and stderr of the server
|
||||
// will be redirected to this file.
|
||||
outFilename string
|
||||
// restartOnExit determines whether a restart loop is launched with the server
|
||||
restartOnExit bool
|
||||
// Writing to this channel, if it is not nil, stops the restart loop.
|
||||
// When tearing down a server, you should check for this channel and write to it if it exists.
|
||||
stopRestartingCh chan<- bool
|
||||
// Read from this to confirm that the restart loop has stopped.
|
||||
ackStopRestartingCh <-chan bool
|
||||
}
|
||||
|
||||
// newServer returns a new server with the given name, commands, health check
|
||||
// URLs, etc.
|
||||
func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, restartOnExit bool) *server {
|
||||
return &server{
|
||||
name: name,
|
||||
startCommand: start,
|
||||
killCommand: kill,
|
||||
restartCommand: restart,
|
||||
healthCheckUrls: urls,
|
||||
outFilename: outputFileName,
|
||||
restartOnExit: restartOnExit,
|
||||
}
|
||||
}
|
||||
|
||||
// commandToString format command to string.
|
||||
func commandToString(c *exec.Cmd) string {
|
||||
if c == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ")
|
||||
}
|
||||
|
||||
func (s *server) String() string {
|
||||
return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, restart-command: `%s`, health-check: %v, output-file: %q", s.name,
|
||||
commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
|
||||
}
|
||||
|
||||
// start starts the server by running its commands, monitors it with a health
|
||||
// check, and ensures that it is restarted if applicable.
|
||||
//
|
||||
// Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly.
|
||||
func (s *server) start() error {
|
||||
glog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand))
|
||||
errCh := make(chan error)
|
||||
|
||||
// Set up restart channels if the server is configured for restart on exit.
|
||||
var stopRestartingCh, ackStopRestartingCh chan bool
|
||||
if s.restartOnExit {
|
||||
if len(s.healthCheckUrls) == 0 {
|
||||
return fmt.Errorf("Tried to start %s which has s.restartOnExit == true, but no health check urls provided.", s)
|
||||
}
|
||||
|
||||
stopRestartingCh = make(chan bool)
|
||||
ackStopRestartingCh = make(chan bool)
|
||||
|
||||
s.stopRestartingCh = stopRestartingCh
|
||||
s.ackStopRestartingCh = ackStopRestartingCh
|
||||
}
|
||||
|
||||
// This goroutine actually runs the start command for the server.
|
||||
go func() {
|
||||
defer close(errCh)
|
||||
|
||||
// Create the output filename
|
||||
outPath := path.Join(framework.TestContext.ReportDir, s.outFilename)
|
||||
outfile, err := os.Create(outPath)
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("failed to create file %q for `%s` %v.", outPath, s, err)
|
||||
return
|
||||
} else {
|
||||
glog.Infof("Output file for server %q: %v", s.name, outfile.Name())
|
||||
}
|
||||
defer outfile.Close()
|
||||
defer outfile.Sync()
|
||||
|
||||
// Set the command to write the output file
|
||||
s.startCommand.Stdout = outfile
|
||||
s.startCommand.Stderr = outfile
|
||||
|
||||
// Death of this test process should kill the server as well.
|
||||
attrs := &syscall.SysProcAttr{}
|
||||
// Hack to set linux-only field without build tags.
|
||||
deathSigField := reflect.ValueOf(attrs).Elem().FieldByName("Pdeathsig")
|
||||
if deathSigField.IsValid() {
|
||||
deathSigField.Set(reflect.ValueOf(syscall.SIGTERM))
|
||||
} else {
|
||||
errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)")
|
||||
return
|
||||
}
|
||||
s.startCommand.SysProcAttr = attrs
|
||||
|
||||
// Start the command
|
||||
err = s.startCommand.Start()
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("failed to run %s: %v", s, err)
|
||||
return
|
||||
}
|
||||
if !s.restartOnExit {
|
||||
glog.Infof("Waiting for server %q start command to complete", s.name)
|
||||
// If we aren't planning on restarting, ok to Wait() here to release resources.
|
||||
// Otherwise, we Wait() in the restart loop.
|
||||
err = s.startCommand.Wait()
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("failed to run start command for server %q: %v", s.name, err)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
usedStartCmd := true
|
||||
for {
|
||||
glog.Infof("Running health check for service %q", s.name)
|
||||
// Wait for an initial health check to pass, so that we are sure the server started.
|
||||
err := readinessCheck(s.name, s.healthCheckUrls, nil)
|
||||
if err != nil {
|
||||
if usedStartCmd {
|
||||
glog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name)
|
||||
s.startCommand.Wait() // Release resources if necessary.
|
||||
}
|
||||
// This should not happen, immediately stop the e2eService process.
|
||||
glog.Fatalf("restart loop readinessCheck failed for %s", s)
|
||||
} else {
|
||||
glog.Infof("Initial health check passed for service %q", s.name)
|
||||
}
|
||||
|
||||
// Initial health check passed, wait until a health check fails again.
|
||||
stillAlive:
|
||||
for {
|
||||
select {
|
||||
case <-stopRestartingCh:
|
||||
ackStopRestartingCh <- true
|
||||
return
|
||||
case <-time.After(time.Second):
|
||||
for _, url := range s.healthCheckUrls {
|
||||
resp, err := http.Head(url)
|
||||
if err != nil || resp.StatusCode != http.StatusOK {
|
||||
break stillAlive
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if usedStartCmd {
|
||||
s.startCommand.Wait() // Release resources from last cmd
|
||||
usedStartCmd = false
|
||||
}
|
||||
if s.restartCommand != nil {
|
||||
// Always make a fresh copy of restartCommand before
|
||||
// running, we may have to restart multiple times
|
||||
s.restartCommand = &exec.Cmd{
|
||||
Path: s.restartCommand.Path,
|
||||
Args: s.restartCommand.Args,
|
||||
Env: s.restartCommand.Env,
|
||||
Dir: s.restartCommand.Dir,
|
||||
Stdin: s.restartCommand.Stdin,
|
||||
Stdout: s.restartCommand.Stdout,
|
||||
Stderr: s.restartCommand.Stderr,
|
||||
ExtraFiles: s.restartCommand.ExtraFiles,
|
||||
SysProcAttr: s.restartCommand.SysProcAttr,
|
||||
}
|
||||
// Run and wait for exit. This command is assumed to have
|
||||
// short duration, e.g. systemctl restart
|
||||
glog.Infof("Restarting server %q with restart command", s.name)
|
||||
err = s.restartCommand.Run()
|
||||
if err != nil {
|
||||
// This should not happen, immediately stop the e2eService process.
|
||||
glog.Fatalf("restarting server %s with restartCommand failed. Error: %v.", s, err)
|
||||
}
|
||||
} else {
|
||||
s.startCommand = &exec.Cmd{
|
||||
Path: s.startCommand.Path,
|
||||
Args: s.startCommand.Args,
|
||||
Env: s.startCommand.Env,
|
||||
Dir: s.startCommand.Dir,
|
||||
Stdin: s.startCommand.Stdin,
|
||||
Stdout: s.startCommand.Stdout,
|
||||
Stderr: s.startCommand.Stderr,
|
||||
ExtraFiles: s.startCommand.ExtraFiles,
|
||||
SysProcAttr: s.startCommand.SysProcAttr,
|
||||
}
|
||||
glog.Infof("Restarting server %q with start command", s.name)
|
||||
err = s.startCommand.Start()
|
||||
usedStartCmd = true
|
||||
if err != nil {
|
||||
// This should not happen, immediately stop the e2eService process.
|
||||
glog.Fatalf("Restarting %s with startCommand failed. Error: %v.", s, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return readinessCheck(s.name, s.healthCheckUrls, errCh)
|
||||
}
|
||||
|
||||
// kill runs the server's kill command.
|
||||
func (s *server) kill() error {
|
||||
glog.Infof("Kill server %q", s.name)
|
||||
name := s.name
|
||||
cmd := s.startCommand
|
||||
|
||||
// If s has a restart loop, turn it off.
|
||||
if s.restartOnExit {
|
||||
s.stopRestartingCh <- true
|
||||
<-s.ackStopRestartingCh
|
||||
}
|
||||
|
||||
if s.killCommand != nil {
|
||||
return s.killCommand.Run()
|
||||
}
|
||||
|
||||
if cmd == nil {
|
||||
return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name)
|
||||
}
|
||||
|
||||
if cmd.Process == nil {
|
||||
glog.V(2).Infof("%q not running", name)
|
||||
return nil
|
||||
}
|
||||
pid := cmd.Process.Pid
|
||||
if pid <= 1 {
|
||||
return fmt.Errorf("invalid PID %d for %q", pid, name)
|
||||
}
|
||||
|
||||
// Attempt to shut down the process in a friendly manner before forcing it.
|
||||
waitChan := make(chan error)
|
||||
go func() {
|
||||
_, err := cmd.Process.Wait()
|
||||
waitChan <- err
|
||||
close(waitChan)
|
||||
}()
|
||||
|
||||
const timeout = 10 * time.Second
|
||||
for _, signal := range []string{"-TERM", "-KILL"} {
|
||||
glog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal)
|
||||
cmd := exec.Command("sudo", "kill", signal, strconv.Itoa(pid))
|
||||
|
||||
// Run the 'kill' command in a separate process group so sudo doesn't ignore it
|
||||
attrs := &syscall.SysProcAttr{}
|
||||
// Hack to set unix-only field without build tags.
|
||||
setpgidField := reflect.ValueOf(attrs).Elem().FieldByName("Setpgid")
|
||||
if setpgidField.IsValid() {
|
||||
setpgidField.Set(reflect.ValueOf(true))
|
||||
} else {
|
||||
return fmt.Errorf("Failed to set Setpgid field (non-unix build)")
|
||||
}
|
||||
cmd.SysProcAttr = attrs
|
||||
|
||||
_, err := cmd.Output()
|
||||
if err != nil {
|
||||
glog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err)
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case err := <-waitChan:
|
||||
if err != nil {
|
||||
return fmt.Errorf("error stopping %q: %v", name, err)
|
||||
}
|
||||
// Success!
|
||||
return nil
|
||||
case <-time.After(timeout):
|
||||
// Continue.
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to stop %q", name)
|
||||
}
|
||||
|
||||
func adjustConfigForSystemd(config string) string {
|
||||
return strings.Replace(config, "%", "%%", -1)
|
||||
}
|
||||
|
||||
// getLogFiles gets logs of interest either via journalctl or by creating sym
|
||||
// links. Since we scp files from the remote directory, symlinks will be
|
||||
// treated as normal files and file contents will be copied over.
|
||||
func (e *E2EServices) getLogFiles() {
|
||||
// Nothing to do if report dir is not specified.
|
||||
if framework.TestContext.ReportDir == "" {
|
||||
return
|
||||
}
|
||||
glog.Info("Fetching log files...")
|
||||
journaldFound := isJournaldAvailable()
|
||||
for targetFileName, logFileData := range e.logFiles {
|
||||
targetLink := path.Join(framework.TestContext.ReportDir, targetFileName)
|
||||
if journaldFound {
|
||||
// Skip log files that do not have an equivalent in journald-based machines.
|
||||
if len(logFileData.journalctlCommand) == 0 {
|
||||
continue
|
||||
}
|
||||
glog.Infof("Get log file %q with journalctl command %v.", targetFileName, logFileData.journalctlCommand)
|
||||
out, err := exec.Command("sudo", append([]string{"journalctl"}, logFileData.journalctlCommand...)...).CombinedOutput()
|
||||
if err != nil {
|
||||
glog.Errorf("failed to get %q from journald: %v, %v", targetFileName, string(out), err)
|
||||
} else {
|
||||
if err = ioutil.WriteFile(targetLink, out, 0644); err != nil {
|
||||
glog.Errorf("failed to write logs to %q: %v", targetLink, err)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
for _, file := range logFileData.files {
|
||||
if _, err := os.Stat(file); err != nil {
|
||||
// Expected file not found on this distro.
|
||||
continue
|
||||
}
|
||||
if err := copyLogFile(file, targetLink); err != nil {
|
||||
glog.Error(err)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// isJournaldAvailable returns whether the system executing the tests uses
|
||||
// journald.
|
||||
func isJournaldAvailable() bool {
|
||||
_, err := exec.LookPath("journalctl")
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func copyLogFile(src, target string) error {
|
||||
// If not a journald based distro, then just symlink files.
|
||||
if out, err := exec.Command("sudo", "cp", src, target).CombinedOutput(); err != nil {
|
||||
@ -728,55 +305,3 @@ func copyLogFile(src, target string) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isJournaldAvailable returns whether the system executing the tests uses
|
||||
// journald.
|
||||
func isJournaldAvailable() bool {
|
||||
_, err := exec.LookPath("journalctl")
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// readinessCheck checks whether services are ready via the supplied health
|
||||
// check URLs. Once there is an error in errCh, the function will stop waiting
|
||||
// and return the error.
|
||||
// TODO(random-liu): Move this to util
|
||||
func readinessCheck(name string, urls []string, errCh <-chan error) error {
|
||||
glog.Infof("Running readiness check for service %q", name)
|
||||
endTime := time.Now().Add(*serverStartTimeout)
|
||||
blockCh := make(chan error)
|
||||
defer close(blockCh)
|
||||
for endTime.After(time.Now()) {
|
||||
select {
|
||||
// We *always* want to run the health check if there is no error on the channel.
|
||||
// With systemd, reads from errCh report nil because cmd.Run() waits
|
||||
// on systemd-run, rather than the service process. systemd-run quickly
|
||||
// exits with status 0, causing the channel to be closed with no error. In
|
||||
// this case, you want to wait for the health check to complete, rather
|
||||
// than returning from readinessCheck as soon as the channel is closed.
|
||||
case err, ok := <-errCh:
|
||||
if ok { // The channel is not closed, this is a real error
|
||||
if err != nil { // If there is an error, return it
|
||||
return err
|
||||
}
|
||||
// If not, keep checking readiness.
|
||||
} else { // The channel is closed, this is only a zero value.
|
||||
// Replace the errCh with blockCh to avoid busy loop,
|
||||
// and keep checking readiness.
|
||||
errCh = blockCh
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
ready := true
|
||||
for _, url := range urls {
|
||||
resp, err := http.Head(url)
|
||||
if err != nil || resp.StatusCode != http.StatusOK {
|
||||
ready = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if ready {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("e2e service %q readiness check timeout %v", name, *serverStartTimeout)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user