mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 10:19:50 +00:00
Split services.go into services.go, internal_services.go and server.go.
This commit is contained in:
parent
5b3860ce0b
commit
0d3befd7ea
148
test/e2e_node/services/internal_services.go
Normal file
148
test/e2e_node/services/internal_services.go
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package services
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// e2eService manages e2e services in current process.
|
||||||
|
type e2eServices struct {
|
||||||
|
rmDirs []string
|
||||||
|
// statically linked e2e services
|
||||||
|
etcdServer *EtcdServer
|
||||||
|
apiServer *APIServer
|
||||||
|
nsController *NamespaceController
|
||||||
|
}
|
||||||
|
|
||||||
|
func newE2EServices() *e2eServices {
|
||||||
|
return &e2eServices{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// terminationSignals are signals that cause the program to exit in the
|
||||||
|
// supported platforms (linux, darwin, windows).
|
||||||
|
var terminationSignals = []os.Signal{syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT}
|
||||||
|
|
||||||
|
// run starts all e2e services and wait for the termination signal. Once receives the
|
||||||
|
// termination signal, it will stop the e2e services gracefully.
|
||||||
|
func (es *e2eServices) run() error {
|
||||||
|
defer es.stop()
|
||||||
|
if err := es.start(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Wait until receiving a termination signal.
|
||||||
|
sig := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sig, terminationSignals...)
|
||||||
|
<-sig
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// start starts the tests embedded services or returns an error.
|
||||||
|
func (es *e2eServices) start() error {
|
||||||
|
glog.Info("Starting e2e services...")
|
||||||
|
err := es.startEtcd()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = es.startApiServer()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = es.startNamespaceController()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
glog.Info("E2E services started.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// stop stops the embedded e2e services.
|
||||||
|
func (es *e2eServices) stop() {
|
||||||
|
glog.Info("Stopping e2e services...")
|
||||||
|
// TODO(random-liu): Use a loop to stop all services after introducing
|
||||||
|
// service interface.
|
||||||
|
glog.Info("Stopping namespace controller")
|
||||||
|
if es.nsController != nil {
|
||||||
|
if err := es.nsController.Stop(); err != nil {
|
||||||
|
glog.Errorf("Failed to stop %q: %v", es.nsController.Name(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
glog.Info("Stopping API server")
|
||||||
|
if es.apiServer != nil {
|
||||||
|
if err := es.apiServer.Stop(); err != nil {
|
||||||
|
glog.Errorf("Failed to stop %q: %v", es.apiServer.Name(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
glog.Info("Stopping etcd")
|
||||||
|
if es.etcdServer != nil {
|
||||||
|
if err := es.etcdServer.Stop(); err != nil {
|
||||||
|
glog.Errorf("Failed to stop %q: %v", es.etcdServer.Name(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, d := range es.rmDirs {
|
||||||
|
glog.Info("Deleting directory %v", d)
|
||||||
|
err := os.RemoveAll(d)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Failed to delete directory %s.\n%v", d, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
glog.Info("E2E services stopped.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// startEtcd starts the embedded etcd instance or returns an error.
|
||||||
|
func (es *e2eServices) startEtcd() error {
|
||||||
|
glog.Info("Starting etcd")
|
||||||
|
dataDir, err := ioutil.TempDir("", "node-e2e")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Mark the dataDir as directories to remove.
|
||||||
|
es.rmDirs = append(es.rmDirs, dataDir)
|
||||||
|
es.etcdServer = NewEtcd(dataDir)
|
||||||
|
return es.etcdServer.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
// startApiServer starts the embedded API server or returns an error.
|
||||||
|
func (es *e2eServices) startApiServer() error {
|
||||||
|
glog.Info("Starting API server")
|
||||||
|
es.apiServer = NewAPIServer()
|
||||||
|
return es.apiServer.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
// startNamespaceController starts the embedded namespace controller or returns an error.
|
||||||
|
func (es *e2eServices) startNamespaceController() error {
|
||||||
|
glog.Info("Starting namespace controller")
|
||||||
|
es.nsController = NewNamespaceController()
|
||||||
|
return es.nsController.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
// getServicesHealthCheckURLs returns the health check urls for the internal services.
|
||||||
|
func getServicesHealthCheckURLs() []string {
|
||||||
|
return []string{
|
||||||
|
getEtcdHealthCheckURL(),
|
||||||
|
getAPIServerHealthCheckURL(),
|
||||||
|
}
|
||||||
|
}
|
366
test/e2e_node/services/server.go
Normal file
366
test/e2e_node/services/server.go
Normal file
@ -0,0 +1,366 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package services
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path"
|
||||||
|
"reflect"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
)
|
||||||
|
|
||||||
|
var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
|
||||||
|
|
||||||
|
// A server manages a separate server process started and killed with
|
||||||
|
// commands.
|
||||||
|
type server struct {
|
||||||
|
// name is the name of the server, it is only used for logging.
|
||||||
|
name string
|
||||||
|
// startCommand is the command used to start the server
|
||||||
|
startCommand *exec.Cmd
|
||||||
|
// killCommand is the command used to stop the server. It is not required. If it
|
||||||
|
// is not specified, `sudo kill` will be used to stop the server.
|
||||||
|
killCommand *exec.Cmd
|
||||||
|
// restartCommand is the command used to restart the server. If provided, it will be used
|
||||||
|
// instead of startCommand when restarting the server.
|
||||||
|
restartCommand *exec.Cmd
|
||||||
|
// healthCheckUrls is the urls used to check whether the server is ready.
|
||||||
|
healthCheckUrls []string
|
||||||
|
// outFilename is the name of the log file. The stdout and stderr of the server
|
||||||
|
// will be redirected to this file.
|
||||||
|
outFilename string
|
||||||
|
// restartOnExit determines whether a restart loop is launched with the server
|
||||||
|
restartOnExit bool
|
||||||
|
// Writing to this channel, if it is not nil, stops the restart loop.
|
||||||
|
// When tearing down a server, you should check for this channel and write to it if it exists.
|
||||||
|
stopRestartingCh chan<- bool
|
||||||
|
// Read from this to confirm that the restart loop has stopped.
|
||||||
|
ackStopRestartingCh <-chan bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// newServer returns a new server with the given name, commands, health check
|
||||||
|
// URLs, etc.
|
||||||
|
func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, restartOnExit bool) *server {
|
||||||
|
return &server{
|
||||||
|
name: name,
|
||||||
|
startCommand: start,
|
||||||
|
killCommand: kill,
|
||||||
|
restartCommand: restart,
|
||||||
|
healthCheckUrls: urls,
|
||||||
|
outFilename: outputFileName,
|
||||||
|
restartOnExit: restartOnExit,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// commandToString format command to string.
|
||||||
|
func commandToString(c *exec.Cmd) string {
|
||||||
|
if c == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *server) String() string {
|
||||||
|
return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, restart-command: `%s`, health-check: %v, output-file: %q", s.name,
|
||||||
|
commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
|
||||||
|
}
|
||||||
|
|
||||||
|
// readinessCheck checks whether services are ready via the supplied health
|
||||||
|
// check URLs. Once there is an error in errCh, the function will stop waiting
|
||||||
|
// and return the error.
|
||||||
|
// TODO(random-liu): Move this to util
|
||||||
|
func readinessCheck(name string, urls []string, errCh <-chan error) error {
|
||||||
|
glog.Infof("Running readiness check for service %q", name)
|
||||||
|
endTime := time.Now().Add(*serverStartTimeout)
|
||||||
|
blockCh := make(chan error)
|
||||||
|
defer close(blockCh)
|
||||||
|
for endTime.After(time.Now()) {
|
||||||
|
select {
|
||||||
|
// We *always* want to run the health check if there is no error on the channel.
|
||||||
|
// With systemd, reads from errCh report nil because cmd.Run() waits
|
||||||
|
// on systemd-run, rather than the service process. systemd-run quickly
|
||||||
|
// exits with status 0, causing the channel to be closed with no error. In
|
||||||
|
// this case, you want to wait for the health check to complete, rather
|
||||||
|
// than returning from readinessCheck as soon as the channel is closed.
|
||||||
|
case err, ok := <-errCh:
|
||||||
|
if ok { // The channel is not closed, this is a real error
|
||||||
|
if err != nil { // If there is an error, return it
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If not, keep checking readiness.
|
||||||
|
} else { // The channel is closed, this is only a zero value.
|
||||||
|
// Replace the errCh with blockCh to avoid busy loop,
|
||||||
|
// and keep checking readiness.
|
||||||
|
errCh = blockCh
|
||||||
|
}
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
ready := true
|
||||||
|
for _, url := range urls {
|
||||||
|
resp, err := http.Head(url)
|
||||||
|
if err != nil || resp.StatusCode != http.StatusOK {
|
||||||
|
ready = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ready {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("e2e service %q readiness check timeout %v", name, *serverStartTimeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
// start starts the server by running its commands, monitors it with a health
|
||||||
|
// check, and ensures that it is restarted if applicable.
|
||||||
|
//
|
||||||
|
// Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly.
|
||||||
|
func (s *server) start() error {
|
||||||
|
glog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand))
|
||||||
|
errCh := make(chan error)
|
||||||
|
|
||||||
|
// Set up restart channels if the server is configured for restart on exit.
|
||||||
|
var stopRestartingCh, ackStopRestartingCh chan bool
|
||||||
|
if s.restartOnExit {
|
||||||
|
if len(s.healthCheckUrls) == 0 {
|
||||||
|
return fmt.Errorf("Tried to start %s which has s.restartOnExit == true, but no health check urls provided.", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
stopRestartingCh = make(chan bool)
|
||||||
|
ackStopRestartingCh = make(chan bool)
|
||||||
|
|
||||||
|
s.stopRestartingCh = stopRestartingCh
|
||||||
|
s.ackStopRestartingCh = ackStopRestartingCh
|
||||||
|
}
|
||||||
|
|
||||||
|
// This goroutine actually runs the start command for the server.
|
||||||
|
go func() {
|
||||||
|
defer close(errCh)
|
||||||
|
|
||||||
|
// Create the output filename
|
||||||
|
outPath := path.Join(framework.TestContext.ReportDir, s.outFilename)
|
||||||
|
outfile, err := os.Create(outPath)
|
||||||
|
if err != nil {
|
||||||
|
errCh <- fmt.Errorf("failed to create file %q for `%s` %v.", outPath, s, err)
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
glog.Infof("Output file for server %q: %v", s.name, outfile.Name())
|
||||||
|
}
|
||||||
|
defer outfile.Close()
|
||||||
|
defer outfile.Sync()
|
||||||
|
|
||||||
|
// Set the command to write the output file
|
||||||
|
s.startCommand.Stdout = outfile
|
||||||
|
s.startCommand.Stderr = outfile
|
||||||
|
|
||||||
|
// Death of this test process should kill the server as well.
|
||||||
|
attrs := &syscall.SysProcAttr{}
|
||||||
|
// Hack to set linux-only field without build tags.
|
||||||
|
deathSigField := reflect.ValueOf(attrs).Elem().FieldByName("Pdeathsig")
|
||||||
|
if deathSigField.IsValid() {
|
||||||
|
deathSigField.Set(reflect.ValueOf(syscall.SIGTERM))
|
||||||
|
} else {
|
||||||
|
errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.startCommand.SysProcAttr = attrs
|
||||||
|
|
||||||
|
// Start the command
|
||||||
|
err = s.startCommand.Start()
|
||||||
|
if err != nil {
|
||||||
|
errCh <- fmt.Errorf("failed to run %s: %v", s, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !s.restartOnExit {
|
||||||
|
glog.Infof("Waiting for server %q start command to complete", s.name)
|
||||||
|
// If we aren't planning on restarting, ok to Wait() here to release resources.
|
||||||
|
// Otherwise, we Wait() in the restart loop.
|
||||||
|
err = s.startCommand.Wait()
|
||||||
|
if err != nil {
|
||||||
|
errCh <- fmt.Errorf("failed to run start command for server %q: %v", s.name, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
usedStartCmd := true
|
||||||
|
for {
|
||||||
|
glog.Infof("Running health check for service %q", s.name)
|
||||||
|
// Wait for an initial health check to pass, so that we are sure the server started.
|
||||||
|
err := readinessCheck(s.name, s.healthCheckUrls, nil)
|
||||||
|
if err != nil {
|
||||||
|
if usedStartCmd {
|
||||||
|
glog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name)
|
||||||
|
s.startCommand.Wait() // Release resources if necessary.
|
||||||
|
}
|
||||||
|
// This should not happen, immediately stop the e2eService process.
|
||||||
|
glog.Fatalf("Restart loop readinessCheck failed for %s", s)
|
||||||
|
} else {
|
||||||
|
glog.Infof("Initial health check passed for service %q", s.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initial health check passed, wait until a health check fails again.
|
||||||
|
stillAlive:
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-stopRestartingCh:
|
||||||
|
ackStopRestartingCh <- true
|
||||||
|
return
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
for _, url := range s.healthCheckUrls {
|
||||||
|
resp, err := http.Head(url)
|
||||||
|
if err != nil || resp.StatusCode != http.StatusOK {
|
||||||
|
break stillAlive
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if usedStartCmd {
|
||||||
|
s.startCommand.Wait() // Release resources from last cmd
|
||||||
|
usedStartCmd = false
|
||||||
|
}
|
||||||
|
if s.restartCommand != nil {
|
||||||
|
// Always make a fresh copy of restartCommand before
|
||||||
|
// running, we may have to restart multiple times
|
||||||
|
s.restartCommand = &exec.Cmd{
|
||||||
|
Path: s.restartCommand.Path,
|
||||||
|
Args: s.restartCommand.Args,
|
||||||
|
Env: s.restartCommand.Env,
|
||||||
|
Dir: s.restartCommand.Dir,
|
||||||
|
Stdin: s.restartCommand.Stdin,
|
||||||
|
Stdout: s.restartCommand.Stdout,
|
||||||
|
Stderr: s.restartCommand.Stderr,
|
||||||
|
ExtraFiles: s.restartCommand.ExtraFiles,
|
||||||
|
SysProcAttr: s.restartCommand.SysProcAttr,
|
||||||
|
}
|
||||||
|
// Run and wait for exit. This command is assumed to have
|
||||||
|
// short duration, e.g. systemctl restart
|
||||||
|
glog.Infof("Restarting server %q with restart command", s.name)
|
||||||
|
err = s.restartCommand.Run()
|
||||||
|
if err != nil {
|
||||||
|
// This should not happen, immediately stop the e2eService process.
|
||||||
|
glog.Fatalf("Restarting server %s with restartCommand failed. Error: %v.", s, err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s.startCommand = &exec.Cmd{
|
||||||
|
Path: s.startCommand.Path,
|
||||||
|
Args: s.startCommand.Args,
|
||||||
|
Env: s.startCommand.Env,
|
||||||
|
Dir: s.startCommand.Dir,
|
||||||
|
Stdin: s.startCommand.Stdin,
|
||||||
|
Stdout: s.startCommand.Stdout,
|
||||||
|
Stderr: s.startCommand.Stderr,
|
||||||
|
ExtraFiles: s.startCommand.ExtraFiles,
|
||||||
|
SysProcAttr: s.startCommand.SysProcAttr,
|
||||||
|
}
|
||||||
|
glog.Infof("Restarting server %q with start command", s.name)
|
||||||
|
err = s.startCommand.Start()
|
||||||
|
usedStartCmd = true
|
||||||
|
if err != nil {
|
||||||
|
// This should not happen, immediately stop the e2eService process.
|
||||||
|
glog.Fatalf("Restarting server %s with startCommand failed. Error: %v.", s, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return readinessCheck(s.name, s.healthCheckUrls, errCh)
|
||||||
|
}
|
||||||
|
|
||||||
|
// kill runs the server's kill command.
|
||||||
|
func (s *server) kill() error {
|
||||||
|
glog.Infof("Kill server %q", s.name)
|
||||||
|
name := s.name
|
||||||
|
cmd := s.startCommand
|
||||||
|
|
||||||
|
// If s has a restart loop, turn it off.
|
||||||
|
if s.restartOnExit {
|
||||||
|
s.stopRestartingCh <- true
|
||||||
|
<-s.ackStopRestartingCh
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.killCommand != nil {
|
||||||
|
return s.killCommand.Run()
|
||||||
|
}
|
||||||
|
|
||||||
|
if cmd == nil {
|
||||||
|
return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cmd.Process == nil {
|
||||||
|
glog.V(2).Infof("%q not running", name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
pid := cmd.Process.Pid
|
||||||
|
if pid <= 1 {
|
||||||
|
return fmt.Errorf("invalid PID %d for %q", pid, name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to shut down the process in a friendly manner before forcing it.
|
||||||
|
waitChan := make(chan error)
|
||||||
|
go func() {
|
||||||
|
_, err := cmd.Process.Wait()
|
||||||
|
waitChan <- err
|
||||||
|
close(waitChan)
|
||||||
|
}()
|
||||||
|
|
||||||
|
const timeout = 10 * time.Second
|
||||||
|
for _, signal := range []string{"-TERM", "-KILL"} {
|
||||||
|
glog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal)
|
||||||
|
cmd := exec.Command("sudo", "kill", signal, strconv.Itoa(pid))
|
||||||
|
|
||||||
|
// Run the 'kill' command in a separate process group so sudo doesn't ignore it
|
||||||
|
attrs := &syscall.SysProcAttr{}
|
||||||
|
// Hack to set unix-only field without build tags.
|
||||||
|
setpgidField := reflect.ValueOf(attrs).Elem().FieldByName("Setpgid")
|
||||||
|
if setpgidField.IsValid() {
|
||||||
|
setpgidField.Set(reflect.ValueOf(true))
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("Failed to set Setpgid field (non-unix build)")
|
||||||
|
}
|
||||||
|
cmd.SysProcAttr = attrs
|
||||||
|
|
||||||
|
_, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-waitChan:
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error stopping %q: %v", name, err)
|
||||||
|
}
|
||||||
|
// Success!
|
||||||
|
return nil
|
||||||
|
case <-time.After(timeout):
|
||||||
|
// Continue.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("unable to stop %q", name)
|
||||||
|
}
|
@ -17,21 +17,14 @@ limitations under the License.
|
|||||||
package services
|
package services
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net/http"
|
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/signal"
|
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"github.com/kardianos/osext"
|
"github.com/kardianos/osext"
|
||||||
@ -41,132 +34,12 @@ import (
|
|||||||
"k8s.io/kubernetes/test/e2e_node/build"
|
"k8s.io/kubernetes/test/e2e_node/build"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO(random-liu): Move this file to a separate package.
|
|
||||||
var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
|
|
||||||
|
|
||||||
// E2EServices starts and stops e2e services in a separate process. The test
|
// E2EServices starts and stops e2e services in a separate process. The test
|
||||||
// uses it to start and stop all e2e services.
|
// uses it to start and stop all e2e services.
|
||||||
type E2EServices struct {
|
type E2EServices struct {
|
||||||
services *server
|
services *server
|
||||||
}
|
kubelet *server
|
||||||
|
|
||||||
// NewE2EServices returns a new E2EServices instance.
|
|
||||||
func NewE2EServices() *E2EServices {
|
|
||||||
return &E2EServices{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// services.log is the combined log of all services
|
|
||||||
const servicesLogFile = "services.log"
|
|
||||||
|
|
||||||
// Start starts the e2e services in another process by calling back into the
|
|
||||||
// test binary. Returns when all e2e services are ready or an error.
|
|
||||||
//
|
|
||||||
// We want to statically link e2e services into the test binary, but we don't
|
|
||||||
// want their glog output to pollute the test result. So we run the binary in
|
|
||||||
// run- services-mode to start e2e services in another process.
|
|
||||||
func (e *E2EServices) Start() error {
|
|
||||||
var err error
|
|
||||||
// Create the manifest path for kubelet.
|
|
||||||
// TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test.
|
|
||||||
framework.TestContext.ManifestPath, err = ioutil.TempDir("", "node-e2e-pod")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create static pod manifest directory: %v", err)
|
|
||||||
}
|
|
||||||
testBin, err := osext.Executable()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't get current binary: %v", err)
|
|
||||||
}
|
|
||||||
// TODO(random-liu): Add sudo after we statically link apiserver and etcd, because apiserver needs
|
|
||||||
// sudo. We can't add sudo now, because etcd may not be in PATH of root.
|
|
||||||
startCmd := exec.Command(testBin,
|
|
||||||
// TODO(mtaufen): Flags e.g. that target the TestContext need to be manually forwarded to the
|
|
||||||
// test binary when we start it up in run-services mode. This is not ideal.
|
|
||||||
// Very unintuitive because it prevents any falgs NOT manually forwarded here
|
|
||||||
// from being set via TEST_ARGS when running tests from the command line.
|
|
||||||
"--run-services-mode",
|
|
||||||
"--server-start-timeout", serverStartTimeout.String(),
|
|
||||||
"--report-dir", framework.TestContext.ReportDir,
|
|
||||||
// TODO(random-liu): Remove the following flags after we move kubelet starting logic
|
|
||||||
// out of the test.
|
|
||||||
"--node-name", framework.TestContext.NodeName,
|
|
||||||
"--disable-kubenet="+strconv.FormatBool(framework.TestContext.DisableKubenet),
|
|
||||||
// TODO: enable when flag is introduced in 1.5
|
|
||||||
// "--cgroups-per-qos="+strconv.FormatBool(framework.TestContext.CgroupsPerQOS),
|
|
||||||
"--manifest-path", framework.TestContext.ManifestPath,
|
|
||||||
"--eviction-hard", framework.TestContext.EvictionHard,
|
|
||||||
"--feature-gates", framework.TestContext.FeatureGates,
|
|
||||||
"--runtime-integration-type", framework.TestContext.RuntimeIntegrationType,
|
|
||||||
"--logtostderr",
|
|
||||||
"--vmodule=*=4",
|
|
||||||
)
|
|
||||||
e.services = newServer("services", startCmd, nil, nil, getHealthCheckURLs(), servicesLogFile, false)
|
|
||||||
return e.services.start()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stop stops the e2e services.
|
|
||||||
func (e *E2EServices) Stop() error {
|
|
||||||
defer func() {
|
|
||||||
// Cleanup the manifest path for kubelet.
|
|
||||||
manifestPath := framework.TestContext.ManifestPath
|
|
||||||
if manifestPath != "" {
|
|
||||||
err := os.RemoveAll(manifestPath)
|
|
||||||
if err != nil {
|
|
||||||
glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if e.services == nil {
|
|
||||||
glog.Errorf("can't stop e2e services, because `services` is nil")
|
|
||||||
}
|
|
||||||
return e.services.kill()
|
|
||||||
}
|
|
||||||
|
|
||||||
// RunE2EServices actually start the e2e services. This function is used to
|
|
||||||
// start e2e services in current process. This is only used in run-services-mode.
|
|
||||||
func RunE2EServices() {
|
|
||||||
// Populate global DefaultFeatureGate with value from TestContext.FeatureGates.
|
|
||||||
// This way, statically-linked components see the same feature gate config as the test context.
|
|
||||||
utilconfig.DefaultFeatureGate.Set(framework.TestContext.FeatureGates)
|
|
||||||
e := newE2EService()
|
|
||||||
if err := e.run(); err != nil {
|
|
||||||
glog.Fatalf("Failed to run e2e services: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ports of different e2e services.
|
|
||||||
const (
|
|
||||||
kubeletPort = "10250"
|
|
||||||
kubeletReadOnlyPort = "10255"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Health check urls of different e2e services.
|
|
||||||
var (
|
|
||||||
kubeletHealthCheckURL = getEndpoint(kubeletReadOnlyPort) + "/healthz"
|
|
||||||
)
|
|
||||||
|
|
||||||
// getEndpoint generates endpoint url from service port.
|
|
||||||
func getEndpoint(port string) string {
|
|
||||||
return "http://127.0.0.1:" + port
|
|
||||||
}
|
|
||||||
|
|
||||||
func getHealthCheckURLs() []string {
|
|
||||||
return []string{
|
|
||||||
getEtcdHealthCheckURL(),
|
|
||||||
getAPIServerHealthCheckURL(),
|
|
||||||
kubeletHealthCheckURL,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// e2eService manages e2e services in current process.
|
|
||||||
type e2eService struct {
|
|
||||||
services []*server
|
|
||||||
rmDirs []string
|
|
||||||
logFiles map[string]logFileData
|
logFiles map[string]logFileData
|
||||||
|
|
||||||
// statically linked e2e services
|
|
||||||
etcdServer *EtcdServer
|
|
||||||
apiServer *APIServer
|
|
||||||
nsController *NamespaceController
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// logFileData holds data about logfiles to fetch with a journalctl command or
|
// logFileData holds data about logfiles to fetch with a journalctl command or
|
||||||
@ -176,185 +49,123 @@ type logFileData struct {
|
|||||||
journalctlCommand []string
|
journalctlCommand []string
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
// NewE2EServices returns a new E2EServices instance.
|
||||||
// This is consistent with the level used in a cluster e2e test.
|
func NewE2EServices() *E2EServices {
|
||||||
LOG_VERBOSITY_LEVEL = "4"
|
return &E2EServices{
|
||||||
)
|
|
||||||
|
|
||||||
func newE2EService() *e2eService {
|
|
||||||
// Special log files that need to be collected for additional debugging.
|
// Special log files that need to be collected for additional debugging.
|
||||||
var logFiles = map[string]logFileData{
|
logFiles: map[string]logFileData{
|
||||||
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
|
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
|
||||||
"docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}},
|
"docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}},
|
||||||
"cloud-init.log": {[]string{"/var/log/cloud-init.log"}, []string{"-u", "cloud*"}},
|
"cloud-init.log": {[]string{"/var/log/cloud-init.log"}, []string{"-u", "cloud*"}},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return &e2eService{logFiles: logFiles}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// terminationSignals are signals that cause the program to exit in the
|
// Start starts the e2e services in another process by calling back into the
|
||||||
// supported platforms (linux, darwin, windows).
|
// test binary. Returns when all e2e services are ready or an error.
|
||||||
var terminationSignals = []os.Signal{syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT}
|
//
|
||||||
|
// We want to statically link e2e services into the test binary, but we don't
|
||||||
// run starts all e2e services and wait for the termination signal. Once receives the
|
// want their glog output to pollute the test result. So we run the binary in
|
||||||
// termination signal, it will stop the e2e services gracefully.
|
// run-services-mode to start e2e services in another process.
|
||||||
func (es *e2eService) run() error {
|
// The function starts 2 processes:
|
||||||
defer es.stop()
|
// * internal e2e services: services which statically linked in the test binary - apiserver, etcd and
|
||||||
if err := es.start(); err != nil {
|
// namespace controller.
|
||||||
|
// * kubelet: kubelet binary is outside. (We plan to move main kubelet start logic out when we have
|
||||||
|
// standard kubelet launcher)
|
||||||
|
func (e *E2EServices) Start() error {
|
||||||
|
var err error
|
||||||
|
// Start kubelet
|
||||||
|
// Create the manifest path for kubelet.
|
||||||
|
// TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test.
|
||||||
|
framework.TestContext.ManifestPath, err = ioutil.TempDir("", "node-e2e-pod")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create static pod manifest directory: %v", err)
|
||||||
|
}
|
||||||
|
e.kubelet, err = e.startKubelet()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to start kubelet: %v", err)
|
||||||
|
}
|
||||||
|
e.services, err = e.startInternalServices()
|
||||||
return err
|
return err
|
||||||
}
|
|
||||||
// Wait until receiving a termination signal.
|
|
||||||
sig := make(chan os.Signal, 1)
|
|
||||||
signal.Notify(sig, terminationSignals...)
|
|
||||||
<-sig
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// start starts the tests embedded services or returns an error.
|
// Stop stops the e2e services.
|
||||||
func (es *e2eService) start() error {
|
func (e *E2EServices) Stop() {
|
||||||
glog.Info("Starting e2e services...")
|
defer func() {
|
||||||
err := es.startEtcd()
|
// Collect log files.
|
||||||
|
e.getLogFiles()
|
||||||
|
// Cleanup the manifest path for kubelet.
|
||||||
|
manifestPath := framework.TestContext.ManifestPath
|
||||||
|
if manifestPath != "" {
|
||||||
|
err := os.RemoveAll(manifestPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
glog.Errorf("Failed to delete static pod manifest directory %s: %v", manifestPath, err)
|
||||||
}
|
|
||||||
|
|
||||||
err = es.startApiServer()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
s, err := es.startKubelet()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
es.services = append(es.services, s)
|
|
||||||
|
|
||||||
err = es.startNamespaceController()
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
glog.Info("E2E services started.")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// getLogFiles gets logs of interest either via journalctl or by creating sym
|
|
||||||
// links. Since we scp files from the remote directory, symlinks will be
|
|
||||||
// treated as normal files and file contents will be copied over.
|
|
||||||
func (es *e2eService) getLogFiles() {
|
|
||||||
// Nothing to do if report dir is not specified.
|
|
||||||
if framework.TestContext.ReportDir == "" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
glog.Info("Fetching log files...")
|
|
||||||
journaldFound := isJournaldAvailable()
|
|
||||||
for targetFileName, logFileData := range es.logFiles {
|
|
||||||
targetLink := path.Join(framework.TestContext.ReportDir, targetFileName)
|
|
||||||
if journaldFound {
|
|
||||||
// Skip log files that do not have an equivalent in journald-based machines.
|
|
||||||
if len(logFileData.journalctlCommand) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
glog.Infof("Get log file %q with journalctl command %v.", targetFileName, logFileData.journalctlCommand)
|
|
||||||
out, err := exec.Command("sudo", append([]string{"journalctl"}, logFileData.journalctlCommand...)...).CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
glog.Errorf("failed to get %q from journald: %v, %v", targetFileName, string(out), err)
|
|
||||||
} else {
|
|
||||||
if err = ioutil.WriteFile(targetLink, out, 0644); err != nil {
|
|
||||||
glog.Errorf("failed to write logs to %q: %v", targetLink, err)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
continue
|
}()
|
||||||
|
if e.services != nil {
|
||||||
|
if err := e.services.kill(); err != nil {
|
||||||
|
glog.Errorf("Failed to stop services: %v", err)
|
||||||
}
|
}
|
||||||
for _, file := range logFileData.files {
|
|
||||||
if _, err := os.Stat(file); err != nil {
|
|
||||||
// Expected file not found on this distro.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := copyLogFile(file, targetLink); err != nil {
|
|
||||||
glog.Error(err)
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
if e.kubelet != nil {
|
||||||
|
if err := e.kubelet.kill(); err != nil {
|
||||||
|
glog.Errorf("Failed to stop kubelet: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// stop stops the embedded e2e services.
|
// RunE2EServices actually start the e2e services. This function is used to
|
||||||
func (es *e2eService) stop() {
|
// start e2e services in current process. This is only used in run-services-mode.
|
||||||
glog.Info("Stopping e2e services...")
|
func RunE2EServices() {
|
||||||
es.getLogFiles()
|
// Populate global DefaultFeatureGate with value from TestContext.FeatureGates.
|
||||||
|
// This way, statically-linked components see the same feature gate config as the test context.
|
||||||
|
utilconfig.DefaultFeatureGate.Set(framework.TestContext.FeatureGates)
|
||||||
|
e := newE2EServices()
|
||||||
|
if err := e.run(); err != nil {
|
||||||
|
glog.Fatalf("Failed to run e2e services: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(random-liu): Use a loop to stop all services after introducing
|
const (
|
||||||
// service interface.
|
// services.log is the combined log of all services
|
||||||
glog.Info("Stopping namespace controller")
|
servicesLogFile = "services.log"
|
||||||
if es.nsController != nil {
|
// LOG_VERBOSITY_LEVEL is consistent with the level used in a cluster e2e test.
|
||||||
if err := es.nsController.Stop(); err != nil {
|
LOG_VERBOSITY_LEVEL = "4"
|
||||||
glog.Errorf("Failed to stop %q: %v", es.nsController.Name(), err)
|
)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
glog.Info("Stopping API server")
|
// startInternalServices starts the internal services in a separate process.
|
||||||
if es.apiServer != nil {
|
func (e *E2EServices) startInternalServices() (*server, error) {
|
||||||
if err := es.apiServer.Stop(); err != nil {
|
testBin, err := osext.Executable()
|
||||||
glog.Errorf("Failed to stop %q: %v", es.apiServer.Name(), err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, s := range es.services {
|
|
||||||
glog.Info("Stopping service %q", s.name)
|
|
||||||
if err := s.kill(); err != nil {
|
|
||||||
glog.Errorf("Failed to stop %v: %v", s.name, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
glog.Info("Stopping etcd")
|
|
||||||
if es.etcdServer != nil {
|
|
||||||
if err := es.etcdServer.Stop(); err != nil {
|
|
||||||
glog.Errorf("Failed to stop %q: %v", es.etcdServer.Name(), err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, d := range es.rmDirs {
|
|
||||||
glog.Info("Deleting directory %v", d)
|
|
||||||
err := os.RemoveAll(d)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed to delete directory %s.\n%v", d, err)
|
return nil, fmt.Errorf("can't get current binary: %v", err)
|
||||||
}
|
}
|
||||||
}
|
startCmd := exec.Command("sudo", testBin,
|
||||||
|
// TODO(mtaufen): Flags e.g. that target the TestContext need to be manually forwarded to the
|
||||||
glog.Info("E2E services stopped.")
|
// test binary when we start it up in run-services mode. This is not ideal.
|
||||||
|
// Very unintuitive because it prevents any flags NOT manually forwarded here
|
||||||
|
// from being set via TEST_ARGS when running tests from the command line.
|
||||||
|
"--run-services-mode",
|
||||||
|
"--server-start-timeout", serverStartTimeout.String(),
|
||||||
|
"--feature-gates", framework.TestContext.FeatureGates,
|
||||||
|
"--logtostderr",
|
||||||
|
"--vmodule=*="+LOG_VERBOSITY_LEVEL,
|
||||||
|
)
|
||||||
|
server := newServer("services", startCmd, nil, nil, getServicesHealthCheckURLs(), servicesLogFile, false)
|
||||||
|
return server, server.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
// startEtcd starts the embedded etcd instance or returns an error.
|
const (
|
||||||
func (es *e2eService) startEtcd() error {
|
// Ports of different e2e services.
|
||||||
glog.Info("Starting etcd")
|
kubeletPort = "10250"
|
||||||
dataDir, err := ioutil.TempDir("", "node-e2e")
|
kubeletReadOnlyPort = "10255"
|
||||||
if err != nil {
|
// Health check url of kubelet
|
||||||
return err
|
kubeletHealthCheckURL = "http://127.0.0.1:" + kubeletReadOnlyPort + "/healthz"
|
||||||
}
|
)
|
||||||
// Mark the dataDir as directories to remove.
|
|
||||||
es.rmDirs = append(es.rmDirs, dataDir)
|
|
||||||
es.etcdServer = NewEtcd(dataDir)
|
|
||||||
return es.etcdServer.Start()
|
|
||||||
}
|
|
||||||
|
|
||||||
// startApiServer starts the embedded API server or returns an error.
|
|
||||||
func (es *e2eService) startApiServer() error {
|
|
||||||
glog.Info("Starting API server")
|
|
||||||
es.apiServer = NewAPIServer()
|
|
||||||
return es.apiServer.Start()
|
|
||||||
}
|
|
||||||
|
|
||||||
// startNamespaceController starts the embedded namespace controller or returns an error.
|
|
||||||
func (es *e2eService) startNamespaceController() error {
|
|
||||||
glog.Info("Starting namespace controller")
|
|
||||||
es.nsController = NewNamespaceController()
|
|
||||||
return es.nsController.Start()
|
|
||||||
}
|
|
||||||
|
|
||||||
// startKubelet starts the Kubelet in a separate process or returns an error
|
// startKubelet starts the Kubelet in a separate process or returns an error
|
||||||
// if the Kubelet fails to start.
|
// if the Kubelet fails to start.
|
||||||
func (es *e2eService) startKubelet() (*server, error) {
|
func (e *E2EServices) startKubelet() (*server, error) {
|
||||||
glog.Info("Starting kubelet")
|
glog.Info("Starting kubelet")
|
||||||
var killCommand, restartCommand *exec.Cmd
|
var killCommand, restartCommand *exec.Cmd
|
||||||
cmdArgs := []string{}
|
cmdArgs := []string{}
|
||||||
@ -367,7 +178,7 @@ func (es *e2eService) startKubelet() (*server, error) {
|
|||||||
cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, "--remain-after-exit", build.GetKubeletServerBin())
|
cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, "--remain-after-exit", build.GetKubeletServerBin())
|
||||||
killCommand = exec.Command("sudo", "systemctl", "kill", unitName)
|
killCommand = exec.Command("sudo", "systemctl", "kill", unitName)
|
||||||
restartCommand = exec.Command("sudo", "systemctl", "restart", unitName)
|
restartCommand = exec.Command("sudo", "systemctl", "restart", unitName)
|
||||||
es.logFiles["kubelet.log"] = logFileData{
|
e.logFiles["kubelet.log"] = logFileData{
|
||||||
journalctlCommand: []string{"-u", unitName},
|
journalctlCommand: []string{"-u", unitName},
|
||||||
}
|
}
|
||||||
framework.TestContext.EvictionHard = adjustConfigForSystemd(framework.TestContext.EvictionHard)
|
framework.TestContext.EvictionHard = adjustConfigForSystemd(framework.TestContext.EvictionHard)
|
||||||
@ -395,6 +206,7 @@ func (es *e2eService) startKubelet() (*server, error) {
|
|||||||
"--eviction-hard", framework.TestContext.EvictionHard,
|
"--eviction-hard", framework.TestContext.EvictionHard,
|
||||||
"--eviction-pressure-transition-period", "30s",
|
"--eviction-pressure-transition-period", "30s",
|
||||||
"--feature-gates", framework.TestContext.FeatureGates,
|
"--feature-gates", framework.TestContext.FeatureGates,
|
||||||
|
"--runtime-integration-type", framework.TestContext.RuntimeIntegrationType,
|
||||||
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
||||||
)
|
)
|
||||||
if framework.TestContext.RuntimeIntegrationType != "" {
|
if framework.TestContext.RuntimeIntegrationType != "" {
|
||||||
@ -430,294 +242,59 @@ func (es *e2eService) startKubelet() (*server, error) {
|
|||||||
return server, server.start()
|
return server, server.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
// A server manages a separate server process started and killed with
|
|
||||||
// commands.
|
|
||||||
type server struct {
|
|
||||||
// name is the name of the server, it is only used for logging.
|
|
||||||
name string
|
|
||||||
// startCommand is the command used to start the server
|
|
||||||
startCommand *exec.Cmd
|
|
||||||
// killCommand is the command used to stop the server. It is not required. If it
|
|
||||||
// is not specified, `sudo kill` will be used to stop the server.
|
|
||||||
killCommand *exec.Cmd
|
|
||||||
// restartCommand is the command used to restart the server. If provided, it will be used
|
|
||||||
// instead of startCommand when restarting the server.
|
|
||||||
restartCommand *exec.Cmd
|
|
||||||
// healthCheckUrls is the urls used to check whether the server is ready.
|
|
||||||
healthCheckUrls []string
|
|
||||||
// outFilename is the name of the log file. The stdout and stderr of the server
|
|
||||||
// will be redirected to this file.
|
|
||||||
outFilename string
|
|
||||||
// restartOnExit determines whether a restart loop is launched with the server
|
|
||||||
restartOnExit bool
|
|
||||||
// Writing to this channel, if it is not nil, stops the restart loop.
|
|
||||||
// When tearing down a server, you should check for this channel and write to it if it exists.
|
|
||||||
stopRestartingCh chan<- bool
|
|
||||||
// Read from this to confirm that the restart loop has stopped.
|
|
||||||
ackStopRestartingCh <-chan bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// newServer returns a new server with the given name, commands, health check
|
|
||||||
// URLs, etc.
|
|
||||||
func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, restartOnExit bool) *server {
|
|
||||||
return &server{
|
|
||||||
name: name,
|
|
||||||
startCommand: start,
|
|
||||||
killCommand: kill,
|
|
||||||
restartCommand: restart,
|
|
||||||
healthCheckUrls: urls,
|
|
||||||
outFilename: outputFileName,
|
|
||||||
restartOnExit: restartOnExit,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// commandToString format command to string.
|
|
||||||
func commandToString(c *exec.Cmd) string {
|
|
||||||
if c == nil {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *server) String() string {
|
|
||||||
return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, restart-command: `%s`, health-check: %v, output-file: %q", s.name,
|
|
||||||
commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
|
|
||||||
}
|
|
||||||
|
|
||||||
// start starts the server by running its commands, monitors it with a health
|
|
||||||
// check, and ensures that it is restarted if applicable.
|
|
||||||
//
|
|
||||||
// Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly.
|
|
||||||
func (s *server) start() error {
|
|
||||||
glog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand))
|
|
||||||
errCh := make(chan error)
|
|
||||||
|
|
||||||
// Set up restart channels if the server is configured for restart on exit.
|
|
||||||
var stopRestartingCh, ackStopRestartingCh chan bool
|
|
||||||
if s.restartOnExit {
|
|
||||||
if len(s.healthCheckUrls) == 0 {
|
|
||||||
return fmt.Errorf("Tried to start %s which has s.restartOnExit == true, but no health check urls provided.", s)
|
|
||||||
}
|
|
||||||
|
|
||||||
stopRestartingCh = make(chan bool)
|
|
||||||
ackStopRestartingCh = make(chan bool)
|
|
||||||
|
|
||||||
s.stopRestartingCh = stopRestartingCh
|
|
||||||
s.ackStopRestartingCh = ackStopRestartingCh
|
|
||||||
}
|
|
||||||
|
|
||||||
// This goroutine actually runs the start command for the server.
|
|
||||||
go func() {
|
|
||||||
defer close(errCh)
|
|
||||||
|
|
||||||
// Create the output filename
|
|
||||||
outPath := path.Join(framework.TestContext.ReportDir, s.outFilename)
|
|
||||||
outfile, err := os.Create(outPath)
|
|
||||||
if err != nil {
|
|
||||||
errCh <- fmt.Errorf("failed to create file %q for `%s` %v.", outPath, s, err)
|
|
||||||
return
|
|
||||||
} else {
|
|
||||||
glog.Infof("Output file for server %q: %v", s.name, outfile.Name())
|
|
||||||
}
|
|
||||||
defer outfile.Close()
|
|
||||||
defer outfile.Sync()
|
|
||||||
|
|
||||||
// Set the command to write the output file
|
|
||||||
s.startCommand.Stdout = outfile
|
|
||||||
s.startCommand.Stderr = outfile
|
|
||||||
|
|
||||||
// Death of this test process should kill the server as well.
|
|
||||||
attrs := &syscall.SysProcAttr{}
|
|
||||||
// Hack to set linux-only field without build tags.
|
|
||||||
deathSigField := reflect.ValueOf(attrs).Elem().FieldByName("Pdeathsig")
|
|
||||||
if deathSigField.IsValid() {
|
|
||||||
deathSigField.Set(reflect.ValueOf(syscall.SIGTERM))
|
|
||||||
} else {
|
|
||||||
errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
s.startCommand.SysProcAttr = attrs
|
|
||||||
|
|
||||||
// Start the command
|
|
||||||
err = s.startCommand.Start()
|
|
||||||
if err != nil {
|
|
||||||
errCh <- fmt.Errorf("failed to run %s: %v", s, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if !s.restartOnExit {
|
|
||||||
glog.Infof("Waiting for server %q start command to complete", s.name)
|
|
||||||
// If we aren't planning on restarting, ok to Wait() here to release resources.
|
|
||||||
// Otherwise, we Wait() in the restart loop.
|
|
||||||
err = s.startCommand.Wait()
|
|
||||||
if err != nil {
|
|
||||||
errCh <- fmt.Errorf("failed to run start command for server %q: %v", s.name, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
usedStartCmd := true
|
|
||||||
for {
|
|
||||||
glog.Infof("Running health check for service %q", s.name)
|
|
||||||
// Wait for an initial health check to pass, so that we are sure the server started.
|
|
||||||
err := readinessCheck(s.name, s.healthCheckUrls, nil)
|
|
||||||
if err != nil {
|
|
||||||
if usedStartCmd {
|
|
||||||
glog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name)
|
|
||||||
s.startCommand.Wait() // Release resources if necessary.
|
|
||||||
}
|
|
||||||
// This should not happen, immediately stop the e2eService process.
|
|
||||||
glog.Fatalf("restart loop readinessCheck failed for %s", s)
|
|
||||||
} else {
|
|
||||||
glog.Infof("Initial health check passed for service %q", s.name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initial health check passed, wait until a health check fails again.
|
|
||||||
stillAlive:
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-stopRestartingCh:
|
|
||||||
ackStopRestartingCh <- true
|
|
||||||
return
|
|
||||||
case <-time.After(time.Second):
|
|
||||||
for _, url := range s.healthCheckUrls {
|
|
||||||
resp, err := http.Head(url)
|
|
||||||
if err != nil || resp.StatusCode != http.StatusOK {
|
|
||||||
break stillAlive
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if usedStartCmd {
|
|
||||||
s.startCommand.Wait() // Release resources from last cmd
|
|
||||||
usedStartCmd = false
|
|
||||||
}
|
|
||||||
if s.restartCommand != nil {
|
|
||||||
// Always make a fresh copy of restartCommand before
|
|
||||||
// running, we may have to restart multiple times
|
|
||||||
s.restartCommand = &exec.Cmd{
|
|
||||||
Path: s.restartCommand.Path,
|
|
||||||
Args: s.restartCommand.Args,
|
|
||||||
Env: s.restartCommand.Env,
|
|
||||||
Dir: s.restartCommand.Dir,
|
|
||||||
Stdin: s.restartCommand.Stdin,
|
|
||||||
Stdout: s.restartCommand.Stdout,
|
|
||||||
Stderr: s.restartCommand.Stderr,
|
|
||||||
ExtraFiles: s.restartCommand.ExtraFiles,
|
|
||||||
SysProcAttr: s.restartCommand.SysProcAttr,
|
|
||||||
}
|
|
||||||
// Run and wait for exit. This command is assumed to have
|
|
||||||
// short duration, e.g. systemctl restart
|
|
||||||
glog.Infof("Restarting server %q with restart command", s.name)
|
|
||||||
err = s.restartCommand.Run()
|
|
||||||
if err != nil {
|
|
||||||
// This should not happen, immediately stop the e2eService process.
|
|
||||||
glog.Fatalf("restarting server %s with restartCommand failed. Error: %v.", s, err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
s.startCommand = &exec.Cmd{
|
|
||||||
Path: s.startCommand.Path,
|
|
||||||
Args: s.startCommand.Args,
|
|
||||||
Env: s.startCommand.Env,
|
|
||||||
Dir: s.startCommand.Dir,
|
|
||||||
Stdin: s.startCommand.Stdin,
|
|
||||||
Stdout: s.startCommand.Stdout,
|
|
||||||
Stderr: s.startCommand.Stderr,
|
|
||||||
ExtraFiles: s.startCommand.ExtraFiles,
|
|
||||||
SysProcAttr: s.startCommand.SysProcAttr,
|
|
||||||
}
|
|
||||||
glog.Infof("Restarting server %q with start command", s.name)
|
|
||||||
err = s.startCommand.Start()
|
|
||||||
usedStartCmd = true
|
|
||||||
if err != nil {
|
|
||||||
// This should not happen, immediately stop the e2eService process.
|
|
||||||
glog.Fatalf("Restarting %s with startCommand failed. Error: %v.", s, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
return readinessCheck(s.name, s.healthCheckUrls, errCh)
|
|
||||||
}
|
|
||||||
|
|
||||||
// kill runs the server's kill command.
|
|
||||||
func (s *server) kill() error {
|
|
||||||
glog.Infof("Kill server %q", s.name)
|
|
||||||
name := s.name
|
|
||||||
cmd := s.startCommand
|
|
||||||
|
|
||||||
// If s has a restart loop, turn it off.
|
|
||||||
if s.restartOnExit {
|
|
||||||
s.stopRestartingCh <- true
|
|
||||||
<-s.ackStopRestartingCh
|
|
||||||
}
|
|
||||||
|
|
||||||
if s.killCommand != nil {
|
|
||||||
return s.killCommand.Run()
|
|
||||||
}
|
|
||||||
|
|
||||||
if cmd == nil {
|
|
||||||
return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name)
|
|
||||||
}
|
|
||||||
|
|
||||||
if cmd.Process == nil {
|
|
||||||
glog.V(2).Infof("%q not running", name)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
pid := cmd.Process.Pid
|
|
||||||
if pid <= 1 {
|
|
||||||
return fmt.Errorf("invalid PID %d for %q", pid, name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to shut down the process in a friendly manner before forcing it.
|
|
||||||
waitChan := make(chan error)
|
|
||||||
go func() {
|
|
||||||
_, err := cmd.Process.Wait()
|
|
||||||
waitChan <- err
|
|
||||||
close(waitChan)
|
|
||||||
}()
|
|
||||||
|
|
||||||
const timeout = 10 * time.Second
|
|
||||||
for _, signal := range []string{"-TERM", "-KILL"} {
|
|
||||||
glog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal)
|
|
||||||
cmd := exec.Command("sudo", "kill", signal, strconv.Itoa(pid))
|
|
||||||
|
|
||||||
// Run the 'kill' command in a separate process group so sudo doesn't ignore it
|
|
||||||
attrs := &syscall.SysProcAttr{}
|
|
||||||
// Hack to set unix-only field without build tags.
|
|
||||||
setpgidField := reflect.ValueOf(attrs).Elem().FieldByName("Setpgid")
|
|
||||||
if setpgidField.IsValid() {
|
|
||||||
setpgidField.Set(reflect.ValueOf(true))
|
|
||||||
} else {
|
|
||||||
return fmt.Errorf("Failed to set Setpgid field (non-unix build)")
|
|
||||||
}
|
|
||||||
cmd.SysProcAttr = attrs
|
|
||||||
|
|
||||||
_, err := cmd.Output()
|
|
||||||
if err != nil {
|
|
||||||
glog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
select {
|
|
||||||
case err := <-waitChan:
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("error stopping %q: %v", name, err)
|
|
||||||
}
|
|
||||||
// Success!
|
|
||||||
return nil
|
|
||||||
case <-time.After(timeout):
|
|
||||||
// Continue.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Errorf("unable to stop %q", name)
|
|
||||||
}
|
|
||||||
|
|
||||||
func adjustConfigForSystemd(config string) string {
|
func adjustConfigForSystemd(config string) string {
|
||||||
return strings.Replace(config, "%", "%%", -1)
|
return strings.Replace(config, "%", "%%", -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getLogFiles gets logs of interest either via journalctl or by creating sym
|
||||||
|
// links. Since we scp files from the remote directory, symlinks will be
|
||||||
|
// treated as normal files and file contents will be copied over.
|
||||||
|
func (e *E2EServices) getLogFiles() {
|
||||||
|
// Nothing to do if report dir is not specified.
|
||||||
|
if framework.TestContext.ReportDir == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
glog.Info("Fetching log files...")
|
||||||
|
journaldFound := isJournaldAvailable()
|
||||||
|
for targetFileName, logFileData := range e.logFiles {
|
||||||
|
targetLink := path.Join(framework.TestContext.ReportDir, targetFileName)
|
||||||
|
if journaldFound {
|
||||||
|
// Skip log files that do not have an equivalent in journald-based machines.
|
||||||
|
if len(logFileData.journalctlCommand) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
glog.Infof("Get log file %q with journalctl command %v.", targetFileName, logFileData.journalctlCommand)
|
||||||
|
out, err := exec.Command("sudo", append([]string{"journalctl"}, logFileData.journalctlCommand...)...).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("failed to get %q from journald: %v, %v", targetFileName, string(out), err)
|
||||||
|
} else {
|
||||||
|
if err = ioutil.WriteFile(targetLink, out, 0644); err != nil {
|
||||||
|
glog.Errorf("failed to write logs to %q: %v", targetLink, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, file := range logFileData.files {
|
||||||
|
if _, err := os.Stat(file); err != nil {
|
||||||
|
// Expected file not found on this distro.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := copyLogFile(file, targetLink); err != nil {
|
||||||
|
glog.Error(err)
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// isJournaldAvailable returns whether the system executing the tests uses
|
||||||
|
// journald.
|
||||||
|
func isJournaldAvailable() bool {
|
||||||
|
_, err := exec.LookPath("journalctl")
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
func copyLogFile(src, target string) error {
|
func copyLogFile(src, target string) error {
|
||||||
// If not a journald based distro, then just symlink files.
|
// If not a journald based distro, then just symlink files.
|
||||||
if out, err := exec.Command("sudo", "cp", src, target).CombinedOutput(); err != nil {
|
if out, err := exec.Command("sudo", "cp", src, target).CombinedOutput(); err != nil {
|
||||||
@ -728,55 +305,3 @@ func copyLogFile(src, target string) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// isJournaldAvailable returns whether the system executing the tests uses
|
|
||||||
// journald.
|
|
||||||
func isJournaldAvailable() bool {
|
|
||||||
_, err := exec.LookPath("journalctl")
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// readinessCheck checks whether services are ready via the supplied health
|
|
||||||
// check URLs. Once there is an error in errCh, the function will stop waiting
|
|
||||||
// and return the error.
|
|
||||||
// TODO(random-liu): Move this to util
|
|
||||||
func readinessCheck(name string, urls []string, errCh <-chan error) error {
|
|
||||||
glog.Infof("Running readiness check for service %q", name)
|
|
||||||
endTime := time.Now().Add(*serverStartTimeout)
|
|
||||||
blockCh := make(chan error)
|
|
||||||
defer close(blockCh)
|
|
||||||
for endTime.After(time.Now()) {
|
|
||||||
select {
|
|
||||||
// We *always* want to run the health check if there is no error on the channel.
|
|
||||||
// With systemd, reads from errCh report nil because cmd.Run() waits
|
|
||||||
// on systemd-run, rather than the service process. systemd-run quickly
|
|
||||||
// exits with status 0, causing the channel to be closed with no error. In
|
|
||||||
// this case, you want to wait for the health check to complete, rather
|
|
||||||
// than returning from readinessCheck as soon as the channel is closed.
|
|
||||||
case err, ok := <-errCh:
|
|
||||||
if ok { // The channel is not closed, this is a real error
|
|
||||||
if err != nil { // If there is an error, return it
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// If not, keep checking readiness.
|
|
||||||
} else { // The channel is closed, this is only a zero value.
|
|
||||||
// Replace the errCh with blockCh to avoid busy loop,
|
|
||||||
// and keep checking readiness.
|
|
||||||
errCh = blockCh
|
|
||||||
}
|
|
||||||
case <-time.After(time.Second):
|
|
||||||
ready := true
|
|
||||||
for _, url := range urls {
|
|
||||||
resp, err := http.Head(url)
|
|
||||||
if err != nil || resp.StatusCode != http.StatusOK {
|
|
||||||
ready = false
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ready {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fmt.Errorf("e2e service %q readiness check timeout %v", name, *serverStartTimeout)
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user