From c51921e54a857d82d66980b5e45c227c86a9cd7a Mon Sep 17 00:00:00 2001 From: Sven Dowideit Date: Fri, 28 Jul 2017 13:27:40 +1000 Subject: [PATCH] add a timeout in case a system-container refuses to quit Signed-off-by: Sven Dowideit --- cmd/power/power.go | 74 +++++++++++++++++++---- cmd/power/shutdown.go | 3 - config/schema.go | 3 +- config/types.go | 1 + os-config.tpl.yml | 1 + scripts/installer/kexec/Dockerfile.dapper | 7 ++- scripts/schema.json | 3 +- tests/cmdline_test.go | 2 +- 8 files changed, 73 insertions(+), 21 deletions(-) diff --git a/cmd/power/power.go b/cmd/power/power.go index de382cea..f6439ca0 100644 --- a/cmd/power/power.go +++ b/cmd/power/power.go @@ -2,17 +2,20 @@ package power import ( "errors" + "fmt" "os" "path/filepath" "strconv" "strings" "syscall" + "time" "golang.org/x/net/context" "github.com/docker/engine-api/types" "github.com/docker/engine-api/types/container" "github.com/docker/engine-api/types/filters" + "github.com/rancher/os/config" "github.com/rancher/os/log" "github.com/rancher/os/docker" @@ -76,21 +79,35 @@ func runDocker(name string) error { return err } - go func() { - client.ContainerAttach(context.Background(), types.ContainerAttachOptions{ - ContainerID: powerContainer.ID, - Stream: true, - Stderr: true, - Stdout: true, - }) - }() - err = client.ContainerStart(context.Background(), powerContainer.ID) if err != nil { return err } - _, err = client.ContainerWait(context.Background(), powerContainer.ID) + reader, err := client.ContainerLogs(context.Background(), types.ContainerLogsOptions{ + ContainerID: powerContainer.ID, + ShowStderr: true, + ShowStdout: true, + Follow: true, + }) + if err != nil { + log.Fatal(err) + } + + for { + p := make([]byte, 4096) + n, err := reader.Read(p) + if err != nil { + log.Error(err) + if n == 0 { + reader.Close() + break + } + } + if n > 0 { + fmt.Print(string(p)) + } + } if err != nil { log.Fatal(err) @@ -126,13 +143,39 @@ func Halt() { } func reboot(code uint) { + cfg := config.LoadConfig() + timeoutValue := cfg.Rancher.ShutdownTimeout + if timeoutValue == 0 { + timeoutValue = 60 + } + if timeoutValue < 5 { + timeoutValue = 5 + } + log.Infof("Setting %s timeout to %d (rancher.shutdown_timeout set to %d)", os.Args[0], timeoutValue, cfg.Rancher.ShutdownTimeout) + + go func() { + timeout := time.After(time.Duration(timeoutValue) * time.Second) + tick := time.Tick(100 * time.Millisecond) + // Keep trying until we're timed out or got a result or got an error + for { + select { + // Got a timeout! fail with a timeout error + case <-timeout: + log.Errorf("Container shutdown taking too long, forcing %s.", os.Args[0]) + syscall.Sync() + syscall.Reboot(int(code)) + case <-tick: + fmt.Printf(".") + } + } + }() + err := shutDownContainers() if err != nil { log.Error(err) } syscall.Sync() - err = syscall.Reboot(int(code)) if err != nil { log.Fatal(err) @@ -206,13 +249,20 @@ func shutDownContainers() error { } } + // lets see what containers are still running and only wait on those + containers, err = client.ContainerList(context.Background(), opts) + if err != nil { + return err + } + var waitErrorStrings []string - for _, container := range containers { + for idx, container := range containers { if container.ID == currentContainerID { continue } if container.Names[0] == "/console" { + consoleContainerIdx = idx continue } log.Infof("Waiting %s : %s", container.Names[0], container.ID[:12]) diff --git a/cmd/power/shutdown.go b/cmd/power/shutdown.go index a684ef2e..c4164c87 100644 --- a/cmd/power/shutdown.go +++ b/cmd/power/shutdown.go @@ -31,9 +31,6 @@ func Main() { } app.HideHelp = true - log.Infof("%s, %s", app.Usage, app.Version) - fmt.Printf("%s, %s", app.Usage, app.Version) - app.Run(os.Args) } diff --git a/config/schema.go b/config/schema.go index 0876f1e7..4da64bce 100644 --- a/config/schema.go +++ b/config/schema.go @@ -52,7 +52,8 @@ var schema = `{ "defaults": {"$ref": "#/definitions/defaults_config"}, "resize_device": {"type": "string"}, "sysctl": {"type": "object"}, - "restart_services": {"type": "array"} + "restart_services": {"type": "array"}, + "shutdown_timeout": {"type": "integer"} } }, diff --git a/config/types.go b/config/types.go index 4a4e998b..90e0b8d4 100755 --- a/config/types.go +++ b/config/types.go @@ -130,6 +130,7 @@ type RancherConfig struct { ResizeDevice string `yaml:"resize_device,omitempty"` Sysctl map[string]string `yaml:"sysctl,omitempty"` RestartServices []string `yaml:"restart_services,omitempty"` + ShutdownTimeout int `yaml:"shutdown_timeout,omitempty"` } type UpgradeConfig struct { diff --git a/os-config.tpl.yml b/os-config.tpl.yml index bb9b2eff..f255e91b 100644 --- a/os-config.tpl.yml +++ b/os-config.tpl.yml @@ -1,4 +1,5 @@ rancher: + shutdown_timeout: 60 environment: VERSION: {{.VERSION}} SUFFIX: {{.SUFFIX}} diff --git a/scripts/installer/kexec/Dockerfile.dapper b/scripts/installer/kexec/Dockerfile.dapper index 49ad5889..8b0e9833 100644 --- a/scripts/installer/kexec/Dockerfile.dapper +++ b/scripts/installer/kexec/Dockerfile.dapper @@ -10,7 +10,7 @@ RUN echo "Acquire::http { Proxy \"$APTPROXY\"; };" >> /etc/apt/apt.conf.d/01prox && apt-get install -yq build-essential autoconf libtool gawk alien fakeroot \ zlib1g-dev uuid-dev libattr1-dev libblkid-dev libselinux-dev libudev-dev libdevmapper-dev \ module-init-tools \ - parted lsscsi ksh curl git + parted lsscsi ksh curl git wget WORKDIR /source @@ -20,8 +20,9 @@ WORKDIR /source # && tar zxvf /source/build-linux-4.9.15-rancher-x86.tar.gz # https://www.kernel.org/pub/linux/utils/kernel/kexec/ -ENV VERSION 2.0.14 -ADD https://www.kernel.org/pub/linux/utils/kernel/kexec/kexec-tools-$VERSION.tar.gz . +ENV VERSION 2.0.15 +RUN wget https://www.kernel.org/pub/linux/utils/kernel/kexec/kexec-tools-$VERSION.tar.gz \ + && tar zxvf kexec-tools-$VERSION.tar.gz RUN zcat kexec-tools-$VERSION.tar.gz | tar xvf - \ && cd kexec-tools-$VERSION \ diff --git a/scripts/schema.json b/scripts/schema.json index f16d88bb..c7d04762 100644 --- a/scripts/schema.json +++ b/scripts/schema.json @@ -50,7 +50,8 @@ "defaults": {"$ref": "#/definitions/defaults_config"}, "resize_device": {"type": "string"}, "sysctl": {"type": "object"}, - "restart_services": {"type": "array"} + "restart_services": {"type": "array"}, + "shutdown_timeout": {"type": "integer"} } }, diff --git a/tests/cmdline_test.go b/tests/cmdline_test.go index d15afc23..04c9c1ff 100755 --- a/tests/cmdline_test.go +++ b/tests/cmdline_test.go @@ -19,7 +19,7 @@ func (s *QemuSuite) TestElideCmdLine(c *C) { s.RunQemuWith(c, runArgs...) s.CheckOutput(c, "nope\n", Equals, "hostname") - cmdline := s.CheckOutput(c, "", Not(Equals), "cat /proc/cmdline",) + cmdline := s.CheckOutput(c, "", Not(Equals), "cat /proc/cmdline") if strings.Contains(cmdline, extra) { c.Errorf("/proc/cmdline (%s) contains info that should be elided (%s)", cmdline, extra) }