From c8915d646d761ffb9f77769ae3dcd54fde248b41 Mon Sep 17 00:00:00 2001 From: Sven Dowideit Date: Fri, 28 Jul 2017 13:27:40 +1000 Subject: [PATCH] add a timeout in case a system-container refuses to quit Signed-off-by: Sven Dowideit --- cmd/power/power.go | 75 ++++++++++++++++++++++++++++++++++++------- cmd/power/shutdown.go | 3 -- config/schema.go | 3 +- config/types.go | 1 + os-config.tpl.yml | 1 + scripts/schema.json | 3 +- 6 files changed, 69 insertions(+), 17 deletions(-) diff --git a/cmd/power/power.go b/cmd/power/power.go index 9882bbb7..9442e786 100644 --- a/cmd/power/power.go +++ b/cmd/power/power.go @@ -2,11 +2,13 @@ package power import ( "errors" + "fmt" "os" "path/filepath" "strconv" "strings" "syscall" + "time" "golang.org/x/net/context" @@ -14,6 +16,7 @@ import ( "github.com/docker/engine-api/types/container" "github.com/docker/engine-api/types/filters" "github.com/rancher/os/cmd/control/install" + "github.com/rancher/os/config" "github.com/rancher/os/log" "github.com/rancher/os/docker" @@ -80,21 +83,35 @@ func runDocker(name string) error { return err } - go func() { - client.ContainerAttach(context.Background(), types.ContainerAttachOptions{ - ContainerID: powerContainer.ID, - Stream: true, - Stderr: true, - Stdout: true, - }) - }() - err = client.ContainerStart(context.Background(), powerContainer.ID) if err != nil { return err } - _, err = client.ContainerWait(context.Background(), powerContainer.ID) + reader, err := client.ContainerLogs(context.Background(), types.ContainerLogsOptions{ + ContainerID: powerContainer.ID, + ShowStderr: true, + ShowStdout: true, + Follow: true, + }) + if err != nil { + log.Fatal(err) + } + + for { + p := make([]byte, 4096) + n, err := reader.Read(p) + if err != nil { + log.Error(err) + if n == 0 { + reader.Close() + break + } + } + if n > 0 { + fmt.Print(string(p)) + } + } if err != nil { log.Fatal(err) @@ -109,6 +126,34 @@ func reboot(name string, force bool, code uint) { log.Fatalf("%s: Need to be root", os.Args[0]) } + // Add shutdown timeout + cfg := config.LoadConfig() + timeoutValue := cfg.Rancher.ShutdownTimeout + if timeoutValue == 0 { + timeoutValue = 60 + } + if timeoutValue < 5 { + timeoutValue = 5 + } + log.Infof("Setting %s timeout to %d (rancher.shutdown_timeout set to %d)", os.Args[0], timeoutValue, cfg.Rancher.ShutdownTimeout) + + go func() { + timeout := time.After(time.Duration(timeoutValue) * time.Second) + tick := time.Tick(100 * time.Millisecond) + // Keep trying until we're timed out or got a result or got an error + for { + select { + // Got a timeout! fail with a timeout error + case <-timeout: + log.Errorf("Container shutdown taking too long, forcing %s.", os.Args[0]) + syscall.Sync() + syscall.Reboot(int(code)) + case <-tick: + fmt.Printf(".") + } + } + }() + // reboot -f should work even when system-docker is having problems if !force { if kexecFlag || previouskexecFlag || kexecAppendFlag != "" { @@ -141,7 +186,6 @@ func reboot(name string, force bool, code uint) { } syscall.Sync() - err := syscall.Reboot(int(code)) if err != nil { log.Fatal(err) @@ -215,13 +259,20 @@ func shutDownContainers() error { } } + // lets see what containers are still running and only wait on those + containers, err = client.ContainerList(context.Background(), opts) + if err != nil { + return err + } + var waitErrorStrings []string - for _, container := range containers { + for idx, container := range containers { if container.ID == currentContainerID { continue } if container.Names[0] == "/console" { + consoleContainerIdx = idx continue } log.Infof("Waiting %s : %s", container.Names[0], container.ID[:12]) diff --git a/cmd/power/shutdown.go b/cmd/power/shutdown.go index 3c72406d..7307a5c5 100644 --- a/cmd/power/shutdown.go +++ b/cmd/power/shutdown.go @@ -138,9 +138,6 @@ func Shutdown() { //TODO: add the time and msg flags... app.HideHelp = true - log.Infof("%s, %s", app.Usage, app.Version) - fmt.Printf("%s, %s", app.Usage, app.Version) - app.Run(os.Args) } diff --git a/config/schema.go b/config/schema.go index df2ec0d5..f451a630 100644 --- a/config/schema.go +++ b/config/schema.go @@ -54,7 +54,8 @@ var schema = `{ "resize_device": {"type": "string"}, "sysctl": {"type": "object"}, "restart_services": {"type": "array"}, - "hypervisor_service": {"type": "boolean"} + "hypervisor_service": {"type": "boolean"}, + "shutdown_timeout": {"type": "integer"} } }, diff --git a/config/types.go b/config/types.go index 6ff50a19..4f0bb98d 100755 --- a/config/types.go +++ b/config/types.go @@ -132,6 +132,7 @@ type RancherConfig struct { Sysctl map[string]string `yaml:"sysctl,omitempty"` RestartServices []string `yaml:"restart_services,omitempty"` HypervisorService bool `yaml:"hypervisor_service,omitempty"` + ShutdownTimeout int `yaml:"shutdown_timeout,omitempty"` } type UpgradeConfig struct { diff --git a/os-config.tpl.yml b/os-config.tpl.yml index ed5ad641..616ae31d 100644 --- a/os-config.tpl.yml +++ b/os-config.tpl.yml @@ -1,4 +1,5 @@ rancher: + shutdown_timeout: 60 environment: VERSION: {{.VERSION}} SUFFIX: {{.SUFFIX}} diff --git a/scripts/schema.json b/scripts/schema.json index 0bb57607..9ca50a2f 100644 --- a/scripts/schema.json +++ b/scripts/schema.json @@ -52,7 +52,8 @@ "resize_device": {"type": "string"}, "sysctl": {"type": "object"}, "restart_services": {"type": "array"}, - "hypervisor_service": {"type": "boolean"} + "hypervisor_service": {"type": "boolean"}, + "shutdown_timeout": {"type": "integer"} } },