From 1e0dfd47981caba2c136269d0cb2f070c6a272ba Mon Sep 17 00:00:00 2001 From: Nathan LeClaire Date: Tue, 12 Jul 2016 15:06:28 -0700 Subject: [PATCH] Re-write diagnostic server to support cloud/HTTP Signed-off-by: Nathan LeClaire --- alpine/packages/diagnostics/capture.go | 170 +++++++++++++++ .../diagnostics/etc/init.d/diagnostics | 21 +- alpine/packages/diagnostics/http.go | 67 ++++++ alpine/packages/diagnostics/hvsock.go | 24 +++ alpine/packages/diagnostics/main.go | 197 ++++-------------- alpine/packages/diagnostics/rawtcp.go | 19 ++ alpine/packages/diagnostics/run.go | 1 + alpine/packages/diagnostics/tar_respond.go | 40 ++++ alpine/packages/diagnostics/vsock.go | 23 ++ 9 files changed, 401 insertions(+), 161 deletions(-) create mode 100644 alpine/packages/diagnostics/capture.go create mode 100644 alpine/packages/diagnostics/http.go create mode 100644 alpine/packages/diagnostics/hvsock.go create mode 100644 alpine/packages/diagnostics/rawtcp.go create mode 100644 alpine/packages/diagnostics/run.go create mode 100644 alpine/packages/diagnostics/tar_respond.go create mode 100644 alpine/packages/diagnostics/vsock.go diff --git a/alpine/packages/diagnostics/capture.go b/alpine/packages/diagnostics/capture.go new file mode 100644 index 000000000..b1dbb4fb5 --- /dev/null +++ b/alpine/packages/diagnostics/capture.go @@ -0,0 +1,170 @@ +package main + +import ( + "archive/tar" + "bytes" + "io" + "log" + "os" + "os/exec" + "path" + "path/filepath" + "strings" + "time" +) + +var ( + commonCmdCaptures = []CommandCapturer{ + {"/bin/date", nil}, + {"/bin/uname", []string{"-a"}}, + {"/bin/ps", []string{"uax"}}, + {"/bin/netstat", []string{"-tulpn"}}, + {"/sbin/iptables-save", nil}, + {"/sbin/ifconfig", nil}, + {"/sbin/route", nil}, + {"/usr/sbin/brctl", nil}, + {"/bin/dmesg", nil}, + {"/usr/bin/docker", []string{"ps"}}, + {"/usr/bin/tail", []string{"-100", "/var/log/docker.log"}}, + {"/usr/bin/tail", []string{"-100", "/var/log/messages"}}, + {"/bin/mount", nil}, + {"/bin/df", nil}, + {"/bin/ls", []string{"-l", "/var"}}, + {"/bin/ls", []string{"-l", "/var/lib"}}, + {"/bin/ls", []string{"-l", "/var/lib/docker"}}, + {"/usr/bin/diagnostics", nil}, + {"/bin/ping", []string{"-w", "5", "8.8.8.8"}}, + {"/bin/cat", []string{"/etc/docker/daemon.json"}}, + {"/bin/cat", []string{"/etc/network/interfaces"}}, + {"/bin/cat", []string{"/etc/resolv.conf"}}, + {"/bin/cat", []string{"/etc/sysctl.conf"}}, + {"/usr/bin/dig", []string{"docker.com"}}, + {"/usr/bin/dig", []string{"@8.8.8.8", "docker.com"}}, + {"/usr/bin/wget", []string{"-O", "-", "http://www.docker.com/"}}, + {"/usr/bin/wget", []string{"-O", "-", "http://104.239.220.248/"}}, // a www.docker.com address + {"/usr/bin/wget", []string{"-O", "-", "http://216.58.213.68/"}}, // a www.google.com address + {"/usr/bin/wget", []string{"-O", "-", "http://91.198.174.192/"}}, // a www.wikipedia.com address + } + localCmdCaptures = []CommandCapturer{ + {"/usr/bin/tail", []string{"-100", "/var/log/proxy-vsockd.log"}}, + {"/usr/bin/tail", []string{"-100", "/var/log/service-port-opener.log"}}, + {"/usr/bin/tail", []string{"-100", "/var/log/vsudd.log"}}, + } + localCaptures = []Capturer{NewDatabaseCapturer()} +) + +func init() { + for _, c := range localCmdCaptures { + localCmdCaptures = append(localCmdCaptures, c) + } +} + +// Capturer defines behavior for structs which will capture arbitrary +// diagnostic information and write it to a tar archive with a timeout. +type Capturer interface { + Capture(time.Duration, *tar.Writer) +} + +type CommandCapturer struct { + command string + args []string +} + +func (cc CommandCapturer) Capture(timeout time.Duration, w *tar.Writer) { + log.Printf("Running %s", cc.command) + c := exec.Command(cc.command, cc.args...) + stdoutPipe, err := c.StdoutPipe() + if err != nil { + log.Fatalf("Failed to create stdout pipe: %s", err) + } + stderrPipe, err := c.StderrPipe() + if err != nil { + log.Fatalf("Failed to create stderr pipe: %s", err) + } + var stdoutBuffer bytes.Buffer + var stderrBuffer bytes.Buffer + done := make(chan int) + go func() { + io.Copy(&stdoutBuffer, stdoutPipe) + done <- 0 + }() + go func() { + io.Copy(&stderrBuffer, stderrPipe) + done <- 0 + }() + var timer *time.Timer + timer = time.AfterFunc(timeout, func() { + timer.Stop() + if c.Process != nil { + c.Process.Kill() + } + }) + _ = c.Run() + <-done + <-done + timer.Stop() + + name := strings.Join(append([]string{path.Base(cc.command)}, cc.args...), " ") + + hdr := &tar.Header{ + Name: name + ".stdout", + Mode: 0644, + Size: int64(stdoutBuffer.Len()), + } + if err = w.WriteHeader(hdr); err != nil { + log.Fatalln(err) + } + if _, err = w.Write(stdoutBuffer.Bytes()); err != nil { + log.Fatalln(err) + } + hdr = &tar.Header{ + Name: name + ".stderr", + Mode: 0644, + Size: int64(stderrBuffer.Len()), + } + if err = w.WriteHeader(hdr); err != nil { + log.Fatalln(err) + } + if _, err = w.Write(stderrBuffer.Bytes()); err != nil { + log.Fatalln(err) + } +} + +type DatabaseCapturer struct { + *CommandCapturer +} + +func NewDatabaseCapturer() DatabaseCapturer { + return DatabaseCapturer{ + &CommandCapturer{ + command: "/bin/cat", + }, + } +} + +func (dc DatabaseCapturer) Capture(timeout time.Duration, w *tar.Writer) { + // Dump the database + dbBase := "/Database/branch/master/ro" + filepath.Walk(dbBase, func(path string, f os.FileInfo, err error) error { + if f.Mode().IsRegular() { + dc.CommandCapturer.args = []string{path} + dc.CommandCapturer.Capture(timeout, w) + } + return nil + }) +} + +// Capture is the outer level wrapper function to trigger the capturing of +// information. Clients are expected to call it with a slice of Capturers +// which define the information to be captured. By using an interface we can +// flexibly define various capture actions for the various listeners. +// +// It is a passed a tar.Writer which the results of the capture will be written +// to. +func Capture(w *tar.Writer, captures []Capturer) { + t := 2 * time.Second + + for _, c := range captures { + c.Capture(t, w) + } +} diff --git a/alpine/packages/diagnostics/etc/init.d/diagnostics b/alpine/packages/diagnostics/etc/init.d/diagnostics index ca8a2e931..4e0ceb45b 100755 --- a/alpine/packages/diagnostics/etc/init.d/diagnostics +++ b/alpine/packages/diagnostics/etc/init.d/diagnostics @@ -9,6 +9,25 @@ start() { ebegin "Checking system state" - /usr/bin/diagnostics-server & + DIAGNOSTICS_SERVER_FLAGS="" + + case "$(mobyplatform)" in + "aws") + DIAGNOSTICS_SERVER_FLAGS="-http" + ;; + "azure") + DIAGNOSTICS_SERVER_FLAGS="-http" + ;; + "windows") + # TODO(Rolf/David): Are these correct? + DIAGNOSTICS_SERVER_FLAGS="-hvsock -rawtcp" + ;; + "mac") + # TODO(Justin/Anil): Are these correct? + DIAGNOSTICS_SERVER_FLAGS="-vsock -rawtcp" + ;; + esac + + /usr/bin/diagnostics-server ${DIAGNOSTICS_SERVER_FLAGS} & /usr/bin/diagnostics } diff --git a/alpine/packages/diagnostics/http.go b/alpine/packages/diagnostics/http.go new file mode 100644 index 000000000..13840aec3 --- /dev/null +++ b/alpine/packages/diagnostics/http.go @@ -0,0 +1,67 @@ +package main + +import ( + "archive/tar" + "io/ioutil" + "log" + "net/http" + "os" +) + +const ( + dockerSock = "/var/run/docker.sock" + lgtm = "LGTM" + httpMagicPort = ":44554" // chosen arbitrarily due to IANA availability -- might change +) + +var ( + cloudCaptures = []Capturer{} +) + +func init() { + for _, c := range commonCmdCaptures { + cloudCaptures = append(cloudCaptures, c) + } +} + +// HTTPDiagnosticListener sets a health check and optional diagnostic endpoint +// for cloud editions. +type HTTPDiagnosticListener struct{} + +func (h HTTPDiagnosticListener) Listen() { + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if _, err := os.Stat(dockerSock); os.IsNotExist(err) { + http.Error(w, "Docker socket not found -- daemon is down", http.StatusServiceUnavailable) + return + } + if _, err := w.Write([]byte(lgtm)); err != nil { + log.Println("Error writing HTTP success response:", err) + return + } + }) + + http.HandleFunc("/diagnose", func(w http.ResponseWriter, r *http.Request) { + dir, err := ioutil.TempDir("", "diagnostics") + if err != nil { + log.Println("Error creating temp dir on diagnostic request:", err) + return + } + + file, err := ioutil.TempFile(dir, "diagnostics") + if err != nil { + log.Println("Error creating temp file on diagnostic request:", err) + return + } + + tarWriter := tar.NewWriter(file) + + Capture(tarWriter, cloudCaptures) + + // TODO: upload written (and gzipped?) tar file to our S3 + // bucket with specific path convention (per-user? by date?) + }) + + // Start HTTP server to indicate general Docker health. + // TODO: no magic port? + http.ListenAndServe(httpMagicPort, nil) +} diff --git a/alpine/packages/diagnostics/hvsock.go b/alpine/packages/diagnostics/hvsock.go new file mode 100644 index 000000000..e9fb9bbc5 --- /dev/null +++ b/alpine/packages/diagnostics/hvsock.go @@ -0,0 +1,24 @@ +package main + +import ( + "log" + "syscall" + + "github.com/rneugeba/virtsock/go/hvsock" +) + +type HVSockDiagnosticListener struct{} + +func (l HVSockDiagnosticListener) Listen() { + svcid, _ := hvsock.GuidFromString("445BA2CB-E69B-4912-8B42-D7F494D007EA") + hvsock, err := hvsock.Listen(hvsock.HypervAddr{VmId: hvsock.GUID_WILDCARD, ServiceId: svcid}) + if err != nil { + if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT { + log.Printf("Failed to bind to hvsock port: %s", err) + } + } + + for { + TarRespond(hvsock) + } +} diff --git a/alpine/packages/diagnostics/main.go b/alpine/packages/diagnostics/main.go index 1e7f42747..df7e365d7 100644 --- a/alpine/packages/diagnostics/main.go +++ b/alpine/packages/diagnostics/main.go @@ -1,132 +1,12 @@ package main import ( - "archive/tar" - "bytes" - "io" + "flag" "log" "log/syslog" - "net" - "os" - "os/exec" - "path" - "path/filepath" - "strings" - "syscall" - "time" - - "github.com/rneugeba/virtsock/go/hvsock" - "github.com/rneugeba/virtsock/go/vsock" ) -func run(timeout time.Duration, w *tar.Writer, command string, args ...string) { - log.Printf("Running %s", command) - c := exec.Command(command, args...) - stdoutPipe, err := c.StdoutPipe() - if err != nil { - log.Fatalf("Failed to create stdout pipe: %s", err) - } - stderrPipe, err := c.StderrPipe() - if err != nil { - log.Fatalf("Failed to create stderr pipe: %s", err) - } - var stdoutBuffer bytes.Buffer - var stderrBuffer bytes.Buffer - done := make(chan int) - go func() { - io.Copy(&stdoutBuffer, stdoutPipe) - done <- 0 - }() - go func() { - io.Copy(&stderrBuffer, stderrPipe) - done <- 0 - }() - var timer *time.Timer - timer = time.AfterFunc(timeout, func() { - timer.Stop() - if c.Process != nil { - c.Process.Kill() - } - }) - _ = c.Run() - <-done - <-done - timer.Stop() - - name := strings.Join(append([]string{path.Base(command)}, args...), " ") - - hdr := &tar.Header{ - Name: name + ".stdout", - Mode: 0644, - Size: int64(stdoutBuffer.Len()), - } - if err = w.WriteHeader(hdr); err != nil { - log.Fatalln(err) - } - if _, err = w.Write(stdoutBuffer.Bytes()); err != nil { - log.Fatalln(err) - } - hdr = &tar.Header{ - Name: name + ".stderr", - Mode: 0644, - Size: int64(stderrBuffer.Len()), - } - if err = w.WriteHeader(hdr); err != nil { - log.Fatalln(err) - } - if _, err = w.Write(stderrBuffer.Bytes()); err != nil { - log.Fatalln(err) - } - -} - -func capture(w *tar.Writer) { - t := 2 * time.Second - - run(t, w, "/bin/date") - run(t, w, "/bin/uname", "-a") - run(t, w, "/bin/ps", "uax") - run(t, w, "/bin/netstat", "-tulpn") - run(t, w, "/sbin/iptables-save") - run(t, w, "/sbin/ifconfig", "-a") - run(t, w, "/sbin/route", "-n") - run(t, w, "/usr/sbin/brctl", "show") - run(t, w, "/bin/dmesg") - run(t, w, "/usr/bin/docker", "ps") - run(t, w, "/usr/bin/tail", "-100", "/var/log/docker.log") - run(t, w, "/usr/bin/tail", "-100", "/var/log/messages") - run(t, w, "/usr/bin/tail", "-100", "/var/log/proxy-vsockd.log") - run(t, w, "/usr/bin/tail", "-100", "/var/log/service-port-opener.log") - run(t, w, "/usr/bin/tail", "-100", "/var/log/vsudd.log") - run(t, w, "/bin/mount") - run(t, w, "/bin/df") - run(t, w, "/bin/ls", "-l", "/var") - run(t, w, "/bin/ls", "-l", "/var/lib") - run(t, w, "/bin/ls", "-l", "/var/lib/docker") - run(t, w, "/usr/bin/diagnostics") - run(t, w, "/bin/ping", "-w", "5", "8.8.8.8") - run(t, w, "/bin/cat", "/etc/docker/daemon.json") - run(t, w, "/bin/cat", "/etc/network/interfaces") - run(t, w, "/bin/cat", "/etc/resolv.conf") - run(t, w, "/bin/cat", "/etc/sysctl.conf") - run(t, w, "/usr/bin/dig", "docker.com") - run(t, w, "/usr/bin/dig", "@8.8.8.8", "docker.com") - run(t, w, "/usr/bin/wget", "-O", "-", "http://www.docker.com/") - run(t, w, "/usr/bin/wget", "-O", "-", "http://104.239.220.248/") // a www.docker.com address - run(t, w, "/usr/bin/wget", "-O", "-", "http://216.58.213.68/") // a www.google.com address - run(t, w, "/usr/bin/wget", "-O", "-", "http://91.198.174.192/") // a www.wikipedia.com address - - // Dump the database - dbBase := "/Database/branch/master/ro" - filepath.Walk(dbBase, func(path string, f os.FileInfo, err error) error { - if f.Mode().IsRegular() { - run(t, w, "/bin/cat", path) - } - return nil - }) -} - -func main() { +func init() { syslog, err := syslog.New(syslog.LOG_INFO|syslog.LOG_DAEMON, "diagnostics") if err != nil { log.Fatalln("Failed to open syslog", err) @@ -134,51 +14,48 @@ func main() { log.SetOutput(syslog) log.SetFlags(0) +} - listeners := make([]net.Listener, 0) +type DiagnosticListener interface { + // Listen(), a blocking method intended to be invoked in its own + // goroutine, will listen for a diagnostic information request and + // capture the desired information if one is received. + Listen() +} - ip, err := net.Listen("tcp", ":62374") - if err != nil { - log.Printf("Failed to bind to TCP port 62374: %s", err) - } else { - listeners = append(listeners, ip) +// DiagnosticUploader uploads the collected information to the mothership. +type DiagnosticUploader interface { + Upload() error +} + +func main() { + flHTTP := flag.Bool("http", false, "Enable diagnostic HTTP listener") + flVSock := flag.Bool("vsock", false, "Enable vsock listener") + flHVSock := flag.Bool("hvsock", false, "Enable hvsock listener") + flRawTCP := flag.Bool("rawtcp", false, "Enable raw TCP listener") + + flag.Parse() + + listeners := make([]DiagnosticListener, 0) + + if *flHTTP { + listeners = append(listeners, HTTPDiagnosticListener{}) } - vsock, err := vsock.Listen(uint(62374)) - if err != nil { - if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT { - log.Printf("Failed to bind to vsock port 62374: %s", err) - } - } else { - listeners = append(listeners, vsock) + + if *flVSock { + listeners = append(listeners, VSockDiagnosticListener{}) } - svcid, _ := hvsock.GuidFromString("445BA2CB-E69B-4912-8B42-D7F494D007EA") - hvsock, err := hvsock.Listen(hvsock.HypervAddr{VmId: hvsock.GUID_WILDCARD, ServiceId: svcid}) - if err != nil { - if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT { - log.Printf("Failed to bind to hvsock port: %s", err) - } - } else { - listeners = append(listeners, hvsock) + + if *flHVSock { + listeners = append(listeners, HVSockDiagnosticListener{}) + } + + if *flRawTCP { + listeners = append(listeners, RawTCPDiagnosticListener{}) } for _, l := range listeners { - go func(l net.Listener) { - for { - conn, err := l.Accept() - if err != nil { - log.Printf("Error accepting connection: %s", err) - return // no more listening - } - go func(conn net.Conn) { - w := tar.NewWriter(conn) - capture(w) - if err := w.Close(); err != nil { - log.Println(err) - } - conn.Close() - }(conn) - } - }(l) + go l.Listen() } forever := make(chan int) <-forever diff --git a/alpine/packages/diagnostics/rawtcp.go b/alpine/packages/diagnostics/rawtcp.go new file mode 100644 index 000000000..bda11200b --- /dev/null +++ b/alpine/packages/diagnostics/rawtcp.go @@ -0,0 +1,19 @@ +package main + +import ( + "log" + "net" +) + +type RawTCPDiagnosticListener struct{} + +func (l RawTCPDiagnosticListener) Listen() { + ip, err := net.Listen("tcp", ":62374") + if err != nil { + log.Printf("Failed to bind to TCP port 62374: %s", err) + } + + for { + TarRespond(ip) + } +} diff --git a/alpine/packages/diagnostics/run.go b/alpine/packages/diagnostics/run.go new file mode 100644 index 000000000..06ab7d0f9 --- /dev/null +++ b/alpine/packages/diagnostics/run.go @@ -0,0 +1 @@ +package main diff --git a/alpine/packages/diagnostics/tar_respond.go b/alpine/packages/diagnostics/tar_respond.go new file mode 100644 index 000000000..2992cd5e4 --- /dev/null +++ b/alpine/packages/diagnostics/tar_respond.go @@ -0,0 +1,40 @@ +package main + +import ( + "archive/tar" + "log" + "net" +) + +// TarRespond is used to write back a tar archive over a connection for the +// lower-level listener types. +// +// In local virtualization (which this is for) we write back a tar file +// directly and the client takes care of shipping the result to the mothership. +// +// By contrast, in cloud editions we expect each node to ship the captured +// information on its own, so this function is not used. +// +// This is a deliberate design to choice to ensure that it is possible in the +// future for diagnostic information to be reported from nodes which have have +// been separated via network partition from the node which initiates +// diagnostic collection, and/or if we decide to automatically collect +// diagnostic information from nodes which deem *themselves* unhealthy at a +// future time. +func TarRespond(l net.Listener) { + conn, err := l.Accept() + if err != nil { + log.Printf("Error accepting connection: %s", err) + return + } + + w := tar.NewWriter(conn) + + Capture(w, localCaptures) + + if err := w.Close(); err != nil { + log.Println(err) + } + + conn.Close() +} diff --git a/alpine/packages/diagnostics/vsock.go b/alpine/packages/diagnostics/vsock.go new file mode 100644 index 000000000..a80308c42 --- /dev/null +++ b/alpine/packages/diagnostics/vsock.go @@ -0,0 +1,23 @@ +package main + +import ( + "log" + "syscall" + + "github.com/rneugeba/virtsock/go/vsock" +) + +type VSockDiagnosticListener struct{} + +func (l VSockDiagnosticListener) Listen() { + vsock, err := vsock.Listen(uint(62374)) + if err != nil { + if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT { + log.Printf("Failed to bind to vsock port 62374: %s", err) + } + } + + for { + TarRespond(vsock) + } +}