Re-write diagnostic server to support cloud/HTTP

Signed-off-by: Nathan LeClaire <nathan.leclaire@gmail.com>
This commit is contained in:
Nathan LeClaire 2016-07-12 15:06:28 -07:00
parent 8a177d59f8
commit 1e0dfd4798
9 changed files with 401 additions and 161 deletions

View File

@ -0,0 +1,170 @@
package main
import (
"archive/tar"
"bytes"
"io"
"log"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"time"
)
var (
commonCmdCaptures = []CommandCapturer{
{"/bin/date", nil},
{"/bin/uname", []string{"-a"}},
{"/bin/ps", []string{"uax"}},
{"/bin/netstat", []string{"-tulpn"}},
{"/sbin/iptables-save", nil},
{"/sbin/ifconfig", nil},
{"/sbin/route", nil},
{"/usr/sbin/brctl", nil},
{"/bin/dmesg", nil},
{"/usr/bin/docker", []string{"ps"}},
{"/usr/bin/tail", []string{"-100", "/var/log/docker.log"}},
{"/usr/bin/tail", []string{"-100", "/var/log/messages"}},
{"/bin/mount", nil},
{"/bin/df", nil},
{"/bin/ls", []string{"-l", "/var"}},
{"/bin/ls", []string{"-l", "/var/lib"}},
{"/bin/ls", []string{"-l", "/var/lib/docker"}},
{"/usr/bin/diagnostics", nil},
{"/bin/ping", []string{"-w", "5", "8.8.8.8"}},
{"/bin/cat", []string{"/etc/docker/daemon.json"}},
{"/bin/cat", []string{"/etc/network/interfaces"}},
{"/bin/cat", []string{"/etc/resolv.conf"}},
{"/bin/cat", []string{"/etc/sysctl.conf"}},
{"/usr/bin/dig", []string{"docker.com"}},
{"/usr/bin/dig", []string{"@8.8.8.8", "docker.com"}},
{"/usr/bin/wget", []string{"-O", "-", "http://www.docker.com/"}},
{"/usr/bin/wget", []string{"-O", "-", "http://104.239.220.248/"}}, // a www.docker.com address
{"/usr/bin/wget", []string{"-O", "-", "http://216.58.213.68/"}}, // a www.google.com address
{"/usr/bin/wget", []string{"-O", "-", "http://91.198.174.192/"}}, // a www.wikipedia.com address
}
localCmdCaptures = []CommandCapturer{
{"/usr/bin/tail", []string{"-100", "/var/log/proxy-vsockd.log"}},
{"/usr/bin/tail", []string{"-100", "/var/log/service-port-opener.log"}},
{"/usr/bin/tail", []string{"-100", "/var/log/vsudd.log"}},
}
localCaptures = []Capturer{NewDatabaseCapturer()}
)
func init() {
for _, c := range localCmdCaptures {
localCmdCaptures = append(localCmdCaptures, c)
}
}
// Capturer defines behavior for structs which will capture arbitrary
// diagnostic information and write it to a tar archive with a timeout.
type Capturer interface {
Capture(time.Duration, *tar.Writer)
}
type CommandCapturer struct {
command string
args []string
}
func (cc CommandCapturer) Capture(timeout time.Duration, w *tar.Writer) {
log.Printf("Running %s", cc.command)
c := exec.Command(cc.command, cc.args...)
stdoutPipe, err := c.StdoutPipe()
if err != nil {
log.Fatalf("Failed to create stdout pipe: %s", err)
}
stderrPipe, err := c.StderrPipe()
if err != nil {
log.Fatalf("Failed to create stderr pipe: %s", err)
}
var stdoutBuffer bytes.Buffer
var stderrBuffer bytes.Buffer
done := make(chan int)
go func() {
io.Copy(&stdoutBuffer, stdoutPipe)
done <- 0
}()
go func() {
io.Copy(&stderrBuffer, stderrPipe)
done <- 0
}()
var timer *time.Timer
timer = time.AfterFunc(timeout, func() {
timer.Stop()
if c.Process != nil {
c.Process.Kill()
}
})
_ = c.Run()
<-done
<-done
timer.Stop()
name := strings.Join(append([]string{path.Base(cc.command)}, cc.args...), " ")
hdr := &tar.Header{
Name: name + ".stdout",
Mode: 0644,
Size: int64(stdoutBuffer.Len()),
}
if err = w.WriteHeader(hdr); err != nil {
log.Fatalln(err)
}
if _, err = w.Write(stdoutBuffer.Bytes()); err != nil {
log.Fatalln(err)
}
hdr = &tar.Header{
Name: name + ".stderr",
Mode: 0644,
Size: int64(stderrBuffer.Len()),
}
if err = w.WriteHeader(hdr); err != nil {
log.Fatalln(err)
}
if _, err = w.Write(stderrBuffer.Bytes()); err != nil {
log.Fatalln(err)
}
}
type DatabaseCapturer struct {
*CommandCapturer
}
func NewDatabaseCapturer() DatabaseCapturer {
return DatabaseCapturer{
&CommandCapturer{
command: "/bin/cat",
},
}
}
func (dc DatabaseCapturer) Capture(timeout time.Duration, w *tar.Writer) {
// Dump the database
dbBase := "/Database/branch/master/ro"
filepath.Walk(dbBase, func(path string, f os.FileInfo, err error) error {
if f.Mode().IsRegular() {
dc.CommandCapturer.args = []string{path}
dc.CommandCapturer.Capture(timeout, w)
}
return nil
})
}
// Capture is the outer level wrapper function to trigger the capturing of
// information. Clients are expected to call it with a slice of Capturers
// which define the information to be captured. By using an interface we can
// flexibly define various capture actions for the various listeners.
//
// It is a passed a tar.Writer which the results of the capture will be written
// to.
func Capture(w *tar.Writer, captures []Capturer) {
t := 2 * time.Second
for _, c := range captures {
c.Capture(t, w)
}
}

View File

@ -9,6 +9,25 @@ start()
{
ebegin "Checking system state"
/usr/bin/diagnostics-server &
DIAGNOSTICS_SERVER_FLAGS=""
case "$(mobyplatform)" in
"aws")
DIAGNOSTICS_SERVER_FLAGS="-http"
;;
"azure")
DIAGNOSTICS_SERVER_FLAGS="-http"
;;
"windows")
# TODO(Rolf/David): Are these correct?
DIAGNOSTICS_SERVER_FLAGS="-hvsock -rawtcp"
;;
"mac")
# TODO(Justin/Anil): Are these correct?
DIAGNOSTICS_SERVER_FLAGS="-vsock -rawtcp"
;;
esac
/usr/bin/diagnostics-server ${DIAGNOSTICS_SERVER_FLAGS} &
/usr/bin/diagnostics
}

View File

@ -0,0 +1,67 @@
package main
import (
"archive/tar"
"io/ioutil"
"log"
"net/http"
"os"
)
const (
dockerSock = "/var/run/docker.sock"
lgtm = "LGTM"
httpMagicPort = ":44554" // chosen arbitrarily due to IANA availability -- might change
)
var (
cloudCaptures = []Capturer{}
)
func init() {
for _, c := range commonCmdCaptures {
cloudCaptures = append(cloudCaptures, c)
}
}
// HTTPDiagnosticListener sets a health check and optional diagnostic endpoint
// for cloud editions.
type HTTPDiagnosticListener struct{}
func (h HTTPDiagnosticListener) Listen() {
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if _, err := os.Stat(dockerSock); os.IsNotExist(err) {
http.Error(w, "Docker socket not found -- daemon is down", http.StatusServiceUnavailable)
return
}
if _, err := w.Write([]byte(lgtm)); err != nil {
log.Println("Error writing HTTP success response:", err)
return
}
})
http.HandleFunc("/diagnose", func(w http.ResponseWriter, r *http.Request) {
dir, err := ioutil.TempDir("", "diagnostics")
if err != nil {
log.Println("Error creating temp dir on diagnostic request:", err)
return
}
file, err := ioutil.TempFile(dir, "diagnostics")
if err != nil {
log.Println("Error creating temp file on diagnostic request:", err)
return
}
tarWriter := tar.NewWriter(file)
Capture(tarWriter, cloudCaptures)
// TODO: upload written (and gzipped?) tar file to our S3
// bucket with specific path convention (per-user? by date?)
})
// Start HTTP server to indicate general Docker health.
// TODO: no magic port?
http.ListenAndServe(httpMagicPort, nil)
}

View File

@ -0,0 +1,24 @@
package main
import (
"log"
"syscall"
"github.com/rneugeba/virtsock/go/hvsock"
)
type HVSockDiagnosticListener struct{}
func (l HVSockDiagnosticListener) Listen() {
svcid, _ := hvsock.GuidFromString("445BA2CB-E69B-4912-8B42-D7F494D007EA")
hvsock, err := hvsock.Listen(hvsock.HypervAddr{VmId: hvsock.GUID_WILDCARD, ServiceId: svcid})
if err != nil {
if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT {
log.Printf("Failed to bind to hvsock port: %s", err)
}
}
for {
TarRespond(hvsock)
}
}

View File

@ -1,132 +1,12 @@
package main
import (
"archive/tar"
"bytes"
"io"
"flag"
"log"
"log/syslog"
"net"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/rneugeba/virtsock/go/hvsock"
"github.com/rneugeba/virtsock/go/vsock"
)
func run(timeout time.Duration, w *tar.Writer, command string, args ...string) {
log.Printf("Running %s", command)
c := exec.Command(command, args...)
stdoutPipe, err := c.StdoutPipe()
if err != nil {
log.Fatalf("Failed to create stdout pipe: %s", err)
}
stderrPipe, err := c.StderrPipe()
if err != nil {
log.Fatalf("Failed to create stderr pipe: %s", err)
}
var stdoutBuffer bytes.Buffer
var stderrBuffer bytes.Buffer
done := make(chan int)
go func() {
io.Copy(&stdoutBuffer, stdoutPipe)
done <- 0
}()
go func() {
io.Copy(&stderrBuffer, stderrPipe)
done <- 0
}()
var timer *time.Timer
timer = time.AfterFunc(timeout, func() {
timer.Stop()
if c.Process != nil {
c.Process.Kill()
}
})
_ = c.Run()
<-done
<-done
timer.Stop()
name := strings.Join(append([]string{path.Base(command)}, args...), " ")
hdr := &tar.Header{
Name: name + ".stdout",
Mode: 0644,
Size: int64(stdoutBuffer.Len()),
}
if err = w.WriteHeader(hdr); err != nil {
log.Fatalln(err)
}
if _, err = w.Write(stdoutBuffer.Bytes()); err != nil {
log.Fatalln(err)
}
hdr = &tar.Header{
Name: name + ".stderr",
Mode: 0644,
Size: int64(stderrBuffer.Len()),
}
if err = w.WriteHeader(hdr); err != nil {
log.Fatalln(err)
}
if _, err = w.Write(stderrBuffer.Bytes()); err != nil {
log.Fatalln(err)
}
}
func capture(w *tar.Writer) {
t := 2 * time.Second
run(t, w, "/bin/date")
run(t, w, "/bin/uname", "-a")
run(t, w, "/bin/ps", "uax")
run(t, w, "/bin/netstat", "-tulpn")
run(t, w, "/sbin/iptables-save")
run(t, w, "/sbin/ifconfig", "-a")
run(t, w, "/sbin/route", "-n")
run(t, w, "/usr/sbin/brctl", "show")
run(t, w, "/bin/dmesg")
run(t, w, "/usr/bin/docker", "ps")
run(t, w, "/usr/bin/tail", "-100", "/var/log/docker.log")
run(t, w, "/usr/bin/tail", "-100", "/var/log/messages")
run(t, w, "/usr/bin/tail", "-100", "/var/log/proxy-vsockd.log")
run(t, w, "/usr/bin/tail", "-100", "/var/log/service-port-opener.log")
run(t, w, "/usr/bin/tail", "-100", "/var/log/vsudd.log")
run(t, w, "/bin/mount")
run(t, w, "/bin/df")
run(t, w, "/bin/ls", "-l", "/var")
run(t, w, "/bin/ls", "-l", "/var/lib")
run(t, w, "/bin/ls", "-l", "/var/lib/docker")
run(t, w, "/usr/bin/diagnostics")
run(t, w, "/bin/ping", "-w", "5", "8.8.8.8")
run(t, w, "/bin/cat", "/etc/docker/daemon.json")
run(t, w, "/bin/cat", "/etc/network/interfaces")
run(t, w, "/bin/cat", "/etc/resolv.conf")
run(t, w, "/bin/cat", "/etc/sysctl.conf")
run(t, w, "/usr/bin/dig", "docker.com")
run(t, w, "/usr/bin/dig", "@8.8.8.8", "docker.com")
run(t, w, "/usr/bin/wget", "-O", "-", "http://www.docker.com/")
run(t, w, "/usr/bin/wget", "-O", "-", "http://104.239.220.248/") // a www.docker.com address
run(t, w, "/usr/bin/wget", "-O", "-", "http://216.58.213.68/") // a www.google.com address
run(t, w, "/usr/bin/wget", "-O", "-", "http://91.198.174.192/") // a www.wikipedia.com address
// Dump the database
dbBase := "/Database/branch/master/ro"
filepath.Walk(dbBase, func(path string, f os.FileInfo, err error) error {
if f.Mode().IsRegular() {
run(t, w, "/bin/cat", path)
}
return nil
})
}
func main() {
func init() {
syslog, err := syslog.New(syslog.LOG_INFO|syslog.LOG_DAEMON, "diagnostics")
if err != nil {
log.Fatalln("Failed to open syslog", err)
@ -134,51 +14,48 @@ func main() {
log.SetOutput(syslog)
log.SetFlags(0)
}
listeners := make([]net.Listener, 0)
type DiagnosticListener interface {
// Listen(), a blocking method intended to be invoked in its own
// goroutine, will listen for a diagnostic information request and
// capture the desired information if one is received.
Listen()
}
ip, err := net.Listen("tcp", ":62374")
if err != nil {
log.Printf("Failed to bind to TCP port 62374: %s", err)
} else {
listeners = append(listeners, ip)
// DiagnosticUploader uploads the collected information to the mothership.
type DiagnosticUploader interface {
Upload() error
}
vsock, err := vsock.Listen(uint(62374))
if err != nil {
if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT {
log.Printf("Failed to bind to vsock port 62374: %s", err)
func main() {
flHTTP := flag.Bool("http", false, "Enable diagnostic HTTP listener")
flVSock := flag.Bool("vsock", false, "Enable vsock listener")
flHVSock := flag.Bool("hvsock", false, "Enable hvsock listener")
flRawTCP := flag.Bool("rawtcp", false, "Enable raw TCP listener")
flag.Parse()
listeners := make([]DiagnosticListener, 0)
if *flHTTP {
listeners = append(listeners, HTTPDiagnosticListener{})
}
} else {
listeners = append(listeners, vsock)
if *flVSock {
listeners = append(listeners, VSockDiagnosticListener{})
}
svcid, _ := hvsock.GuidFromString("445BA2CB-E69B-4912-8B42-D7F494D007EA")
hvsock, err := hvsock.Listen(hvsock.HypervAddr{VmId: hvsock.GUID_WILDCARD, ServiceId: svcid})
if err != nil {
if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT {
log.Printf("Failed to bind to hvsock port: %s", err)
if *flHVSock {
listeners = append(listeners, HVSockDiagnosticListener{})
}
} else {
listeners = append(listeners, hvsock)
if *flRawTCP {
listeners = append(listeners, RawTCPDiagnosticListener{})
}
for _, l := range listeners {
go func(l net.Listener) {
for {
conn, err := l.Accept()
if err != nil {
log.Printf("Error accepting connection: %s", err)
return // no more listening
}
go func(conn net.Conn) {
w := tar.NewWriter(conn)
capture(w)
if err := w.Close(); err != nil {
log.Println(err)
}
conn.Close()
}(conn)
}
}(l)
go l.Listen()
}
forever := make(chan int)
<-forever

View File

@ -0,0 +1,19 @@
package main
import (
"log"
"net"
)
type RawTCPDiagnosticListener struct{}
func (l RawTCPDiagnosticListener) Listen() {
ip, err := net.Listen("tcp", ":62374")
if err != nil {
log.Printf("Failed to bind to TCP port 62374: %s", err)
}
for {
TarRespond(ip)
}
}

View File

@ -0,0 +1 @@
package main

View File

@ -0,0 +1,40 @@
package main
import (
"archive/tar"
"log"
"net"
)
// TarRespond is used to write back a tar archive over a connection for the
// lower-level listener types.
//
// In local virtualization (which this is for) we write back a tar file
// directly and the client takes care of shipping the result to the mothership.
//
// By contrast, in cloud editions we expect each node to ship the captured
// information on its own, so this function is not used.
//
// This is a deliberate design to choice to ensure that it is possible in the
// future for diagnostic information to be reported from nodes which have have
// been separated via network partition from the node which initiates
// diagnostic collection, and/or if we decide to automatically collect
// diagnostic information from nodes which deem *themselves* unhealthy at a
// future time.
func TarRespond(l net.Listener) {
conn, err := l.Accept()
if err != nil {
log.Printf("Error accepting connection: %s", err)
return
}
w := tar.NewWriter(conn)
Capture(w, localCaptures)
if err := w.Close(); err != nil {
log.Println(err)
}
conn.Close()
}

View File

@ -0,0 +1,23 @@
package main
import (
"log"
"syscall"
"github.com/rneugeba/virtsock/go/vsock"
)
type VSockDiagnosticListener struct{}
func (l VSockDiagnosticListener) Listen() {
vsock, err := vsock.Listen(uint(62374))
if err != nil {
if errno, ok := err.(syscall.Errno); !ok || errno != syscall.EAFNOSUPPORT {
log.Printf("Failed to bind to vsock port 62374: %s", err)
}
}
for {
TarRespond(vsock)
}
}