mirror of
https://github.com/linuxkit/linuxkit.git
synced 2025-07-30 22:21:34 +00:00
Merge pull request #438 from nathanleclaire/diagnosticserverrace
Fix race condition(s) in command collection code
This commit is contained in:
commit
655c3dae30
@ -3,7 +3,8 @@ package main
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"io"
|
||||
"context"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
@ -13,41 +14,49 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultCommandTimeout = 5 * time.Second
|
||||
|
||||
// Might eventually have some pretty long (~30s) traces in here, so 35
|
||||
// seconds seems reasonable.
|
||||
allCaptureTimeout = 35 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
commonCmdCaptures = []CommandCapturer{
|
||||
{"/bin/date", nil},
|
||||
{"/bin/uname", []string{"-a"}},
|
||||
{"/bin/ps", []string{"uax"}},
|
||||
{"/bin/netstat", []string{"-tulpn"}},
|
||||
{"/sbin/iptables-save", nil},
|
||||
{"/sbin/ifconfig", nil},
|
||||
{"/sbin/route", nil},
|
||||
{"/usr/sbin/brctl", []string{"show"}},
|
||||
{"/bin/dmesg", nil},
|
||||
{"/usr/bin/docker", []string{"ps"}},
|
||||
{"/usr/bin/tail", []string{"-20000", "/var/log/docker.log"}},
|
||||
{"/usr/bin/tail", []string{"-20000", "/var/log/messages"}},
|
||||
{"/bin/mount", nil},
|
||||
{"/bin/df", []string{"-h"}},
|
||||
{"/bin/ls", []string{"-l", "/var"}},
|
||||
{"/bin/ls", []string{"-l", "/var/lib"}},
|
||||
{"/bin/ls", []string{"-l", "/var/lib/docker"}},
|
||||
{"/usr/bin/diagnostics", nil},
|
||||
{"/bin/ping", []string{"-w", "5", "8.8.8.8"}},
|
||||
{"/bin/cat", []string{"/etc/docker/daemon.json"}},
|
||||
{"/bin/cat", []string{"/etc/network/interfaces"}},
|
||||
{"/bin/cat", []string{"/etc/resolv.conf"}},
|
||||
{"/bin/cat", []string{"/etc/sysctl.conf"}},
|
||||
{"/usr/bin/dig", []string{"docker.com"}},
|
||||
{"/usr/bin/dig", []string{"@8.8.8.8", "docker.com"}},
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://www.docker.com/"}},
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://104.239.220.248/"}}, // a www.docker.com address
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://216.58.213.68/"}}, // a www.google.com address
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://91.198.174.192/"}}, // a www.wikipedia.com address
|
||||
{"/bin/date", nil, defaultCommandTimeout},
|
||||
{"/bin/uname", []string{"-a"}, defaultCommandTimeout},
|
||||
{"/bin/ps", []string{"uax"}, defaultCommandTimeout},
|
||||
{"/bin/netstat", []string{"-tulpn"}, defaultCommandTimeout},
|
||||
{"/sbin/iptables-save", nil, defaultCommandTimeout},
|
||||
{"/sbin/ifconfig", nil, defaultCommandTimeout},
|
||||
{"/sbin/route", nil, defaultCommandTimeout},
|
||||
{"/usr/sbin/brctl", []string{"show"}, defaultCommandTimeout},
|
||||
{"/bin/dmesg", nil, defaultCommandTimeout},
|
||||
{"/usr/bin/docker", []string{"ps"}, defaultCommandTimeout},
|
||||
{"/usr/bin/tail", []string{"-20000", "/var/log/docker.log"}, defaultCommandTimeout},
|
||||
{"/usr/bin/tail", []string{"-20000", "/var/log/messages"}, defaultCommandTimeout},
|
||||
{"/bin/mount", nil, defaultCommandTimeout},
|
||||
{"/bin/df", []string{"-h"}, defaultCommandTimeout},
|
||||
{"/bin/ls", []string{"-l", "/var"}, defaultCommandTimeout},
|
||||
{"/bin/ls", []string{"-l", "/var/lib"}, defaultCommandTimeout},
|
||||
{"/bin/ls", []string{"-l", "/var/lib/docker"}, defaultCommandTimeout},
|
||||
{"/usr/bin/diagnostics", nil, defaultCommandTimeout},
|
||||
{"/bin/ping", []string{"-w", "5", "8.8.8.8"}, 6 * time.Second},
|
||||
{"/bin/cat", []string{"/etc/docker/daemon.json"}, defaultCommandTimeout},
|
||||
{"/bin/cat", []string{"/etc/network/interfaces"}, defaultCommandTimeout},
|
||||
{"/bin/cat", []string{"/etc/resolv.conf"}, defaultCommandTimeout},
|
||||
{"/bin/cat", []string{"/etc/sysctl.conf"}, defaultCommandTimeout},
|
||||
{"/usr/bin/dig", []string{"docker.com"}, defaultCommandTimeout},
|
||||
{"/usr/bin/dig", []string{"@8.8.8.8", "docker.com"}, defaultCommandTimeout},
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://www.docker.com/"}, defaultCommandTimeout},
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://104.239.220.248/"}, defaultCommandTimeout}, // a www.docker.com address
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://216.58.213.68/"}, defaultCommandTimeout}, // a www.google.com address
|
||||
{"/usr/bin/wget", []string{"-O", "-", "http://91.198.174.192/"}, defaultCommandTimeout}, // a www.wikipedia.com address
|
||||
}
|
||||
localCmdCaptures = []CommandCapturer{
|
||||
{"/usr/bin/tail", []string{"-100", "/var/log/proxy-vsockd.log"}},
|
||||
{"/usr/bin/tail", []string{"-100", "/var/log/vsudd.log"}},
|
||||
{"/usr/bin/tail", []string{"-100", "/var/log/proxy-vsockd.log"}, defaultCommandTimeout},
|
||||
{"/usr/bin/tail", []string{"-100", "/var/log/vsudd.log"}, defaultCommandTimeout},
|
||||
}
|
||||
localCaptures = []Capturer{NewDatabaseCapturer()}
|
||||
)
|
||||
@ -64,72 +73,73 @@ func init() {
|
||||
// Capturer defines behavior for structs which will capture arbitrary
|
||||
// diagnostic information and write it to a tar archive with a timeout.
|
||||
type Capturer interface {
|
||||
Capture(time.Duration, *tar.Writer)
|
||||
Capture(context.Context, *tar.Writer)
|
||||
}
|
||||
|
||||
type CommandCapturer struct {
|
||||
command string
|
||||
args []string
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
func (cc CommandCapturer) Capture(timeout time.Duration, w *tar.Writer) {
|
||||
log.Printf("Running %s", cc.command)
|
||||
c := exec.Command(cc.command, cc.args...)
|
||||
stdoutPipe, err := c.StdoutPipe()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create stdout pipe: %s", err)
|
||||
}
|
||||
stderrPipe, err := c.StderrPipe()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create stderr pipe: %s", err)
|
||||
}
|
||||
var stdoutBuffer bytes.Buffer
|
||||
var stderrBuffer bytes.Buffer
|
||||
done := make(chan int)
|
||||
go func() {
|
||||
io.Copy(&stdoutBuffer, stdoutPipe)
|
||||
done <- 0
|
||||
}()
|
||||
go func() {
|
||||
io.Copy(&stderrBuffer, stderrPipe)
|
||||
done <- 0
|
||||
}()
|
||||
var timer *time.Timer
|
||||
timer = time.AfterFunc(timeout, func() {
|
||||
timer.Stop()
|
||||
if c.Process != nil {
|
||||
c.Process.Kill()
|
||||
}
|
||||
})
|
||||
_ = c.Run()
|
||||
<-done
|
||||
<-done
|
||||
timer.Stop()
|
||||
func (cc CommandCapturer) Capture(parentCtx context.Context, w *tar.Writer) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
done := make(chan struct{})
|
||||
|
||||
name := strings.Join(append([]string{path.Base(cc.command)}, cc.args...), " ")
|
||||
ctx, cancel := context.WithTimeout(parentCtx, cc.timeout)
|
||||
cmd := exec.CommandContext(ctx, cc.command, cc.args...)
|
||||
cmd.Stdout = stdout
|
||||
cmd.Stderr = stderr
|
||||
|
||||
go runCmd(cmd, done)
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Println("ERROR:", ctx.Err())
|
||||
case <-done:
|
||||
tarWrite(w, stdout, name+".stdout")
|
||||
tarWrite(w, stderr, name+".stderr")
|
||||
}
|
||||
|
||||
cancel()
|
||||
}
|
||||
|
||||
// TODO(nathanleclaire): Is the user of log.Fatalln in this function really the
|
||||
// right choice? i.e., should the program really exit on failure here?
|
||||
func tarWrite(w *tar.Writer, buf *bytes.Buffer, headerName string) {
|
||||
contents, err := ioutil.ReadAll(buf)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
log.Println("HEADER:", headerName)
|
||||
log.Println("{")
|
||||
contentLines := strings.Split(string(contents), "\n")
|
||||
for _, line := range contentLines {
|
||||
log.Println(line)
|
||||
}
|
||||
log.Println("}")
|
||||
|
||||
hdr := &tar.Header{
|
||||
Name: name + ".stdout",
|
||||
Name: headerName,
|
||||
Mode: 0644,
|
||||
Size: int64(stdoutBuffer.Len()),
|
||||
Size: int64(len(contents)),
|
||||
}
|
||||
if err = w.WriteHeader(hdr); err != nil {
|
||||
if err := w.WriteHeader(hdr); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
if _, err = w.Write(stdoutBuffer.Bytes()); err != nil {
|
||||
if _, err := w.Write(contents); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
hdr = &tar.Header{
|
||||
Name: name + ".stderr",
|
||||
Mode: 0644,
|
||||
Size: int64(stderrBuffer.Len()),
|
||||
}
|
||||
if err = w.WriteHeader(hdr); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
if _, err = w.Write(stderrBuffer.Bytes()); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
func runCmd(cmd *exec.Cmd, done chan<- struct{}) {
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Println("ERROR:", err)
|
||||
}
|
||||
done <- struct{}{}
|
||||
}
|
||||
|
||||
type DatabaseCapturer struct {
|
||||
@ -144,13 +154,13 @@ func NewDatabaseCapturer() DatabaseCapturer {
|
||||
}
|
||||
}
|
||||
|
||||
func (dc DatabaseCapturer) Capture(timeout time.Duration, w *tar.Writer) {
|
||||
func (dc DatabaseCapturer) Capture(parentCtx context.Context, w *tar.Writer) {
|
||||
// Dump the database
|
||||
dbBase := "/Database/branch/master/ro"
|
||||
filepath.Walk(dbBase, func(path string, f os.FileInfo, err error) error {
|
||||
if f.Mode().IsRegular() {
|
||||
dc.CommandCapturer.args = []string{path}
|
||||
dc.CommandCapturer.Capture(timeout, w)
|
||||
dc.CommandCapturer.Capture(parentCtx, w)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
@ -164,9 +174,22 @@ func (dc DatabaseCapturer) Capture(timeout time.Duration, w *tar.Writer) {
|
||||
// It is a passed a tar.Writer which the results of the capture will be written
|
||||
// to.
|
||||
func Capture(w *tar.Writer, captures []Capturer) {
|
||||
t := 2 * time.Second
|
||||
allCaptureCtx, cancel := context.WithTimeout(context.Background(), allCaptureTimeout)
|
||||
done := make(chan struct{})
|
||||
|
||||
for _, c := range captures {
|
||||
c.Capture(t, w)
|
||||
go func(captures []Capturer, ctx context.Context, done chan<- struct{}) {
|
||||
for _, c := range captures {
|
||||
c.Capture(ctx, w)
|
||||
}
|
||||
done <- struct{}{}
|
||||
}(captures, allCaptureCtx, done)
|
||||
|
||||
select {
|
||||
case <-allCaptureCtx.Done():
|
||||
log.Println("Global command context", allCaptureCtx.Err())
|
||||
case <-done:
|
||||
log.Println("Captures all finished")
|
||||
}
|
||||
|
||||
cancel()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user