From 6c77d76f24ba0b4a654b3fa1823f4e60029005ee Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 13 Aug 2019 15:56:23 +0800 Subject: [PATCH] qemu: check guest status with qmp query-status When guest panics or stops with unexpected internal error, qemu process might still be running but we can find out such situation with qmp. Then monitor can still report such failures to watchers. Fixes: #1963 Signed-off-by: Peng Tao --- virtcontainers/acrn.go | 15 ++++++++++++--- virtcontainers/fc.go | 8 ++++++++ virtcontainers/hypervisor.go | 1 + virtcontainers/mock_hypervisor.go | 4 ++++ virtcontainers/mock_hypervisor_test.go | 6 ++++++ virtcontainers/monitor.go | 3 +-- virtcontainers/qemu.go | 25 +++++++++++++++++++++---- 7 files changed, 53 insertions(+), 9 deletions(-) diff --git a/virtcontainers/acrn.go b/virtcontainers/acrn.go index 7fdb831bb1..877ff166aa 100644 --- a/virtcontainers/acrn.go +++ b/virtcontainers/acrn.go @@ -7,7 +7,6 @@ package virtcontainers import ( "context" - "errors" "fmt" "os" "os/exec" @@ -16,13 +15,15 @@ import ( "syscall" "time" + opentracing "github.com/opentracing/opentracing-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/kata-containers/runtime/virtcontainers/device/config" persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/store" "github.com/kata-containers/runtime/virtcontainers/types" "github.com/kata-containers/runtime/virtcontainers/utils" - opentracing "github.com/opentracing/opentracing-go" - "github.com/sirupsen/logrus" ) // AcrnState keeps Acrn's state @@ -648,3 +649,11 @@ func (a *acrn) load(s persistapi.HypervisorState) { a.info.PID = s.Pid a.state.UUID = s.UUID } + +func (a *acrn) check() error { + if err := syscall.Kill(a.pid(), syscall.Signal(0)); err != nil { + return errors.Wrapf(err, "failed to ping acrn process") + } + + return nil +} diff --git a/virtcontainers/fc.go b/virtcontainers/fc.go index 2ef5ffc594..f8bc56285d 100644 --- a/virtcontainers/fc.go +++ b/virtcontainers/fc.go @@ -1006,3 +1006,11 @@ func (fc *firecracker) save() (s persistapi.HypervisorState) { func (fc *firecracker) load(s persistapi.HypervisorState) { fc.info.PID = s.Pid } + +func (fc *firecracker) check() error { + if err := syscall.Kill(fc.pid(), syscall.Signal(0)); err != nil { + return errors.Wrapf(err, "failed to ping fc process") + } + + return nil +} diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index f12595328b..4a795f7bac 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -671,6 +671,7 @@ type hypervisor interface { pid() int fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, store *store.VCStore, j []byte) error toGrpc() ([]byte, error) + check() error save() persistapi.HypervisorState load(persistapi.HypervisorState) diff --git a/virtcontainers/mock_hypervisor.go b/virtcontainers/mock_hypervisor.go index 1c198f47bd..90611aeb75 100644 --- a/virtcontainers/mock_hypervisor.go +++ b/virtcontainers/mock_hypervisor.go @@ -121,3 +121,7 @@ func (m *mockHypervisor) save() (s persistapi.HypervisorState) { } func (m *mockHypervisor) load(s persistapi.HypervisorState) {} + +func (m *mockHypervisor) check() error { + return nil +} diff --git a/virtcontainers/mock_hypervisor_test.go b/virtcontainers/mock_hypervisor_test.go index 77279ef22e..efcee1c808 100644 --- a/virtcontainers/mock_hypervisor_test.go +++ b/virtcontainers/mock_hypervisor_test.go @@ -82,3 +82,9 @@ func TestMockHypervisorDisconnect(t *testing.T) { m.disconnect() } + +func TestMockHypervisorCheck(t *testing.T) { + var m *mockHypervisor + + assert.NoError(t, m.check()) +} diff --git a/virtcontainers/monitor.go b/virtcontainers/monitor.go index 25c55f4250..ff5b4d0348 100644 --- a/virtcontainers/monitor.go +++ b/virtcontainers/monitor.go @@ -7,7 +7,6 @@ package virtcontainers import ( "sync" - "syscall" "time" "github.com/pkg/errors" @@ -127,7 +126,7 @@ func (m *monitor) watchAgent() { } func (m *monitor) watchHypervisor() error { - if err := syscall.Kill(m.sandbox.hypervisor.pid(), syscall.Signal(0)); err != nil { + if err := m.sandbox.hypervisor.check(); err != nil { m.notify(errors.Wrapf(err, "failed to ping hypervisor process")) return err } diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 1366e39bd3..c36bd5bbb9 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -10,7 +10,6 @@ import ( "context" "encoding/hex" "encoding/json" - "errors" "fmt" "io/ioutil" "math" @@ -24,17 +23,17 @@ import ( "unsafe" govmmQemu "github.com/intel/govmm/qemu" - "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" "github.com/opentracing/opentracing-go" + "github.com/pkg/errors" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" "github.com/kata-containers/runtime/virtcontainers/device/config" persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" + "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" "github.com/kata-containers/runtime/virtcontainers/store" "github.com/kata-containers/runtime/virtcontainers/types" "github.com/kata-containers/runtime/virtcontainers/utils" - - "golang.org/x/sys/unix" ) // romFile is the file name of the ROM that can be used for virtio-pci devices. @@ -2015,3 +2014,21 @@ func (q *qemu) load(s persistapi.HypervisorState) { }) } } + +func (q *qemu) check() error { + err := q.qmpSetup() + if err != nil { + return err + } + + status, err := q.qmpMonitorCh.qmp.ExecuteQueryStatus(q.qmpMonitorCh.ctx) + if err != nil { + return err + } + + if status.Status == "internal-error" || status.Status == "guest-panicked" { + return errors.Errorf("guest failure: %s", status.Status) + } + + return nil +}