diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 3f9935043..93359e16a 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -479,7 +479,7 @@ func (a *Acrn) waitSandbox(ctx context.Context, timeoutSecs int) error { } // stopSandbox will stop the Sandbox's VM. -func (a *Acrn) stopSandbox(ctx context.Context) (err error) { +func (a *Acrn) stopSandbox(ctx context.Context, waitOnly bool) (err error) { span, _ := a.trace(ctx, "stopSandbox") defer span.End() @@ -513,6 +513,11 @@ func (a *Acrn) stopSandbox(ctx context.Context) (err error) { shutdownSignal := syscall.SIGINT + if waitOnly { + // NOP + shutdownSignal = syscall.Signal(0) + } + return utils.WaitLocalProcess(pid, acrnStopSandboxTimeoutSecs, shutdownSignal, a.Logger()) } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 2fc74c076..3f9a364d6 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -652,11 +652,11 @@ func (clh *cloudHypervisor) resumeSandbox(ctx context.Context) error { } // stopSandbox will stop the Sandbox's VM. -func (clh *cloudHypervisor) stopSandbox(ctx context.Context) (err error) { +func (clh *cloudHypervisor) stopSandbox(ctx context.Context, waitOnly bool) (err error) { span, _ := clh.trace(ctx, "stopSandbox") defer span.End() clh.Logger().WithField("function", "stopSandbox").Info("Stop Sandbox") - return clh.terminate(ctx) + return clh.terminate(ctx, waitOnly) } func (clh *cloudHypervisor) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error { @@ -756,7 +756,7 @@ func (clh *cloudHypervisor) trace(parent context.Context, name string) (otelTrac return span, ctx } -func (clh *cloudHypervisor) terminate(ctx context.Context) (err error) { +func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err error) { span, _ := clh.trace(ctx, "terminate") defer span.End() @@ -775,7 +775,7 @@ func (clh *cloudHypervisor) terminate(ctx context.Context) (err error) { clh.Logger().Debug("Stopping Cloud Hypervisor") - if pidRunning { + if pidRunning && !waitOnly { clhRunning, _ := clh.isClhRunning(clhStopSandboxTimeout) if clhRunning { ctx, cancel := context.WithTimeout(context.Background(), clhStopSandboxTimeout*time.Second) diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index c0644ed71..7dac0e78a 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -409,7 +409,7 @@ func (fc *firecracker) fcInit(ctx context.Context, timeout int) error { return nil } -func (fc *firecracker) fcEnd(ctx context.Context) (err error) { +func (fc *firecracker) fcEnd(ctx context.Context, waitOnly bool) (err error) { span, _ := fc.trace(ctx, "fcEnd") defer span.End() @@ -427,6 +427,11 @@ func (fc *firecracker) fcEnd(ctx context.Context) (err error) { shutdownSignal := syscall.SIGTERM + if waitOnly { + // NOP + shutdownSignal = syscall.Signal(0) + } + // Wait for the VM process to terminate return utils.WaitLocalProcess(pid, fcStopSandboxTimeout, shutdownSignal, fc.Logger()) } @@ -760,7 +765,7 @@ func (fc *firecracker) startSandbox(ctx context.Context, timeout int) error { var err error defer func() { if err != nil { - fc.fcEnd(ctx) + fc.fcEnd(ctx, false) } }() @@ -853,11 +858,11 @@ func (fc *firecracker) cleanupJail(ctx context.Context) { } // stopSandbox will stop the Sandbox's VM. -func (fc *firecracker) stopSandbox(ctx context.Context) (err error) { +func (fc *firecracker) stopSandbox(ctx context.Context, waitOnly bool) (err error) { span, _ := fc.trace(ctx, "stopSandbox") defer span.End() - return fc.fcEnd(ctx) + return fc.fcEnd(ctx, waitOnly) } func (fc *firecracker) pauseSandbox(ctx context.Context) error { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index a7fb093cd..dc6fedf06 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -787,7 +787,9 @@ func generateVMSocket(id string, vmStogarePath string) (interface{}, error) { type hypervisor interface { createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error startSandbox(ctx context.Context, timeout int) error - stopSandbox(ctx context.Context) error + // If wait is set, don't actively stop the sandbox: + // just perform cleanup. + stopSandbox(ctx context.Context, waitOnly bool) error pauseSandbox(ctx context.Context) error saveSandbox() error resumeSandbox(ctx context.Context) error diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index fd2a6161b..af19130f4 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -41,7 +41,7 @@ func (m *mockHypervisor) startSandbox(ctx context.Context, timeout int) error { return nil } -func (m *mockHypervisor) stopSandbox(ctx context.Context) error { +func (m *mockHypervisor) stopSandbox(ctx context.Context, waitOnly bool) error { return nil } diff --git a/src/runtime/virtcontainers/mock_hypervisor_test.go b/src/runtime/virtcontainers/mock_hypervisor_test.go index dece25165..c557f8658 100644 --- a/src/runtime/virtcontainers/mock_hypervisor_test.go +++ b/src/runtime/virtcontainers/mock_hypervisor_test.go @@ -53,7 +53,7 @@ func TestMockHypervisorStartSandbox(t *testing.T) { func TestMockHypervisorStopSandbox(t *testing.T) { var m *mockHypervisor - assert.NoError(t, m.stopSandbox(context.Background())) + assert.NoError(t, m.stopSandbox(context.Background(), false)) } func TestMockHypervisorAddDevice(t *testing.T) { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 98f9a844e..e44d16faa 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -122,6 +122,8 @@ const ( scsiControllerID = "scsi0" rngID = "rng0" fallbackFileBackedMemDir = "/dev/shm" + + qemuStopSandboxTimeoutSecs = 15 ) // agnostic list of kernel parameters @@ -703,7 +705,7 @@ func (q *qemu) setupVirtiofsd(ctx context.Context) (err error) { q.Logger().Info("virtiofsd quits") // Wait to release resources of virtiofsd process cmd.Process.Wait() - q.stopSandbox(ctx) + q.stopSandbox(ctx, false) }() return err } @@ -933,7 +935,7 @@ func (q *qemu) waitSandbox(ctx context.Context, timeout int) error { } // stopSandbox will stop the Sandbox's VM. -func (q *qemu) stopSandbox(ctx context.Context) error { +func (q *qemu) stopSandbox(ctx context.Context, waitOnly bool) error { span, _ := q.trace(ctx, "stopSandbox") defer span.End() @@ -965,10 +967,24 @@ func (q *qemu) stopSandbox(ctx context.Context) error { return err } - err := q.qmpMonitorCh.qmp.ExecuteQuit(q.qmpMonitorCh.ctx) - if err != nil { - q.Logger().WithError(err).Error("Fail to execute qmp QUIT") - return err + if waitOnly { + pids := q.getPids() + if len(pids) == 0 { + return errors.New("cannot determine QEMU PID") + } + + pid := pids[0] + + err := utils.WaitLocalProcess(pid, qemuStopSandboxTimeoutSecs, syscall.Signal(0), q.Logger()) + if err != nil { + return err + } + } else { + err := q.qmpMonitorCh.qmp.ExecuteQuit(q.qmpMonitorCh.ctx) + if err != nil { + q.Logger().WithError(err).Error("Fail to execute qmp QUIT") + return err + } } return nil diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index e758071fd..a9370e0d5 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -1027,7 +1027,7 @@ func (s *Sandbox) startVM(ctx context.Context) (err error) { defer func() { if err != nil { - s.hypervisor.stopSandbox(ctx) + s.hypervisor.stopSandbox(ctx, false) } }() @@ -1081,14 +1081,9 @@ func (s *Sandbox) stopVM(ctx context.Context) error { s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("Agent did not stop sandbox") } - if s.disableVMShutdown { - // Do not kill the VM - allow the agent to shut it down - // (only used to support static agent tracing). - return nil - } - s.Logger().Info("Stopping VM") - return s.hypervisor.stopSandbox(ctx) + + return s.hypervisor.stopSandbox(ctx, s.disableVMShutdown) } func (s *Sandbox) addContainer(c *Container) error { diff --git a/src/runtime/virtcontainers/vm.go b/src/runtime/virtcontainers/vm.go index c94a26fc5..e6f02b6e0 100644 --- a/src/runtime/virtcontainers/vm.go +++ b/src/runtime/virtcontainers/vm.go @@ -137,7 +137,7 @@ func NewVM(ctx context.Context, config VMConfig) (*VM, error) { defer func() { if err != nil { virtLog.WithField("vm", id).WithError(err).Info("clean up vm") - hypervisor.stopSandbox(ctx) + hypervisor.stopSandbox(ctx, false) } }() @@ -251,7 +251,7 @@ func (v *VM) Disconnect(ctx context.Context) error { func (v *VM) Stop(ctx context.Context) error { v.logger().Info("stop vm") - if err := v.hypervisor.stopSandbox(ctx); err != nil { + if err := v.hypervisor.stopSandbox(ctx, false); err != nil { return err }