diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index a0f08e8cb7..090e074755 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -24,6 +24,8 @@ import ( "regexp" "strconv" "strings" + "sync" + "sync/atomic" "syscall" "time" @@ -256,6 +258,8 @@ type cloudHypervisor struct { vmconfig chclient.VmConfig state CloudHypervisorState config HypervisorConfig + stopped int32 + mu sync.Mutex } var clhKernelParams = []Param{ @@ -1081,9 +1085,21 @@ func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error { // StopVM will stop the Sandbox's VM. func (clh *cloudHypervisor) StopVM(ctx context.Context, waitOnly bool) (err error) { + clh.mu.Lock() + defer func() { + if err == nil { + atomic.StoreInt32(&clh.stopped, 1) + } + clh.mu.Unlock() + }() span, _ := katatrace.Trace(ctx, clh.Logger(), "StopVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() clh.Logger().WithField("function", "StopVM").Info("Stop Sandbox") + if atomic.LoadInt32(&clh.stopped) != 0 { + clh.Logger().Info("Already stopped") + return nil + } + return clh.terminate(ctx, waitOnly) } @@ -1385,16 +1401,20 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) { pid := clh.state.PID - if err := syscall.Kill(pid, syscall.Signal(0)); err != nil { + if atomic.LoadInt32(&clh.stopped) != 0 { return false, nil } timeStart := time.Now() cl := clh.client() for { + err := syscall.Kill(pid, syscall.Signal(0)) + if err != nil { + return false, nil + } ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) - defer cancel() - _, _, err := cl.VmmPingGet(ctx) + _, _, err = cl.VmmPingGet(ctx) + cancel() if err == nil { return true, nil } else { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index b7f9601c13..f15f62d13c 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -21,6 +21,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "syscall" "time" "unsafe" @@ -109,7 +110,7 @@ type qemu struct { nvdimmCount int - stopped bool + stopped int32 mu sync.Mutex } @@ -969,21 +970,23 @@ func (q *qemu) waitVM(ctx context.Context, timeout int) error { } // StopVM will stop the Sandbox's VM. -func (q *qemu) StopVM(ctx context.Context, waitOnly bool) error { +func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) { q.mu.Lock() defer q.mu.Unlock() span, _ := katatrace.Trace(ctx, q.Logger(), "StopVM", qemuTracingTags, map[string]string{"sandbox_id": q.id}) defer span.End() q.Logger().Info("Stopping Sandbox") - if q.stopped { + if atomic.LoadInt32(&q.stopped) != 0 { q.Logger().Info("Already stopped") return nil } defer func() { q.cleanupVM() - q.stopped = true + if err == nil { + atomic.StoreInt32(&q.stopped, 1) + } }() if q.config.Debug && q.qemuConfig.LogFile != "" { @@ -2568,7 +2571,7 @@ func (q *qemu) toGrpc(ctx context.Context) ([]byte, error) { func (q *qemu) Save() (s hv.HypervisorState) { // If QEMU isn't even running, there isn't any state to Save - if q.stopped { + if atomic.LoadInt32(&q.stopped) != 0 { return } @@ -2619,6 +2622,10 @@ func (q *qemu) Load(s hv.HypervisorState) { } func (q *qemu) Check() error { + if atomic.LoadInt32(&q.stopped) != 0 { + return fmt.Errorf("qemu is not running") + } + q.memoryDumpFlag.Lock() defer q.memoryDumpFlag.Unlock()