mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-28 16:27:50 +00:00
Merge pull request #5597 from UiPath/fix-clh-wait
clh: avoid race condition when stopping clh
This commit is contained in:
commit
1bbcb413c9
@ -24,6 +24,8 @@ import (
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@ -256,6 +258,8 @@ type cloudHypervisor struct {
|
||||
vmconfig chclient.VmConfig
|
||||
state CloudHypervisorState
|
||||
config HypervisorConfig
|
||||
stopped int32
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
var clhKernelParams = []Param{
|
||||
@ -1081,9 +1085,21 @@ func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error {
|
||||
|
||||
// StopVM will stop the Sandbox's VM.
|
||||
func (clh *cloudHypervisor) StopVM(ctx context.Context, waitOnly bool) (err error) {
|
||||
clh.mu.Lock()
|
||||
defer func() {
|
||||
if err == nil {
|
||||
atomic.StoreInt32(&clh.stopped, 1)
|
||||
}
|
||||
clh.mu.Unlock()
|
||||
}()
|
||||
span, _ := katatrace.Trace(ctx, clh.Logger(), "StopVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
||||
defer span.End()
|
||||
clh.Logger().WithField("function", "StopVM").Info("Stop Sandbox")
|
||||
if atomic.LoadInt32(&clh.stopped) != 0 {
|
||||
clh.Logger().Info("Already stopped")
|
||||
return nil
|
||||
}
|
||||
|
||||
return clh.terminate(ctx, waitOnly)
|
||||
}
|
||||
|
||||
@ -1385,16 +1401,20 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) {
|
||||
|
||||
pid := clh.state.PID
|
||||
|
||||
if err := syscall.Kill(pid, syscall.Signal(0)); err != nil {
|
||||
if atomic.LoadInt32(&clh.stopped) != 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
timeStart := time.Now()
|
||||
cl := clh.client()
|
||||
for {
|
||||
err := syscall.Kill(pid, syscall.Signal(0))
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
||||
defer cancel()
|
||||
_, _, err := cl.VmmPingGet(ctx)
|
||||
_, _, err = cl.VmmPingGet(ctx)
|
||||
cancel()
|
||||
if err == nil {
|
||||
return true, nil
|
||||
} else {
|
||||
|
@ -21,6 +21,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
"unsafe"
|
||||
@ -109,7 +110,7 @@ type qemu struct {
|
||||
|
||||
nvdimmCount int
|
||||
|
||||
stopped bool
|
||||
stopped int32
|
||||
|
||||
mu sync.Mutex
|
||||
}
|
||||
@ -969,21 +970,23 @@ func (q *qemu) waitVM(ctx context.Context, timeout int) error {
|
||||
}
|
||||
|
||||
// StopVM will stop the Sandbox's VM.
|
||||
func (q *qemu) StopVM(ctx context.Context, waitOnly bool) error {
|
||||
func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
span, _ := katatrace.Trace(ctx, q.Logger(), "StopVM", qemuTracingTags, map[string]string{"sandbox_id": q.id})
|
||||
defer span.End()
|
||||
|
||||
q.Logger().Info("Stopping Sandbox")
|
||||
if q.stopped {
|
||||
if atomic.LoadInt32(&q.stopped) != 0 {
|
||||
q.Logger().Info("Already stopped")
|
||||
return nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
q.cleanupVM()
|
||||
q.stopped = true
|
||||
if err == nil {
|
||||
atomic.StoreInt32(&q.stopped, 1)
|
||||
}
|
||||
}()
|
||||
|
||||
if q.config.Debug && q.qemuConfig.LogFile != "" {
|
||||
@ -2568,7 +2571,7 @@ func (q *qemu) toGrpc(ctx context.Context) ([]byte, error) {
|
||||
func (q *qemu) Save() (s hv.HypervisorState) {
|
||||
|
||||
// If QEMU isn't even running, there isn't any state to Save
|
||||
if q.stopped {
|
||||
if atomic.LoadInt32(&q.stopped) != 0 {
|
||||
return
|
||||
}
|
||||
|
||||
@ -2619,6 +2622,10 @@ func (q *qemu) Load(s hv.HypervisorState) {
|
||||
}
|
||||
|
||||
func (q *qemu) Check() error {
|
||||
if atomic.LoadInt32(&q.stopped) != 0 {
|
||||
return fmt.Errorf("qemu is not running")
|
||||
}
|
||||
|
||||
q.memoryDumpFlag.Lock()
|
||||
defer q.memoryDumpFlag.Unlock()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user