mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-29 00:37:24 +00:00
Merge pull request #5597 from UiPath/fix-clh-wait
clh: avoid race condition when stopping clh
This commit is contained in:
commit
1bbcb413c9
@ -24,6 +24,8 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -256,6 +258,8 @@ type cloudHypervisor struct {
|
|||||||
vmconfig chclient.VmConfig
|
vmconfig chclient.VmConfig
|
||||||
state CloudHypervisorState
|
state CloudHypervisorState
|
||||||
config HypervisorConfig
|
config HypervisorConfig
|
||||||
|
stopped int32
|
||||||
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
var clhKernelParams = []Param{
|
var clhKernelParams = []Param{
|
||||||
@ -1081,9 +1085,21 @@ func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error {
|
|||||||
|
|
||||||
// StopVM will stop the Sandbox's VM.
|
// StopVM will stop the Sandbox's VM.
|
||||||
func (clh *cloudHypervisor) StopVM(ctx context.Context, waitOnly bool) (err error) {
|
func (clh *cloudHypervisor) StopVM(ctx context.Context, waitOnly bool) (err error) {
|
||||||
|
clh.mu.Lock()
|
||||||
|
defer func() {
|
||||||
|
if err == nil {
|
||||||
|
atomic.StoreInt32(&clh.stopped, 1)
|
||||||
|
}
|
||||||
|
clh.mu.Unlock()
|
||||||
|
}()
|
||||||
span, _ := katatrace.Trace(ctx, clh.Logger(), "StopVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "StopVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
||||||
defer span.End()
|
defer span.End()
|
||||||
clh.Logger().WithField("function", "StopVM").Info("Stop Sandbox")
|
clh.Logger().WithField("function", "StopVM").Info("Stop Sandbox")
|
||||||
|
if atomic.LoadInt32(&clh.stopped) != 0 {
|
||||||
|
clh.Logger().Info("Already stopped")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
return clh.terminate(ctx, waitOnly)
|
return clh.terminate(ctx, waitOnly)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1385,16 +1401,20 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) {
|
|||||||
|
|
||||||
pid := clh.state.PID
|
pid := clh.state.PID
|
||||||
|
|
||||||
if err := syscall.Kill(pid, syscall.Signal(0)); err != nil {
|
if atomic.LoadInt32(&clh.stopped) != 0 {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
timeStart := time.Now()
|
timeStart := time.Now()
|
||||||
cl := clh.client()
|
cl := clh.client()
|
||||||
for {
|
for {
|
||||||
|
err := syscall.Kill(pid, syscall.Signal(0))
|
||||||
|
if err != nil {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
||||||
defer cancel()
|
_, _, err = cl.VmmPingGet(ctx)
|
||||||
_, _, err := cl.VmmPingGet(ctx)
|
cancel()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return true, nil
|
return true, nil
|
||||||
} else {
|
} else {
|
||||||
|
@ -21,6 +21,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
@ -109,7 +110,7 @@ type qemu struct {
|
|||||||
|
|
||||||
nvdimmCount int
|
nvdimmCount int
|
||||||
|
|
||||||
stopped bool
|
stopped int32
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
@ -969,21 +970,23 @@ func (q *qemu) waitVM(ctx context.Context, timeout int) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// StopVM will stop the Sandbox's VM.
|
// StopVM will stop the Sandbox's VM.
|
||||||
func (q *qemu) StopVM(ctx context.Context, waitOnly bool) error {
|
func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) {
|
||||||
q.mu.Lock()
|
q.mu.Lock()
|
||||||
defer q.mu.Unlock()
|
defer q.mu.Unlock()
|
||||||
span, _ := katatrace.Trace(ctx, q.Logger(), "StopVM", qemuTracingTags, map[string]string{"sandbox_id": q.id})
|
span, _ := katatrace.Trace(ctx, q.Logger(), "StopVM", qemuTracingTags, map[string]string{"sandbox_id": q.id})
|
||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
q.Logger().Info("Stopping Sandbox")
|
q.Logger().Info("Stopping Sandbox")
|
||||||
if q.stopped {
|
if atomic.LoadInt32(&q.stopped) != 0 {
|
||||||
q.Logger().Info("Already stopped")
|
q.Logger().Info("Already stopped")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
q.cleanupVM()
|
q.cleanupVM()
|
||||||
q.stopped = true
|
if err == nil {
|
||||||
|
atomic.StoreInt32(&q.stopped, 1)
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if q.config.Debug && q.qemuConfig.LogFile != "" {
|
if q.config.Debug && q.qemuConfig.LogFile != "" {
|
||||||
@ -2568,7 +2571,7 @@ func (q *qemu) toGrpc(ctx context.Context) ([]byte, error) {
|
|||||||
func (q *qemu) Save() (s hv.HypervisorState) {
|
func (q *qemu) Save() (s hv.HypervisorState) {
|
||||||
|
|
||||||
// If QEMU isn't even running, there isn't any state to Save
|
// If QEMU isn't even running, there isn't any state to Save
|
||||||
if q.stopped {
|
if atomic.LoadInt32(&q.stopped) != 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2619,6 +2622,10 @@ func (q *qemu) Load(s hv.HypervisorState) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemu) Check() error {
|
func (q *qemu) Check() error {
|
||||||
|
if atomic.LoadInt32(&q.stopped) != 0 {
|
||||||
|
return fmt.Errorf("qemu is not running")
|
||||||
|
}
|
||||||
|
|
||||||
q.memoryDumpFlag.Lock()
|
q.memoryDumpFlag.Lock()
|
||||||
defer q.memoryDumpFlag.Unlock()
|
defer q.memoryDumpFlag.Unlock()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user