mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-26 15:32:30 +00:00
Merge pull request #2778 from jcvenegas/clh-race-condition-check
clh: Fix race condition that prevent start pods
This commit is contained in:
commit
15b5d22e81
@ -761,12 +761,18 @@ func (clh *cloudHypervisor) Load(s persistapi.HypervisorState) {
|
||||
clh.state.apiSocket = s.APISocket
|
||||
}
|
||||
|
||||
func (clh *cloudHypervisor) Check() error {
|
||||
cl := clh.client()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second)
|
||||
defer cancel()
|
||||
// Check is the implementation of Check from the Hypervisor interface.
|
||||
// Check if the VMM API is working.
|
||||
|
||||
_, _, err := cl.VmmPingGet(ctx)
|
||||
func (clh *cloudHypervisor) Check() error {
|
||||
// Use a long timeout to check if the VMM is running:
|
||||
// Check is used by the monitor thread(a background thread). If the
|
||||
// monitor thread calls Check() during the Container boot, it will take
|
||||
// longer than usual specially if there is a hot-plug request in progress.
|
||||
running, err := clh.isClhRunning(10)
|
||||
if !running {
|
||||
return fmt.Errorf("clh is not running: %s", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@ -1034,8 +1040,6 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) {
|
||||
|
||||
pid := clh.state.PID
|
||||
|
||||
// Check if clh process is running, in case it is not, let's
|
||||
// return from here.
|
||||
if err := syscall.Kill(pid, syscall.Signal(0)); err != nil {
|
||||
return false, nil
|
||||
}
|
||||
@ -1048,6 +1052,8 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) {
|
||||
_, _, err := cl.VmmPingGet(ctx)
|
||||
if err == nil {
|
||||
return true, nil
|
||||
} else {
|
||||
clh.Logger().WithError(err).Warning("clh.VmmPingGet API call failed")
|
||||
}
|
||||
|
||||
if time.Since(timeStart).Seconds() > float64(timeout) {
|
||||
|
@ -14,7 +14,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
defaultCheckInterval = 1 * time.Second
|
||||
defaultCheckInterval = 5 * time.Second
|
||||
watcherChannelSize = 128
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user