mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-05-01 21:24:36 +00:00
runtime: sleep 1 second after GetOOMEvent failed
In some cases, for example agent crashed and not marked dead yet, the GetOOMEvent will return errors like `connection reset by peer` or `ttrpc: closed`. Do a sleep with 1 second (agent check interval) and let agent health check to do the check. Fixes: #991 Signed-off-by: bin liu <bin@hyper.sh>
This commit is contained in:
parent
d22c7cf00b
commit
cb0e6094ff
@ -20,6 +20,8 @@ import (
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
|
||||
)
|
||||
|
||||
const defaultCheckInterval = 1 * time.Second
|
||||
|
||||
func wait(s *service, c *container, execID string) (int32, error) {
|
||||
var execs *exec
|
||||
var err error
|
||||
@ -152,6 +154,7 @@ func watchOOMEvents(ctx context.Context, s *service) {
|
||||
if isGRPCErrorCode(codes.NotFound, err) || err.Error() == "Dead agent" {
|
||||
return
|
||||
}
|
||||
time.Sleep(defaultCheckInterval)
|
||||
continue
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user