mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-05-02 05:34:46 +00:00
runtime: sleep 1 second after GetOOMEvent failed
In some cases, for example agent crashed and not marked dead yet, the GetOOMEvent will return errors like `connection reset by peer` or `ttrpc: closed`. Do a sleep with 1 second (agent check interval) and let agent health check to do the check. Fixes: #991 Signed-off-by: bin liu <bin@hyper.sh>
This commit is contained in:
parent
d22c7cf00b
commit
cb0e6094ff
@ -20,6 +20,8 @@ import (
|
|||||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const defaultCheckInterval = 1 * time.Second
|
||||||
|
|
||||||
func wait(s *service, c *container, execID string) (int32, error) {
|
func wait(s *service, c *container, execID string) (int32, error) {
|
||||||
var execs *exec
|
var execs *exec
|
||||||
var err error
|
var err error
|
||||||
@ -152,6 +154,7 @@ func watchOOMEvents(ctx context.Context, s *service) {
|
|||||||
if isGRPCErrorCode(codes.NotFound, err) || err.Error() == "Dead agent" {
|
if isGRPCErrorCode(codes.NotFound, err) || err.Error() == "Dead agent" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
time.Sleep(defaultCheckInterval)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user