From fb8be96194c62840e173c26c3fce3e5db9539a8d Mon Sep 17 00:00:00 2001 From: bin Date: Tue, 29 Mar 2022 16:39:01 +0800 Subject: [PATCH] runtime: stop getting OOM events when ttrpc: closed error getOOMEvents is a long-waiting call, it will retry when failed. For cases of agent shutdown, the retry should stop. When the agent hasn't detected agent has died, we can also check whether the error is "ttrpc: closed". Fixes: #3815 Signed-off-by: bin --- src/runtime/pkg/containerd-shim-v2/wait.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/runtime/pkg/containerd-shim-v2/wait.go b/src/runtime/pkg/containerd-shim-v2/wait.go index 0eeac976e0..8b2ac70fbf 100644 --- a/src/runtime/pkg/containerd-shim-v2/wait.go +++ b/src/runtime/pkg/containerd-shim-v2/wait.go @@ -15,7 +15,6 @@ import ( "github.com/containerd/containerd/api/types/task" "github.com/containerd/containerd/mount" "github.com/sirupsen/logrus" - "google.golang.org/grpc/codes" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" ) @@ -156,13 +155,11 @@ func watchOOMEvents(ctx context.Context, s *service) { default: containerID, err := s.sandbox.GetOOMEvent(ctx) if err != nil { - shimLog.WithError(err).Warn("failed to get OOM event from sandbox") - // If the GetOOMEvent call is not implemented, then the agent is most likely an older version, - // stop attempting to get OOM events. - // for rust agent, the response code is not found - if isGRPCErrorCode(codes.NotFound, err) || err.Error() == "Dead agent" { + if err.Error() == "ttrpc: closed" || err.Error() == "Dead agent" { + shimLog.WithError(err).Warn("agent has shutdown, return from watching of OOM events") return } + shimLog.WithError(err).Warn("failed to get OOM event from sandbox") time.Sleep(defaultCheckInterval) continue }