From 92f3b11c944f2ec0d9589b5d7cae0842a9fd1240 Mon Sep 17 00:00:00 2001 From: ls <335814617@qq.com> Date: Mon, 16 Jan 2023 14:45:37 +0800 Subject: [PATCH] runtime:all APIs are hang in the service.mu When the vmm process exits abnormally, a goroutine sets s.monitor to null in the 'watchSandbox' function without getting service.mu, This will cause another goroutine to block when sending a message to s.monitor, and it holds service.mu, which leads to a deadlock. For example, the wait function in the file .../pkg/containerd-shim-v2/wait.go will send a message to s.monitor after obtaining service.mu, but s.monitor may be null at this time Fixes: #6059 Signed-off-by: ls <335814617@qq.com> (cherry picked from commit 69fc8de7123156477717803bbb30df19264975d9) --- src/runtime/pkg/containerd-shim-v2/wait.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/runtime/pkg/containerd-shim-v2/wait.go b/src/runtime/pkg/containerd-shim-v2/wait.go index ebb742790d..ecf75b8c34 100644 --- a/src/runtime/pkg/containerd-shim-v2/wait.go +++ b/src/runtime/pkg/containerd-shim-v2/wait.go @@ -120,10 +120,12 @@ func watchSandbox(ctx context.Context, s *service) { if err == nil { return } - s.monitor = nil s.mu.Lock() defer s.mu.Unlock() + + s.monitor = nil + // sandbox malfunctioning, cleanup as much as we can shimLog.WithError(err).Warn("sandbox stopped unexpectedly") err = s.sandbox.Stop(ctx, true)