diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index 2bf4edff4e3..d452a553b01 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -84,6 +84,7 @@ go_library( "//pkg/kubelet/apis/kubeletconfig:go_default_library", "//pkg/kubelet/checkpointmanager:go_default_library", "//pkg/kubelet/checkpointmanager/checksum:go_default_library", + "//pkg/kubelet/checkpointmanager/errors:go_default_library", "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/dockershim/cm:go_default_library", diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index 487193e69b7..13e9c42366e 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -31,6 +31,7 @@ import ( utilerrors "k8s.io/apimachinery/pkg/util/errors" runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker" "k8s.io/kubernetes/pkg/kubelet/qos" @@ -209,6 +210,12 @@ func (ds *dockerService) StopPodSandbox(ctx context.Context, r *runtimeapi.StopP // actions will only have sandbox ID and not have pod namespace and name information. // Return error if encounter any unexpected error. if checkpointErr != nil { + if checkpointErr != errors.ErrCheckpointNotFound { + err := ds.checkpointManager.RemoveCheckpoint(podSandboxID) + if err != nil { + glog.Errorf("Failed to delete corrupt checkpoint for sandbox %q: %v", podSandboxID, err) + } + } if libdocker.IsContainerNotFoundError(statusErr) { glog.Warningf("Both sandbox container and checkpoint for id %q could not be found. "+ "Proceed without further sandbox information.", podSandboxID) @@ -517,6 +524,12 @@ func (ds *dockerService) ListPodSandbox(_ context.Context, r *runtimeapi.ListPod err := ds.checkpointManager.GetCheckpoint(id, checkpoint) if err != nil { glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err) + if err == errors.ErrCorruptCheckpoint { + err = ds.checkpointManager.RemoveCheckpoint(id) + if err != nil { + glog.Errorf("Failed to delete corrupt checkpoint for sandbox %q: %v", id, err) + } + } continue } result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))