mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
dockershim: remove corrupt checkpoints immediately upon detection
This commit is contained in:
parent
81fa823a6c
commit
a4bc7707d4
@ -174,7 +174,6 @@ pkg/kubelet/container/testing
|
|||||||
pkg/kubelet/custommetrics
|
pkg/kubelet/custommetrics
|
||||||
pkg/kubelet/dockershim
|
pkg/kubelet/dockershim
|
||||||
pkg/kubelet/dockershim/cm
|
pkg/kubelet/dockershim/cm
|
||||||
pkg/kubelet/dockershim/errors
|
|
||||||
pkg/kubelet/dockershim/libdocker
|
pkg/kubelet/dockershim/libdocker
|
||||||
pkg/kubelet/dockershim/remote
|
pkg/kubelet/dockershim/remote
|
||||||
pkg/kubelet/dockershim/testing
|
pkg/kubelet/dockershim/testing
|
||||||
|
@ -47,7 +47,6 @@ go_library(
|
|||||||
"//pkg/kubelet/cm:go_default_library",
|
"//pkg/kubelet/cm:go_default_library",
|
||||||
"//pkg/kubelet/container:go_default_library",
|
"//pkg/kubelet/container:go_default_library",
|
||||||
"//pkg/kubelet/dockershim/cm:go_default_library",
|
"//pkg/kubelet/dockershim/cm:go_default_library",
|
||||||
"//pkg/kubelet/dockershim/errors:go_default_library",
|
|
||||||
"//pkg/kubelet/dockershim/libdocker:go_default_library",
|
"//pkg/kubelet/dockershim/libdocker:go_default_library",
|
||||||
"//pkg/kubelet/dockershim/metrics:go_default_library",
|
"//pkg/kubelet/dockershim/metrics:go_default_library",
|
||||||
"//pkg/kubelet/leaky:go_default_library",
|
"//pkg/kubelet/leaky:go_default_library",
|
||||||
@ -142,7 +141,6 @@ filegroup(
|
|||||||
srcs = [
|
srcs = [
|
||||||
":package-srcs",
|
":package-srcs",
|
||||||
"//pkg/kubelet/dockershim/cm:all-srcs",
|
"//pkg/kubelet/dockershim/cm:all-srcs",
|
||||||
"//pkg/kubelet/dockershim/errors:all-srcs",
|
|
||||||
"//pkg/kubelet/dockershim/libdocker:all-srcs",
|
"//pkg/kubelet/dockershim/libdocker:all-srcs",
|
||||||
"//pkg/kubelet/dockershim/metrics:all-srcs",
|
"//pkg/kubelet/dockershim/metrics:all-srcs",
|
||||||
"//pkg/kubelet/dockershim/remote:all-srcs",
|
"//pkg/kubelet/dockershim/remote:all-srcs",
|
||||||
|
@ -23,7 +23,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
|
||||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||||
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
||||||
@ -113,12 +112,14 @@ func (handler *PersistentCheckpointHandler) GetCheckpoint(podSandboxID string) (
|
|||||||
//TODO: unmarhsal into a struct with just Version, check version, unmarshal into versioned type.
|
//TODO: unmarhsal into a struct with just Version, check version, unmarshal into versioned type.
|
||||||
err = json.Unmarshal(blob, &checkpoint)
|
err = json.Unmarshal(blob, &checkpoint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed to unmarshal checkpoint %q. Checkpoint content: %q. ErrMsg: %v", podSandboxID, string(blob), err)
|
glog.Errorf("Failed to unmarshal checkpoint %q, removing checkpoint. Checkpoint content: %q. ErrMsg: %v", podSandboxID, string(blob), err)
|
||||||
return &checkpoint, errors.CorruptCheckpointError
|
handler.RemoveCheckpoint(podSandboxID)
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal checkpoint")
|
||||||
}
|
}
|
||||||
if checkpoint.CheckSum != calculateChecksum(checkpoint) {
|
if checkpoint.CheckSum != calculateChecksum(checkpoint) {
|
||||||
glog.Errorf("Checksum of checkpoint %q is not valid", podSandboxID)
|
glog.Errorf("Checksum of checkpoint %q is not valid, removing checkpoint", podSandboxID)
|
||||||
return &checkpoint, errors.CorruptCheckpointError
|
handler.RemoveCheckpoint(podSandboxID)
|
||||||
|
return nil, fmt.Errorf("checkpoint is corrupted")
|
||||||
}
|
}
|
||||||
return &checkpoint, nil
|
return &checkpoint, nil
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,6 @@ import (
|
|||||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
|
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
|
||||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
|
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||||
@ -193,20 +192,10 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
|
|||||||
// actions will only have sandbox ID and not have pod namespace and name information.
|
// actions will only have sandbox ID and not have pod namespace and name information.
|
||||||
// Return error if encounter any unexpected error.
|
// Return error if encounter any unexpected error.
|
||||||
if checkpointErr != nil {
|
if checkpointErr != nil {
|
||||||
if libdocker.IsContainerNotFoundError(statusErr) && checkpointErr == errors.CheckpointNotFoundError {
|
if libdocker.IsContainerNotFoundError(statusErr) {
|
||||||
glog.Warningf("Both sandbox container and checkpoint for id %q could not be found. "+
|
glog.Warningf("Both sandbox container and checkpoint for id %q could not be found. "+
|
||||||
"Proceed without further sandbox information.", podSandboxID)
|
"Proceed without further sandbox information.", podSandboxID)
|
||||||
} else {
|
} else {
|
||||||
if checkpointErr == errors.CorruptCheckpointError {
|
|
||||||
// Remove the corrupted checkpoint so that the next
|
|
||||||
// StopPodSandbox call can proceed. This may indicate that
|
|
||||||
// some resources won't be reclaimed.
|
|
||||||
// TODO (#43021): Fix this properly.
|
|
||||||
glog.Warningf("Removing corrupted checkpoint %q: %+v", podSandboxID, *checkpoint)
|
|
||||||
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
|
|
||||||
glog.Warningf("Unable to remove corrupted checkpoint %q: %v", podSandboxID, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return utilerrors.NewAggregate([]error{
|
return utilerrors.NewAggregate([]error{
|
||||||
fmt.Errorf("failed to get checkpoint for sandbox %q: %v", podSandboxID, checkpointErr),
|
fmt.Errorf("failed to get checkpoint for sandbox %q: %v", podSandboxID, checkpointErr),
|
||||||
fmt.Errorf("failed to get sandbox status: %v", statusErr)})
|
fmt.Errorf("failed to get sandbox status: %v", statusErr)})
|
||||||
@ -488,13 +477,6 @@ func (ds *dockerService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]
|
|||||||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(id)
|
checkpoint, err := ds.checkpointHandler.GetCheckpoint(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)
|
glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)
|
||||||
|
|
||||||
if err == errors.CorruptCheckpointError {
|
|
||||||
glog.Warningf("Removing corrupted checkpoint %q: %+v", id, *checkpoint)
|
|
||||||
if err := ds.checkpointHandler.RemoveCheckpoint(id); err != nil {
|
|
||||||
glog.Warningf("Unable to remove corrupted checkpoint %q: %v", id, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
|
result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
|
||||||
|
@ -38,7 +38,6 @@ import (
|
|||||||
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
|
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
|
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
|
||||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/network/cni"
|
"k8s.io/kubernetes/pkg/kubelet/network/cni"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/network/hostport"
|
"k8s.io/kubernetes/pkg/kubelet/network/hostport"
|
||||||
@ -366,10 +365,6 @@ func (ds *dockerService) GetPodPortMappings(podSandboxID string) ([]*hostport.Po
|
|||||||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(podSandboxID)
|
checkpoint, err := ds.checkpointHandler.GetCheckpoint(podSandboxID)
|
||||||
// Return empty portMappings if checkpoint is not found
|
// Return empty portMappings if checkpoint is not found
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == errors.CheckpointNotFoundError {
|
|
||||||
glog.Warningf("Failed to retrieve checkpoint for sandbox %q: %v", podSandboxID, err)
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
package(default_visibility = ["//visibility:public"])
|
|
||||||
|
|
||||||
load(
|
|
||||||
"@io_bazel_rules_go//go:def.bzl",
|
|
||||||
"go_library",
|
|
||||||
)
|
|
||||||
|
|
||||||
go_library(
|
|
||||||
name = "go_default_library",
|
|
||||||
srcs = ["errors.go"],
|
|
||||||
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/errors",
|
|
||||||
)
|
|
||||||
|
|
||||||
filegroup(
|
|
||||||
name = "package-srcs",
|
|
||||||
srcs = glob(["**"]),
|
|
||||||
tags = ["automanaged"],
|
|
||||||
visibility = ["//visibility:private"],
|
|
||||||
)
|
|
||||||
|
|
||||||
filegroup(
|
|
||||||
name = "all-srcs",
|
|
||||||
srcs = [":package-srcs"],
|
|
||||||
tags = ["automanaged"],
|
|
||||||
)
|
|
@ -1,22 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright 2017 The Kubernetes Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package errors
|
|
||||||
|
|
||||||
import "fmt"
|
|
||||||
|
|
||||||
var CorruptCheckpointError = fmt.Errorf("checkpoint is corrupted.")
|
|
||||||
var CheckpointNotFoundError = fmt.Errorf("checkpoint is not found.")
|
|
@ -9,7 +9,6 @@ go_library(
|
|||||||
name = "go_default_library",
|
name = "go_default_library",
|
||||||
srcs = ["util.go"],
|
srcs = ["util.go"],
|
||||||
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/testing",
|
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/testing",
|
||||||
deps = ["//pkg/kubelet/dockershim/errors:go_default_library"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
filegroup(
|
filegroup(
|
||||||
|
@ -17,9 +17,8 @@ limitations under the License.
|
|||||||
package testing
|
package testing
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// MemStore is an implementation of CheckpointStore interface which stores checkpoint in memory.
|
// MemStore is an implementation of CheckpointStore interface which stores checkpoint in memory.
|
||||||
@ -44,7 +43,7 @@ func (mstore *MemStore) Read(key string) ([]byte, error) {
|
|||||||
defer mstore.Unlock()
|
defer mstore.Unlock()
|
||||||
data, ok := mstore.mem[key]
|
data, ok := mstore.mem[key]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, errors.CheckpointNotFoundError
|
return nil, fmt.Errorf("checkpoint is not found")
|
||||||
}
|
}
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user