kubeadm: support retry mechanism for removing container in reset phase

This commit is contained in:
SataQiu 2022-06-29 17:54:56 +08:00
parent 6269784cd0
commit 3889a6cac8
3 changed files with 23 additions and 13 deletions

View File

@ -218,6 +218,8 @@ const (
APICallWithReadTimeout = 15 * time.Second
// PullImageRetry specifies how many times ContainerRuntime retries when pulling image failed
PullImageRetry = 5
// RemoveContainerRetry specifies how many times ContainerRuntime retries when removing container failed
RemoveContainerRetry = 5
// DefaultControlPlaneTimeout specifies the default control plane (actually API Server) timeout for use by kubeadm
DefaultControlPlaneTimeout = 4 * time.Minute

View File

@ -23,6 +23,7 @@ import (
"github.com/pkg/errors"
errorsutil "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/klog/v2"
utilsexec "k8s.io/utils/exec"
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
@ -97,15 +98,25 @@ func (runtime *CRIRuntime) ListKubeContainers() ([]string, error) {
func (runtime *CRIRuntime) RemoveContainers(containers []string) error {
errs := []error{}
for _, container := range containers {
out, err := runtime.crictl("stopp", container).CombinedOutput()
if err != nil {
// don't stop on errors, try to remove as many containers as possible
errs = append(errs, errors.Wrapf(err, "failed to stop running pod %s: output: %s, error", container, string(out)))
} else {
var lastErr error
for i := 0; i < constants.RemoveContainerRetry; i++ {
klog.V(5).Infof("Attempting to remove container %v", container)
out, err := runtime.crictl("stopp", container).CombinedOutput()
if err != nil {
lastErr = errors.Wrapf(err, "failed to stop running pod %s: output: %s", container, string(out))
continue
}
out, err = runtime.crictl("rmp", container).CombinedOutput()
if err != nil {
errs = append(errs, errors.Wrapf(err, "failed to remove running container %s: output: %s, error", container, string(out)))
lastErr = errors.Wrapf(err, "failed to remove running container %s: output: %s", container, string(out))
continue
}
lastErr = nil
break
}
if lastErr != nil {
errs = append(errs, lastErr)
}
}
return errorsutil.NewAggregate(errs)

View File

@ -167,11 +167,8 @@ func TestRemoveContainers(t *testing.T) {
fcmd := fakeexec.FakeCmd{
CombinedOutputScript: []fakeexec.FakeAction{
fakeOK, fakeOK, fakeOK, fakeOK, fakeOK, fakeOK, // Test case 1
fakeOK, fakeOK, fakeOK, fakeErr, fakeOK, fakeOK,
fakeErr, fakeOK, fakeOK, fakeErr, fakeOK,
fakeOK, fakeOK, fakeOK, fakeOK, fakeOK, fakeOK,
fakeOK, fakeOK, fakeOK, fakeErr, fakeOK, fakeOK,
fakeErr, fakeOK, fakeOK, fakeErr, fakeOK,
fakeOK, fakeOK, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeOK, // Test case 2
fakeErr, fakeErr, fakeErr, fakeErr, fakeErr, fakeOK, fakeOK, fakeOK, fakeOK, // Test case 3
},
}
execer := fakeexec.FakeExec{
@ -186,8 +183,8 @@ func TestRemoveContainers(t *testing.T) {
isError bool
}{
{"valid: remove containers using CRI", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, false}, // Test case 1
{"invalid: CRI rmp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true},
{"invalid: CRI stopp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true},
{"invalid: CRI rmp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true}, // Test case 2
{"invalid: CRI stopp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true}, // Test case 3
}
for _, tc := range cases {