Merge pull request #98933 from rphillips/fixes/create_sandbox_delete_pod_race

kubelet: fix create sandbox delete pod race
This commit is contained in:
Kubernetes Prow Robot 2021-02-18 21:40:24 -08:00 committed by GitHub
commit 931648726f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 1 deletions

View File

@ -101,7 +101,9 @@ func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
return nil, err
}
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, newFakePodStateProvider(), kubeRuntimeManager)
podStateProvider := newFakePodStateProvider()
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.podStateProvider = podStateProvider
kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
kubeRuntimeManager.imagePuller = images.NewImageManager(
kubecontainer.FilterEventRecorder(recorder),

View File

@ -137,6 +137,9 @@ type kubeGenericRuntimeManager struct {
// Cache last per-container error message to reduce log spam
logReduction *logreduction.LogReduction
// PodState provider instance
podStateProvider podStateProvider
}
// KubeGenericRuntime is a interface contains interfaces for container runtime and command.
@ -248,6 +251,7 @@ func NewKubeGenericRuntimeManager(
imagePullBurst)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager)
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.podStateProvider = podStateProvider
kubeRuntimeManager.versionCache = cache.NewObjectCache(
func() (interface{}, error) {
@ -751,6 +755,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
result.AddSyncResult(createSandboxResult)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
// createPodSandbox can return an error from CNI, CSI,
// or CRI if the Pod has been deleted while the POD is
// being created. If the pod has been deleted then it's
// not a real error.
if m.podStateProvider.IsPodDeleted(pod.UID) {
klog.V(4).Infof("Pod %q was deleted and sandbox failed to be created: %v", format.Pod(pod), pod.UID)
return
}
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
klog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)

View File

@ -1372,6 +1372,41 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) {
}
}
func TestSyncPodWithSandboxAndDeletedPod(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
fakeRuntime.ErrorOnSandboxCreate = true
containers := []v1.Container{
{
Name: "foo1",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: containers,
},
}
backOff := flowcontrol.NewBackOff(time.Second, time.Minute)
// GetPodStatus and the following SyncPod will not return errors in the
// case where the pod has been deleted. We are not adding any pods into
// the fakePodProvider so they are 'deleted'.
podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
assert.NoError(t, err)
result := m.SyncPod(pod, podStatus, []v1.Secret{}, backOff)
// This will return an error if the pod has _not_ been deleted.
assert.NoError(t, result.Error())
}
func makeBasePodAndStatusWithInitAndEphemeralContainers() (*v1.Pod, *kubecontainer.PodStatus) {
pod, status := makeBasePodAndStatus()
pod.Spec.InitContainers = []v1.Container{

View File

@ -67,6 +67,8 @@ type FakeRuntimeService struct {
Containers map[string]*FakeContainer
Sandboxes map[string]*FakePodSandbox
FakeContainerStats map[string]*runtimeapi.ContainerStats
ErrorOnSandboxCreate bool
}
// GetContainerID returns the unique container ID from the FakeRuntimeService.
@ -198,6 +200,10 @@ func (r *FakeRuntimeService) RunPodSandbox(config *runtimeapi.PodSandboxConfig,
return "", err
}
if r.ErrorOnSandboxCreate {
return "", fmt.Errorf("error on sandbox create")
}
// PodSandboxID should be randomized for real container runtime, but here just use
// fixed name from BuildSandboxName() for easily making fake sandboxes.
podSandboxID := BuildSandboxName(config.Metadata)