Merge pull request #107670 from 249043822/br-notfound

Suppress container not found errors in container runtime getPodStatuses
This commit is contained in:
Kubernetes Prow Robot 2022-02-16 10:00:37 -08:00 committed by GitHub
commit 2d2a7272fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 108 additions and 12 deletions

View File

@ -34,6 +34,8 @@ import (
"sync" "sync"
"time" "time"
crierror "k8s.io/cri-api/pkg/errors"
"github.com/opencontainers/selinux/go-selinux" "github.com/opencontainers/selinux/go-selinux"
grpcstatus "google.golang.org/grpc/status" grpcstatus "google.golang.org/grpc/status"
@ -501,10 +503,17 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, n
return nil, err return nil, err
} }
statuses := make([]*kubecontainer.Status, len(containers)) statuses := []*kubecontainer.Status{}
// TODO: optimization: set maximum number of containers per container name to examine. // TODO: optimization: set maximum number of containers per container name to examine.
for i, c := range containers { for _, c := range containers {
resp, err := m.runtimeService.ContainerStatus(c.Id, false) resp, err := m.runtimeService.ContainerStatus(c.Id, false)
// Between List (ListContainers) and check (ContainerStatus) another thread might remove a container, and that is normal.
// The previous call (ListContainers) never fails due to a pod container not existing.
// Therefore, this method should not either, but instead act as if the previous call failed,
// which means the error should be ignored.
if crierror.IsNotFound(err) {
continue
}
if err != nil { if err != nil {
// Merely log this here; GetPodStatus will actually report the error out. // Merely log this here; GetPodStatus will actually report the error out.
klog.V(4).InfoS("ContainerStatus return error", "containerID", c.Id, "err", err) klog.V(4).InfoS("ContainerStatus return error", "containerID", c.Id, "err", err)
@ -533,7 +542,7 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, n
cStatus.Message += tMessage cStatus.Message += tMessage
} }
} }
statuses[i] = cStatus statuses = append(statuses, cStatus)
} }
sort.Sort(containerStatusByCreated(statuses)) sort.Sort(containerStatusByCreated(statuses))
@ -714,15 +723,15 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
"containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod) "containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod)
err := m.runtimeService.StopContainer(containerID.ID, gracePeriod) err := m.runtimeService.StopContainer(containerID.ID, gracePeriod)
if err != nil { if err != nil && !crierror.IsNotFound(err) {
klog.ErrorS(err, "Container termination failed with gracePeriod", "pod", klog.KObj(pod), "podUID", pod.UID, klog.ErrorS(err, "Container termination failed with gracePeriod", "pod", klog.KObj(pod), "podUID", pod.UID,
"containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod) "containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod)
} else { return err
}
klog.V(3).InfoS("Container exited normally", "pod", klog.KObj(pod), "podUID", pod.UID, klog.V(3).InfoS("Container exited normally", "pod", klog.KObj(pod), "podUID", pod.UID,
"containerName", containerName, "containerID", containerID.String()) "containerName", containerName, "containerID", containerID.String())
}
return err return nil
} }
// killContainersWithSyncResult kills all pod's containers with sync results. // killContainersWithSyncResult kills all pod's containers with sync results.

View File

@ -24,6 +24,7 @@ import (
"time" "time"
cadvisorapi "github.com/google/cadvisor/info/v1" cadvisorapi "github.com/google/cadvisor/info/v1"
crierror "k8s.io/cri-api/pkg/errors"
"k8s.io/klog/v2" "k8s.io/klog/v2"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
@ -991,7 +992,7 @@ func (m *kubeGenericRuntimeManager) killPodWithSyncResult(pod *v1.Pod, runningPo
result.AddSyncResult(killSandboxResult) result.AddSyncResult(killSandboxResult)
// Stop all sandboxes belongs to same pod // Stop all sandboxes belongs to same pod
for _, podSandbox := range runningPod.Sandboxes { for _, podSandbox := range runningPod.Sandboxes {
if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil { if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil && !crierror.IsNotFound(err) {
killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error()) killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error())
klog.ErrorS(nil, "Failed to stop sandbox", "podSandboxID", podSandbox.ID) klog.ErrorS(nil, "Failed to stop sandbox", "podSandboxID", podSandbox.ID)
} }
@ -1033,10 +1034,17 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp
klog.V(4).InfoS("getSandboxIDByPodUID got sandbox IDs for pod", "podSandboxID", podSandboxIDs, "pod", klog.KObj(pod)) klog.V(4).InfoS("getSandboxIDByPodUID got sandbox IDs for pod", "podSandboxID", podSandboxIDs, "pod", klog.KObj(pod))
sandboxStatuses := make([]*runtimeapi.PodSandboxStatus, len(podSandboxIDs)) sandboxStatuses := []*runtimeapi.PodSandboxStatus{}
podIPs := []string{} podIPs := []string{}
for idx, podSandboxID := range podSandboxIDs { for idx, podSandboxID := range podSandboxIDs {
resp, err := m.runtimeService.PodSandboxStatus(podSandboxID, false) resp, err := m.runtimeService.PodSandboxStatus(podSandboxID, false)
// Between List (getSandboxIDByPodUID) and check (PodSandboxStatus) another thread might remove a container, and that is normal.
// The previous call (getSandboxIDByPodUID) never fails due to a pod sandbox not existing.
// Therefore, this method should not either, but instead act as if the previous call failed,
// which means the error should be ignored.
if crierror.IsNotFound(err) {
continue
}
if err != nil { if err != nil {
klog.ErrorS(err, "PodSandboxStatus of sandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod)) klog.ErrorS(err, "PodSandboxStatus of sandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))
return nil, err return nil, err
@ -1045,8 +1053,7 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp
return nil, errors.New("pod sandbox status is nil") return nil, errors.New("pod sandbox status is nil")
} }
sandboxStatuses[idx] = resp.Status sandboxStatuses = append(sandboxStatuses, resp.Status)
// Only get pod IP from latest sandbox // Only get pod IP from latest sandbox
if idx == 0 && resp.Status.State == runtimeapi.PodSandboxState_SANDBOX_READY { if idx == 0 && resp.Status.State == runtimeapi.PodSandboxState_SANDBOX_READY {
podIPs = m.determinePodSandboxIPs(namespace, name, resp.Status) podIPs = m.determinePodSandboxIPs(namespace, name, resp.Status)

View File

@ -23,6 +23,9 @@ import (
"testing" "testing"
"time" "time"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
cadvisorapi "github.com/google/cadvisor/info/v1" cadvisorapi "github.com/google/cadvisor/info/v1"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -334,6 +337,82 @@ func TestGetPodStatus(t *testing.T) {
assert.Equal(t, apitest.FakePodSandboxIPs, podStatus.IPs) assert.Equal(t, apitest.FakePodSandboxIPs, podStatus.IPs)
} }
func TestStopContainerWithNotFoundError(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
containers := []v1.Container{
{
Name: "foo1",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
{
Name: "foo2",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: containers,
},
}
// Set fake sandbox and faked containers to fakeRuntime.
makeAndSetFakePod(t, m, fakeRuntime, pod)
fakeRuntime.InjectError("StopContainer", status.Error(codes.NotFound, "No such container"))
podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
require.NoError(t, err)
p := kubecontainer.ConvertPodStatusToRunningPod("", podStatus)
gracePeriod := int64(1)
err = m.KillPod(pod, p, &gracePeriod)
require.NoError(t, err)
}
func TestGetPodStatusWithNotFoundError(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
containers := []v1.Container{
{
Name: "foo1",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
{
Name: "foo2",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: containers,
},
}
// Set fake sandbox and faked containers to fakeRuntime.
makeAndSetFakePod(t, m, fakeRuntime, pod)
fakeRuntime.InjectError("ContainerStatus", status.Error(codes.NotFound, "No such container"))
podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
require.NoError(t, err)
require.Equal(t, pod.UID, podStatus.ID)
require.Equal(t, pod.Name, podStatus.Name)
require.Equal(t, pod.Namespace, podStatus.Namespace)
require.Equal(t, apitest.FakePodSandboxIPs, podStatus.IPs)
}
func TestGetPods(t *testing.T) { func TestGetPods(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager() fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err) assert.NoError(t, err)

1
vendor/modules.txt vendored
View File

@ -1998,6 +1998,7 @@ k8s.io/cri-api/pkg/apis
k8s.io/cri-api/pkg/apis/runtime/v1 k8s.io/cri-api/pkg/apis/runtime/v1
k8s.io/cri-api/pkg/apis/runtime/v1alpha2 k8s.io/cri-api/pkg/apis/runtime/v1alpha2
k8s.io/cri-api/pkg/apis/testing k8s.io/cri-api/pkg/apis/testing
k8s.io/cri-api/pkg/errors
# k8s.io/csi-translation-lib v0.0.0 => ./staging/src/k8s.io/csi-translation-lib # k8s.io/csi-translation-lib v0.0.0 => ./staging/src/k8s.io/csi-translation-lib
## explicit ## explicit
k8s.io/csi-translation-lib k8s.io/csi-translation-lib