Merge pull request #107670 from 249043822/br-notfound

Suppress container not found errors in container runtime getPodStatuses
This commit is contained in:
Kubernetes Prow Robot 2022-02-16 10:00:37 -08:00 committed by GitHub
commit 2d2a7272fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 108 additions and 12 deletions

View File

@ -34,6 +34,8 @@ import (
"sync"
"time"
crierror "k8s.io/cri-api/pkg/errors"
"github.com/opencontainers/selinux/go-selinux"
grpcstatus "google.golang.org/grpc/status"
@ -501,10 +503,17 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, n
return nil, err
}
statuses := make([]*kubecontainer.Status, len(containers))
statuses := []*kubecontainer.Status{}
// TODO: optimization: set maximum number of containers per container name to examine.
for i, c := range containers {
for _, c := range containers {
resp, err := m.runtimeService.ContainerStatus(c.Id, false)
// Between List (ListContainers) and check (ContainerStatus) another thread might remove a container, and that is normal.
// The previous call (ListContainers) never fails due to a pod container not existing.
// Therefore, this method should not either, but instead act as if the previous call failed,
// which means the error should be ignored.
if crierror.IsNotFound(err) {
continue
}
if err != nil {
// Merely log this here; GetPodStatus will actually report the error out.
klog.V(4).InfoS("ContainerStatus return error", "containerID", c.Id, "err", err)
@ -533,7 +542,7 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, n
cStatus.Message += tMessage
}
}
statuses[i] = cStatus
statuses = append(statuses, cStatus)
}
sort.Sort(containerStatusByCreated(statuses))
@ -714,15 +723,15 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
"containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod)
err := m.runtimeService.StopContainer(containerID.ID, gracePeriod)
if err != nil {
if err != nil && !crierror.IsNotFound(err) {
klog.ErrorS(err, "Container termination failed with gracePeriod", "pod", klog.KObj(pod), "podUID", pod.UID,
"containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod)
} else {
klog.V(3).InfoS("Container exited normally", "pod", klog.KObj(pod), "podUID", pod.UID,
"containerName", containerName, "containerID", containerID.String())
return err
}
klog.V(3).InfoS("Container exited normally", "pod", klog.KObj(pod), "podUID", pod.UID,
"containerName", containerName, "containerID", containerID.String())
return err
return nil
}
// killContainersWithSyncResult kills all pod's containers with sync results.

View File

@ -24,6 +24,7 @@ import (
"time"
cadvisorapi "github.com/google/cadvisor/info/v1"
crierror "k8s.io/cri-api/pkg/errors"
"k8s.io/klog/v2"
v1 "k8s.io/api/core/v1"
@ -991,7 +992,7 @@ func (m *kubeGenericRuntimeManager) killPodWithSyncResult(pod *v1.Pod, runningPo
result.AddSyncResult(killSandboxResult)
// Stop all sandboxes belongs to same pod
for _, podSandbox := range runningPod.Sandboxes {
if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil {
if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil && !crierror.IsNotFound(err) {
killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error())
klog.ErrorS(nil, "Failed to stop sandbox", "podSandboxID", podSandbox.ID)
}
@ -1033,10 +1034,17 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp
klog.V(4).InfoS("getSandboxIDByPodUID got sandbox IDs for pod", "podSandboxID", podSandboxIDs, "pod", klog.KObj(pod))
sandboxStatuses := make([]*runtimeapi.PodSandboxStatus, len(podSandboxIDs))
sandboxStatuses := []*runtimeapi.PodSandboxStatus{}
podIPs := []string{}
for idx, podSandboxID := range podSandboxIDs {
resp, err := m.runtimeService.PodSandboxStatus(podSandboxID, false)
// Between List (getSandboxIDByPodUID) and check (PodSandboxStatus) another thread might remove a container, and that is normal.
// The previous call (getSandboxIDByPodUID) never fails due to a pod sandbox not existing.
// Therefore, this method should not either, but instead act as if the previous call failed,
// which means the error should be ignored.
if crierror.IsNotFound(err) {
continue
}
if err != nil {
klog.ErrorS(err, "PodSandboxStatus of sandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))
return nil, err
@ -1045,8 +1053,7 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp
return nil, errors.New("pod sandbox status is nil")
}
sandboxStatuses[idx] = resp.Status
sandboxStatuses = append(sandboxStatuses, resp.Status)
// Only get pod IP from latest sandbox
if idx == 0 && resp.Status.State == runtimeapi.PodSandboxState_SANDBOX_READY {
podIPs = m.determinePodSandboxIPs(namespace, name, resp.Status)

View File

@ -23,6 +23,9 @@ import (
"testing"
"time"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
cadvisorapi "github.com/google/cadvisor/info/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -334,6 +337,82 @@ func TestGetPodStatus(t *testing.T) {
assert.Equal(t, apitest.FakePodSandboxIPs, podStatus.IPs)
}
func TestStopContainerWithNotFoundError(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
containers := []v1.Container{
{
Name: "foo1",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
{
Name: "foo2",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: containers,
},
}
// Set fake sandbox and faked containers to fakeRuntime.
makeAndSetFakePod(t, m, fakeRuntime, pod)
fakeRuntime.InjectError("StopContainer", status.Error(codes.NotFound, "No such container"))
podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
require.NoError(t, err)
p := kubecontainer.ConvertPodStatusToRunningPod("", podStatus)
gracePeriod := int64(1)
err = m.KillPod(pod, p, &gracePeriod)
require.NoError(t, err)
}
func TestGetPodStatusWithNotFoundError(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
containers := []v1.Container{
{
Name: "foo1",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
{
Name: "foo2",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: containers,
},
}
// Set fake sandbox and faked containers to fakeRuntime.
makeAndSetFakePod(t, m, fakeRuntime, pod)
fakeRuntime.InjectError("ContainerStatus", status.Error(codes.NotFound, "No such container"))
podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
require.NoError(t, err)
require.Equal(t, pod.UID, podStatus.ID)
require.Equal(t, pod.Name, podStatus.Name)
require.Equal(t, pod.Namespace, podStatus.Namespace)
require.Equal(t, apitest.FakePodSandboxIPs, podStatus.IPs)
}
func TestGetPods(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)

1
vendor/modules.txt vendored
View File

@ -1998,6 +1998,7 @@ k8s.io/cri-api/pkg/apis
k8s.io/cri-api/pkg/apis/runtime/v1
k8s.io/cri-api/pkg/apis/runtime/v1alpha2
k8s.io/cri-api/pkg/apis/testing
k8s.io/cri-api/pkg/errors
# k8s.io/csi-translation-lib v0.0.0 => ./staging/src/k8s.io/csi-translation-lib
## explicit
k8s.io/csi-translation-lib