handle pod not found in CNI ADD gracefully

sometimes pods get deleted super fast (like jobs or CI) and they come back as not found.

instead of erroring, just return an empty CNI result so things don't blow up.

adds a sentinel errPodNotFound and skips the rest of CmdAdd when we hit it.

shouts to race conditions.
This commit is contained in:
dougbtv 2025-03-24 09:54:22 -04:00
parent e156e815ad
commit 641f6a3b63

View File

@ -52,11 +52,12 @@ const (
) )
var ( var (
version = "master@git" version = "master@git"
commit = "unknown commit" commit = "unknown commit"
date = "unknown date" date = "unknown date"
gitTreeState = "" gitTreeState = ""
releaseStatus = "" releaseStatus = ""
errPodNotFound = fmt.Errorf("pod not found during Multus GetPod")
) )
// PrintVersionString ... // PrintVersionString ...
@ -133,9 +134,9 @@ func saveDelegates(containerID, dataDir string, delegates []*types.DelegateNetCo
func getValidAttachmentFromCache(b []byte) (string, string, error) { func getValidAttachmentFromCache(b []byte) (string, string, error) {
type simpleCacheV1 struct { type simpleCacheV1 struct {
Kind string `json:"kind"` Kind string `json:"kind"`
ContainerID string `json:"containerId"` ContainerID string `json:"containerId"`
IfName string `json:"ifName"` IfName string `json:"ifName"`
} }
cache := &simpleCacheV1{} cache := &simpleCacheV1{}
@ -585,9 +586,12 @@ func GetPod(kubeClient *k8s.ClientInfo, k8sArgs *types.K8sArgs, isDel bool) (*v1
} }
return pod != nil, getErr return pod != nil, getErr
}); err != nil { }); err != nil {
if isDel && errors.IsNotFound(err) { if errors.IsNotFound(err) {
// On DEL pod may already be gone from apiserver/informer // When pods are not found, this is "OK", it's a known condition for rapidly deleted pods, we'll just warn on it.
return nil, nil if !isDel {
logging.Verbosef("Warning: GetPod for [%s/%s] resulted in pod not found during CNI ADD (pod may have already been deleted): %v", podNamespace, podName, err)
}
return nil, errPodNotFound
} }
// Try one more time to get the pod directly from the apiserver; // Try one more time to get the pod directly from the apiserver;
// TODO: figure out why static pods don't show up via the informer // TODO: figure out why static pods don't show up via the informer
@ -596,6 +600,10 @@ func GetPod(kubeClient *k8s.ClientInfo, k8sArgs *types.K8sArgs, isDel bool) (*v1
defer cancel() defer cancel()
pod, err = kubeClient.GetPodAPILiveQuery(ctx, podNamespace, podName) pod, err = kubeClient.GetPodAPILiveQuery(ctx, podNamespace, podName)
if err != nil { if err != nil {
if errors.IsNotFound(err) {
logging.Verbosef("Warning: On live query retry, [%s/%s] pod not found during CNI ADD (pod may have already been deleted): %v", podNamespace, podName, err)
return nil, errPodNotFound
}
return nil, cmdErr(k8sArgs, "error waiting for pod: %v", err) return nil, cmdErr(k8sArgs, "error waiting for pod: %v", err)
} }
} }
@ -641,6 +649,12 @@ func CmdAdd(args *skel.CmdArgs, exec invoke.Exec, kubeClient *k8s.ClientInfo) (c
pod, err := GetPod(kubeClient, k8sArgs, false) pod, err := GetPod(kubeClient, k8sArgs, false)
if err != nil { if err != nil {
if err == errPodNotFound {
logging.Verbosef("CmdAdd: Warning: pod [%s/%s] not found, exiting with empty CNI result", k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_NAME)
return &cni100.Result{
CNIVersion: n.CNIVersion,
}, nil
}
return nil, err return nil, err
} }