Merge pull request #102344 from smarterclayton/keep_pod_worker

Prevent Kubelet from incorrectly interpreting "not yet started" pods as "ready to terminate pods" by unifying responsibility for pod lifecycle into pod worker
This commit is contained in:
Kubernetes Prow Robot 2021-07-08 16:48:53 -07:00 committed by GitHub
commit dab6f6a43d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 2065 additions and 1104 deletions

View File

@ -92,7 +92,8 @@ type Runtime interface {
// file). In this case, garbage collector should refrain itself from aggressive // file). In this case, garbage collector should refrain itself from aggressive
// behavior such as removing all containers of unrecognized pods (yet). // behavior such as removing all containers of unrecognized pods (yet).
// If evictNonDeletedPods is set to true, containers and sandboxes belonging to pods // If evictNonDeletedPods is set to true, containers and sandboxes belonging to pods
// that are terminated, but not deleted will be evicted. Otherwise, only deleted pods will be GC'd. // that are terminated, but not deleted will be evicted. Otherwise, only deleted pods
// will be GC'd.
// TODO: Revisit this method and make it cleaner. // TODO: Revisit this method and make it cleaner.
GarbageCollect(gcPolicy GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error GarbageCollect(gcPolicy GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error
// SyncPod syncs the running pod into the desired pod. // SyncPod syncs the running pod into the desired pod.

View File

@ -561,15 +561,15 @@ func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg
klog.ErrorS(nil, "Eviction manager: cannot evict a critical pod", "pod", klog.KObj(pod)) klog.ErrorS(nil, "Eviction manager: cannot evict a critical pod", "pod", klog.KObj(pod))
return false return false
} }
status := v1.PodStatus{
Phase: v1.PodFailed,
Message: evictMsg,
Reason: Reason,
}
// record that we are evicting the pod // record that we are evicting the pod
m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg) m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg)
// this is a blocking call and should only return when the pod and its containers are killed. // this is a blocking call and should only return when the pod and its containers are killed.
err := m.killPodFunc(pod, status, &gracePeriodOverride) klog.V(3).InfoS("Evicting pod", "pod", klog.KObj(pod), "podUID", pod.UID, "message", evictMsg)
err := m.killPodFunc(pod, true, &gracePeriodOverride, func(status *v1.PodStatus) {
status.Phase = v1.PodFailed
status.Reason = Reason
status.Message = evictMsg
})
if err != nil { if err != nil {
klog.ErrorS(err, "Eviction manager: pod failed to evict", "pod", klog.KObj(pod)) klog.ErrorS(err, "Eviction manager: pod failed to evict", "pod", klog.KObj(pod))
} else { } else {

View File

@ -21,7 +21,7 @@ import (
"testing" "testing"
"time" "time"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/clock" "k8s.io/apimachinery/pkg/util/clock"
@ -46,14 +46,16 @@ const (
// mockPodKiller is used to testing which pod is killed // mockPodKiller is used to testing which pod is killed
type mockPodKiller struct { type mockPodKiller struct {
pod *v1.Pod pod *v1.Pod
status v1.PodStatus evict bool
statusFn func(*v1.PodStatus)
gracePeriodOverride *int64 gracePeriodOverride *int64
} }
// killPodNow records the pod that was killed // killPodNow records the pod that was killed
func (m *mockPodKiller) killPodNow(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error { func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride *int64, statusFn func(*v1.PodStatus)) error {
m.pod = pod m.pod = pod
m.status = status m.statusFn = statusFn
m.evict = evict
m.gracePeriodOverride = gracePeriodOverride m.gracePeriodOverride = gracePeriodOverride
return nil return nil
} }

View File

@ -19,7 +19,7 @@ package eviction
import ( import (
"time" "time"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
@ -92,7 +92,7 @@ type ContainerGC interface {
// pod - the pod to kill // pod - the pod to kill
// status - the desired status to associate with the pod (i.e. why its killed) // status - the desired status to associate with the pod (i.e. why its killed)
// gracePeriodOverride - the grace period override to use instead of what is on the pod spec // gracePeriodOverride - the grace period override to use instead of what is on the pod spec
type KillPodFunc func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error type KillPodFunc func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error
// MirrorPodFunc returns the mirror pod for the given static pod and // MirrorPodFunc returns the mirror pod for the given static pod and
// whether it was known to the pod manager. // whether it was known to the pod manager.

View File

@ -17,6 +17,7 @@ limitations under the License.
package kubelet package kubelet
import ( import (
"context"
"crypto/tls" "crypto/tls"
"fmt" "fmt"
"math" "math"
@ -137,6 +138,11 @@ const (
// Period for performing global cleanup tasks. // Period for performing global cleanup tasks.
housekeepingPeriod = time.Second * 2 housekeepingPeriod = time.Second * 2
// Duration at which housekeeping failed to satisfy the invariant that
// housekeeping should be fast to avoid blocking pod config (while
// housekeeping is running no new pods are started or deleted).
housekeepingWarningDuration = time.Second * 15
// Period for performing eviction monitoring. // Period for performing eviction monitoring.
// ensure this is kept in sync with internal cadvisor housekeeping. // ensure this is kept in sync with internal cadvisor housekeeping.
evictionMonitoringPeriod = time.Second * 10 evictionMonitoringPeriod = time.Second * 10
@ -639,6 +645,20 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.containerLogManager = logs.NewStubContainerLogManager() klet.containerLogManager = logs.NewStubContainerLogManager()
} }
klet.reasonCache = NewReasonCache()
klet.workQueue = queue.NewBasicWorkQueue(klet.clock)
klet.podWorkers = newPodWorkers(
klet.syncPod,
klet.syncTerminatingPod,
klet.syncTerminatedPod,
kubeDeps.Recorder,
klet.workQueue,
klet.resyncInterval,
backOffPeriod,
klet.podCache,
)
runtime, err := kuberuntime.NewKubeGenericRuntimeManager( runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
kubecontainer.FilterEventRecorder(kubeDeps.Recorder), kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
klet.livenessManager, klet.livenessManager,
@ -646,7 +666,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.startupManager, klet.startupManager,
seccompProfileRoot, seccompProfileRoot,
machineInfo, machineInfo,
klet, klet.podWorkers,
kubeDeps.OSInterface, kubeDeps.OSInterface,
klet, klet,
httpClient, httpClient,
@ -780,7 +800,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
kubeCfg.EnableControllerAttachDetach, kubeCfg.EnableControllerAttachDetach,
nodeName, nodeName,
klet.podManager, klet.podManager,
klet.statusManager, klet.podWorkers,
klet.kubeClient, klet.kubeClient,
klet.volumePluginMgr, klet.volumePluginMgr,
klet.containerRuntime, klet.containerRuntime,
@ -792,12 +812,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
keepTerminatedPodVolumes, keepTerminatedPodVolumes,
volumepathhandler.NewBlockVolumePathHandler()) volumepathhandler.NewBlockVolumePathHandler())
klet.reasonCache = NewReasonCache()
klet.workQueue = queue.NewBasicWorkQueue(klet.clock)
klet.podWorkers = newPodWorkers(klet.syncPod, kubeDeps.Recorder, klet.workQueue, klet.resyncInterval, backOffPeriod, klet.podCache)
klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff) klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
klet.podKiller = NewPodKiller(klet)
// setup eviction manager // setup eviction manager
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock) evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock)
@ -1103,9 +1118,6 @@ type Kubelet struct {
// Container restart Backoff // Container restart Backoff
backOff *flowcontrol.Backoff backOff *flowcontrol.Backoff
// Pod killer handles pods to be killed
podKiller PodKiller
// Information about the ports which are opened by daemons on Node running this Kubelet server. // Information about the ports which are opened by daemons on Node running this Kubelet server.
daemonEndpoints *v1.NodeDaemonEndpoints daemonEndpoints *v1.NodeDaemonEndpoints
@ -1470,10 +1482,6 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
kl.initNetworkUtil() kl.initNetworkUtil()
} }
// Start a goroutine responsible for killing pods (that are not properly
// handled by pod workers).
go wait.Until(kl.podKiller.PerformPodKillingWork, 1*time.Second, wait.NeverStop)
// Start component sync loops. // Start component sync loops.
kl.statusManager.Start() kl.statusManager.Start()
@ -1487,7 +1495,12 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
kl.syncLoop(updates, kl) kl.syncLoop(updates, kl)
} }
// syncPod is the transaction script for the sync of a single pod. // syncPod is the transaction script for the sync of a single pod (setting up)
// a pod. The reverse (teardown) is handled in syncTerminatingPod and
// syncTerminatedPod. If syncPod exits without error, then the pod runtime
// state is in sync with the desired configuration state (pod is running).
// If syncPod exits with a transient error, the next invocation of syncPod
// is expected to make progress towards reaching the runtime state.
// //
// Arguments: // Arguments:
// //
@ -1499,7 +1512,7 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
// * If the pod is being seen as running for the first time, record pod // * If the pod is being seen as running for the first time, record pod
// start latency // start latency
// * Update the status of the pod in the status manager // * Update the status of the pod in the status manager
// * Kill the pod if it should not be running // * Kill the pod if it should not be running due to soft admission
// * Create a mirror pod if the pod is a static pod, and does not // * Create a mirror pod if the pod is a static pod, and does not
// already have a mirror pod // already have a mirror pod
// * Create the data directories for the pod if they do not exist // * Create the data directories for the pod if they do not exist
@ -1514,39 +1527,9 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
// This operation writes all events that are dispatched in order to provide // This operation writes all events that are dispatched in order to provide
// the most accurate information possible about an error situation to aid debugging. // the most accurate information possible about an error situation to aid debugging.
// Callers should not throw an event if this operation returns an error. // Callers should not throw an event if this operation returns an error.
func (kl *Kubelet) syncPod(o syncPodOptions) error { func (kl *Kubelet) syncPod(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
// pull out the required options klog.V(4).InfoS("syncPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
pod := o.pod defer klog.V(4).InfoS("syncPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)
mirrorPod := o.mirrorPod
podStatus := o.podStatus
updateType := o.updateType
// if we want to kill a pod, do it now!
if updateType == kubetypes.SyncPodKill {
killPodOptions := o.killPodOptions
if killPodOptions == nil || killPodOptions.PodStatusFunc == nil {
return fmt.Errorf("kill pod options are required if update type is kill")
}
apiPodStatus := killPodOptions.PodStatusFunc(pod, podStatus)
kl.statusManager.SetPodStatus(pod, apiPodStatus)
// we kill the pod with the specified grace period since this is a termination
if err := kl.killPod(pod, nil, podStatus, killPodOptions.PodTerminationGracePeriodSecondsOverride); err != nil {
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
// there was an error killing the pod, so we return that error directly
utilruntime.HandleError(err)
return err
}
return nil
}
// If a pod is still gracefully terminating, then we do not want to
// take further action. This mitigates static pods and deleted pods
// from getting rerun prematurely or their cgroups being deleted before
// the runtime cleans up.
podFullName := kubecontainer.GetPodFullName(pod)
if kl.podKiller.IsPodPendingTerminationByPodName(podFullName) {
return fmt.Errorf("pod %q is pending termination", podFullName)
}
// Latency measurements for the main workflow are relative to the // Latency measurements for the main workflow are relative to the
// first time the pod was seen by the API server. // first time the pod was seen by the API server.
@ -1583,16 +1566,15 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
podStatus.IPs = []string{apiPodStatus.PodIP} podStatus.IPs = []string{apiPodStatus.PodIP}
} }
// Record the time it takes for the pod to become running. // If the pod should not be running, we request the pod's containers be stopped. This is not the same
existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID) // as termination (we want to stop the pod, but potentially restart it later if soft admission allows
if !ok || existingStatus.Phase == v1.PodPending && apiPodStatus.Phase == v1.PodRunning && // it later). Set the status and phase appropriately
!firstSeenTime.IsZero() {
metrics.PodStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
}
runnable := kl.canRunPod(pod) runnable := kl.canRunPod(pod)
if !runnable.Admit { if !runnable.Admit {
// Pod is not runnable; update the Pod and Container statuses to why. // Pod is not runnable; and update the Pod and Container statuses to why.
if apiPodStatus.Phase != v1.PodFailed && apiPodStatus.Phase != v1.PodSucceeded {
apiPodStatus.Phase = v1.PodPending
}
apiPodStatus.Reason = runnable.Reason apiPodStatus.Reason = runnable.Reason
apiPodStatus.Message = runnable.Message apiPodStatus.Message = runnable.Message
// Waiting containers are not creating. // Waiting containers are not creating.
@ -1609,22 +1591,28 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
} }
} }
// Update status in the status manager // Record the time it takes for the pod to become running.
existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID)
if !ok || existingStatus.Phase == v1.PodPending && apiPodStatus.Phase == v1.PodRunning &&
!firstSeenTime.IsZero() {
metrics.PodStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
}
kl.statusManager.SetPodStatus(pod, apiPodStatus) kl.statusManager.SetPodStatus(pod, apiPodStatus)
// Kill pod if it should not be running // Pods that are not runnable must be stopped - return a typed error to the pod worker
if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == v1.PodFailed { if !runnable.Admit {
klog.V(2).InfoS("Pod is not runnable and must have running containers stopped", "pod", klog.KObj(pod), "podUID", pod.UID, "message", runnable.Message)
var syncErr error var syncErr error
if err := kl.killPod(pod, nil, podStatus, nil); err != nil { p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
if err := kl.killPod(pod, p, nil); err != nil {
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err) kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
syncErr = fmt.Errorf("error killing pod: %v", err) syncErr = fmt.Errorf("error killing pod: %v", err)
utilruntime.HandleError(syncErr) utilruntime.HandleError(syncErr)
} else { } else {
if !runnable.Admit { // There was no error killing the pod, but the pod cannot be run.
// There was no error killing the pod, but the pod cannot be run. // Return an error to signal that the sync loop should back off.
// Return an error to signal that the sync loop should back off. syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
}
} }
return syncErr return syncErr
} }
@ -1640,7 +1628,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
pcm := kl.containerManager.NewPodContainerManager() pcm := kl.containerManager.NewPodContainerManager()
// If pod has already been terminated then we need not create // If pod has already been terminated then we need not create
// or update the pod's cgroup // or update the pod's cgroup
if !kl.podIsTerminated(pod) { // TODO: once context cancellation is added this check can be removed
if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
// When the kubelet is restarted with the cgroups-per-qos // When the kubelet is restarted with the cgroups-per-qos
// flag enabled, all the pod's running containers // flag enabled, all the pod's running containers
// should be killed intermittently and brought back up // should be killed intermittently and brought back up
@ -1657,7 +1646,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
// exists or the pod is running for the first time // exists or the pod is running for the first time
podKilled := false podKilled := false
if !pcm.Exists(pod) && !firstSync { if !pcm.Exists(pod) && !firstSync {
if err := kl.killPod(pod, nil, podStatus, nil); err == nil { p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
if err := kl.killPod(pod, p, nil); err == nil {
podKilled = true podKilled = true
} else { } else {
klog.ErrorS(err, "KillPod failed", "pod", klog.KObj(pod), "podStatus", podStatus) klog.ErrorS(err, "KillPod failed", "pod", klog.KObj(pod), "podStatus", podStatus)
@ -1691,6 +1681,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
// The mirror pod is semantically different from the static pod. Remove // The mirror pod is semantically different from the static pod. Remove
// it. The mirror pod will get recreated later. // it. The mirror pod will get recreated later.
klog.InfoS("Trying to delete pod", "pod", klog.KObj(pod), "podUID", mirrorPod.ObjectMeta.UID) klog.InfoS("Trying to delete pod", "pod", klog.KObj(pod), "podUID", mirrorPod.ObjectMeta.UID)
podFullName := kubecontainer.GetPodFullName(pod)
var err error var err error
deleted, err = kl.podManager.DeleteMirrorPod(podFullName, &mirrorPod.ObjectMeta.UID) deleted, err = kl.podManager.DeleteMirrorPod(podFullName, &mirrorPod.ObjectMeta.UID)
if deleted { if deleted {
@ -1720,8 +1711,9 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
return err return err
} }
// Volume manager will not mount volumes for terminated pods // Volume manager will not mount volumes for terminating pods
if !kl.podIsTerminated(pod) { // TODO: once context cancellation is added this check can be removed
if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
// Wait for volumes to attach/mount // Wait for volumes to attach/mount
if err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil { if err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil {
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedMountVolume, "Unable to attach or mount volumes: %v", err) kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedMountVolume, "Unable to attach or mount volumes: %v", err)
@ -1752,6 +1744,125 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
return nil return nil
} }
// syncTerminatingPod is expected to terminate all running containers in a pod. Once this method
// returns without error, the pod's local state can be safely cleaned up. If runningPod is passed,
// we perform no status updates.
func (kl *Kubelet) syncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, runningPod *kubecontainer.Pod, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
klog.V(4).InfoS("syncTerminatingPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
defer klog.V(4).InfoS("syncTerminatingPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)
// when we receive a runtime only pod (runningPod != nil) we don't need to update the status
// manager or refresh the status of the cache, because a successful killPod will ensure we do
// not get invoked again
if runningPod != nil {
// we kill the pod with the specified grace period since this is a termination
if gracePeriod != nil {
klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", *gracePeriod)
} else {
klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", nil)
}
if err := kl.killPod(pod, *runningPod, gracePeriod); err != nil {
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
// there was an error killing the pod, so we return that error directly
utilruntime.HandleError(err)
return err
}
klog.V(4).InfoS("Pod termination stopped all running orphan containers", "pod", klog.KObj(pod), "podUID", pod.UID)
return nil
}
apiPodStatus := kl.generateAPIPodStatus(pod, podStatus)
if podStatusFn != nil {
podStatusFn(&apiPodStatus)
}
kl.statusManager.SetPodStatus(pod, apiPodStatus)
if gracePeriod != nil {
klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", *gracePeriod)
} else {
klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", nil)
}
p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
if err := kl.killPod(pod, p, gracePeriod); err != nil {
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
// there was an error killing the pod, so we return that error directly
utilruntime.HandleError(err)
return err
}
// Guard against consistency issues in KillPod implementations by checking that there are no
// running containers. This method is invoked infrequently so this is effectively free and can
// catch race conditions introduced by callers updating pod status out of order.
// TODO: have KillPod return the terminal status of stopped containers and write that into the
// cache immediately
podStatus, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
if err != nil {
klog.ErrorS(err, "Unable to read pod status prior to final pod termination", "pod", klog.KObj(pod), "podUID", pod.UID)
return err
}
var runningContainers []string
var containers []string
for _, s := range podStatus.ContainerStatuses {
if s.State == kubecontainer.ContainerStateRunning {
runningContainers = append(runningContainers, s.ID.String())
}
containers = append(containers, fmt.Sprintf("(%s state=%s exitCode=%d finishedAt=%s)", s.Name, s.State, s.ExitCode, s.FinishedAt.UTC().Format(time.RFC3339Nano)))
}
if klog.V(4).Enabled() {
sort.Strings(containers)
klog.InfoS("Post-termination container state", "pod", klog.KObj(pod), "podUID", pod.UID, "containers", strings.Join(containers, " "))
}
if len(runningContainers) > 0 {
return fmt.Errorf("detected running containers after a successful KillPod, CRI violation: %v", runningContainers)
}
// we have successfully stopped all containers, the pod is terminating, our status is "done"
klog.V(4).InfoS("Pod termination stopped all running containers", "pod", klog.KObj(pod), "podUID", pod.UID)
return nil
}
// syncTerminatedPod cleans up a pod that has terminated (has no running containers).
// The invocations in this call are expected to tear down what PodResourcesAreReclaimed checks (which
// gates pod deletion). When this method exits the pod is expected to be ready for cleanup.
// TODO: make this method take a context and exit early
func (kl *Kubelet) syncTerminatedPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
klog.V(4).InfoS("syncTerminatedPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
defer klog.V(4).InfoS("syncTerminatedPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)
// generate the final status of the pod
// TODO: should we simply fold this into TerminatePod? that would give a single pod update
apiPodStatus := kl.generateAPIPodStatus(pod, podStatus)
kl.statusManager.SetPodStatus(pod, apiPodStatus)
// volumes are unmounted after the pod worker reports ShouldPodRuntimeBeRemoved (which is satisfied
// before syncTerminatedPod is invoked)
if err := kl.volumeManager.WaitForUnmount(pod); err != nil {
return err
}
klog.V(4).InfoS("Pod termination unmounted volumes", "pod", klog.KObj(pod), "podUID", pod.UID)
// Note: we leave pod containers to be reclaimed in the background since dockershim requires the
// container for retrieving logs and we want to make sure logs are available until the pod is
// physically deleted.
// remove any cgroups in the hierarchy for pods that are no longer running.
if kl.cgroupsPerQOS {
pcm := kl.containerManager.NewPodContainerManager()
name, _ := pcm.GetPodContainerName(pod)
if err := pcm.Destroy(name); err != nil {
return err
}
klog.V(4).InfoS("Pod termination removed cgroups", "pod", klog.KObj(pod), "podUID", pod.UID)
}
// mark the final pod status
kl.statusManager.TerminatePod(pod)
klog.V(4).InfoS("Pod is terminated and will need no more status updates", "pod", klog.KObj(pod), "podUID", pod.UID)
return nil
}
// Get pods which should be resynchronized. Currently, the following pod should be resynchronized: // Get pods which should be resynchronized. Currently, the following pod should be resynchronized:
// * pod whose work is ready. // * pod whose work is ready.
// * internal modules that request sync of a pod. // * internal modules that request sync of a pod.
@ -1794,28 +1905,11 @@ func (kl *Kubelet) deletePod(pod *v1.Pod) error {
// for sources that haven't reported yet. // for sources that haven't reported yet.
return fmt.Errorf("skipping delete because sources aren't ready yet") return fmt.Errorf("skipping delete because sources aren't ready yet")
} }
kl.podWorkers.ForgetWorker(pod.UID) klog.V(3).InfoS("Pod has been deleted and must be killed", "pod", klog.KObj(pod), "podUID", pod.UID)
kl.podWorkers.UpdatePod(UpdatePodOptions{
// make sure our runtimeCache is at least as fresh as the last container started event we observed. Pod: pod,
// this ensures we correctly send graceful deletion signals to all containers we've reported started. UpdateType: kubetypes.SyncPodKill,
if lastContainerStarted, ok := kl.lastContainerStartedTime.Get(pod.UID); ok { })
if err := kl.runtimeCache.ForceUpdateIfOlder(lastContainerStarted); err != nil {
return fmt.Errorf("error updating containers: %v", err)
}
}
// Runtime cache may not have been updated to with the pod, but it's okay
// because the periodic cleanup routine will attempt to delete again later.
runningPods, err := kl.runtimeCache.GetPods()
if err != nil {
return fmt.Errorf("error listing containers: %v", err)
}
runningPod := kubecontainer.Pods(runningPods).FindPod("", pod.UID)
if runningPod.IsEmpty() {
return fmt.Errorf("pod not found")
}
podPair := kubecontainer.PodPair{APIPod: pod, RunningPod: &runningPod}
kl.podKiller.KillPod(&podPair)
// We leave the volume/directory cleanup to the periodic cleanup routine. // We leave the volume/directory cleanup to the periodic cleanup routine.
return nil return nil
} }
@ -1853,7 +1947,7 @@ func (kl *Kubelet) canAdmitPod(pods []*v1.Pod, pod *v1.Pod) (bool, string, strin
func (kl *Kubelet) canRunPod(pod *v1.Pod) lifecycle.PodAdmitResult { func (kl *Kubelet) canRunPod(pod *v1.Pod) lifecycle.PodAdmitResult {
attrs := &lifecycle.PodAdmitAttributes{Pod: pod} attrs := &lifecycle.PodAdmitAttributes{Pod: pod}
// Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive. // Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive.
attrs.OtherPods = kl.filterOutTerminatedPods(kl.podManager.GetPods()) attrs.OtherPods = kl.GetActivePods()
for _, handler := range kl.softAdmitHandlers { for _, handler := range kl.softAdmitHandlers {
if result := handler.Admit(attrs); !result.Admit { if result := handler.Admit(attrs); !result.Admit {
@ -2043,10 +2137,16 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle
// skip housekeeping, as we may accidentally delete pods from unready sources. // skip housekeeping, as we may accidentally delete pods from unready sources.
klog.V(4).InfoS("SyncLoop (housekeeping, skipped): sources aren't ready yet") klog.V(4).InfoS("SyncLoop (housekeeping, skipped): sources aren't ready yet")
} else { } else {
start := time.Now()
klog.V(4).InfoS("SyncLoop (housekeeping)") klog.V(4).InfoS("SyncLoop (housekeeping)")
if err := handler.HandlePodCleanups(); err != nil { if err := handler.HandlePodCleanups(); err != nil {
klog.ErrorS(err, "Failed cleaning pods") klog.ErrorS(err, "Failed cleaning pods")
} }
duration := time.Since(start)
if duration > housekeepingWarningDuration {
klog.ErrorS(fmt.Errorf("housekeeping took too long"), "Housekeeping took longer than 15s", "seconds", duration.Seconds())
}
klog.V(4).InfoS("SyncLoop (housekeeping) end")
} }
} }
return true return true
@ -2067,31 +2167,12 @@ func handleProbeSync(kl *Kubelet, update proberesults.Update, handler SyncHandle
// dispatchWork starts the asynchronous sync of the pod in a pod worker. // dispatchWork starts the asynchronous sync of the pod in a pod worker.
// If the pod has completed termination, dispatchWork will perform no action. // If the pod has completed termination, dispatchWork will perform no action.
func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) { func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
// check whether we are ready to delete the pod from the API server (all status up to date)
containersTerminal, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
if pod.DeletionTimestamp != nil && containersTerminal {
klog.V(4).InfoS("Pod has completed execution and should be deleted from the API server", "pod", klog.KObj(pod), "syncType", syncType)
kl.statusManager.TerminatePod(pod)
return
}
// optimization: avoid invoking the pod worker if no further changes are possible to the pod definition
// (i.e. the pod has completed and its containers have been terminated)
if podWorkerTerminal && containersTerminal {
klog.V(4).InfoS("Pod has completed and its containers have been terminated, ignoring remaining sync work", "pod", klog.KObj(pod), "syncType", syncType)
return
}
// Run the sync in an async worker. // Run the sync in an async worker.
kl.podWorkers.UpdatePod(&UpdatePodOptions{ kl.podWorkers.UpdatePod(UpdatePodOptions{
Pod: pod, Pod: pod,
MirrorPod: mirrorPod, MirrorPod: mirrorPod,
UpdateType: syncType, UpdateType: syncType,
OnCompleteFunc: func(err error) { StartTime: start,
if err != nil {
metrics.PodWorkerDuration.WithLabelValues(syncType.String()).Observe(metrics.SinceInSeconds(start))
}
},
}) })
// Note the number of containers for new pods. // Note the number of containers for new pods.
if syncType == kubetypes.SyncPodCreate { if syncType == kubetypes.SyncPodCreate {
@ -2127,10 +2208,13 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
continue continue
} }
if !kl.podIsTerminated(pod) { // Only go through the admission process if the pod is not requested
// Only go through the admission process if the pod is not // for termination by another part of the kubelet. If the pod is already
// terminated. // using resources (previously admitted), the pod worker is going to be
// shutting it down. If the pod hasn't started yet, we know that when
// the pod worker is invoked it will also avoid setting up the pod, so
// we simply avoid doing any work.
if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
// We failed pods that we rejected, so activePods include all admitted // We failed pods that we rejected, so activePods include all admitted
// pods that are alive. // pods that are alive.
activePods := kl.filterOutTerminatedPods(existingPods) activePods := kl.filterOutTerminatedPods(existingPods)
@ -2304,13 +2388,8 @@ func (kl *Kubelet) ListenAndServePodResources() {
// Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around. // Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around.
func (kl *Kubelet) cleanUpContainersInPod(podID types.UID, exitedContainerID string) { func (kl *Kubelet) cleanUpContainersInPod(podID types.UID, exitedContainerID string) {
if podStatus, err := kl.podCache.Get(podID); err == nil { if podStatus, err := kl.podCache.Get(podID); err == nil {
removeAll := false // When an evicted or deleted pod has already synced, all containers can be removed.
if syncedPod, ok := kl.podManager.GetPodByUID(podID); ok { removeAll := kl.podWorkers.ShouldPodContentBeRemoved(podID)
// generate the api status using the cached runtime status to get up-to-date ContainerStatuses
apiPodStatus := kl.generateAPIPodStatus(syncedPod, podStatus)
// When an evicted or deleted pod has already synced, all containers can be removed.
removeAll = eviction.PodIsEvicted(syncedPod.Status) || (syncedPod.DeletionTimestamp != nil && notRunning(apiPodStatus.ContainerStatuses))
}
kl.containerDeletor.deleteContainersInPod(exitedContainerID, podStatus, removeAll) kl.containerDeletor.deleteContainersInPod(exitedContainerID, podStatus, removeAll)
} }
} }

View File

@ -30,7 +30,6 @@ import (
"runtime" "runtime"
"sort" "sort"
"strings" "strings"
"sync"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
@ -54,7 +53,6 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward" "k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward"
remotecommandserver "k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand" remotecommandserver "k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand"
"k8s.io/kubernetes/pkg/kubelet/envvars" "k8s.io/kubernetes/pkg/kubelet/envvars"
"k8s.io/kubernetes/pkg/kubelet/eviction"
"k8s.io/kubernetes/pkg/kubelet/images" "k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/status" "k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -71,11 +69,6 @@ import (
const ( const (
managedHostsHeader = "# Kubernetes-managed hosts file.\n" managedHostsHeader = "# Kubernetes-managed hosts file.\n"
managedHostsHeaderWithHostNetwork = "# Kubernetes-managed hosts file (host network).\n" managedHostsHeaderWithHostNetwork = "# Kubernetes-managed hosts file (host network).\n"
// Capacity of the channel for storing pods to kill. A small number should
// suffice because a goroutine is dedicated to check the channel and does
// not block on anything else.
podKillingChannelCapacity = 50
) )
// Container state reason list // Container state reason list
@ -99,7 +92,9 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
return pods, nil return pods, nil
} }
// GetActivePods returns non-terminal pods // GetActivePods returns pods that may have a running container (a
// terminated pod is one that is known to have no running containers and
// will not get any more).
func (kl *Kubelet) GetActivePods() []*v1.Pod { func (kl *Kubelet) GetActivePods() []*v1.Pod {
allPods := kl.podManager.GetPods() allPods := kl.podManager.GetPods()
activePods := kl.filterOutTerminatedPods(allPods) activePods := kl.filterOutTerminatedPods(allPods)
@ -853,18 +848,11 @@ func containerResourceRuntimeValue(fs *v1.ResourceFieldSelector, pod *v1.Pod, co
return resource.ExtractResourceValueByContainerName(fs, pod, containerName) return resource.ExtractResourceValueByContainerName(fs, pod, containerName)
} }
// One of the following arguments must be non-nil: runningPod, status. // killPod instructs the container runtime to kill the pod. This method requires that
func (kl *Kubelet) killPod(pod *v1.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus, gracePeriodOverride *int64) error { // the pod status contains the result of the last syncPod, otherwise it may fail to
var p kubecontainer.Pod // terminate newly created containers and sandboxes.
if runningPod != nil { func (kl *Kubelet) killPod(pod *v1.Pod, p kubecontainer.Pod, gracePeriodOverride *int64) error {
p = *runningPod // Call the container runtime KillPod method which stops all known running containers of the pod
} else if status != nil {
p = kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), status)
} else {
return fmt.Errorf("one of the two arguments must be non-nil: runningPod, status")
}
// Call the container runtime KillPod method which stops all running containers of the pod
if err := kl.containerRuntime.KillPod(pod, p, gracePeriodOverride); err != nil { if err := kl.containerRuntime.KillPod(pod, p, gracePeriodOverride); err != nil {
return err return err
} }
@ -912,83 +900,38 @@ func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
return pullSecrets return pullSecrets
} }
// podStatusIsTerminal reports when the specified pod has no running containers or is no longer accepting func countRunningContainerStatus(status v1.PodStatus) int {
// spec changes. var runningContainers int
func (kl *Kubelet) podAndContainersAreTerminal(pod *v1.Pod) (containersTerminal, podWorkerTerminal bool) { for _, c := range status.InitContainerStatuses {
// Check the cached pod status which was set after the last sync. if c.State.Running != nil {
status, ok := kl.statusManager.GetPodStatus(pod.UID) runningContainers++
if !ok { }
// If there is no cached status, use the status from the
// apiserver. This is useful if kubelet has recently been
// restarted.
status = pod.Status
} }
// A pod transitions into failed or succeeded from either container lifecycle (RestartNever container for _, c := range status.ContainerStatuses {
// fails) or due to external events like deletion or eviction. A terminal pod *should* have no running if c.State.Running != nil {
// containers, but to know that the pod has completed its lifecycle you must wait for containers to also runningContainers++
// be terminal. }
containersTerminal = notRunning(status.ContainerStatuses)
// The kubelet must accept config changes from the pod spec until it has reached a point where changes would
// have no effect on any running container.
podWorkerTerminal = status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && containersTerminal)
return
}
// podIsTerminated returns true if the provided pod is in a terminal phase ("Failed", "Succeeded") or
// has been deleted and has no running containers. This corresponds to when a pod must accept changes to
// its pod spec (e.g. terminating containers allow grace period to be shortened).
func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
_, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
return podWorkerTerminal
}
// IsPodTerminated returns true if the pod with the provided UID is in a terminal phase ("Failed",
// "Succeeded") or has been deleted and has no running containers. This corresponds to when a pod must
// accept changes to its pod spec (e.g. terminating containers allow grace period to be shortened)
func (kl *Kubelet) IsPodTerminated(uid types.UID) bool {
pod, podFound := kl.podManager.GetPodByUID(uid)
if !podFound {
return true
} }
return kl.podIsTerminated(pod) for _, c := range status.EphemeralContainerStatuses {
} if c.State.Running != nil {
runningContainers++
// IsPodDeleted returns true if the pod is deleted. For the pod to be deleted, either: }
// 1. The pod object is deleted
// 2. The pod's status is evicted
// 3. The pod's deletion timestamp is set, and containers are not running
func (kl *Kubelet) IsPodDeleted(uid types.UID) bool {
pod, podFound := kl.podManager.GetPodByUID(uid)
if !podFound {
return true
} }
status, statusFound := kl.statusManager.GetPodStatus(pod.UID) return runningContainers
if !statusFound {
status = pod.Status
}
return eviction.PodIsEvicted(status) || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
} }
// PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have // PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have
// been reclaimed by the kubelet. Reclaiming resources is a prerequisite to deleting a pod from the API server. // been reclaimed by the kubelet. Reclaiming resources is a prerequisite to deleting a pod from the API server.
func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool { func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool {
if !notRunning(status.ContainerStatuses) { if kl.podWorkers.CouldHaveRunningContainers(pod.UID) {
// We shouldn't delete pods that still have running containers // We shouldn't delete pods that still have running containers
klog.V(3).InfoS("Pod is terminated, but some containers are still running", "pod", klog.KObj(pod)) klog.V(3).InfoS("Pod is terminated, but some containers are still running", "pod", klog.KObj(pod))
return false return false
} }
// pod's containers should be deleted if count := countRunningContainerStatus(status); count > 0 {
runtimeStatus, err := kl.podCache.Get(pod.UID) // We shouldn't delete pods until the reported pod status contains no more running containers (the previous
if err != nil { // check ensures no more status can be generated, this check verifies we have seen enough of the status)
klog.V(3).InfoS("Pod is terminated, Error getting runtimeStatus from the podCache", "pod", klog.KObj(pod), "err", err) klog.V(3).InfoS("Pod is terminated, but some container status has not yet been reported", "pod", klog.KObj(pod), "running", count)
return false
}
if len(runtimeStatus.ContainerStatuses) > 0 {
var statusStr string
for _, status := range runtimeStatus.ContainerStatuses {
statusStr += fmt.Sprintf("%+v ", *status)
}
klog.V(3).InfoS("Pod is terminated, but some containers have not been cleaned up", "pod", klog.KObj(pod), "status", statusStr)
return false return false
} }
if kl.podVolumesExist(pod.UID) && !kl.keepTerminatedPodVolumes { if kl.podVolumesExist(pod.UID) && !kl.keepTerminatedPodVolumes {
@ -1003,6 +946,12 @@ func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bo
return false return false
} }
} }
// Note: we leave pod containers to be reclaimed in the background since dockershim requires the
// container for retrieving logs and we want to make sure logs are available until the pod is
// physically deleted.
klog.V(3).InfoS("Pod is terminated and all resources are reclaimed", "pod", klog.KObj(pod))
return true return true
} }
@ -1015,23 +964,12 @@ func (kl *Kubelet) podResourcesAreReclaimed(pod *v1.Pod) bool {
return kl.PodResourcesAreReclaimed(pod, status) return kl.PodResourcesAreReclaimed(pod, status)
} }
// notRunning returns true if every status is terminated or waiting, or the status list // filterOutTerminatedPods returns the pods that could still have running
// is empty. // containers
func notRunning(statuses []v1.ContainerStatus) bool {
for _, status := range statuses {
if status.State.Terminated == nil && status.State.Waiting == nil {
return false
}
}
return true
}
// filterOutTerminatedPods returns the given pods which the status manager
// does not consider failed or succeeded.
func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod { func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod {
var filteredPods []*v1.Pod var filteredPods []*v1.Pod
for _, p := range pods { for _, p := range pods {
if kl.podIsTerminated(p) { if !kl.podWorkers.CouldHaveRunningContainers(p.UID) {
continue continue
} }
filteredPods = append(filteredPods, p) filteredPods = append(filteredPods, p)
@ -1056,9 +994,9 @@ func (kl *Kubelet) removeOrphanedPodStatuses(pods []*v1.Pod, mirrorPods []*v1.Po
// If pod killing is done, podManager.DeleteMirrorPod() is called to delete mirror pod // If pod killing is done, podManager.DeleteMirrorPod() is called to delete mirror pod
// from the API server // from the API server
func (kl *Kubelet) deleteOrphanedMirrorPods() { func (kl *Kubelet) deleteOrphanedMirrorPods() {
podFullNames := kl.podManager.GetOrphanedMirrorPodNames() mirrorPods := kl.podManager.GetOrphanedMirrorPodNames()
for _, podFullname := range podFullNames { for _, podFullname := range mirrorPods {
if !kl.podKiller.IsPodPendingTerminationByPodName(podFullname) { if !kl.podWorkers.IsPodForMirrorPodTerminatingByFullName(podFullname) {
_, err := kl.podManager.DeleteMirrorPod(podFullname, nil) _, err := kl.podManager.DeleteMirrorPod(podFullname, nil)
if err != nil { if err != nil {
klog.ErrorS(err, "Encountered error when deleting mirror pod", "podName", podFullname) klog.ErrorS(err, "Encountered error when deleting mirror pod", "podName", podFullname)
@ -1071,7 +1009,8 @@ func (kl *Kubelet) deleteOrphanedMirrorPods() {
// HandlePodCleanups performs a series of cleanup work, including terminating // HandlePodCleanups performs a series of cleanup work, including terminating
// pod workers, killing unwanted pods, and removing orphaned volumes/pod // pod workers, killing unwanted pods, and removing orphaned volumes/pod
// directories. // directories. No config changes are sent to pod workers while this method
// is executing which means no new pods can appear.
// NOTE: This function is executed by the main sync loop, so it // NOTE: This function is executed by the main sync loop, so it
// should not contain any blocking calls. // should not contain any blocking calls.
func (kl *Kubelet) HandlePodCleanups() error { func (kl *Kubelet) HandlePodCleanups() error {
@ -1102,43 +1041,92 @@ func (kl *Kubelet) HandlePodCleanups() error {
// to the apiserver, it could still restart the terminated pod (even // to the apiserver, it could still restart the terminated pod (even
// though the pod was not considered terminated by the apiserver). // though the pod was not considered terminated by the apiserver).
// These two conditions could be alleviated by checkpointing kubelet. // These two conditions could be alleviated by checkpointing kubelet.
activePods := kl.filterOutTerminatedPods(allPods)
desiredPods := make(map[types.UID]sets.Empty) // Stop the workers for terminated pods not in the config source
for _, pod := range activePods { klog.V(3).InfoS("Clean up pod workers for terminated pods")
desiredPods[pod.UID] = sets.Empty{} workingPods := kl.podWorkers.SyncKnownPods(allPods)
allPodsByUID := make(map[types.UID]*v1.Pod)
for _, pod := range allPods {
allPodsByUID[pod.UID] = pod
} }
// Stop the workers for no-longer existing pods.
kl.podWorkers.ForgetNonExistingPodWorkers(desiredPods)
kl.probeManager.CleanupPods(desiredPods)
runningPods, err := kl.runtimeCache.GetPods() // Identify the set of pods that have workers, which should be all pods
// from config that are not terminated, as well as any terminating pods
// that have already been removed from config. Pods that are terminating
// will be added to possiblyRunningPods, to prevent overly aggressive
// cleanup of pod cgroups.
runningPods := make(map[types.UID]sets.Empty)
possiblyRunningPods := make(map[types.UID]sets.Empty)
for uid, sync := range workingPods {
switch sync {
case SyncPodWork:
runningPods[uid] = struct{}{}
possiblyRunningPods[uid] = struct{}{}
case TerminatingPodWork:
possiblyRunningPods[uid] = struct{}{}
}
}
// Stop probing pods that are not running
klog.V(3).InfoS("Clean up probes for terminating and terminated pods")
kl.probeManager.CleanupPods(runningPods)
// Terminate any pods that are observed in the runtime but not
// present in the list of known running pods from config.
runningRuntimePods, err := kl.runtimeCache.GetPods()
if err != nil { if err != nil {
klog.ErrorS(err, "Error listing containers") klog.ErrorS(err, "Error listing containers")
return err return err
} }
for _, pod := range runningPods { for _, runningPod := range runningRuntimePods {
if _, found := desiredPods[pod.ID]; !found { switch workType, ok := workingPods[runningPod.ID]; {
kl.podKiller.KillPod(&kubecontainer.PodPair{APIPod: nil, RunningPod: pod}) case ok && workType == SyncPodWork, ok && workType == TerminatingPodWork:
// if the pod worker is already in charge of this pod, we don't need to do anything
continue
default:
// If the pod isn't in the set that should be running and isn't already terminating, terminate
// now. This termination is aggressive because all known pods should already be in a known state
// (i.e. a removed static pod should already be terminating), so these are pods that were
// orphaned due to kubelet restart or bugs. Since housekeeping blocks other config changes, we
// know that another pod wasn't started in the background so we are safe to terminate the
// unknown pods.
if _, ok := allPodsByUID[runningPod.ID]; !ok {
klog.V(3).InfoS("Clean up orphaned pod containers", "podUID", runningPod.ID)
one := int64(1)
kl.podWorkers.UpdatePod(UpdatePodOptions{
UpdateType: kubetypes.SyncPodKill,
RunningPod: runningPod,
KillPodOptions: &KillPodOptions{
PodTerminationGracePeriodSecondsOverride: &one,
},
})
}
} }
} }
// Remove orphaned pod statuses not in the total list of known config pods
klog.V(3).InfoS("Clean up orphaned pod statuses")
kl.removeOrphanedPodStatuses(allPods, mirrorPods) kl.removeOrphanedPodStatuses(allPods, mirrorPods)
// Note that we just killed the unwanted pods. This may not have reflected // Note that we just killed the unwanted pods. This may not have reflected
// in the cache. We need to bypass the cache to get the latest set of // in the cache. We need to bypass the cache to get the latest set of
// running pods to clean up the volumes. // running pods to clean up the volumes.
// TODO: Evaluate the performance impact of bypassing the runtime cache. // TODO: Evaluate the performance impact of bypassing the runtime cache.
runningPods, err = kl.containerRuntime.GetPods(false) runningRuntimePods, err = kl.containerRuntime.GetPods(false)
if err != nil { if err != nil {
klog.ErrorS(err, "Error listing containers") klog.ErrorS(err, "Error listing containers")
return err return err
} }
// Remove any orphaned volumes. // Remove orphaned volumes from pods that are known not to have any
// Note that we pass all pods (including terminated pods) to the function, // containers. Note that we pass all pods (including terminated pods) to
// so that we don't remove volumes associated with terminated but not yet // the function, so that we don't remove volumes associated with terminated
// deleted pods. // but not yet deleted pods.
err = kl.cleanupOrphanedPodDirs(allPods, runningPods) // TODO: this method could more aggressively cleanup terminated pods
// in the future (volumes, mount dirs, logs, and containers could all be
// better separated)
klog.V(3).InfoS("Clean up orphaned pod directories")
err = kl.cleanupOrphanedPodDirs(allPods, runningRuntimePods)
if err != nil { if err != nil {
// We want all cleanup tasks to be run even if one of them failed. So // We want all cleanup tasks to be run even if one of them failed. So
// we just log an error here and continue other cleanup tasks. // we just log an error here and continue other cleanup tasks.
@ -1146,162 +1134,23 @@ func (kl *Kubelet) HandlePodCleanups() error {
klog.ErrorS(err, "Failed cleaning up orphaned pod directories") klog.ErrorS(err, "Failed cleaning up orphaned pod directories")
} }
// Remove any orphaned mirror pods. // Remove any orphaned mirror pods (mirror pods are tracked by name via the
// pod worker)
klog.V(3).InfoS("Clean up orphaned mirror pods")
kl.deleteOrphanedMirrorPods() kl.deleteOrphanedMirrorPods()
// Remove any cgroups in the hierarchy for pods that are no longer running. // Remove any cgroups in the hierarchy for pods that are definitely no longer
// running (not in the container runtime).
if kl.cgroupsPerQOS { if kl.cgroupsPerQOS {
pcm := kl.containerManager.NewPodContainerManager() pcm := kl.containerManager.NewPodContainerManager()
kl.cleanupOrphanedPodCgroups(pcm, cgroupPods, activePods) klog.V(3).InfoS("Clean up orphaned pod cgroups")
kl.cleanupOrphanedPodCgroups(pcm, cgroupPods, possiblyRunningPods)
} }
kl.backOff.GC() kl.backOff.GC()
return nil return nil
} }
// PodKiller handles requests for killing pods
type PodKiller interface {
// KillPod receives pod speficier representing the pod to kill
KillPod(podPair *kubecontainer.PodPair)
// PerformPodKillingWork performs the actual pod killing work via calling CRI
// It returns after its Close() func is called and all outstanding pod killing requests are served
PerformPodKillingWork()
// Close ensures that after it's called, then this pod killer wouldn't accept any more pod killing requests
Close()
// IsPodPendingTerminationByPodName checks whether any pod for the given full pod name is pending termination (thread safe)
IsPodPendingTerminationByPodName(podFullname string) bool
// IsPodPendingTerminationByUID checks whether the mirror pod for the given uid is pending termination (thread safe)
IsPodPendingTerminationByUID(uid types.UID) bool
}
// podKillerWithChannel is an implementation of PodKiller which receives pod killing requests via channel
type podKillerWithChannel struct {
// Channel for getting pods to kill.
podKillingCh chan *kubecontainer.PodPair
// lock for synchronization between HandlePodCleanups and pod killer
podKillingLock *sync.RWMutex
// mirrorPodTerminationMap keeps track of the progress of mirror pod termination
// The key is the UID of the pod and the value is the full name of the pod
mirrorPodTerminationMap map[string]string
// podTerminationMap keeps track of the progress of pod termination.
// The key is the UID of the pod and the value is the full name of the pod
podTerminationMap map[string]string
// killPod is the func which invokes runtime to kill the pod
killPod func(pod *v1.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus, gracePeriodOverride *int64) error
}
// NewPodKiller returns a functional PodKiller
func NewPodKiller(kl *Kubelet) PodKiller {
podKiller := &podKillerWithChannel{
podKillingCh: make(chan *kubecontainer.PodPair, podKillingChannelCapacity),
podKillingLock: &sync.RWMutex{},
mirrorPodTerminationMap: make(map[string]string),
podTerminationMap: make(map[string]string),
killPod: kl.killPod,
}
return podKiller
}
// IsPodPendingTerminationByUID checks whether the pod for the given uid is pending termination
func (pk *podKillerWithChannel) IsPodPendingTerminationByUID(uid types.UID) bool {
pk.podKillingLock.RLock()
defer pk.podKillingLock.RUnlock()
if _, ok := pk.podTerminationMap[string(uid)]; ok {
return ok
}
if _, ok := pk.mirrorPodTerminationMap[string(uid)]; ok {
return ok
}
return false
}
// IsPodPendingTerminationByPodName checks whether the given pod is in grace period of termination
func (pk *podKillerWithChannel) IsPodPendingTerminationByPodName(podFullname string) bool {
pk.podKillingLock.RLock()
defer pk.podKillingLock.RUnlock()
for _, name := range pk.mirrorPodTerminationMap {
if name == podFullname {
return true
}
}
for _, name := range pk.podTerminationMap {
if name == podFullname {
return true
}
}
return false
}
func (pk *podKillerWithChannel) markPodTerminated(uid string) {
klog.V(4).InfoS("Marking pod termination", "podUID", uid)
pk.podKillingLock.Lock()
defer pk.podKillingLock.Unlock()
delete(pk.mirrorPodTerminationMap, uid)
delete(pk.podTerminationMap, uid)
}
// KillPod sends pod killing request to the killer after marks the pod
// unless the given pod has been marked to be killed
func (pk *podKillerWithChannel) KillPod(podPair *kubecontainer.PodPair) {
pk.podKillingLock.Lock()
defer pk.podKillingLock.Unlock()
var apiPodExists bool
var runningPodExists bool
if podPair.APIPod != nil {
uid := string(podPair.APIPod.UID)
_, apiPodExists = pk.mirrorPodTerminationMap[uid]
if !apiPodExists {
fullname := kubecontainer.GetPodFullName(podPair.APIPod)
klog.V(4).InfoS("Marking api pod pending termination", "podName", fullname, "podUID", uid)
pk.mirrorPodTerminationMap[uid] = fullname
}
}
if podPair.RunningPod != nil {
uid := string(podPair.RunningPod.ID)
_, runningPodExists = pk.podTerminationMap[uid]
if !runningPodExists {
fullname := podPair.RunningPod.Name
klog.V(4).InfoS("Marking running pod pending termination", "podName", fullname, "podUID", uid)
pk.podTerminationMap[uid] = fullname
}
}
if apiPodExists || runningPodExists {
if apiPodExists && runningPodExists {
klog.V(4).InfoS("Api pod and running pod are pending termination", "apiPodUID", podPair.APIPod.UID, "runningPodUID", podPair.RunningPod.ID)
} else if apiPodExists {
klog.V(4).InfoS("Api pod is pending termination", "podUID", podPair.APIPod.UID)
} else {
klog.V(4).InfoS("Running pod is pending termination", "podUID", podPair.RunningPod.ID)
}
return
}
// Limit to one request per pod
pk.podKillingCh <- podPair
}
// Close closes the channel through which requests are delivered
func (pk *podKillerWithChannel) Close() {
close(pk.podKillingCh)
}
// PerformPodKillingWork launches a goroutine to kill a pod received from the channel if
// another goroutine isn't already in action.
func (pk *podKillerWithChannel) PerformPodKillingWork() {
for podPair := range pk.podKillingCh {
runningPod := podPair.RunningPod
apiPod := podPair.APIPod
go func(apiPod *v1.Pod, runningPod *kubecontainer.Pod) {
klog.V(2).InfoS("Killing unwanted pod", "podName", runningPod.Name)
err := pk.killPod(apiPod, runningPod, nil, nil)
if err != nil {
klog.ErrorS(err, "Failed killing the pod", "podName", runningPod.Name)
}
pk.markPodTerminated(string(runningPod.ID))
}(apiPod, runningPod)
}
}
// validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state // validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state
// of the container. The previous flag will only return the logs for the last terminated container, otherwise, the current // of the container. The previous flag will only return the logs for the last terminated container, otherwise, the current
// running container is preferred over a previous termination. If info about the container is not available then a specific // running container is preferred over a previous termination. If info about the container is not available then a specific
@ -2014,25 +1863,14 @@ func (kl *Kubelet) GetPortForward(podName, podNamespace string, podUID types.UID
// cleanupOrphanedPodCgroups removes cgroups that should no longer exist. // cleanupOrphanedPodCgroups removes cgroups that should no longer exist.
// it reconciles the cached state of cgroupPods with the specified list of runningPods // it reconciles the cached state of cgroupPods with the specified list of runningPods
func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupPods map[types.UID]cm.CgroupName, activePods []*v1.Pod) { func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupPods map[types.UID]cm.CgroupName, possiblyRunningPods map[types.UID]sets.Empty) {
// Add all running pods to the set that we want to preserve
podSet := sets.NewString()
for _, pod := range activePods {
podSet.Insert(string(pod.UID))
}
// Iterate over all the found pods to verify if they should be running // Iterate over all the found pods to verify if they should be running
for uid, val := range cgroupPods { for uid, val := range cgroupPods {
// if the pod is in the running set, its not a candidate for cleanup // if the pod is in the running set, its not a candidate for cleanup
if podSet.Has(string(uid)) { if _, ok := possiblyRunningPods[uid]; ok {
continue continue
} }
// if the pod is within termination grace period, we shouldn't cleanup the underlying cgroup
if kl.podKiller.IsPodPendingTerminationByUID(uid) {
klog.V(3).InfoS("Pod is pending termination", "podUID", uid)
continue
}
// If volumes have not been unmounted/detached, do not delete the cgroup // If volumes have not been unmounted/detached, do not delete the cgroup
// so any memory backed volumes don't have their charges propagated to the // so any memory backed volumes don't have their charges propagated to the
// parent croup. If the volumes still exist, reduce the cpu shares for any // parent croup. If the volumes still exist, reduce the cpu shares for any

View File

@ -17,9 +17,11 @@ limitations under the License.
package kubelet package kubelet
import ( import (
"context"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"reflect"
"sort" "sort"
"strconv" "strconv"
"testing" "testing"
@ -119,7 +121,6 @@ type TestKubelet struct {
func (tk *TestKubelet) Cleanup() { func (tk *TestKubelet) Cleanup() {
if tk.kubelet != nil { if tk.kubelet != nil {
tk.kubelet.podKiller.Close()
os.RemoveAll(tk.kubelet.rootDirectory) os.RemoveAll(tk.kubelet.rootDirectory)
tk.kubelet = nil tk.kubelet = nil
} }
@ -293,12 +294,10 @@ func newTestKubeletWithImageList(
fakeClock := clock.NewFakeClock(time.Now()) fakeClock := clock.NewFakeClock(time.Now())
kubelet.backOff = flowcontrol.NewBackOff(time.Second, time.Minute) kubelet.backOff = flowcontrol.NewBackOff(time.Second, time.Minute)
kubelet.backOff.Clock = fakeClock kubelet.backOff.Clock = fakeClock
kubelet.podKiller = NewPodKiller(kubelet)
go kubelet.podKiller.PerformPodKillingWork()
kubelet.resyncInterval = 10 * time.Second kubelet.resyncInterval = 10 * time.Second
kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock) kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock)
// Relist period does not affect the tests. // Relist period does not affect the tests.
kubelet.pleg = pleg.NewGenericPLEG(fakeRuntime, 100, time.Hour, nil, clock.RealClock{}) kubelet.pleg = pleg.NewGenericPLEG(fakeRuntime, 100, time.Hour, kubelet.podCache, clock.RealClock{})
kubelet.clock = fakeClock kubelet.clock = fakeClock
nodeRef := &v1.ObjectReference{ nodeRef := &v1.ObjectReference{
@ -340,7 +339,7 @@ func newTestKubeletWithImageList(
controllerAttachDetachEnabled, controllerAttachDetachEnabled,
kubelet.nodeName, kubelet.nodeName,
kubelet.podManager, kubelet.podManager,
kubelet.statusManager, kubelet.podWorkers,
fakeKubeClient, fakeKubeClient,
kubelet.volumePluginMgr, kubelet.volumePluginMgr,
fakeRuntime, fakeRuntime,
@ -450,10 +449,12 @@ func TestHandlePodCleanupsPerQOS(t *testing.T) {
// mark the pod as killed (within this test case). // mark the pod as killed (within this test case).
kubelet.HandlePodCleanups() kubelet.HandlePodCleanups()
time.Sleep(2 * time.Second)
// assert that unwanted pods were killed // assert that unwanted pods were killed
fakeRuntime.AssertKilledPods([]string{"12345678"}) if actual, expected := kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion, []types.UID{"12345678"}; !reflect.DeepEqual(actual, expected) {
t.Fatalf("expected %v to be deleted, got %v", expected, actual)
}
fakeRuntime.AssertKilledPods([]string(nil))
// simulate Runtime.KillPod // simulate Runtime.KillPod
fakeRuntime.PodList = nil fakeRuntime.PodList = nil
@ -461,7 +462,6 @@ func TestHandlePodCleanupsPerQOS(t *testing.T) {
kubelet.HandlePodCleanups() kubelet.HandlePodCleanups()
kubelet.HandlePodCleanups() kubelet.HandlePodCleanups()
kubelet.HandlePodCleanups() kubelet.HandlePodCleanups()
time.Sleep(2 * time.Second)
destroyCount := 0 destroyCount := 0
err := wait.Poll(100*time.Millisecond, 10*time.Second, func() (bool, error) { err := wait.Poll(100*time.Millisecond, 10*time.Second, func() (bool, error) {
@ -488,9 +488,11 @@ func TestDispatchWorkOfCompletedPod(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup() defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet kubelet := testKubelet.kubelet
var got bool
kubelet.podWorkers = &fakePodWorkers{ kubelet.podWorkers = &fakePodWorkers{
syncPodFn: func(options syncPodOptions) error { syncPodFn: func(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
return fmt.Errorf("should ignore completed pod %q", options.pod.Name) got = true
return nil
}, },
cache: kubelet.podCache, cache: kubelet.podCache,
t: t, t: t,
@ -554,6 +556,10 @@ func TestDispatchWorkOfCompletedPod(t *testing.T) {
} }
for _, pod := range pods { for _, pod := range pods {
kubelet.dispatchWork(pod, kubetypes.SyncPodSync, nil, time.Now()) kubelet.dispatchWork(pod, kubetypes.SyncPodSync, nil, time.Now())
if !got {
t.Errorf("Should not skip completed pod %q", pod.Name)
}
got = false
} }
} }
@ -563,7 +569,7 @@ func TestDispatchWorkOfActivePod(t *testing.T) {
kubelet := testKubelet.kubelet kubelet := testKubelet.kubelet
var got bool var got bool
kubelet.podWorkers = &fakePodWorkers{ kubelet.podWorkers = &fakePodWorkers{
syncPodFn: func(options syncPodOptions) error { syncPodFn: func(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
got = true got = true
return nil return nil
}, },
@ -631,10 +637,12 @@ func TestHandlePodCleanups(t *testing.T) {
kubelet := testKubelet.kubelet kubelet := testKubelet.kubelet
kubelet.HandlePodCleanups() kubelet.HandlePodCleanups()
time.Sleep(2 * time.Second)
// assert that unwanted pods were killed // assert that unwanted pods were queued to kill
fakeRuntime.AssertKilledPods([]string{"12345678"}) if actual, expected := kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion, []types.UID{"12345678"}; !reflect.DeepEqual(actual, expected) {
t.Fatalf("expected %v to be deleted, got %v", expected, actual)
}
fakeRuntime.AssertKilledPods([]string(nil))
} }
func TestHandlePodRemovesWhenSourcesAreReady(t *testing.T) { func TestHandlePodRemovesWhenSourcesAreReady(t *testing.T) {
@ -667,44 +675,17 @@ func TestHandlePodRemovesWhenSourcesAreReady(t *testing.T) {
time.Sleep(2 * time.Second) time.Sleep(2 * time.Second)
// Sources are not ready yet. Don't remove any pods. // Sources are not ready yet. Don't remove any pods.
fakeRuntime.AssertKilledPods(nil) if expect, actual := []types.UID(nil), kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion; !reflect.DeepEqual(expect, actual) {
t.Fatalf("expected %v kills, got %v", expect, actual)
}
ready = true ready = true
kubelet.HandlePodRemoves(pods) kubelet.HandlePodRemoves(pods)
time.Sleep(2 * time.Second) time.Sleep(2 * time.Second)
// Sources are ready. Remove unwanted pods. // Sources are ready. Remove unwanted pods.
fakeRuntime.AssertKilledPods([]string{"1"}) if expect, actual := []types.UID{"1"}, kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion; !reflect.DeepEqual(expect, actual) {
} t.Fatalf("expected %v kills, got %v", expect, actual)
func TestKillPodFollwedByIsPodPendingTermination(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
pod := &kubecontainer.Pod{
ID: "12345678",
Name: "foo",
Namespace: "new",
Containers: []*kubecontainer.Container{
{Name: "bar"},
},
}
fakeRuntime := testKubelet.fakeRuntime
fakeContainerManager := testKubelet.fakeContainerManager
fakeContainerManager.PodContainerManager.AddPodFromCgroups(pod) // add pod to mock cgroup
fakeRuntime.PodList = []*containertest.FakePod{
{Pod: pod},
}
kl := testKubelet.kubelet
kl.podKiller.KillPod(&kubecontainer.PodPair{
APIPod: nil,
RunningPod: pod,
})
if !(kl.podKiller.IsPodPendingTerminationByUID(pod.ID) || fakeRuntime.AssertKilledPods([]string{"12345678"})) {
t.Fatal("Race condition: When KillPod is complete, the pod should be pending termination or be killed")
} }
} }
@ -726,6 +707,7 @@ func (nl testNodeLister) List(_ labels.Selector) (ret []*v1.Node, err error) {
} }
func checkPodStatus(t *testing.T, kl *Kubelet, pod *v1.Pod, phase v1.PodPhase) { func checkPodStatus(t *testing.T, kl *Kubelet, pod *v1.Pod, phase v1.PodPhase) {
t.Helper()
status, found := kl.statusManager.GetPodStatus(pod.UID) status, found := kl.statusManager.GetPodStatus(pod.UID)
require.True(t, found, "Status of pod %q is not found in the status map", pod.UID) require.True(t, found, "Status of pod %q is not found in the status map", pod.UID)
require.Equal(t, phase, status.Phase) require.Equal(t, phase, status.Phase)
@ -769,6 +751,10 @@ func TestHandlePortConflicts(t *testing.T) {
// The newer pod should be rejected. // The newer pod should be rejected.
notfittingPod := pods[0] notfittingPod := pods[0]
fittingPod := pods[1] fittingPod := pods[1]
kl.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
pods[0].UID: true,
pods[1].UID: true,
}
kl.HandlePodAdditions(pods) kl.HandlePodAdditions(pods)
@ -904,6 +890,10 @@ func TestHandleMemExceeded(t *testing.T) {
// The newer pod should be rejected. // The newer pod should be rejected.
notfittingPod := pods[0] notfittingPod := pods[0]
fittingPod := pods[1] fittingPod := pods[1]
kl.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
pods[0].UID: true,
pods[1].UID: true,
}
kl.HandlePodAdditions(pods) kl.HandlePodAdditions(pods)
@ -1230,11 +1220,7 @@ func TestCreateMirrorPod(t *testing.T) {
pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = "file" pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = "file"
pods := []*v1.Pod{pod} pods := []*v1.Pod{pod}
kl.podManager.SetPods(pods) kl.podManager.SetPods(pods)
err := kl.syncPod(syncPodOptions{ err := kl.syncPod(context.Background(), updateType, pod, nil, &kubecontainer.PodStatus{})
pod: pod,
podStatus: &kubecontainer.PodStatus{},
updateType: updateType,
})
assert.NoError(t, err) assert.NoError(t, err)
podFullName := kubecontainer.GetPodFullName(pod) podFullName := kubecontainer.GetPodFullName(pod)
assert.True(t, manager.HasPod(podFullName), "Expected mirror pod %q to be created", podFullName) assert.True(t, manager.HasPod(podFullName), "Expected mirror pod %q to be created", podFullName)
@ -1266,12 +1252,7 @@ func TestDeleteOutdatedMirrorPod(t *testing.T) {
pods := []*v1.Pod{pod, mirrorPod} pods := []*v1.Pod{pod, mirrorPod}
kl.podManager.SetPods(pods) kl.podManager.SetPods(pods)
err := kl.syncPod(syncPodOptions{ err := kl.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, mirrorPod, &kubecontainer.PodStatus{})
pod: pod,
mirrorPod: mirrorPod,
podStatus: &kubecontainer.PodStatus{},
updateType: kubetypes.SyncPodUpdate,
})
assert.NoError(t, err) assert.NoError(t, err)
name := kubecontainer.GetPodFullName(pod) name := kubecontainer.GetPodFullName(pod)
creates, deletes := manager.GetCounts(name) creates, deletes := manager.GetCounts(name)
@ -1309,17 +1290,41 @@ func TestDeleteOrphanedMirrorPods(t *testing.T) {
}, },
}, },
}, },
{
ObjectMeta: metav1.ObjectMeta{
UID: "12345670",
Name: "pod3",
Namespace: "ns",
Annotations: map[string]string{
kubetypes.ConfigSourceAnnotationKey: "api",
kubetypes.ConfigMirrorAnnotationKey: "mirror",
},
},
},
} }
kl.podManager.SetPods(orphanPods) kl.podManager.SetPods(orphanPods)
// a static pod that is terminating will not be deleted
kl.podWorkers.(*fakePodWorkers).terminatingStaticPods = map[string]bool{
kubecontainer.GetPodFullName(orphanPods[2]): true,
}
// Sync with an empty pod list to delete all mirror pods. // Sync with an empty pod list to delete all mirror pods.
kl.HandlePodCleanups() kl.HandlePodCleanups()
assert.Len(t, manager.GetPods(), 0, "Expected 0 mirror pods") assert.Len(t, manager.GetPods(), 0, "Expected 0 mirror pods")
for _, pod := range orphanPods { for i, pod := range orphanPods {
name := kubecontainer.GetPodFullName(pod) name := kubecontainer.GetPodFullName(pod)
creates, deletes := manager.GetCounts(name) creates, deletes := manager.GetCounts(name)
if creates != 0 || deletes != 1 { switch i {
t.Errorf("expected 0 creation and one deletion of %q, got %d, %d", name, creates, deletes) case 2:
if creates != 0 || deletes != 0 {
t.Errorf("expected 0 creation and 0 deletion of %q, got %d, %d", name, creates, deletes)
}
default:
if creates != 0 || deletes != 1 {
t.Errorf("expected 0 creation and one deletion of %q, got %d, %d", name, creates, deletes)
}
} }
} }
} }
@ -1404,20 +1409,12 @@ func TestNetworkErrorsWithoutHostNetwork(t *testing.T) {
}) })
kubelet.podManager.SetPods([]*v1.Pod{pod}) kubelet.podManager.SetPods([]*v1.Pod{pod})
err := kubelet.syncPod(syncPodOptions{ err := kubelet.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, nil, &kubecontainer.PodStatus{})
pod: pod,
podStatus: &kubecontainer.PodStatus{},
updateType: kubetypes.SyncPodUpdate,
})
assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error") assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error")
pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource
pod.Spec.HostNetwork = true pod.Spec.HostNetwork = true
err = kubelet.syncPod(syncPodOptions{ err = kubelet.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, nil, &kubecontainer.PodStatus{})
pod: pod,
podStatus: &kubecontainer.PodStatus{},
updateType: kubetypes.SyncPodUpdate,
})
assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error") assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error")
} }
@ -1442,6 +1439,12 @@ func TestFilterOutTerminatedPods(t *testing.T) {
pods[3].Status.Phase = v1.PodPending pods[3].Status.Phase = v1.PodPending
pods[4].Status.Phase = v1.PodRunning pods[4].Status.Phase = v1.PodRunning
kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
pods[2].UID: true,
pods[3].UID: true,
pods[4].UID: true,
}
expected := []*v1.Pod{pods[2], pods[3], pods[4]} expected := []*v1.Pod{pods[2], pods[3], pods[4]}
kubelet.podManager.SetPods(pods) kubelet.podManager.SetPods(pods)
actual := kubelet.filterOutTerminatedPods(pods) actual := kubelet.filterOutTerminatedPods(pods)
@ -1586,17 +1589,12 @@ func TestDeletePodDirsForDeletedPods(t *testing.T) {
} }
func syncAndVerifyPodDir(t *testing.T, testKubelet *TestKubelet, pods []*v1.Pod, podsToCheck []*v1.Pod, shouldExist bool) { func syncAndVerifyPodDir(t *testing.T, testKubelet *TestKubelet, pods []*v1.Pod, podsToCheck []*v1.Pod, shouldExist bool) {
t.Helper()
kl := testKubelet.kubelet kl := testKubelet.kubelet
kl.podManager.SetPods(pods) kl.podManager.SetPods(pods)
kl.HandlePodSyncs(pods) kl.HandlePodSyncs(pods)
kl.HandlePodCleanups() kl.HandlePodCleanups()
// The first time HandlePodCleanups() is run the pod is placed into the
// podKiller, and bypasses the pod directory cleanup. The pod is
// already killed in the second run to HandlePodCleanups() and will
// cleanup the directories.
time.Sleep(2 * time.Second)
kl.HandlePodCleanups()
for i, pod := range podsToCheck { for i, pod := range podsToCheck {
exist := dirExists(kl.getPodDir(pod.UID)) exist := dirExists(kl.getPodDir(pod.UID))
assert.Equal(t, shouldExist, exist, "directory of pod %d", i) assert.Equal(t, shouldExist, exist, "directory of pod %d", i)
@ -1612,7 +1610,6 @@ func TestDoesNotDeletePodDirsForTerminatedPods(t *testing.T) {
podWithUIDNameNs("12345679", "pod2", "ns"), podWithUIDNameNs("12345679", "pod2", "ns"),
podWithUIDNameNs("12345680", "pod3", "ns"), podWithUIDNameNs("12345680", "pod3", "ns"),
} }
syncAndVerifyPodDir(t, testKubelet, pods, pods, true) syncAndVerifyPodDir(t, testKubelet, pods, pods, true)
// Pod 1 failed, and pod 2 succeeded. None of the pod directories should be // Pod 1 failed, and pod 2 succeeded. None of the pod directories should be
// deleted. // deleted.
@ -1634,6 +1631,7 @@ func TestDoesNotDeletePodDirsIfContainerIsRunning(t *testing.T) {
// Sync once to create pod directory; confirm that the pod directory has // Sync once to create pod directory; confirm that the pod directory has
// already been created. // already been created.
pods := []*v1.Pod{apiPod} pods := []*v1.Pod{apiPod}
testKubelet.kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{apiPod.UID: true}
syncAndVerifyPodDir(t, testKubelet, pods, []*v1.Pod{apiPod}, true) syncAndVerifyPodDir(t, testKubelet, pods, []*v1.Pod{apiPod}, true)
// Pretend the pod is deleted from apiserver, but is still active on the node. // Pretend the pod is deleted from apiserver, but is still active on the node.
@ -1646,6 +1644,7 @@ func TestDoesNotDeletePodDirsIfContainerIsRunning(t *testing.T) {
// should be removed. // should be removed.
pods = []*v1.Pod{} pods = []*v1.Pod{}
testKubelet.fakeRuntime.PodList = []*containertest.FakePod{} testKubelet.fakeRuntime.PodList = []*containertest.FakePod{}
testKubelet.kubelet.podWorkers.(*fakePodWorkers).running = nil
syncAndVerifyPodDir(t, testKubelet, pods, []*v1.Pod{apiPod}, false) syncAndVerifyPodDir(t, testKubelet, pods, []*v1.Pod{apiPod}, false)
} }
@ -2233,7 +2232,7 @@ func TestGenerateAPIPodStatusInvokesPodSyncHandlers(t *testing.T) {
require.Equal(t, "because", apiStatus.Message) require.Equal(t, "because", apiStatus.Message)
} }
func TestSyncPodKillPod(t *testing.T) { func TestSyncTerminatingPodKillPod(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup() defer testKubelet.Cleanup()
kl := testKubelet.kubelet kl := testKubelet.kubelet
@ -2246,21 +2245,12 @@ func TestSyncPodKillPod(t *testing.T) {
} }
pods := []*v1.Pod{pod} pods := []*v1.Pod{pod}
kl.podManager.SetPods(pods) kl.podManager.SetPods(pods)
podStatus := &kubecontainer.PodStatus{ID: pod.UID}
gracePeriodOverride := int64(0) gracePeriodOverride := int64(0)
err := kl.syncPod(syncPodOptions{ err := kl.syncTerminatingPod(context.Background(), pod, podStatus, nil, &gracePeriodOverride, func(podStatus *v1.PodStatus) {
pod: pod, podStatus.Phase = v1.PodFailed
podStatus: &kubecontainer.PodStatus{}, podStatus.Reason = "reason"
updateType: kubetypes.SyncPodKill, podStatus.Message = "message"
killPodOptions: &KillPodOptions{
PodStatusFunc: func(p *v1.Pod, podStatus *kubecontainer.PodStatus) v1.PodStatus {
return v1.PodStatus{
Phase: v1.PodFailed,
Reason: "reason",
Message: "message",
}
},
PodTerminationGracePeriodSecondsOverride: &gracePeriodOverride,
},
}) })
require.NoError(t, err) require.NoError(t, err)

View File

@ -186,11 +186,6 @@ func (kl *Kubelet) cleanupOrphanedPodDirs(pods []*v1.Pod, runningPods []*kubecon
if allPods.Has(string(uid)) { if allPods.Has(string(uid)) {
continue continue
} }
// if the pod is within termination grace period, we shouldn't cleanup the underlying volumes
if kl.podKiller.IsPodPendingTerminationByUID(uid) {
klog.V(3).InfoS("Pod is pending termination", "podUID", uid)
continue
}
// If volumes have not been unmounted/detached, do not delete directory. // If volumes have not been unmounted/detached, do not delete directory.
// Doing so may result in corruption of data. // Doing so may result in corruption of data.
// TODO: getMountedVolumePathListFromDisk() call may be redundant with // TODO: getMountedVolumePathListFromDisk() call may be redundant with

View File

@ -315,9 +315,14 @@ func TestVolumeUnmountAndDetachControllerDisabled(t *testing.T) {
1 /* expectedSetUpCallCount */, testKubelet.volumePlugin)) 1 /* expectedSetUpCallCount */, testKubelet.volumePlugin))
// Remove pod // Remove pod
// TODO: this may not be threadsafe (technically waitForVolumeUnmount)
kubelet.podWorkers.(*fakePodWorkers).removeRuntime = map[types.UID]bool{pod.UID: true}
kubelet.podManager.SetPods([]*v1.Pod{}) kubelet.podManager.SetPods([]*v1.Pod{})
assert.NoError(t, waitForVolumeUnmount(kubelet.volumeManager, pod)) assert.NoError(t, kubelet.volumeManager.WaitForUnmount(pod))
if actual := kubelet.volumeManager.GetMountedVolumesForPod(util.GetUniquePodName(pod)); len(actual) > 0 {
t.Fatalf("expected volume unmount to wait for no volumes: %v", actual)
}
// Verify volumes unmounted // Verify volumes unmounted
podVolumes = kubelet.volumeManager.GetMountedVolumesForPod( podVolumes = kubelet.volumeManager.GetMountedVolumesForPod(
@ -499,6 +504,7 @@ func TestVolumeUnmountAndDetachControllerEnabled(t *testing.T) {
1 /* expectedSetUpCallCount */, testKubelet.volumePlugin)) 1 /* expectedSetUpCallCount */, testKubelet.volumePlugin))
// Remove pod // Remove pod
kubelet.podWorkers.(*fakePodWorkers).removeRuntime = map[types.UID]bool{pod.UID: true}
kubelet.podManager.SetPods([]*v1.Pod{}) kubelet.podManager.SetPods([]*v1.Pod{})
assert.NoError(t, waitForVolumeUnmount(kubelet.volumeManager, pod)) assert.NoError(t, waitForVolumeUnmount(kubelet.volumeManager, pod))

View File

@ -56,25 +56,25 @@ func (f *fakeHTTP) Get(url string) (*http.Response, error) {
} }
type fakePodStateProvider struct { type fakePodStateProvider struct {
existingPods map[types.UID]struct{} terminated map[types.UID]struct{}
runningPods map[types.UID]struct{} removed map[types.UID]struct{}
} }
func newFakePodStateProvider() *fakePodStateProvider { func newFakePodStateProvider() *fakePodStateProvider {
return &fakePodStateProvider{ return &fakePodStateProvider{
existingPods: make(map[types.UID]struct{}), terminated: make(map[types.UID]struct{}),
runningPods: make(map[types.UID]struct{}), removed: make(map[types.UID]struct{}),
} }
} }
func (f *fakePodStateProvider) IsPodDeleted(uid types.UID) bool { func (f *fakePodStateProvider) ShouldPodRuntimeBeRemoved(uid types.UID) bool {
_, found := f.existingPods[uid] _, found := f.terminated[uid]
return !found return found
} }
func (f *fakePodStateProvider) IsPodTerminated(uid types.UID) bool { func (f *fakePodStateProvider) ShouldPodContentBeRemoved(uid types.UID) bool {
_, found := f.runningPods[uid] _, found := f.removed[uid]
return !found return found
} }
func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) { func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) {

View File

@ -220,7 +220,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
} }
// evict all containers that are evictable // evict all containers that are evictable
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictTerminatedPods bool) error { func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Separate containers by evict units. // Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge) evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
if err != nil { if err != nil {
@ -230,7 +230,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSour
// Remove deleted pod containers if all sources are ready. // Remove deleted pod containers if all sources are ready.
if allSourcesReady { if allSourcesReady {
for key, unit := range evictUnits { for key, unit := range evictUnits {
if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) { if cgc.podStateProvider.ShouldPodContentBeRemoved(key.uid) || (evictNonDeletedPods && cgc.podStateProvider.ShouldPodRuntimeBeRemoved(key.uid)) {
cgc.removeOldestN(unit, len(unit)) // Remove all. cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key) delete(evictUnits, key)
} }
@ -272,7 +272,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSour
// 2. contains no containers. // 2. contains no containers.
// 3. belong to a non-existent (i.e., already removed) pod, or is not the // 3. belong to a non-existent (i.e., already removed) pod, or is not the
// most recently created sandbox for the pod. // most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error { func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
containers, err := cgc.manager.getKubeletContainers(true) containers, err := cgc.manager.getKubeletContainers(true)
if err != nil { if err != nil {
return err return err
@ -311,7 +311,7 @@ func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
} }
for podUID, sandboxes := range sandboxesByPod { for podUID, sandboxes := range sandboxesByPod {
if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) { if cgc.podStateProvider.ShouldPodContentBeRemoved(podUID) || (evictNonDeletedPods && cgc.podStateProvider.ShouldPodRuntimeBeRemoved(podUID)) {
// Remove all evictable sandboxes if the pod has been removed. // Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is // Note that the latest dead sandbox is also removed if there is
// already an active one. // already an active one.
@ -337,9 +337,10 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
for _, dir := range dirs { for _, dir := range dirs {
name := dir.Name() name := dir.Name()
podUID := parsePodUIDFromLogsDirectory(name) podUID := parsePodUIDFromLogsDirectory(name)
if !cgc.podStateProvider.IsPodDeleted(podUID) { if !cgc.podStateProvider.ShouldPodContentBeRemoved(podUID) {
continue continue
} }
klog.V(4).InfoS("Removing pod logs", "podUID", podUID)
err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name)) err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
if err != nil { if err != nil {
klog.ErrorS(err, "Failed to remove pod logs directory", "path", name) klog.ErrorS(err, "Failed to remove pod logs directory", "path", name)
@ -397,15 +398,15 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
// * removes oldest dead containers by enforcing gcPolicy.MaxContainers. // * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
// * gets evictable sandboxes which are not ready and contains no containers. // * gets evictable sandboxes which are not ready and contains no containers.
// * removes evictable sandboxes. // * removes evictable sandboxes.
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictTerminatedPods bool) error { func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
errors := []error{} errors := []error{}
// Remove evictable containers // Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil { if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
errors = append(errors, err) errors = append(errors, err)
} }
// Remove sandboxes with zero containers // Remove sandboxes with zero containers
if err := cgc.evictSandboxes(evictTerminatedPods); err != nil { if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
errors = append(errors, err) errors = append(errors, err)
} }

View File

@ -24,7 +24,8 @@ import (
"github.com/golang/mock/gomock" "github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
@ -35,16 +36,14 @@ func TestSandboxGC(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodStateProvider bool, createdAt int64) sandboxTemplate { makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, hasRunningContainers, isTerminating bool, createdAt int64) sandboxTemplate {
if withPodStateProvider {
// initialize the pod getter
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return sandboxTemplate{ return sandboxTemplate{
pod: pod, pod: pod,
state: state, state: state,
attempt: attempt, attempt: attempt,
createdAt: createdAt, createdAt: createdAt,
running: hasRunningContainers,
terminating: isTerminating,
} }
} }
@ -61,133 +60,138 @@ func TestSandboxGC(t *testing.T) {
}), }),
} }
for c, test := range []struct { for _, test := range []struct {
description string // description of the test case description string // description of the test case
sandboxes []sandboxTemplate // templates of sandboxes sandboxes []sandboxTemplate // templates of sandboxes
containers []containerTemplate // templates of containers containers []containerTemplate // templates of containers
remain []int // template indexes of remaining sandboxes remain []int // template indexes of remaining sandboxes
evictTerminatedPods bool evictTerminatingPods bool
}{ }{
{ {
description: "notready sandboxes without containers for deleted pods should be garbage collected.", description: "notready sandboxes without containers for deleted pods should be garbage collected.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0), makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, false, 0),
}, },
containers: []containerTemplate{}, containers: []containerTemplate{},
remain: []int{}, remain: []int{},
evictTerminatedPods: false, evictTerminatingPods: false,
}, },
{ {
description: "ready sandboxes without containers for deleted pods should not be garbage collected.", description: "ready sandboxes without containers for deleted pods should not be garbage collected.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false, 0), makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false, false, 0),
}, },
containers: []containerTemplate{}, containers: []containerTemplate{},
remain: []int{0}, remain: []int{0},
evictTerminatedPods: false, evictTerminatingPods: false,
}, },
{ {
description: "sandboxes for existing pods should not be garbage collected.", description: "sandboxes for existing pods should not be garbage collected.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0), makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, false, 0),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
}, },
containers: []containerTemplate{}, containers: []containerTemplate{},
remain: []int{0, 1}, remain: []int{0, 1},
evictTerminatedPods: false, evictTerminatingPods: false,
}, },
{ {
description: "older exited sandboxes without containers for existing pods should be garbage collected if there are more than one exited sandboxes.", description: "older exited sandboxes without containers for existing pods should be garbage collected if there are more than one exited sandboxes.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 1), makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 1),
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
}, },
containers: []containerTemplate{}, containers: []containerTemplate{},
remain: []int{0}, remain: []int{0},
evictTerminatedPods: false, evictTerminatingPods: false,
}, },
{ {
description: "older exited sandboxes with containers for existing pods should not be garbage collected even if there are more than one exited sandboxes.", description: "older exited sandboxes with containers for existing pods should not be garbage collected even if there are more than one exited sandboxes.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 1), makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 1),
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
}, },
containers: []containerTemplate{ containers: []containerTemplate{
{pod: pods[0], container: &pods[0].Spec.Containers[0], sandboxAttempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pods[0], container: &pods[0].Spec.Containers[0], sandboxAttempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
}, },
remain: []int{0, 1}, remain: []int{0, 1},
evictTerminatedPods: false, evictTerminatingPods: false,
}, },
{ {
description: "non-running sandboxes for existing pods should be garbage collected if evictTerminatedPods is set.", description: "non-running sandboxes for existing pods should be garbage collected if evictTerminatingPods is set.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0), makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, true, 0),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, true, 0),
}, },
containers: []containerTemplate{}, containers: []containerTemplate{},
remain: []int{0}, remain: []int{0},
evictTerminatedPods: true, evictTerminatingPods: true,
}, },
{ {
description: "sandbox with containers should not be garbage collected.", description: "sandbox with containers should not be garbage collected.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0), makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, false, 0),
}, },
containers: []containerTemplate{ containers: []containerTemplate{
{pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED},
}, },
remain: []int{0}, remain: []int{0},
evictTerminatedPods: false, evictTerminatingPods: false,
}, },
{ {
description: "multiple sandboxes should be handled properly.", description: "multiple sandboxes should be handled properly.",
sandboxes: []sandboxTemplate{ sandboxes: []sandboxTemplate{
// running sandbox. // running sandbox.
makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true, 1), makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true, false, 1),
// exited sandbox without containers. // exited sandbox without containers.
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
// exited sandbox with containers. // exited sandbox with containers.
makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 1), makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 1),
// exited sandbox without containers. // exited sandbox without containers.
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
// exited sandbox without containers for deleted pods. // exited sandbox without containers for deleted pods.
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0), makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, true, 0),
}, },
containers: []containerTemplate{ containers: []containerTemplate{
{pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
}, },
remain: []int{0, 2}, remain: []int{0, 2},
evictTerminatedPods: false, evictTerminatingPods: false,
}, },
} { } {
t.Logf("TestCase #%d: %+v", c, test) t.Run(test.description, func(t *testing.T) {
fakeSandboxes := makeFakePodSandboxes(t, m, test.sandboxes) podStateProvider.removed = make(map[types.UID]struct{})
fakeContainers := makeFakeContainers(t, m, test.containers) podStateProvider.terminated = make(map[types.UID]struct{})
fakeRuntime.SetFakeSandboxes(fakeSandboxes) fakeSandboxes := makeFakePodSandboxes(t, m, test.sandboxes)
fakeRuntime.SetFakeContainers(fakeContainers) fakeContainers := makeFakeContainers(t, m, test.containers)
for _, s := range test.sandboxes {
if !s.running && s.pod.Name == "deleted" {
podStateProvider.removed[s.pod.UID] = struct{}{}
}
if s.terminating {
podStateProvider.terminated[s.pod.UID] = struct{}{}
}
}
fakeRuntime.SetFakeSandboxes(fakeSandboxes)
fakeRuntime.SetFakeContainers(fakeContainers)
err := m.containerGC.evictSandboxes(test.evictTerminatedPods) err := m.containerGC.evictSandboxes(test.evictTerminatingPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListPodSandbox(nil)
assert.NoError(t, err)
assert.Len(t, realRemain, len(test.remain))
for _, remain := range test.remain {
status, err := fakeRuntime.PodSandboxStatus(fakeSandboxes[remain].Id)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, &fakeSandboxes[remain].PodSandboxStatus, status) realRemain, err := fakeRuntime.ListPodSandbox(nil)
} assert.NoError(t, err)
assert.Len(t, realRemain, len(test.remain))
for _, remain := range test.remain {
status, err := fakeRuntime.PodSandboxStatus(fakeSandboxes[remain].Id)
assert.NoError(t, err)
assert.Equal(t, &fakeSandboxes[remain].PodSandboxStatus, status)
}
})
} }
} }
func makeGCContainer(podStateProvider *fakePodStateProvider, podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate { func makeGCContainer(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
container := makeTestContainer(containerName, "test-image") container := makeTestContainer(containerName, "test-image")
pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container}) pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
if podName == "running" {
podStateProvider.runningPods[pod.UID] = struct{}{}
}
if podName != "deleted" {
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return containerTemplate{ return containerTemplate{
pod: pod, pod: pod,
container: &container, container: &container,
@ -204,201 +208,212 @@ func TestContainerGC(t *testing.T) {
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
defaultGCPolicy := kubecontainer.GCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6} defaultGCPolicy := kubecontainer.GCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6}
for c, test := range []struct { for _, test := range []struct {
description string // description of the test case description string // description of the test case
containers []containerTemplate // templates of containers containers []containerTemplate // templates of containers
policy *kubecontainer.GCPolicy // container gc policy policy *kubecontainer.GCPolicy // container gc policy
remain []int // template indexes of remaining containers remain []int // template indexes of remaining containers
evictTerminatedPods bool evictTerminatingPods bool
allSourcesReady bool allSourcesReady bool
}{ }{
{ {
description: "all containers should be removed when max container limit is 0", description: "all containers should be removed when max container limit is 0",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
policy: &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0}, policy: &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
remain: []int{}, remain: []int{},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "max containers should be complied when no max per pod container limit is set", description: "max containers should be complied when no max per pod container limit is set",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
policy: &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4}, policy: &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
remain: []int{0, 1, 2, 3}, remain: []int{0, 1, 2, 3},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "no containers should be removed if both max container and per pod container limits are not set", description: "no containers should be removed if both max container and per pod container limits are not set",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
policy: &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1}, policy: &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "recently started containers should not be removed", description: "recently started containers should not be removed",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "oldest containers should be removed when per pod container limit exceeded", description: "oldest containers should be removed when per pod container limit exceeded",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1}, remain: []int{0, 1},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "running containers should not be removed", description: "running containers should not be removed",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
}, },
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "no containers should be removed when limits are not exceeded", description: "no containers should be removed when limits are not exceeded",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1}, remain: []int{0, 1},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "max container count should apply per (UID, container) pair", description: "max container count should apply per (UID, container) pair",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1, 3, 4, 6, 7}, remain: []int{0, 1, 3, 4, 6, 7},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "max limit should apply and try to keep from every pod", description: "max limit should apply and try to keep from every pod",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 2, 4, 6, 8}, remain: []int{0, 2, 4, 6, 8},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "oldest pods should be removed if limit exceeded", description: "oldest pods should be removed if limit exceeded",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 2, 4, 6, 8, 9}, remain: []int{0, 2, 4, 6, 8, 9},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "all non-running containers should be removed when evictTerminatedPods is set", description: "all non-running containers should be removed when evictTerminatingPods is set",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
}, },
remain: []int{4, 5}, remain: []int{4, 5},
evictTerminatedPods: true, evictTerminatingPods: true,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "containers for deleted pods should be removed", description: "containers for deleted pods should be removed",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
// deleted pods still respect MinAge. // deleted pods still respect MinAge.
makeGCContainer(podStateProvider, "deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
evictTerminatedPods: false, evictTerminatingPods: false,
allSourcesReady: true, allSourcesReady: true,
}, },
{ {
description: "containers for deleted pods may not be removed if allSourcesReady is set false ", description: "containers for deleted pods may not be removed if allSourcesReady is set false ",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0}, remain: []int{0},
evictTerminatedPods: true, evictTerminatingPods: true,
allSourcesReady: false, allSourcesReady: false,
}, },
} { } {
t.Logf("TestCase #%d: %+v", c, test) t.Run(test.description, func(t *testing.T) {
fakeContainers := makeFakeContainers(t, m, test.containers) podStateProvider.removed = make(map[types.UID]struct{})
fakeRuntime.SetFakeContainers(fakeContainers) podStateProvider.terminated = make(map[types.UID]struct{})
fakeContainers := makeFakeContainers(t, m, test.containers)
for _, s := range test.containers {
if s.pod.Name == "deleted" {
podStateProvider.removed[s.pod.UID] = struct{}{}
}
if s.pod.Name != "running" {
podStateProvider.terminated[s.pod.UID] = struct{}{}
}
}
fakeRuntime.SetFakeContainers(fakeContainers)
if test.policy == nil { if test.policy == nil {
test.policy = &defaultGCPolicy test.policy = &defaultGCPolicy
} }
err := m.containerGC.evictContainers(*test.policy, test.allSourcesReady, test.evictTerminatedPods) err := m.containerGC.evictContainers(*test.policy, test.allSourcesReady, test.evictTerminatingPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListContainers(nil)
assert.NoError(t, err)
assert.Len(t, realRemain, len(test.remain))
for _, remain := range test.remain {
status, err := fakeRuntime.ContainerStatus(fakeContainers[remain].Id)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, &fakeContainers[remain].ContainerStatus, status) realRemain, err := fakeRuntime.ListContainers(nil)
} assert.NoError(t, err)
assert.Len(t, realRemain, len(test.remain))
for _, remain := range test.remain {
status, err := fakeRuntime.ContainerStatus(fakeContainers[remain].Id)
assert.NoError(t, err)
assert.Equal(t, &fakeContainers[remain].ContainerStatus, status)
}
})
} }
} }
@ -410,18 +425,15 @@ func TestPodLogDirectoryGC(t *testing.T) {
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
// pod log directories without corresponding pods should be removed. // pod log directories without corresponding pods should be removed.
podStateProvider.existingPods["123"] = struct{}{}
podStateProvider.existingPods["456"] = struct{}{}
podStateProvider.existingPods["321"] = struct{}{}
podStateProvider.runningPods["123"] = struct{}{}
podStateProvider.runningPods["456"] = struct{}{}
podStateProvider.existingPods["321"] = struct{}{}
files := []string{"123", "456", "789", "012", "name_namespace_321", "name_namespace_654"} files := []string{"123", "456", "789", "012", "name_namespace_321", "name_namespace_654"}
removed := []string{ removed := []string{
filepath.Join(podLogsRootDirectory, "789"), filepath.Join(podLogsRootDirectory, "789"),
filepath.Join(podLogsRootDirectory, "012"), filepath.Join(podLogsRootDirectory, "012"),
filepath.Join(podLogsRootDirectory, "name_namespace_654"), filepath.Join(podLogsRootDirectory, "name_namespace_654"),
} }
podStateProvider.removed["012"] = struct{}{}
podStateProvider.removed["789"] = struct{}{}
podStateProvider.removed["654"] = struct{}{}
ctrl := gomock.NewController(t) ctrl := gomock.NewController(t)
defer ctrl.Finish() defer ctrl.Finish()
@ -452,11 +464,11 @@ func TestUnknownStateContainerGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager() fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err) assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) // podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
defaultGCPolicy := kubecontainer.GCPolicy{MinAge: time.Hour, MaxPerPodContainer: 0, MaxContainers: 0} defaultGCPolicy := kubecontainer.GCPolicy{MinAge: time.Hour, MaxPerPodContainer: 0, MaxContainers: 0}
fakeContainers := makeFakeContainers(t, m, []containerTemplate{ fakeContainers := makeFakeContainers(t, m, []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_UNKNOWN), makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_UNKNOWN),
}) })
fakeRuntime.SetFakeContainers(fakeContainers) fakeRuntime.SetFakeContainers(fakeContainers)

View File

@ -75,10 +75,11 @@ var (
ErrVersionNotSupported = errors.New("runtime api version is not supported") ErrVersionNotSupported = errors.New("runtime api version is not supported")
) )
// podStateProvider can determine if a pod is deleted ir terminated // podStateProvider can determine if none of the elements are necessary to retain (pod content)
// or if none of the runtime elements are necessary to retain (containers)
type podStateProvider interface { type podStateProvider interface {
IsPodDeleted(kubetypes.UID) bool ShouldPodContentBeRemoved(kubetypes.UID) bool
IsPodTerminated(kubetypes.UID) bool ShouldPodRuntimeBeRemoved(kubetypes.UID) bool
} }
type kubeGenericRuntimeManager struct { type kubeGenericRuntimeManager struct {
@ -804,7 +805,8 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
// or CRI if the Pod has been deleted while the POD is // or CRI if the Pod has been deleted while the POD is
// being created. If the pod has been deleted then it's // being created. If the pod has been deleted then it's
// not a real error. // not a real error.
if m.podStateProvider.IsPodDeleted(pod.UID) { // TODO: this is probably not needed now that termination is part of the sync loop
if m.podStateProvider.ShouldPodContentBeRemoved(pod.UID) {
klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID) klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
return return
} }

View File

@ -70,10 +70,12 @@ func customTestRuntimeManager(keyring *credentialprovider.BasicDockerKeyring) (*
// sandboxTemplate is a sandbox template to create fake sandbox. // sandboxTemplate is a sandbox template to create fake sandbox.
type sandboxTemplate struct { type sandboxTemplate struct {
pod *v1.Pod pod *v1.Pod
attempt uint32 attempt uint32
createdAt int64 createdAt int64
state runtimeapi.PodSandboxState state runtimeapi.PodSandboxState
running bool
terminating bool
} }
// containerTemplate is a container template to create fake container. // containerTemplate is a container template to create fake container.
@ -1401,6 +1403,7 @@ func TestSyncPodWithSandboxAndDeletedPod(t *testing.T) {
} }
backOff := flowcontrol.NewBackOff(time.Second, time.Minute) backOff := flowcontrol.NewBackOff(time.Second, time.Minute)
m.podStateProvider.(*fakePodStateProvider).removed = map[types.UID]struct{}{pod.UID: {}}
// GetPodStatus and the following SyncPod will not return errors in the // GetPodStatus and the following SyncPod will not return errors in the
// case where the pod has been deleted. We are not adding any pods into // case where the pod has been deleted. We are not adding any pods into

View File

@ -62,7 +62,7 @@ type Manager struct {
shutdownGracePeriodCriticalPods time.Duration shutdownGracePeriodCriticalPods time.Duration
getPods eviction.ActivePodsFunc getPods eviction.ActivePodsFunc
killPod eviction.KillPodFunc killPodFunc eviction.KillPodFunc
syncNodeStatus func() syncNodeStatus func()
dbusCon dbusInhibiter dbusCon dbusInhibiter
@ -78,7 +78,7 @@ type Manager struct {
func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, syncNodeStatus func(), shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) (*Manager, lifecycle.PodAdmitHandler) { func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, syncNodeStatus func(), shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) (*Manager, lifecycle.PodAdmitHandler) {
manager := &Manager{ manager := &Manager{
getPods: getPodsFunc, getPods: getPodsFunc,
killPod: killPodFunc, killPodFunc: killPodFunc,
syncNodeStatus: syncNodeStatus, syncNodeStatus: syncNodeStatus,
shutdownGracePeriodRequested: shutdownGracePeriodRequested, shutdownGracePeriodRequested: shutdownGracePeriodRequested,
shutdownGracePeriodCriticalPods: shutdownGracePeriodCriticalPods, shutdownGracePeriodCriticalPods: shutdownGracePeriodCriticalPods,
@ -268,15 +268,10 @@ func (m *Manager) processShutdownEvent() error {
} }
klog.V(1).InfoS("Shutdown manager killing pod with gracePeriod", "pod", klog.KObj(pod), "gracePeriod", gracePeriodOverride) klog.V(1).InfoS("Shutdown manager killing pod with gracePeriod", "pod", klog.KObj(pod), "gracePeriod", gracePeriodOverride)
if err := m.killPodFunc(pod, false, &gracePeriodOverride, func(status *v1.PodStatus) {
status := v1.PodStatus{ status.Message = nodeShutdownMessage
Phase: v1.PodFailed, status.Reason = nodeShutdownReason
Reason: nodeShutdownReason, }); err != nil {
Message: nodeShutdownMessage,
}
err := m.killPod(pod, status, &gracePeriodOverride)
if err != nil {
klog.V(1).InfoS("Shutdown manager failed killing pod", "pod", klog.KObj(pod), "err", err) klog.V(1).InfoS("Shutdown manager failed killing pod", "pod", klog.KObj(pod), "err", err)
} else { } else {
klog.V(1).InfoS("Shutdown manager finished killing pod", "pod", klog.KObj(pod)) klog.V(1).InfoS("Shutdown manager finished killing pod", "pod", klog.KObj(pod))

View File

@ -212,7 +212,7 @@ func TestManager(t *testing.T) {
} }
podKillChan := make(chan PodKillInfo, 1) podKillChan := make(chan PodKillInfo, 1)
killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error { killPodsFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(podStatus *v1.PodStatus)) error {
var gracePeriod int64 var gracePeriod int64
if gracePeriodOverride != nil { if gracePeriodOverride != nil {
gracePeriod = *gracePeriodOverride gracePeriod = *gracePeriodOverride
@ -297,7 +297,7 @@ func TestFeatureEnabled(t *testing.T) {
activePodsFunc := func() []*v1.Pod { activePodsFunc := func() []*v1.Pod {
return nil return nil
} }
killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error { killPodsFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
return nil return nil
} }
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, tc.featureGateEnabled)() defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, tc.featureGateEnabled)()
@ -323,7 +323,7 @@ func TestRestart(t *testing.T) {
activePodsFunc := func() []*v1.Pod { activePodsFunc := func() []*v1.Pod {
return nil return nil
} }
killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error { killPodsFunc := func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
return nil return nil
} }
syncNodeStatus := func() {} syncNodeStatus := func() {}

View File

@ -19,11 +19,13 @@ package testing
import ( import (
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types" kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
)
import mock "github.com/stretchr/testify/mock"
import types "k8s.io/apimachinery/pkg/types" mock "github.com/stretchr/testify/mock"
import v1 "k8s.io/api/core/v1"
types "k8s.io/apimachinery/pkg/types"
v1 "k8s.io/api/core/v1"
)
// MockManager is an autogenerated mock type for the Manager type // MockManager is an autogenerated mock type for the Manager type
type MockManager struct { type MockManager struct {
@ -77,6 +79,20 @@ func (_m *MockManager) GetOrphanedMirrorPodNames() []string {
return r0 return r0
} }
func (_m *MockManager) GetOrphanedMirrorPodNamesAndUIDs() map[string]kubelettypes.ResolvedPodUID {
ret := _m.Called()
var r0 map[string]kubelettypes.ResolvedPodUID
if rf, ok := ret.Get(0).(func() map[string]kubelettypes.ResolvedPodUID); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(map[string]kubelettypes.ResolvedPodUID)
}
}
return r0
}
// DeletePod provides a mock function with given fields: _a0 // DeletePod provides a mock function with given fields: _a0
func (_m *MockManager) DeletePod(_a0 *v1.Pod) { func (_m *MockManager) DeletePod(_a0 *v1.Pod) {
_m.Called(_a0) _m.Called(_a0)

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@ limitations under the License.
package kubelet package kubelet
import ( import (
"context"
"reflect" "reflect"
"strconv" "strconv"
"sync" "sync"
@ -38,30 +39,82 @@ import (
// fakePodWorkers runs sync pod function in serial, so we can have // fakePodWorkers runs sync pod function in serial, so we can have
// deterministic behaviour in testing. // deterministic behaviour in testing.
type fakePodWorkers struct { type fakePodWorkers struct {
lock sync.Mutex
syncPodFn syncPodFnType syncPodFn syncPodFnType
cache kubecontainer.Cache cache kubecontainer.Cache
t TestingInterface t TestingInterface
triggeredDeletion []types.UID
statusLock sync.Mutex
running map[types.UID]bool
terminating map[types.UID]bool
terminationRequested map[types.UID]bool
removeRuntime map[types.UID]bool
removeContent map[types.UID]bool
terminatingStaticPods map[string]bool
} }
func (f *fakePodWorkers) UpdatePod(options *UpdatePodOptions) { func (f *fakePodWorkers) UpdatePod(options UpdatePodOptions) {
status, err := f.cache.Get(options.Pod.UID) f.lock.Lock()
defer f.lock.Unlock()
var uid types.UID
switch {
case options.Pod != nil:
uid = options.Pod.UID
case options.RunningPod != nil:
uid = options.RunningPod.ID
default:
return
}
status, err := f.cache.Get(uid)
if err != nil { if err != nil {
f.t.Errorf("Unexpected error: %v", err) f.t.Errorf("Unexpected error: %v", err)
} }
if err := f.syncPodFn(syncPodOptions{ switch options.UpdateType {
mirrorPod: options.MirrorPod, case kubetypes.SyncPodKill:
pod: options.Pod, f.triggeredDeletion = append(f.triggeredDeletion, uid)
podStatus: status, default:
updateType: options.UpdateType, if err := f.syncPodFn(context.Background(), options.UpdateType, options.Pod, options.MirrorPod, status); err != nil {
killPodOptions: options.KillPodOptions, f.t.Errorf("Unexpected error: %v", err)
}); err != nil { }
f.t.Errorf("Unexpected error: %v", err)
} }
} }
func (f *fakePodWorkers) ForgetNonExistingPodWorkers(desiredPods map[types.UID]sets.Empty) {} func (f *fakePodWorkers) SyncKnownPods(desiredPods []*v1.Pod) map[types.UID]PodWorkType {
return nil
}
func (f *fakePodWorkers) ForgetWorker(uid types.UID) {} func (f *fakePodWorkers) CouldHaveRunningContainers(uid types.UID) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()
return f.running[uid]
}
func (f *fakePodWorkers) IsPodTerminationRequested(uid types.UID) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()
return f.terminationRequested[uid]
}
func (f *fakePodWorkers) ShouldPodContainersBeTerminating(uid types.UID) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()
return f.terminating[uid]
}
func (f *fakePodWorkers) ShouldPodRuntimeBeRemoved(uid types.UID) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()
return f.removeRuntime[uid]
}
func (f *fakePodWorkers) ShouldPodContentBeRemoved(uid types.UID) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()
return f.removeContent[uid]
}
func (f *fakePodWorkers) IsPodForMirrorPodTerminatingByFullName(podFullname string) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()
return f.terminatingStaticPods[podFullname]
}
type TestingInterface interface { type TestingInterface interface {
Errorf(format string, args ...interface{}) Errorf(format string, args ...interface{})
@ -80,6 +133,8 @@ func newPod(uid, name string) *v1.Pod {
type syncPodRecord struct { type syncPodRecord struct {
name string name string
updateType kubetypes.SyncPodType updateType kubetypes.SyncPodType
runningPod *kubecontainer.Pod
terminated bool
} }
func createPodWorkers() (*podWorkers, map[types.UID][]syncPodRecord) { func createPodWorkers() (*podWorkers, map[types.UID][]syncPodRecord) {
@ -88,15 +143,38 @@ func createPodWorkers() (*podWorkers, map[types.UID][]syncPodRecord) {
fakeRecorder := &record.FakeRecorder{} fakeRecorder := &record.FakeRecorder{}
fakeRuntime := &containertest.FakeRuntime{} fakeRuntime := &containertest.FakeRuntime{}
fakeCache := containertest.NewFakeCache(fakeRuntime) fakeCache := containertest.NewFakeCache(fakeRuntime)
podWorkers := newPodWorkers( w := newPodWorkers(
func(options syncPodOptions) error { func(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
func() { func() {
lock.Lock() lock.Lock()
defer lock.Unlock() defer lock.Unlock()
pod := options.pod pod := pod
processed[pod.UID] = append(processed[pod.UID], syncPodRecord{ processed[pod.UID] = append(processed[pod.UID], syncPodRecord{
name: pod.Name, name: pod.Name,
updateType: options.updateType, updateType: updateType,
})
}()
return nil
},
func(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, runningPod *kubecontainer.Pod, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
func() {
lock.Lock()
defer lock.Unlock()
processed[pod.UID] = append(processed[pod.UID], syncPodRecord{
name: pod.Name,
updateType: kubetypes.SyncPodKill,
runningPod: runningPod,
})
}()
return nil
},
func(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
func() {
lock.Lock()
defer lock.Unlock()
processed[pod.UID] = append(processed[pod.UID], syncPodRecord{
name: pod.Name,
terminated: true,
}) })
}() }()
return nil return nil
@ -107,7 +185,7 @@ func createPodWorkers() (*podWorkers, map[types.UID][]syncPodRecord) {
time.Second, time.Second,
fakeCache, fakeCache,
) )
return podWorkers, processed return w.(*podWorkers), processed
} }
func drainWorkers(podWorkers *podWorkers, numPods int) { func drainWorkers(podWorkers *podWorkers, numPods int) {
@ -115,8 +193,27 @@ func drainWorkers(podWorkers *podWorkers, numPods int) {
stillWorking := false stillWorking := false
podWorkers.podLock.Lock() podWorkers.podLock.Lock()
for i := 0; i < numPods; i++ { for i := 0; i < numPods; i++ {
if podWorkers.isWorking[types.UID(strconv.Itoa(i))] { if s, ok := podWorkers.podSyncStatuses[types.UID(strconv.Itoa(i))]; ok && s.working {
stillWorking = true stillWorking = true
break
}
}
podWorkers.podLock.Unlock()
if !stillWorking {
break
}
time.Sleep(50 * time.Millisecond)
}
}
func drainAllWorkers(podWorkers *podWorkers) {
for {
stillWorking := false
podWorkers.podLock.Lock()
for _, worker := range podWorkers.podSyncStatuses {
if worker.working {
stillWorking = true
break
} }
} }
podWorkers.podLock.Unlock() podWorkers.podLock.Unlock()
@ -133,7 +230,7 @@ func TestUpdatePod(t *testing.T) {
numPods := 20 numPods := 20
for i := 0; i < numPods; i++ { for i := 0; i < numPods; i++ {
for j := i; j < numPods; j++ { for j := i; j < numPods; j++ {
podWorkers.UpdatePod(&UpdatePodOptions{ podWorkers.UpdatePod(UpdatePodOptions{
Pod: newPod(strconv.Itoa(j), strconv.Itoa(i)), Pod: newPod(strconv.Itoa(j), strconv.Itoa(i)),
UpdateType: kubetypes.SyncPodCreate, UpdateType: kubetypes.SyncPodCreate,
}) })
@ -165,20 +262,51 @@ func TestUpdatePod(t *testing.T) {
} }
} }
func TestUpdatePodForRuntimePod(t *testing.T) {
podWorkers, processed := createPodWorkers()
// ignores running pod of wrong sync type
podWorkers.UpdatePod(UpdatePodOptions{
UpdateType: kubetypes.SyncPodCreate,
RunningPod: &kubecontainer.Pod{ID: "1", Name: "1", Namespace: "test"},
})
drainAllWorkers(podWorkers)
if len(processed) != 0 {
t.Fatalf("Not all pods processed: %v", len(processed))
}
// creates synthetic pod
podWorkers.UpdatePod(UpdatePodOptions{
UpdateType: kubetypes.SyncPodKill,
RunningPod: &kubecontainer.Pod{ID: "1", Name: "1", Namespace: "test"},
})
drainAllWorkers(podWorkers)
if len(processed) != 1 {
t.Fatalf("Not all pods processed: %v", processed)
}
updates := processed["1"]
if len(updates) != 1 {
t.Fatalf("unexpected updates: %v", updates)
}
if updates[0].runningPod == nil || updates[0].updateType != kubetypes.SyncPodKill || updates[0].name != "1" {
t.Fatalf("unexpected update: %v", updates)
}
}
func TestUpdatePodDoesNotForgetSyncPodKill(t *testing.T) { func TestUpdatePodDoesNotForgetSyncPodKill(t *testing.T) {
podWorkers, processed := createPodWorkers() podWorkers, processed := createPodWorkers()
numPods := 20 numPods := 20
for i := 0; i < numPods; i++ { for i := 0; i < numPods; i++ {
pod := newPod(strconv.Itoa(i), strconv.Itoa(i)) pod := newPod(strconv.Itoa(i), strconv.Itoa(i))
podWorkers.UpdatePod(&UpdatePodOptions{ podWorkers.UpdatePod(UpdatePodOptions{
Pod: pod, Pod: pod,
UpdateType: kubetypes.SyncPodCreate, UpdateType: kubetypes.SyncPodCreate,
}) })
podWorkers.UpdatePod(&UpdatePodOptions{ podWorkers.UpdatePod(UpdatePodOptions{
Pod: pod, Pod: pod,
UpdateType: kubetypes.SyncPodKill, UpdateType: kubetypes.SyncPodKill,
}) })
podWorkers.UpdatePod(&UpdatePodOptions{ podWorkers.UpdatePod(UpdatePodOptions{
Pod: pod, Pod: pod,
UpdateType: kubetypes.SyncPodUpdate, UpdateType: kubetypes.SyncPodUpdate,
}) })
@ -205,12 +333,12 @@ func TestUpdatePodDoesNotForgetSyncPodKill(t *testing.T) {
} }
} }
func TestForgetNonExistingPodWorkers(t *testing.T) { func TestSyncKnownPods(t *testing.T) {
podWorkers, _ := createPodWorkers() podWorkers, _ := createPodWorkers()
numPods := 20 numPods := 20
for i := 0; i < numPods; i++ { for i := 0; i < numPods; i++ {
podWorkers.UpdatePod(&UpdatePodOptions{ podWorkers.UpdatePod(UpdatePodOptions{
Pod: newPod(strconv.Itoa(i), "name"), Pod: newPod(strconv.Itoa(i), "name"),
UpdateType: kubetypes.SyncPodUpdate, UpdateType: kubetypes.SyncPodUpdate,
}) })
@ -224,7 +352,72 @@ func TestForgetNonExistingPodWorkers(t *testing.T) {
desiredPods := map[types.UID]sets.Empty{} desiredPods := map[types.UID]sets.Empty{}
desiredPods[types.UID("2")] = sets.Empty{} desiredPods[types.UID("2")] = sets.Empty{}
desiredPods[types.UID("14")] = sets.Empty{} desiredPods[types.UID("14")] = sets.Empty{}
podWorkers.ForgetNonExistingPodWorkers(desiredPods) desiredPodList := []*v1.Pod{newPod("2", "name"), newPod("14", "name")}
// kill all but the requested pods
for i := 0; i < numPods; i++ {
pod := newPod(strconv.Itoa(i), "name")
if _, ok := desiredPods[pod.UID]; ok {
continue
}
if (i % 2) == 0 {
now := metav1.Now()
pod.DeletionTimestamp = &now
}
podWorkers.UpdatePod(UpdatePodOptions{
Pod: pod,
UpdateType: kubetypes.SyncPodKill,
})
}
drainWorkers(podWorkers, numPods)
if !podWorkers.ShouldPodContainersBeTerminating(types.UID("0")) {
t.Errorf("Expected pod to be terminating")
}
if !podWorkers.ShouldPodContainersBeTerminating(types.UID("1")) {
t.Errorf("Expected pod to be terminating")
}
if podWorkers.ShouldPodContainersBeTerminating(types.UID("2")) {
t.Errorf("Expected pod to not be terminating")
}
if !podWorkers.IsPodTerminationRequested(types.UID("0")) {
t.Errorf("Expected pod to be terminating")
}
if podWorkers.IsPodTerminationRequested(types.UID("2")) {
t.Errorf("Expected pod to not be terminating")
}
if podWorkers.CouldHaveRunningContainers(types.UID("0")) {
t.Errorf("Expected pod to be terminated (deleted and terminated)")
}
if podWorkers.CouldHaveRunningContainers(types.UID("1")) {
t.Errorf("Expected pod to be terminated")
}
if !podWorkers.CouldHaveRunningContainers(types.UID("2")) {
t.Errorf("Expected pod to not be terminated")
}
if !podWorkers.ShouldPodContentBeRemoved(types.UID("0")) {
t.Errorf("Expected pod to be suitable for removal (deleted and terminated)")
}
if podWorkers.ShouldPodContentBeRemoved(types.UID("1")) {
t.Errorf("Expected pod to not be suitable for removal (terminated but not deleted)")
}
if podWorkers.ShouldPodContentBeRemoved(types.UID("2")) {
t.Errorf("Expected pod to not be suitable for removal (not terminated)")
}
if podWorkers.ShouldPodContainersBeTerminating(types.UID("abc")) {
t.Errorf("Expected pod to not be known to be terminating (does not exist but not yet synced)")
}
if !podWorkers.CouldHaveRunningContainers(types.UID("abc")) {
t.Errorf("Expected pod to potentially have running containers (does not exist but not yet synced)")
}
if podWorkers.ShouldPodContentBeRemoved(types.UID("abc")) {
t.Errorf("Expected pod to not be suitable for removal (does not exist but not yet synced)")
}
podWorkers.SyncKnownPods(desiredPodList)
if len(podWorkers.podUpdates) != 2 { if len(podWorkers.podUpdates) != 2 {
t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates)) t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates))
} }
@ -234,34 +427,52 @@ func TestForgetNonExistingPodWorkers(t *testing.T) {
if _, exists := podWorkers.podUpdates[types.UID("14")]; !exists { if _, exists := podWorkers.podUpdates[types.UID("14")]; !exists {
t.Errorf("No updates channel for pod 14") t.Errorf("No updates channel for pod 14")
} }
if podWorkers.IsPodTerminationRequested(types.UID("2")) {
t.Errorf("Expected pod termination request to be cleared after sync")
}
podWorkers.ForgetNonExistingPodWorkers(map[types.UID]sets.Empty{}) if !podWorkers.ShouldPodContainersBeTerminating(types.UID("abc")) {
if len(podWorkers.podUpdates) != 0 { t.Errorf("Expected pod to be expected to terminate containers (does not exist and synced at least once)")
}
if podWorkers.CouldHaveRunningContainers(types.UID("abc")) {
t.Errorf("Expected pod to be known not to have running containers (does not exist and synced at least once)")
}
if !podWorkers.ShouldPodContentBeRemoved(types.UID("abc")) {
t.Errorf("Expected pod to be suitable for removal (does not exist and synced at least once)")
}
// verify workers that are not terminated stay open even if config no longer
// sees them
podWorkers.SyncKnownPods(nil)
if len(podWorkers.podUpdates) != 2 {
t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates)) t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates))
} }
} if len(podWorkers.podSyncStatuses) != 2 {
t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.podSyncStatuses)
}
if len(podWorkers.lastUndeliveredWorkUpdate) != 0 {
t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.lastUndeliveredWorkUpdate)
}
func TestIsWorkingClearedAfterForgetPodWorkers(t *testing.T) { for uid := range desiredPods {
podWorkers, _ := createPodWorkers() pod := newPod(string(uid), "name")
podWorkers.UpdatePod(UpdatePodOptions{
numPods := 20 Pod: pod,
for i := 0; i < numPods; i++ { UpdateType: kubetypes.SyncPodKill,
podWorkers.UpdatePod(&UpdatePodOptions{
Pod: newPod(strconv.Itoa(i), "name"),
UpdateType: kubetypes.SyncPodUpdate,
}) })
} }
drainWorkers(podWorkers, numPods) drainWorkers(podWorkers, numPods)
if len(podWorkers.podUpdates) != numPods { // verify once those pods terminate (via some other flow) the workers are cleared
t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates)) podWorkers.SyncKnownPods(nil)
}
podWorkers.ForgetNonExistingPodWorkers(map[types.UID]sets.Empty{})
if len(podWorkers.podUpdates) != 0 { if len(podWorkers.podUpdates) != 0 {
t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates)) t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates))
} }
if len(podWorkers.isWorking) != 0 { if len(podWorkers.podSyncStatuses) != 0 {
t.Errorf("Incorrect number of isWorking %v", len(podWorkers.isWorking)) t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.podSyncStatuses)
}
if len(podWorkers.lastUndeliveredWorkUpdate) != 0 {
t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.lastUndeliveredWorkUpdate)
} }
} }
@ -272,17 +483,25 @@ type simpleFakeKubelet struct {
wg sync.WaitGroup wg sync.WaitGroup
} }
func (kl *simpleFakeKubelet) syncPod(options syncPodOptions) error { func (kl *simpleFakeKubelet) syncPod(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
kl.pod, kl.mirrorPod, kl.podStatus = options.pod, options.mirrorPod, options.podStatus kl.pod, kl.mirrorPod, kl.podStatus = pod, mirrorPod, podStatus
return nil return nil
} }
func (kl *simpleFakeKubelet) syncPodWithWaitGroup(options syncPodOptions) error { func (kl *simpleFakeKubelet) syncPodWithWaitGroup(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
kl.pod, kl.mirrorPod, kl.podStatus = options.pod, options.mirrorPod, options.podStatus kl.pod, kl.mirrorPod, kl.podStatus = pod, mirrorPod, podStatus
kl.wg.Done() kl.wg.Done()
return nil return nil
} }
func (kl *simpleFakeKubelet) syncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, runningPod *kubecontainer.Pod, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
return nil
}
func (kl *simpleFakeKubelet) syncTerminatedPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
return nil
}
// TestFakePodWorkers verifies that the fakePodWorkers behaves the same way as the real podWorkers // TestFakePodWorkers verifies that the fakePodWorkers behaves the same way as the real podWorkers
// for their invocation of the syncPodFn. // for their invocation of the syncPodFn.
func TestFakePodWorkers(t *testing.T) { func TestFakePodWorkers(t *testing.T) {
@ -293,8 +512,16 @@ func TestFakePodWorkers(t *testing.T) {
kubeletForRealWorkers := &simpleFakeKubelet{} kubeletForRealWorkers := &simpleFakeKubelet{}
kubeletForFakeWorkers := &simpleFakeKubelet{} kubeletForFakeWorkers := &simpleFakeKubelet{}
realPodWorkers := newPodWorkers(kubeletForRealWorkers.syncPodWithWaitGroup, fakeRecorder, queue.NewBasicWorkQueue(&clock.RealClock{}), time.Second, time.Second, fakeCache) realPodWorkers := newPodWorkers(
fakePodWorkers := &fakePodWorkers{kubeletForFakeWorkers.syncPod, fakeCache, t} kubeletForRealWorkers.syncPodWithWaitGroup,
kubeletForRealWorkers.syncTerminatingPod,
kubeletForRealWorkers.syncTerminatedPod,
fakeRecorder, queue.NewBasicWorkQueue(&clock.RealClock{}), time.Second, time.Second, fakeCache)
fakePodWorkers := &fakePodWorkers{
syncPodFn: kubeletForFakeWorkers.syncPod,
cache: fakeCache,
t: t,
}
tests := []struct { tests := []struct {
pod *v1.Pod pod *v1.Pod
@ -316,12 +543,12 @@ func TestFakePodWorkers(t *testing.T) {
for i, tt := range tests { for i, tt := range tests {
kubeletForRealWorkers.wg.Add(1) kubeletForRealWorkers.wg.Add(1)
realPodWorkers.UpdatePod(&UpdatePodOptions{ realPodWorkers.UpdatePod(UpdatePodOptions{
Pod: tt.pod, Pod: tt.pod,
MirrorPod: tt.mirrorPod, MirrorPod: tt.mirrorPod,
UpdateType: kubetypes.SyncPodUpdate, UpdateType: kubetypes.SyncPodUpdate,
}) })
fakePodWorkers.UpdatePod(&UpdatePodOptions{ fakePodWorkers.UpdatePod(UpdatePodOptions{
Pod: tt.pod, Pod: tt.pod,
MirrorPod: tt.mirrorPod, MirrorPod: tt.mirrorPod,
UpdateType: kubetypes.SyncPodUpdate, UpdateType: kubetypes.SyncPodUpdate,
@ -350,19 +577,26 @@ func TestKillPodNowFunc(t *testing.T) {
killPodFunc := killPodNow(podWorkers, fakeRecorder) killPodFunc := killPodNow(podWorkers, fakeRecorder)
pod := newPod("test", "test") pod := newPod("test", "test")
gracePeriodOverride := int64(0) gracePeriodOverride := int64(0)
err := killPodFunc(pod, v1.PodStatus{Phase: v1.PodFailed, Reason: "reason", Message: "message"}, &gracePeriodOverride) err := killPodFunc(pod, false, &gracePeriodOverride, func(status *v1.PodStatus) {
status.Phase = v1.PodFailed
status.Reason = "reason"
status.Message = "message"
})
if err != nil { if err != nil {
t.Errorf("Unexpected error: %v", err) t.Fatalf("Unexpected error: %v", err)
} }
drainAllWorkers(podWorkers)
if len(processed) != 1 { if len(processed) != 1 {
t.Errorf("len(processed) expected: %v, actual: %v", 1, len(processed)) t.Fatalf("len(processed) expected: %v, actual: %#v", 1, processed)
return
} }
syncPodRecords := processed[pod.UID] syncPodRecords := processed[pod.UID]
if len(syncPodRecords) != 1 { if len(syncPodRecords) != 2 {
t.Errorf("Pod processed %v times, but expected %v", len(syncPodRecords), 1) t.Fatalf("Pod processed expected %v times, got %#v", 1, syncPodRecords)
} }
if syncPodRecords[0].updateType != kubetypes.SyncPodKill { if syncPodRecords[0].updateType != kubetypes.SyncPodKill {
t.Errorf("Pod update type was %v, but expected %v", syncPodRecords[0].updateType, kubetypes.SyncPodKill) t.Errorf("Pod update type was %v, but expected %v", syncPodRecords[0].updateType, kubetypes.SyncPodKill)
} }
if !syncPodRecords[1].terminated {
t.Errorf("Pod terminated %v, but expected %v", syncPodRecords[1].terminated, true)
}
} }

View File

@ -20,7 +20,7 @@ import (
"fmt" "fmt"
"math" "math"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/pkg/api/v1/resource"
@ -94,18 +94,16 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod,
if err != nil { if err != nil {
return fmt.Errorf("preemption: error finding a set of pods to preempt: %v", err) return fmt.Errorf("preemption: error finding a set of pods to preempt: %v", err)
} }
klog.InfoS("Attempting to evict pods in order to free up resources", "pods", podsToPreempt,
"insufficientResources", insufficientResources.toString())
for _, pod := range podsToPreempt { for _, pod := range podsToPreempt {
status := v1.PodStatus{
Phase: v1.PodFailed,
Message: message,
Reason: events.PreemptContainer,
}
// record that we are evicting the pod // record that we are evicting the pod
c.recorder.Eventf(pod, v1.EventTypeWarning, events.PreemptContainer, message) c.recorder.Eventf(pod, v1.EventTypeWarning, events.PreemptContainer, message)
// this is a blocking call and should only return when the pod and its containers are killed. // this is a blocking call and should only return when the pod and its containers are killed.
err := c.killPodFunc(pod, status, nil) klog.V(3).InfoS("Preempting pod to free up resources", "pod", klog.KObj(pod), "podUID", pod.UID, "insufficientResources", insufficientResources.toString())
err := c.killPodFunc(pod, true, nil, func(status *v1.PodStatus) {
status.Phase = v1.PodFailed
status.Reason = events.PreemptContainer
status.Message = message
})
if err != nil { if err != nil {
klog.ErrorS(err, "Failed to evict pod", "pod", klog.KObj(pod)) klog.ErrorS(err, "Failed to evict pod", "pod", klog.KObj(pod))
// In future syncPod loops, the kubelet will retry the pod deletion steps that it was stuck on. // In future syncPod loops, the kubelet will retry the pod deletion steps that it was stuck on.

View File

@ -20,7 +20,7 @@ import (
"fmt" "fmt"
"testing" "testing"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
@ -57,7 +57,7 @@ func (f *fakePodKiller) getKilledPods() []*v1.Pod {
return f.killedPods return f.killedPods
} }
func (f *fakePodKiller) killPodNow(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error { func (f *fakePodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(status *v1.PodStatus)) error {
if f.errDuringPodKilling { if f.errDuringPodKilling {
f.killedPods = []*v1.Pod{} f.killedPods = []*v1.Pod{}
return fmt.Errorf("problem killing pod %v", pod) return fmt.Errorf("problem killing pod %v", pod)

View File

@ -17,11 +17,12 @@ limitations under the License.
package kubelet package kubelet
import ( import (
"context"
"fmt" "fmt"
"os" "os"
"time" "time"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2" "k8s.io/klog/v2"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -130,12 +131,7 @@ func (kl *Kubelet) runPod(pod *v1.Pod, retryDelay time.Duration) error {
klog.ErrorS(err, "Failed creating a mirror pod", "pod", klog.KObj(pod)) klog.ErrorS(err, "Failed creating a mirror pod", "pod", klog.KObj(pod))
} }
mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
if err = kl.syncPod(syncPodOptions{ if err = kl.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, mirrorPod, status); err != nil {
pod: pod,
mirrorPod: mirrorPod,
podStatus: status,
updateType: kubetypes.SyncPodUpdate,
}); err != nil {
return fmt.Errorf("error syncing pod %q: %v", format.Pod(pod), err) return fmt.Errorf("error syncing pod %q: %v", format.Pod(pod), err)
} }
if retry >= runOnceMaxRetries { if retry >= runOnceMaxRetries {

View File

@ -79,6 +79,7 @@ func TestRunOnce(t *testing.T) {
nodeLister: testNodeLister{}, nodeLister: testNodeLister{},
statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}), statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}),
podManager: podManager, podManager: podManager,
podWorkers: &fakePodWorkers{},
os: &containertest.FakeOS{}, os: &containertest.FakeOS{},
containerRuntime: fakeRuntime, containerRuntime: fakeRuntime,
reasonCache: NewReasonCache(), reasonCache: NewReasonCache(),
@ -101,7 +102,7 @@ func TestRunOnce(t *testing.T) {
true, true,
kb.nodeName, kb.nodeName,
kb.podManager, kb.podManager,
kb.statusManager, kb.podWorkers,
kb.kubeClient, kb.kubeClient,
kb.volumePluginMgr, kb.volumePluginMgr,
fakeRuntime, fakeRuntime,
@ -122,7 +123,7 @@ func TestRunOnce(t *testing.T) {
UID: types.UID(kb.nodeName), UID: types.UID(kb.nodeName),
Namespace: "", Namespace: "",
} }
fakeKillPodFunc := func(pod *v1.Pod, podStatus v1.PodStatus, gracePeriodOverride *int64) error { fakeKillPodFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
return nil return nil
} }
fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false } fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false }

View File

@ -20,6 +20,7 @@ import (
"context" "context"
"fmt" "fmt"
"sort" "sort"
"strings"
"sync" "sync"
"time" "time"
@ -356,6 +357,7 @@ func (m *manager) TerminatePod(pod *v1.Pod) {
} }
} }
klog.V(5).InfoS("TerminatePod calling updateStatusInternal", "pod", klog.KObj(pod), "podUID", pod.UID)
m.updateStatusInternal(pod, status, true) m.updateStatusInternal(pod, status, true)
} }
@ -431,10 +433,43 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp
} }
normalizeStatus(pod, &status) normalizeStatus(pod, &status)
// Perform some more extensive logging of container termination state to assist in
// debugging production races (generally not needed).
if klog.V(5).Enabled() {
var containers []string
for _, s := range append(append([]v1.ContainerStatus(nil), status.InitContainerStatuses...), status.ContainerStatuses...) {
var current, previous string
switch {
case s.State.Running != nil:
current = "running"
case s.State.Waiting != nil:
current = "waiting"
case s.State.Terminated != nil:
current = fmt.Sprintf("terminated=%d", s.State.Terminated.ExitCode)
default:
current = "unknown"
}
switch {
case s.LastTerminationState.Running != nil:
previous = "running"
case s.LastTerminationState.Waiting != nil:
previous = "waiting"
case s.LastTerminationState.Terminated != nil:
previous = fmt.Sprintf("terminated=%d", s.LastTerminationState.Terminated.ExitCode)
default:
previous = "<none>"
}
containers = append(containers, fmt.Sprintf("(%s state=%s previous=%s)", s.Name, current, previous))
}
sort.Strings(containers)
klog.InfoS("updateStatusInternal", "version", cachedStatus.version+1, "pod", klog.KObj(pod), "podUID", pod.UID, "containers", strings.Join(containers, " "))
}
// The intent here is to prevent concurrent updates to a pod's status from // The intent here is to prevent concurrent updates to a pod's status from
// clobbering each other so the phase of a pod progresses monotonically. // clobbering each other so the phase of a pod progresses monotonically.
if isCached && isPodStatusByKubeletEqual(&cachedStatus.status, &status) && !forceUpdate { if isCached && isPodStatusByKubeletEqual(&cachedStatus.status, &status) && !forceUpdate {
klog.V(5).InfoS("Ignoring same status for pod", "pod", klog.KObj(pod), "status", status) klog.V(3).InfoS("Ignoring same status for pod", "pod", klog.KObj(pod), "status", status)
return false // No new status. return false // No new status.
} }
@ -583,7 +618,7 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
oldStatus := pod.Status.DeepCopy() oldStatus := pod.Status.DeepCopy()
newPod, patchBytes, unchanged, err := statusutil.PatchPodStatus(m.kubeClient, pod.Namespace, pod.Name, pod.UID, *oldStatus, mergePodStatus(*oldStatus, status.status)) newPod, patchBytes, unchanged, err := statusutil.PatchPodStatus(m.kubeClient, pod.Namespace, pod.Name, pod.UID, *oldStatus, mergePodStatus(*oldStatus, status.status))
klog.V(3).InfoS("Patch status for pod", "pod", klog.KObj(pod), "patchBytes", patchBytes) klog.V(3).InfoS("Patch status for pod", "pod", klog.KObj(pod), "patch", string(patchBytes))
if err != nil { if err != nil {
klog.InfoS("Failed to update status for pod", "pod", klog.KObj(pod), "err", err) klog.InfoS("Failed to update status for pod", "pod", klog.KObj(pod), "err", err)

View File

@ -120,8 +120,8 @@ const (
SyncPodUpdate SyncPodUpdate
// SyncPodCreate is when the pod is created from source // SyncPodCreate is when the pod is created from source
SyncPodCreate SyncPodCreate
// SyncPodKill is when the pod is killed based on a trigger internal to the kubelet for eviction. // SyncPodKill is when the pod should have no running containers. A pod stopped in this way could be
// If a SyncPodKill request is made to pod workers, the request is never dropped, and will always be processed. // restarted in the future due config changes.
SyncPodKill SyncPodKill
) )

View File

@ -40,7 +40,6 @@ import (
"k8s.io/kubernetes/pkg/kubelet/config" "k8s.io/kubernetes/pkg/kubelet/config"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/pod" "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache" "k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
"k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/csimigration" "k8s.io/kubernetes/pkg/volume/csimigration"
@ -70,6 +69,12 @@ type DesiredStateOfWorldPopulator interface {
HasAddedPods() bool HasAddedPods() bool
} }
// podStateProvider can determine if a pod is going to be terminated.
type podStateProvider interface {
ShouldPodContainersBeTerminating(types.UID) bool
ShouldPodRuntimeBeRemoved(types.UID) bool
}
// NewDesiredStateOfWorldPopulator returns a new instance of // NewDesiredStateOfWorldPopulator returns a new instance of
// DesiredStateOfWorldPopulator. // DesiredStateOfWorldPopulator.
// //
@ -84,7 +89,7 @@ func NewDesiredStateOfWorldPopulator(
loopSleepDuration time.Duration, loopSleepDuration time.Duration,
getPodStatusRetryDuration time.Duration, getPodStatusRetryDuration time.Duration,
podManager pod.Manager, podManager pod.Manager,
podStatusProvider status.PodStatusProvider, podStateProvider podStateProvider,
desiredStateOfWorld cache.DesiredStateOfWorld, desiredStateOfWorld cache.DesiredStateOfWorld,
actualStateOfWorld cache.ActualStateOfWorld, actualStateOfWorld cache.ActualStateOfWorld,
kubeContainerRuntime kubecontainer.Runtime, kubeContainerRuntime kubecontainer.Runtime,
@ -97,7 +102,7 @@ func NewDesiredStateOfWorldPopulator(
loopSleepDuration: loopSleepDuration, loopSleepDuration: loopSleepDuration,
getPodStatusRetryDuration: getPodStatusRetryDuration, getPodStatusRetryDuration: getPodStatusRetryDuration,
podManager: podManager, podManager: podManager,
podStatusProvider: podStatusProvider, podStateProvider: podStateProvider,
desiredStateOfWorld: desiredStateOfWorld, desiredStateOfWorld: desiredStateOfWorld,
actualStateOfWorld: actualStateOfWorld, actualStateOfWorld: actualStateOfWorld,
pods: processedPods{ pods: processedPods{
@ -117,7 +122,7 @@ type desiredStateOfWorldPopulator struct {
loopSleepDuration time.Duration loopSleepDuration time.Duration
getPodStatusRetryDuration time.Duration getPodStatusRetryDuration time.Duration
podManager pod.Manager podManager pod.Manager
podStatusProvider status.PodStatusProvider podStateProvider podStateProvider
desiredStateOfWorld cache.DesiredStateOfWorld desiredStateOfWorld cache.DesiredStateOfWorld
actualStateOfWorld cache.ActualStateOfWorld actualStateOfWorld cache.ActualStateOfWorld
pods processedPods pods processedPods
@ -177,14 +182,6 @@ func (dswp *desiredStateOfWorldPopulator) populatorLoop() {
dswp.findAndRemoveDeletedPods() dswp.findAndRemoveDeletedPods()
} }
func (dswp *desiredStateOfWorldPopulator) isPodTerminated(pod *v1.Pod) bool {
podStatus, found := dswp.podStatusProvider.GetPodStatus(pod.UID)
if !found {
podStatus = pod.Status
}
return util.IsPodTerminated(pod, podStatus)
}
// Iterate through all pods and add to desired state of world if they don't // Iterate through all pods and add to desired state of world if they don't
// exist but should // exist but should
func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() { func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
@ -203,8 +200,8 @@ func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
processedVolumesForFSResize := sets.NewString() processedVolumesForFSResize := sets.NewString()
for _, pod := range dswp.podManager.GetPods() { for _, pod := range dswp.podManager.GetPods() {
if dswp.isPodTerminated(pod) { if dswp.podStateProvider.ShouldPodContainersBeTerminating(pod.UID) {
// Do not (re)add volumes for terminated pods // Do not (re)add volumes for pods that can't also be starting containers
continue continue
} }
dswp.processPodVolumes(pod, mountedVolumesForPod, processedVolumesForFSResize) dswp.processPodVolumes(pod, mountedVolumesForPod, processedVolumesForFSResize)
@ -214,9 +211,6 @@ func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
// Iterate through all pods in desired state of world, and remove if they no // Iterate through all pods in desired state of world, and remove if they no
// longer exist // longer exist
func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() { func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {
var runningPods []*kubecontainer.Pod
runningPodsFetched := false
for _, volumeToMount := range dswp.desiredStateOfWorld.GetVolumesToMount() { for _, volumeToMount := range dswp.desiredStateOfWorld.GetVolumesToMount() {
pod, podExists := dswp.podManager.GetPodByUID(volumeToMount.Pod.UID) pod, podExists := dswp.podManager.GetPodByUID(volumeToMount.Pod.UID)
if podExists { if podExists {
@ -234,8 +228,8 @@ func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {
} }
} }
// Skip running pods // Exclude known pods that we expect to be running
if !dswp.isPodTerminated(pod) { if !dswp.podStateProvider.ShouldPodRuntimeBeRemoved(pod.UID) {
continue continue
} }
if dswp.keepTerminatedPodVolumes { if dswp.keepTerminatedPodVolumes {
@ -245,36 +239,9 @@ func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {
// Once a pod has been deleted from kubelet pod manager, do not delete // Once a pod has been deleted from kubelet pod manager, do not delete
// it immediately from volume manager. Instead, check the kubelet // it immediately from volume manager. Instead, check the kubelet
// containerRuntime to verify that all containers in the pod have been // pod state provider to verify that all containers in the pod have been
// terminated. // terminated.
if !runningPodsFetched { if !dswp.podStateProvider.ShouldPodRuntimeBeRemoved(volumeToMount.Pod.UID) {
var getPodsErr error
runningPods, getPodsErr = dswp.kubeContainerRuntime.GetPods(false)
if getPodsErr != nil {
klog.ErrorS(getPodsErr, "kubeContainerRuntime.findAndRemoveDeletedPods returned error")
continue
}
runningPodsFetched = true
dswp.timeOfLastGetPodStatus = time.Now()
}
runningContainers := false
for _, runningPod := range runningPods {
if runningPod.ID == volumeToMount.Pod.UID {
// runningPod.Containers only include containers in the running state,
// excluding containers in the creating process.
// By adding a non-empty judgment for runningPod.Sandboxes,
// ensure that all containers of the pod have been terminated.
if len(runningPod.Sandboxes) > 0 || len(runningPod.Containers) > 0 {
runningContainers = true
}
break
}
}
if runningContainers {
klog.V(4).InfoS("Pod still has one or more containers in the non-exited state and will not be removed from desired state", "pod", klog.KObj(volumeToMount.Pod)) klog.V(4).InfoS("Pod still has one or more containers in the non-exited state and will not be removed from desired state", "pod", klog.KObj(volumeToMount.Pod))
continue continue
} }

View File

@ -27,6 +27,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
kubetypes "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing" core "k8s.io/client-go/testing"
@ -34,13 +35,10 @@ import (
csitrans "k8s.io/csi-translation-lib" csitrans "k8s.io/csi-translation-lib"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/configmap" "k8s.io/kubernetes/pkg/kubelet/configmap"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod" kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
podtest "k8s.io/kubernetes/pkg/kubelet/pod/testing" podtest "k8s.io/kubernetes/pkg/kubelet/pod/testing"
"k8s.io/kubernetes/pkg/kubelet/secret" "k8s.io/kubernetes/pkg/kubelet/secret"
"k8s.io/kubernetes/pkg/kubelet/status"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache" "k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
"k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/csimigration" "k8s.io/kubernetes/pkg/volume/csimigration"
@ -81,7 +79,7 @@ func prepareDswpWithVolume(t *testing.T) (*desiredStateOfWorldPopulator, kubepod
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
return dswp, fakePodManager return dswp, fakePodManager
} }
@ -156,7 +154,7 @@ func TestFindAndAddNewPods_WithVolumeRetrievalError(t *testing.T) {
} }
func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) { func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) {
dswp, fakeRuntime, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t) dswp, fakePodState, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t)
podName := util.GetUniquePodName(pod) podName := util.GetUniquePodName(pod)
//let the pod be terminated //let the pod be terminated
@ -167,29 +165,15 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) {
podGet.Status.Phase = v1.PodFailed podGet.Status.Phase = v1.PodFailed
dswp.podManager.DeletePod(pod) dswp.podManager.DeletePod(pod)
fakeRuntime.PodList = []*containertest.FakePod{
{
Pod: &kubecontainer.Pod{
Name: pod.Name,
ID: pod.UID,
Sandboxes: []*kubecontainer.Container{
{
Name: "dswp-test-pod-sandbox",
},
},
},
},
}
dswp.findAndRemoveDeletedPods() dswp.findAndRemoveDeletedPods()
if !dswp.pods.processedPods[podName] { if !dswp.pods.processedPods[podName] {
t.Fatalf("Pod should not been removed from desired state of world since sandbox exist") t.Fatalf("Pod should not been removed from desired state of world since pod state still thinks it exists")
} }
fakeRuntime.PodList = nil fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}
// fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted // the pod state is marked as removed, so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
dswp.findAndRemoveDeletedPods() dswp.findAndRemoveDeletedPods()
if dswp.pods.processedPods[podName] { if dswp.pods.processedPods[podName] {
t.Fatalf("Failed to remove pods from desired state of world since they no longer exist") t.Fatalf("Failed to remove pods from desired state of world since they no longer exist")
@ -221,7 +205,8 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) {
} }
func TestFindAndRemoveDeletedPodsWithActualState(t *testing.T) { func TestFindAndRemoveDeletedPodsWithActualState(t *testing.T) {
dswp, _, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t) dswp, fakePodState, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t)
fakeASW := dswp.actualStateOfWorld
podName := util.GetUniquePodName(pod) podName := util.GetUniquePodName(pod)
//let the pod be terminated //let the pod be terminated
@ -251,7 +236,19 @@ func TestFindAndRemoveDeletedPodsWithActualState(t *testing.T) {
// reconcile with actual state so that volume is added into the actual state // reconcile with actual state so that volume is added into the actual state
// desired state populator now can successfully delete the pod and volume // desired state populator now can successfully delete the pod and volume
fakeASW := dswp.actualStateOfWorld reconcileASW(fakeASW, dswp.desiredStateOfWorld, t)
dswp.findAndRemoveDeletedPods()
if !dswp.desiredStateOfWorld.VolumeExists(expectedVolumeName) {
t.Fatalf(
"VolumeExists(%q) failed. Expected: <false> Actual: <%v>",
expectedVolumeName,
volumeExists)
}
fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}
// reconcile with actual state so that volume is added into the actual state
// desired state populator now can successfully delete the pod and volume
reconcileASW(fakeASW, dswp.desiredStateOfWorld, t) reconcileASW(fakeASW, dswp.desiredStateOfWorld, t)
dswp.findAndRemoveDeletedPods() dswp.findAndRemoveDeletedPods()
volumeExists = dswp.desiredStateOfWorld.VolumeExists(expectedVolumeName) volumeExists = dswp.desiredStateOfWorld.VolumeExists(expectedVolumeName)
@ -271,7 +268,7 @@ func TestFindAndRemoveDeletedPodsWithActualState(t *testing.T) {
} }
func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) { func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
dswp, fakeRuntime, pod, expectedVolumeName, pv := prepareDSWPWithPodPV(t) dswp, fakePodState, pod, expectedVolumeName, pv := prepareDSWPWithPodPV(t)
podName := util.GetUniquePodName(pod) podName := util.GetUniquePodName(pod)
//let the pod be terminated //let the pod be terminated
@ -281,7 +278,7 @@ func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
} }
podGet.Status.Phase = v1.PodFailed podGet.Status.Phase = v1.PodFailed
dswp.podManager.DeletePod(pod) dswp.podManager.DeletePod(pod)
fakeRuntime.PodList = nil fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}
// Add the volume to ASW by reconciling. // Add the volume to ASW by reconciling.
fakeASW := dswp.actualStateOfWorld fakeASW := dswp.actualStateOfWorld
@ -302,7 +299,7 @@ func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
t.Fatalf("Failed to set the volume as uncertain: %s", err) t.Fatalf("Failed to set the volume as uncertain: %s", err)
} }
// fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted // the pod state now lists the pod as removed, so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
dswp.findAndRemoveDeletedPods() dswp.findAndRemoveDeletedPods()
if dswp.pods.processedPods[podName] { if dswp.pods.processedPods[podName] {
t.Fatalf("Failed to remove pods from desired state of world since they no longer exist") t.Fatalf("Failed to remove pods from desired state of world since they no longer exist")
@ -333,7 +330,7 @@ func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
} }
} }
func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *containertest.FakeRuntime, *v1.Pod, v1.UniqueVolumeName, *v1.PersistentVolume) { func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *fakePodStateProvider, *v1.Pod, v1.UniqueVolumeName, *v1.PersistentVolume) {
// create dswp // create dswp
mode := v1.PersistentVolumeFilesystem mode := v1.PersistentVolumeFilesystem
pv := &v1.PersistentVolume{ pv := &v1.PersistentVolume{
@ -353,7 +350,7 @@ func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *contain
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, fakesDSW, fakeRuntime := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, fakesDSW, _, fakePodState := createDswpWithVolume(t, pv, pvc)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -399,7 +396,7 @@ func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *contain
verifyVolumeExistsInVolumesToMount( verifyVolumeExistsInVolumesToMount(
t, v1.UniqueVolumeName(generatedVolumeName), false /* expectReportedInUse */, fakesDSW) t, v1.UniqueVolumeName(generatedVolumeName), false /* expectReportedInUse */, fakesDSW)
return dswp, fakeRuntime, pod, expectedVolumeName, pv return dswp, fakePodState, pod, expectedVolumeName, pv
} }
func TestFindAndRemoveNonattachableVolumes(t *testing.T) { func TestFindAndRemoveNonattachableVolumes(t *testing.T) {
@ -424,7 +421,7 @@ func TestFindAndRemoveNonattachableVolumes(t *testing.T) {
} }
fakeVolumePluginMgr, fakeVolumePlugin := volumetesting.GetTestKubeletVolumePluginMgr(t) fakeVolumePluginMgr, fakeVolumePlugin := volumetesting.GetTestKubeletVolumePluginMgr(t)
dswp, fakePodManager, fakesDSW, _ := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr) dswp, fakePodManager, fakesDSW, _, _ := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -487,7 +484,7 @@ func TestEphemeralVolumeOwnerCheck(t *testing.T) {
// create dswp // create dswp
pod, pv, pvc := createEphemeralVolumeObjects("dswp-test-pod", "dswp-test-volume-name", false /* not owned */) pod, pv, pvc := createEphemeralVolumeObjects("dswp-test-pod", "dswp-test-volume-name", false /* not owned */)
dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
fakePodManager.AddPod(pod) fakePodManager.AddPod(pod)
podName := util.GetUniquePodName(pod) podName := util.GetUniquePodName(pod)
@ -505,7 +502,7 @@ func TestEphemeralVolumeOwnerCheck(t *testing.T) {
func TestEphemeralVolumeEnablement(t *testing.T) { func TestEphemeralVolumeEnablement(t *testing.T) {
// create dswp // create dswp
pod, pv, pvc := createEphemeralVolumeObjects("dswp-test-pod", "dswp-test-volume-name", true /* owned */) pod, pv, pvc := createEphemeralVolumeObjects("dswp-test-pod", "dswp-test-volume-name", true /* owned */)
dswp, fakePodManager, fakesDSW, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, fakesDSW, _, fakePodState := createDswpWithVolume(t, pv, pvc)
fakePodManager.AddPod(pod) fakePodManager.AddPod(pod)
podName := util.GetUniquePodName(pod) podName := util.GetUniquePodName(pod)
@ -555,7 +552,7 @@ func TestEphemeralVolumeEnablement(t *testing.T) {
if !exist { if !exist {
t.Fatalf("Failed to get pod by pod name: %s and namespace: %s", pod.Name, pod.Namespace) t.Fatalf("Failed to get pod by pod name: %s and namespace: %s", pod.Name, pod.Namespace)
} }
podGet.Status.Phase = v1.PodFailed fakePodState.removed = map[kubetypes.UID]struct{}{podGet.UID: {}}
// Pretend again that the feature is disabled. // Pretend again that the feature is disabled.
// Removal of the pod and volumes is expected to work. // Removal of the pod and volumes is expected to work.
@ -620,7 +617,7 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods_Valid_Block_VolumeDevices(t
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, fakesDSW, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, fakesDSW, _, fakePodState := createDswpWithVolume(t, pv, pvc)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -674,9 +671,10 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods_Valid_Block_VolumeDevices(t
} }
podGet.Status.Phase = v1.PodFailed podGet.Status.Phase = v1.PodFailed
fakePodManager.DeletePod(pod) fakePodManager.DeletePod(pod)
//pod is added to fakePodManager but fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}
dswp.findAndRemoveDeletedPods()
//pod is added to fakePodManager but pod state knows the pod is removed, so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
dswp.findAndRemoveDeletedPods()
if dswp.pods.processedPods[podName] { if dswp.pods.processedPods[podName] {
t.Fatalf("Failed to remove pods from desired state of world since they no longer exist") t.Fatalf("Failed to remove pods from desired state of world since they no longer exist")
} }
@ -727,7 +725,7 @@ func TestCreateVolumeSpec_Valid_File_VolumeMounts(t *testing.T) {
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -773,7 +771,7 @@ func TestCreateVolumeSpec_Valid_Nil_VolumeMounts(t *testing.T) {
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -819,7 +817,7 @@ func TestCreateVolumeSpec_Valid_Block_VolumeDevices(t *testing.T) {
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -865,7 +863,7 @@ func TestCreateVolumeSpec_Invalid_File_VolumeDevices(t *testing.T) {
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -911,7 +909,7 @@ func TestCreateVolumeSpec_Invalid_Block_VolumeMounts(t *testing.T) {
Phase: v1.ClaimBound, Phase: v1.ClaimBound,
}, },
} }
dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
// create pod // create pod
containers := []v1.Container{ containers := []v1.Container{
@ -1068,7 +1066,7 @@ func TestCheckVolumeFSResize(t *testing.T) {
}, },
} }
dswp, fakePodManager, fakeDSW, _ := createDswpWithVolume(t, pv, pvc) dswp, fakePodManager, fakeDSW, _, _ := createDswpWithVolume(t, pv, pvc)
fakeASW := dswp.actualStateOfWorld fakeASW := dswp.actualStateOfWorld
containers := []v1.Container{} containers := []v1.Container{}
@ -1290,14 +1288,29 @@ func createEphemeralVolumeObjects(podName, volumeName string, owned bool) (pod *
return return
} }
func createDswpWithVolume(t *testing.T, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime) { func createDswpWithVolume(t *testing.T, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime, *fakePodStateProvider) {
fakeVolumePluginMgr, _ := volumetesting.GetTestKubeletVolumePluginMgr(t) fakeVolumePluginMgr, _ := volumetesting.GetTestKubeletVolumePluginMgr(t)
dswp, fakePodManager, fakesDSW, fakeRuntime := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr) dswp, fakePodManager, fakesDSW, fakeRuntime, fakeStateProvider := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr)
return dswp, fakePodManager, fakesDSW, fakeRuntime return dswp, fakePodManager, fakesDSW, fakeRuntime, fakeStateProvider
}
type fakePodStateProvider struct {
terminating map[kubetypes.UID]struct{}
removed map[kubetypes.UID]struct{}
}
func (p *fakePodStateProvider) ShouldPodContainersBeTerminating(uid kubetypes.UID) bool {
_, ok := p.terminating[uid]
return ok
}
func (p *fakePodStateProvider) ShouldPodRuntimeBeRemoved(uid kubetypes.UID) bool {
_, ok := p.removed[uid]
return ok
} }
func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim, func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim,
fakeVolumePluginMgr *volume.VolumePluginMgr) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime) { fakeVolumePluginMgr *volume.VolumePluginMgr) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime, *fakePodStateProvider) {
fakeClient := &fake.Clientset{} fakeClient := &fake.Clientset{}
fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) {
return true, pvc, nil return true, pvc, nil
@ -1314,8 +1327,7 @@ func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolu
fakesDSW := cache.NewDesiredStateOfWorld(fakeVolumePluginMgr) fakesDSW := cache.NewDesiredStateOfWorld(fakeVolumePluginMgr)
fakeASW := cache.NewActualStateOfWorld("fake", fakeVolumePluginMgr) fakeASW := cache.NewActualStateOfWorld("fake", fakeVolumePluginMgr)
fakeRuntime := &containertest.FakeRuntime{} fakeRuntime := &containertest.FakeRuntime{}
fakeStateProvider := &fakePodStateProvider{}
fakeStatusManager := status.NewManager(fakeClient, fakePodManager, &statustest.FakePodDeletionSafetyProvider{})
csiTranslator := csitrans.New() csiTranslator := csitrans.New()
dswp := &desiredStateOfWorldPopulator{ dswp := &desiredStateOfWorldPopulator{
@ -1323,7 +1335,7 @@ func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolu
loopSleepDuration: 100 * time.Millisecond, loopSleepDuration: 100 * time.Millisecond,
getPodStatusRetryDuration: 2 * time.Second, getPodStatusRetryDuration: 2 * time.Second,
podManager: fakePodManager, podManager: fakePodManager,
podStatusProvider: fakeStatusManager, podStateProvider: fakeStateProvider,
desiredStateOfWorld: fakesDSW, desiredStateOfWorld: fakesDSW,
actualStateOfWorld: fakeASW, actualStateOfWorld: fakeASW,
pods: processedPods{ pods: processedPods{
@ -1334,5 +1346,5 @@ func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolu
intreeToCSITranslator: csiTranslator, intreeToCSITranslator: csiTranslator,
volumePluginMgr: fakeVolumePluginMgr, volumePluginMgr: fakeVolumePluginMgr,
} }
return dswp, fakePodManager, fakesDSW, fakeRuntime return dswp, fakePodManager, fakesDSW, fakeRuntime, fakeStateProvider
} }

View File

@ -39,7 +39,6 @@ import (
"k8s.io/kubernetes/pkg/kubelet/config" "k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/pod" "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache" "k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
"k8s.io/kubernetes/pkg/kubelet/volumemanager/metrics" "k8s.io/kubernetes/pkg/kubelet/volumemanager/metrics"
"k8s.io/kubernetes/pkg/kubelet/volumemanager/populator" "k8s.io/kubernetes/pkg/kubelet/volumemanager/populator"
@ -106,6 +105,13 @@ type VolumeManager interface {
// the duration defined in podAttachAndMountTimeout. // the duration defined in podAttachAndMountTimeout.
WaitForAttachAndMount(pod *v1.Pod) error WaitForAttachAndMount(pod *v1.Pod) error
// WaitForUnmount processes the volumes referenced in the specified
// pod and blocks until they are all unmounted (reflected in the actual
// state of the world).
// An error is returned if all volumes are not unmounted within
// the duration defined in podAttachAndMountTimeout.
WaitForUnmount(pod *v1.Pod) error
// GetMountedVolumesForPod returns a VolumeMap containing the volumes // GetMountedVolumesForPod returns a VolumeMap containing the volumes
// referenced by the specified pod that are successfully attached and // referenced by the specified pod that are successfully attached and
// mounted. The key in the map is the OuterVolumeSpecName (i.e. // mounted. The key in the map is the OuterVolumeSpecName (i.e.
@ -149,6 +155,12 @@ type VolumeManager interface {
MarkVolumesAsReportedInUse(volumesReportedAsInUse []v1.UniqueVolumeName) MarkVolumesAsReportedInUse(volumesReportedAsInUse []v1.UniqueVolumeName)
} }
// podStateProvider can determine if a pod is is going to be terminated
type podStateProvider interface {
ShouldPodContainersBeTerminating(k8stypes.UID) bool
ShouldPodRuntimeBeRemoved(k8stypes.UID) bool
}
// NewVolumeManager returns a new concrete instance implementing the // NewVolumeManager returns a new concrete instance implementing the
// VolumeManager interface. // VolumeManager interface.
// //
@ -160,7 +172,7 @@ func NewVolumeManager(
controllerAttachDetachEnabled bool, controllerAttachDetachEnabled bool,
nodeName k8stypes.NodeName, nodeName k8stypes.NodeName,
podManager pod.Manager, podManager pod.Manager,
podStatusProvider status.PodStatusProvider, podStateProvider podStateProvider,
kubeClient clientset.Interface, kubeClient clientset.Interface,
volumePluginMgr *volume.VolumePluginMgr, volumePluginMgr *volume.VolumePluginMgr,
kubeContainerRuntime container.Runtime, kubeContainerRuntime container.Runtime,
@ -195,7 +207,7 @@ func NewVolumeManager(
desiredStateOfWorldPopulatorLoopSleepPeriod, desiredStateOfWorldPopulatorLoopSleepPeriod,
desiredStateOfWorldPopulatorGetPodStatusRetryDuration, desiredStateOfWorldPopulatorGetPodStatusRetryDuration,
podManager, podManager,
podStatusProvider, podStateProvider,
vm.desiredStateOfWorld, vm.desiredStateOfWorld,
vm.actualStateOfWorld, vm.actualStateOfWorld,
kubeContainerRuntime, kubeContainerRuntime,
@ -426,6 +438,42 @@ func (vm *volumeManager) WaitForAttachAndMount(pod *v1.Pod) error {
return nil return nil
} }
func (vm *volumeManager) WaitForUnmount(pod *v1.Pod) error {
if pod == nil {
return nil
}
klog.V(3).InfoS("Waiting for volumes to unmount for pod", "pod", klog.KObj(pod))
uniquePodName := util.GetUniquePodName(pod)
vm.desiredStateOfWorldPopulator.ReprocessPod(uniquePodName)
err := wait.PollImmediate(
podAttachAndMountRetryInterval,
podAttachAndMountTimeout,
vm.verifyVolumesUnmountedFunc(uniquePodName))
if err != nil {
var mountedVolumes []string
for _, v := range vm.actualStateOfWorld.GetMountedVolumesForPod(uniquePodName) {
mountedVolumes = append(mountedVolumes, v.OuterVolumeSpecName)
}
sort.Strings(mountedVolumes)
if len(mountedVolumes) == 0 {
return nil
}
return fmt.Errorf(
"mounted volumes=%v: %s",
mountedVolumes,
err)
}
klog.V(3).InfoS("All volumes are unmounted for pod", "pod", klog.KObj(pod))
return nil
}
// getUnattachedVolumes returns a list of the volumes that are expected to be attached but // getUnattachedVolumes returns a list of the volumes that are expected to be attached but
// are not currently attached to the node // are not currently attached to the node
func (vm *volumeManager) getUnattachedVolumes(expectedVolumes []string) []string { func (vm *volumeManager) getUnattachedVolumes(expectedVolumes []string) []string {
@ -449,6 +497,17 @@ func (vm *volumeManager) verifyVolumesMountedFunc(podName types.UniquePodName, e
} }
} }
// verifyVolumesUnmountedFunc returns a method that is true when there are no mounted volumes for this
// pod.
func (vm *volumeManager) verifyVolumesUnmountedFunc(podName types.UniquePodName) wait.ConditionFunc {
return func() (done bool, err error) {
if errs := vm.desiredStateOfWorld.PopPodErrors(podName); len(errs) > 0 {
return true, errors.New(strings.Join(errs, "; "))
}
return len(vm.actualStateOfWorld.GetMountedVolumesForPod(podName)) == 0, nil
}
}
// getUnmountedVolumes fetches the current list of mounted volumes from // getUnmountedVolumes fetches the current list of mounted volumes from
// the actual state of the world, and uses it to process the list of // the actual state of the world, and uses it to process the list of
// expectedVolumes. It returns a list of unmounted volumes. // expectedVolumes. It returns a list of unmounted volumes.

View File

@ -50,6 +50,11 @@ func (f *FakeVolumeManager) WaitForAttachAndMount(pod *v1.Pod) error {
return nil return nil
} }
// WaitForUnmount is not implemented
func (f *FakeVolumeManager) WaitForUnmount(pod *v1.Pod) error {
return nil
}
// GetMountedVolumesForPod is not implemented // GetMountedVolumesForPod is not implemented
func (f *FakeVolumeManager) GetMountedVolumesForPod(podName types.UniquePodName) container.VolumeMap { func (f *FakeVolumeManager) GetMountedVolumesForPod(podName types.UniquePodName) container.VolumeMap {
return nil return nil

View File

@ -28,6 +28,7 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
@ -40,8 +41,6 @@ import (
kubepod "k8s.io/kubernetes/pkg/kubelet/pod" kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
podtest "k8s.io/kubernetes/pkg/kubelet/pod/testing" podtest "k8s.io/kubernetes/pkg/kubelet/pod/testing"
"k8s.io/kubernetes/pkg/kubelet/secret" "k8s.io/kubernetes/pkg/kubelet/secret"
"k8s.io/kubernetes/pkg/kubelet/status"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
"k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume"
volumetest "k8s.io/kubernetes/pkg/volume/testing" volumetest "k8s.io/kubernetes/pkg/volume/testing"
"k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util"
@ -269,19 +268,34 @@ func TestGetExtraSupplementalGroupsForPod(t *testing.T) {
} }
} }
type fakePodStateProvider struct {
shouldRemove map[kubetypes.UID]struct{}
terminating map[kubetypes.UID]struct{}
}
func (p *fakePodStateProvider) ShouldPodRuntimeBeRemoved(uid kubetypes.UID) bool {
_, ok := p.shouldRemove[uid]
return ok
}
func (p *fakePodStateProvider) ShouldPodContainersBeTerminating(uid kubetypes.UID) bool {
_, ok := p.terminating[uid]
return ok
}
func newTestVolumeManager(t *testing.T, tmpDir string, podManager kubepod.Manager, kubeClient clientset.Interface) VolumeManager { func newTestVolumeManager(t *testing.T, tmpDir string, podManager kubepod.Manager, kubeClient clientset.Interface) VolumeManager {
plug := &volumetest.FakeVolumePlugin{PluginName: "fake", Host: nil} plug := &volumetest.FakeVolumePlugin{PluginName: "fake", Host: nil}
fakeRecorder := &record.FakeRecorder{} fakeRecorder := &record.FakeRecorder{}
plugMgr := &volume.VolumePluginMgr{} plugMgr := &volume.VolumePluginMgr{}
// TODO (#51147) inject mock prober // TODO (#51147) inject mock prober
plugMgr.InitPlugins([]volume.VolumePlugin{plug}, nil /* prober */, volumetest.NewFakeKubeletVolumeHost(t, tmpDir, kubeClient, nil)) plugMgr.InitPlugins([]volume.VolumePlugin{plug}, nil /* prober */, volumetest.NewFakeKubeletVolumeHost(t, tmpDir, kubeClient, nil))
statusManager := status.NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}) stateProvider := &fakePodStateProvider{}
fakePathHandler := volumetest.NewBlockVolumePathHandler() fakePathHandler := volumetest.NewBlockVolumePathHandler()
vm := NewVolumeManager( vm := NewVolumeManager(
true, true,
testHostname, testHostname,
podManager, podManager,
statusManager, stateProvider,
kubeClient, kubeClient,
plugMgr, plugMgr,
&containertest.FakeRuntime{}, &containertest.FakeRuntime{},

View File

@ -381,7 +381,10 @@ func GetUniqueVolumeNameFromSpec(
// IsPodTerminated checks if pod is terminated // IsPodTerminated checks if pod is terminated
func IsPodTerminated(pod *v1.Pod, podStatus v1.PodStatus) bool { func IsPodTerminated(pod *v1.Pod, podStatus v1.PodStatus) bool {
return podStatus.Phase == v1.PodFailed || podStatus.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(podStatus.ContainerStatuses)) // TODO: the guarantees provided by kubelet status are not sufficient to guarantee it's safe to ignore a deleted pod,
// even if everything is notRunning (kubelet does not guarantee that when pod status is waiting that it isn't trying
// to start a container).
return podStatus.Phase == v1.PodFailed || podStatus.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(podStatus.InitContainerStatuses) && notRunning(podStatus.ContainerStatuses) && notRunning(podStatus.EphemeralContainerStatuses))
} }
// notRunning returns true if every status is terminated or waiting, or the status list // notRunning returns true if every status is terminated or waiting, or the status list

View File

@ -756,6 +756,10 @@ var _ = SIGDescribe("StatefulSet", func() {
} }
pod, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(), pod, metav1.CreateOptions{}) pod, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(), pod, metav1.CreateOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
ginkgo.By("Waiting until pod " + podName + " will start running in namespace " + f.Namespace.Name)
if err := e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, podName, f.Namespace.Name); err != nil {
framework.Failf("Pod %v did not start running: %v", podName, err)
}
ginkgo.By("Creating statefulset with conflicting port in namespace " + f.Namespace.Name) ginkgo.By("Creating statefulset with conflicting port in namespace " + f.Namespace.Name)
ss := e2estatefulset.NewStatefulSet(ssName, f.Namespace.Name, headlessSvcName, 1, nil, nil, labels) ss := e2estatefulset.NewStatefulSet(ssName, f.Namespace.Name, headlessSvcName, 1, nil, nil, labels)
@ -765,11 +769,6 @@ var _ = SIGDescribe("StatefulSet", func() {
_, err = f.ClientSet.AppsV1().StatefulSets(f.Namespace.Name).Create(context.TODO(), ss, metav1.CreateOptions{}) _, err = f.ClientSet.AppsV1().StatefulSets(f.Namespace.Name).Create(context.TODO(), ss, metav1.CreateOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
ginkgo.By("Waiting until pod " + podName + " will start running in namespace " + f.Namespace.Name)
if err := e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, podName, f.Namespace.Name); err != nil {
framework.Failf("Pod %v did not start running: %v", podName, err)
}
var initialStatefulPodUID types.UID var initialStatefulPodUID types.UID
ginkgo.By("Waiting until stateful pod " + statefulPodName + " will be recreated and deleted at least once in namespace " + f.Namespace.Name) ginkgo.By("Waiting until stateful pod " + statefulPodName + " will be recreated and deleted at least once in namespace " + f.Namespace.Name)

View File

@ -387,6 +387,8 @@ var _ = SIGDescribe("Pods Extended", func() {
// pod volume teardown races with container start in CRI, which reports a failure // pod volume teardown races with container start in CRI, which reports a failure
framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName) framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName)
default: default:
data, _ := json.MarshalIndent(pod.Status, "", " ")
framework.Logf("pod %s on node %s had incorrect final status:\n%s", string(data))
return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message) return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message)
} }
switch { switch {