diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index fbcf8b1633c..7b4b070ab6d 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -251,6 +251,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { fs.BoolVar(&s.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", s.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.") fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.") fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.") + fs.BoolVar(&s.KeepTerminatedPodVolumes, "keep-terminated-pod-volumes", s.KeepTerminatedPodVolumes, "Keep terminated pod volumes mounted to the node after the pod terminates. Can be useful for debugging volume related issues.") // CRI flags. fs.BoolVar(&s.EnableCRI, "experimental-cri", s.EnableCRI, "[Experimental] Enable the Container Runtime Interface (CRI) integration. If --container-runtime is set to \"remote\", Kubelet will communicate with the runtime/image CRI server listening on the endpoint specified by --remote-runtime-endpoint/--remote-image-endpoint. If --container-runtime is set to \"docker\", Kubelet will launch a in-process CRI server on behalf of docker, and communicate over a default endpoint.") diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh index b5e94fc128c..f218465a581 100755 --- a/hack/local-up-cluster.sh +++ b/hack/local-up-cluster.sh @@ -536,6 +536,7 @@ function start_kubelet { --experimental-cgroups-per-qos=${EXPERIMENTAL_CGROUPS_PER_QOS} \ --cgroup-driver=${CGROUP_DRIVER} \ --cgroup-root=${CGROUP_ROOT} \ + --keep-terminated-pod-volumes=true \ ${auth_args} \ ${dns_args} \ ${net_plugin_dir_args} \ diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 124127c4ce2..77540f1f81c 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -308,6 +308,7 @@ junit-file-number k8s-bin-dir k8s-build-output keep-gogoproto +keep-terminated-pod-volumes km-path kops-admin-access kops-cluster diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index bd4d30b0da6..707e66f5e55 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -481,6 +481,9 @@ type KubeletConfiguration struct { // (binaries, etc.) to mount the volume are available on the underlying node. If the check is enabled // and fails the mount operation fails. ExperimentalCheckNodeCapabilitiesBeforeMount bool + // This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. + // This can be useful for debugging volume related issues. + KeepTerminatedPodVolumes bool } type KubeletAuthorizationMode string diff --git a/pkg/apis/componentconfig/v1alpha1/types.go b/pkg/apis/componentconfig/v1alpha1/types.go index f8f5e181a02..a01784ef0ed 100644 --- a/pkg/apis/componentconfig/v1alpha1/types.go +++ b/pkg/apis/componentconfig/v1alpha1/types.go @@ -521,6 +521,9 @@ type KubeletConfiguration struct { // (binaries, etc.) to mount the volume are available on the underlying node. If the check is enabled // and fails the mount operation fails. ExperimentalCheckNodeCapabilitiesBeforeMount bool `json:"experimentalCheckNodeCapabilitiesBeforeMount,omitempty"` + // This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. + // This can be useful for debugging volume related issues. + KeepTerminatedPodVolumes bool `json:"keepTerminatedPodVolumes,omitempty"` } type KubeletAuthorizationMode string diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go index 9dc04b4984b..3ad8a0ae793 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go @@ -481,6 +481,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu out.EnableCRI = in.EnableCRI out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount + out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes return nil } @@ -652,6 +653,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu out.EnableCRI = in.EnableCRI out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount + out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes return nil } diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 721b9ad011a..6c8c972edf1 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -10431,6 +10431,13 @@ var OpenAPIDefinitions *openapi.OpenAPIDefinitions = &openapi.OpenAPIDefinitions Format: "", }, }, + "keepTerminatedPodVolumes": { + SchemaProps: spec.SchemaProps{ + Description: "This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. This can be useful for debugging volume related issues.", + Type: []string{"boolean"}, + Format: "", + }, + }, }, Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"}, }, diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 23bf0b62888..eb45b4b9160 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -747,7 +747,8 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub kubeDeps.Mounter, klet.getPodsDir(), kubeDeps.Recorder, - kubeCfg.ExperimentalCheckNodeCapabilitiesBeforeMount) + kubeCfg.ExperimentalCheckNodeCapabilitiesBeforeMount, + kubeCfg.KeepTerminatedPodVolumes) runtimeCache, err := kubecontainer.NewRuntimeCache(klet.containerRuntime) if err != nil { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 3c23784bb96..d29f5139f6a 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -257,7 +257,8 @@ func newTestKubeletWithImageList( kubelet.mounter, kubelet.getPodsDir(), kubelet.recorder, - false /* experimentalCheckNodeCapabilitiesBeforeMount*/) + false, /* experimentalCheckNodeCapabilitiesBeforeMount*/ + false /* keepTerminatedPodVolumes */) require.NoError(t, err, "Failed to initialize volume manager") // enable active deadline handler diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index 831efe1e22c..91bff360bfd 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -106,7 +106,8 @@ func TestRunOnce(t *testing.T) { kb.mounter, kb.getPodsDir(), kb.recorder, - false /* experimentalCheckNodeCapabilitiesBeforeMount */) + false, /* experimentalCheckNodeCapabilitiesBeforeMount */ + false /* keepTerminatedPodVolumes */) kb.networkPlugin, _ = network.InitNetworkPlugin([]network.NetworkPlugin{}, "", nettest.NewFakeHost(nil), componentconfig.HairpinNone, kb.nonMasqueradeCIDR, network.UseDefaultMTU) // TODO: Factor out "StatsProvider" from Kubelet so we don't have a cyclic dependency diff --git a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go index 1ff35f091df..5e8d8d4137a 100644 --- a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go +++ b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go @@ -71,7 +71,8 @@ func NewDesiredStateOfWorldPopulator( getPodStatusRetryDuration time.Duration, podManager pod.Manager, desiredStateOfWorld cache.DesiredStateOfWorld, - kubeContainerRuntime kubecontainer.Runtime) DesiredStateOfWorldPopulator { + kubeContainerRuntime kubecontainer.Runtime, + keepTerminatedPodVolumes bool) DesiredStateOfWorldPopulator { return &desiredStateOfWorldPopulator{ kubeClient: kubeClient, loopSleepDuration: loopSleepDuration, @@ -80,7 +81,8 @@ func NewDesiredStateOfWorldPopulator( desiredStateOfWorld: desiredStateOfWorld, pods: processedPods{ processedPods: make(map[volumetypes.UniquePodName]bool)}, - kubeContainerRuntime: kubeContainerRuntime, + kubeContainerRuntime: kubeContainerRuntime, + keepTerminatedPodVolumes: keepTerminatedPodVolumes, } } @@ -93,6 +95,7 @@ type desiredStateOfWorldPopulator struct { pods processedPods kubeContainerRuntime kubecontainer.Runtime timeOfLastGetPodStatus time.Time + keepTerminatedPodVolumes bool } type processedPods struct { @@ -160,13 +163,7 @@ func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() { if !isPodTerminated(pod) { continue } - // Skip non-memory backed volumes belonging to terminated pods - volume := volumeToMount.VolumeSpec.Volume - if volume == nil { - continue - } - if (volume.EmptyDir == nil || volume.EmptyDir.Medium != v1.StorageMediumMemory) && - volume.ConfigMap == nil && volume.Secret == nil { + if dswp.keepTerminatedPodVolumes { continue } } diff --git a/pkg/kubelet/volumemanager/volume_manager.go b/pkg/kubelet/volumemanager/volume_manager.go index 8bfc1d3b12b..59888ad037a 100644 --- a/pkg/kubelet/volumemanager/volume_manager.go +++ b/pkg/kubelet/volumemanager/volume_manager.go @@ -157,7 +157,8 @@ func NewVolumeManager( mounter mount.Interface, kubeletPodsDir string, recorder record.EventRecorder, - checkNodeCapabilitiesBeforeMount bool) (VolumeManager, error) { + checkNodeCapabilitiesBeforeMount bool, + keepTerminatedPodVolumes bool) (VolumeManager, error) { vm := &volumeManager{ kubeClient: kubeClient, @@ -191,7 +192,8 @@ func NewVolumeManager( desiredStateOfWorldPopulatorGetPodStatusRetryDuration, podManager, vm.desiredStateOfWorld, - kubeContainerRuntime) + kubeContainerRuntime, + keepTerminatedPodVolumes) return vm, nil } diff --git a/pkg/kubelet/volumemanager/volume_manager_test.go b/pkg/kubelet/volumemanager/volume_manager_test.go index 5fe4b6d0064..9f209ab91f8 100644 --- a/pkg/kubelet/volumemanager/volume_manager_test.go +++ b/pkg/kubelet/volumemanager/volume_manager_test.go @@ -198,7 +198,8 @@ func newTestVolumeManager( &mount.FakeMounter{}, "", fakeRecorder, - false /* experimentalCheckNodeCapabilitiesBeforeMount */) + false, /* experimentalCheckNodeCapabilitiesBeforeMount */ + false /* keepTerminatedPodVolumes */) return vm, err }