diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 85d37b75c60..34b1cf6fe9c 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -745,7 +745,7 @@ func parseResourceList(m kubeletconfiginternal.ConfigurationMap) (v1.ResourceLis for k, v := range m { switch v1.ResourceName(k) { // CPU, memory and local storage resources are supported. - case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceStorage: + case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage: q, err := resource.ParseQuantity(v) if err != nil { return nil, err @@ -753,12 +753,7 @@ func parseResourceList(m kubeletconfiginternal.ConfigurationMap) (v1.ResourceLis if q.Sign() == -1 { return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v) } - // storage specified in configuration map is mapped to ResourceStorageScratch API - if v1.ResourceName(k) == v1.ResourceStorage { - rl[v1.ResourceStorageScratch] = q - } else { - rl[v1.ResourceName(k)] = q - } + rl[v1.ResourceName(k)] = q default: return nil, fmt.Errorf("cannot reserve %q resource", k) } diff --git a/pkg/api/helper/helpers.go b/pkg/api/helper/helpers.go index a77bf04892d..e8b7ac923c4 100644 --- a/pkg/api/helper/helpers.go +++ b/pkg/api/helper/helpers.go @@ -107,6 +107,7 @@ func IsResourceQuotaScopeValidForResource(scope api.ResourceQuotaScope, resource var standardContainerResources = sets.NewString( string(api.ResourceCPU), string(api.ResourceMemory), + string(api.ResourceEphemeralStorage), ) // IsStandardContainerResourceName returns true if the container can make a resource request @@ -194,10 +195,13 @@ func IsStandardQuotaResourceName(str string) bool { var standardResources = sets.NewString( string(api.ResourceCPU), string(api.ResourceMemory), + string(api.ResourceEphemeralStorage), string(api.ResourceRequestsCPU), string(api.ResourceRequestsMemory), + string(api.ResourceRequestsEphemeralStorage), string(api.ResourceLimitsCPU), string(api.ResourceLimitsMemory), + string(api.ResourceLimitsEphemeralStorage), string(api.ResourcePods), string(api.ResourceQuotas), string(api.ResourceServices), diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index 23a3458b3a4..48084abd333 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -3817,8 +3817,8 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat // Validate resource quantity. allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...) - if resourceName == api.ResourceStorageOverlay && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { - allErrs = append(allErrs, field.Forbidden(limPath, "ResourceStorageOverlay field disabled by feature-gate for ResourceRequirements")) + if resourceName == api.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { + allErrs = append(allErrs, field.Forbidden(limPath, "ResourceEphemeralStorage field disabled by feature-gate for ResourceRequirements")) } } for resourceName, quantity := range requirements.Requests { diff --git a/pkg/api/validation/validation_test.go b/pkg/api/validation/validation_test.go index bdd514ceb07..b6f99b2a94b 100644 --- a/pkg/api/validation/validation_test.go +++ b/pkg/api/validation/validation_test.go @@ -2683,7 +2683,7 @@ func TestAlphaLocalStorageCapacityIsolation(t *testing.T) { containerLimitCase := api.ResourceRequirements{ Limits: api.ResourceList{ - api.ResourceStorageOverlay: *resource.NewMilliQuantity( + api.ResourceEphemeralStorage: *resource.NewMilliQuantity( int64(40000), resource.BinarySI), }, diff --git a/pkg/kubelet/cadvisor/util.go b/pkg/kubelet/cadvisor/util.go index e1d0c90f27c..a95a8fb19e2 100644 --- a/pkg/kubelet/cadvisor/util.go +++ b/pkg/kubelet/cadvisor/util.go @@ -35,18 +35,9 @@ func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList { return c } -func StorageScratchCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList { +func EphemeralStorageCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList { c := v1.ResourceList{ - v1.ResourceStorageScratch: *resource.NewQuantity( - int64(info.Capacity), - resource.BinarySI), - } - return c -} - -func StorageOverlayCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList { - c := v1.ResourceList{ - v1.ResourceStorageOverlay: *resource.NewQuantity( + v1.ResourceEphemeralStorage: *resource.NewQuantity( int64(info.Capacity), resource.BinarySI), } diff --git a/pkg/kubelet/cm/BUILD b/pkg/kubelet/cm/BUILD index e352bd354a6..51b892c222c 100644 --- a/pkg/kubelet/cm/BUILD +++ b/pkg/kubelet/cm/BUILD @@ -55,7 +55,6 @@ go_library( "//pkg/util/procfs:go_default_library", "//pkg/util/sysctl:go_default_library", "//pkg/util/version:go_default_library", - "//vendor/github.com/google/cadvisor/info/v2:go_default_library", "//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library", "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library", "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library", diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 8ba7d919429..66128398e3b 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -30,7 +30,6 @@ import ( "time" "github.com/golang/glog" - cadvisorapiv2 "github.com/google/cadvisor/info/v2" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" @@ -552,24 +551,11 @@ func (cm *containerManagerImpl) setFsCapacity() error { if err != nil { return fmt.Errorf("Fail to get rootfs information %v", err) } - hasDedicatedImageFs, _ := cm.cadvisorInterface.HasDedicatedImageFs() - var imagesfs cadvisorapiv2.FsInfo - if hasDedicatedImageFs { - imagesfs, err = cm.cadvisorInterface.ImagesFsInfo() - if err != nil { - return fmt.Errorf("Fail to get imagefs information %v", err) - } - } cm.Lock() - for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) { + for rName, rCap := range cadvisor.EphemeralStorageCapacityFromFsInfo(rootfs) { cm.capacity[rName] = rCap } - if hasDedicatedImageFs { - for rName, rCap := range cadvisor.StorageOverlayCapacityFromFsInfo(imagesfs) { - cm.capacity[rName] = rCap - } - } cm.Unlock() return nil } diff --git a/pkg/kubelet/cm/node_container_manager.go b/pkg/kubelet/cm/node_container_manager.go index 7c817984b07..a96f9c54037 100644 --- a/pkg/kubelet/cm/node_container_manager.go +++ b/pkg/kubelet/cm/node_container_manager.go @@ -218,9 +218,9 @@ func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.Res value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity) ret[v1.ResourceMemory] = *value case evictionapi.SignalNodeFsAvailable: - storageCapacity := capacity[v1.ResourceStorageScratch] + storageCapacity := capacity[v1.ResourceEphemeralStorage] value := evictionapi.GetThresholdQuantity(threshold.Value, &storageCapacity) - ret[v1.ResourceStorageScratch] = *value + ret[v1.ResourceEphemeralStorage] = *value } } return ret diff --git a/pkg/kubelet/cm/node_container_manager_test.go b/pkg/kubelet/cm/node_container_manager_test.go index 29208186abc..c06b9aa85be 100644 --- a/pkg/kubelet/cm/node_container_manager_test.go +++ b/pkg/kubelet/cm/node_container_manager_test.go @@ -316,17 +316,17 @@ func TestNodeAllocatableInputValidation(t *testing.T) { invalidConfiguration bool }{ { - kubeReserved: getScratchResourceList("100Mi"), - systemReserved: getScratchResourceList("50Mi"), - capacity: getScratchResourceList("500Mi"), + kubeReserved: getEphemeralStorageResourceList("100Mi"), + systemReserved: getEphemeralStorageResourceList("50Mi"), + capacity: getEphemeralStorageResourceList("500Mi"), }, { - kubeReserved: getScratchResourceList("10Gi"), - systemReserved: getScratchResourceList("10Gi"), + kubeReserved: getEphemeralStorageResourceList("10Gi"), + systemReserved: getEphemeralStorageResourceList("10Gi"), hardThreshold: evictionapi.ThresholdValue{ Quantity: &storageEvictionThreshold, }, - capacity: getScratchResourceList("20Gi"), + capacity: getEphemeralStorageResourceList("20Gi"), invalidConfiguration: true, }, } @@ -359,12 +359,12 @@ func TestNodeAllocatableInputValidation(t *testing.T) { } } -// getScratchResourceList returns a ResourceList with the -// specified scratch storage resource values -func getScratchResourceList(storage string) v1.ResourceList { +// getEphemeralStorageResourceList returns a ResourceList with the +// specified ephemeral storage resource values +func getEphemeralStorageResourceList(storage string) v1.ResourceList { res := v1.ResourceList{} if storage != "" { - res[v1.ResourceStorageScratch] = resource.MustParse(storage) + res[v1.ResourceEphemeralStorage] = resource.MustParse(storage) } return res } diff --git a/pkg/kubelet/eviction/BUILD b/pkg/kubelet/eviction/BUILD index 0d796fdd3a7..69b373533fd 100644 --- a/pkg/kubelet/eviction/BUILD +++ b/pkg/kubelet/eviction/BUILD @@ -48,6 +48,7 @@ go_library( deps = [ "//pkg/api:go_default_library", "//pkg/api/v1/helper/qos:go_default_library", + "//pkg/api/v1/resource:go_default_library", "//pkg/features:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//pkg/kubelet/cm:go_default_library", diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 5841afe7cdd..f430b82502b 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -31,6 +31,7 @@ import ( utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/record" v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos" + apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/pkg/features" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/kubelet/cm" @@ -472,7 +473,12 @@ func (m *managerImpl) localStorageEviction(pods []*v1.Pod) []*v1.Pod { continue } - if m.containerOverlayLimitEviction(podStats, pod) { + if m.podEphemeralStorageLimitEviction(podStats, pod) { + evicted = append(evicted, pod) + continue + } + + if m.containerEphemeralStorageLimitEviction(podStats, pod) { evicted = append(evicted, pod) } } @@ -496,23 +502,56 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1. } } } + return false } -func (m *managerImpl) containerOverlayLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool { +func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool { + _, podLimits := apiv1resource.PodRequestsAndLimits(pod) + _, found := podLimits[v1.ResourceEphemeralStorage] + if !found { + return false + } + + podEphemeralStorageTotalUsage := &resource.Quantity{} + fsStatsSet := []fsStatsType{} + if *m.dedicatedImageFs { + fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource} + } else { + fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource} + } + podUsage, err := podDiskUsage(podStats, pod, fsStatsSet) + if err != nil { + glog.Errorf("eviction manager: error getting pod disk usage %v", err) + return false + } + + podEphemeralStorageTotalUsage.Add(podUsage[resourceDisk]) + if podEphemeralStorageTotalUsage.Cmp(podLimits[v1.ResourceEphemeralStorage]) > 0 { + // the total usage of pod exceeds the total size limit of containers, evict the pod + return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("pod ephemeral local storage usage exceeds the total limit of containers %v", podLimits[v1.ResourceEphemeralStorage])) + } + return false +} + +func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool { thresholdsMap := make(map[string]*resource.Quantity) for _, container := range pod.Spec.Containers { - overlayLimit := container.Resources.Limits.StorageOverlay() - if overlayLimit != nil && overlayLimit.Value() != 0 { - thresholdsMap[container.Name] = overlayLimit + ephemeralLimit := container.Resources.Limits.StorageEphemeral() + if ephemeralLimit != nil && ephemeralLimit.Value() != 0 { + thresholdsMap[container.Name] = ephemeralLimit } } for _, containerStat := range podStats.Containers { - rootfs := diskUsage(containerStat.Rootfs) - if overlayThreshold, ok := thresholdsMap[containerStat.Name]; ok { - if overlayThreshold.Cmp(*rootfs) < 0 { - return m.evictPod(pod, v1.ResourceName("containerOverlay"), fmt.Sprintf("container's overlay usage exceeds the limit %q", overlayThreshold.String())) + containerUsed := diskUsage(containerStat.Logs) + if !*m.dedicatedImageFs { + containerUsed.Add(*diskUsage(containerStat.Rootfs)) + } + + if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok { + if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 { + return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("container's ephemeral local storage usage exceeds the limit %q", ephemeralStorageThreshold.String())) } } diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 859b8d21a45..6cef960232a 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -54,8 +54,6 @@ const ( resourceNodeFs v1.ResourceName = "nodefs" // nodefs inodes, number. internal to this module, used to account for local node root filesystem inodes. resourceNodeFsInodes v1.ResourceName = "nodefsInodes" - // container overlay storage, in bytes. internal to this module, used to account for local disk usage for container overlay. - resourceOverlay v1.ResourceName = "overlay" ) var ( @@ -400,12 +398,10 @@ func localVolumeNames(pod *v1.Pod) []string { func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsStatsType) (v1.ResourceList, error) { disk := resource.Quantity{Format: resource.BinarySI} inodes := resource.Quantity{Format: resource.BinarySI} - overlay := resource.Quantity{Format: resource.BinarySI} for _, container := range podStats.Containers { if hasFsStatsType(statsToMeasure, fsStatsRoot) { disk.Add(*diskUsage(container.Rootfs)) inodes.Add(*inodeUsage(container.Rootfs)) - overlay.Add(*diskUsage(container.Rootfs)) } if hasFsStatsType(statsToMeasure, fsStatsLogs) { disk.Add(*diskUsage(container.Logs)) @@ -425,9 +421,8 @@ func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsSt } } return v1.ResourceList{ - resourceDisk: disk, - resourceInodes: inodes, - resourceOverlay: overlay, + resourceDisk: disk, + resourceInodes: inodes, }, nil } @@ -727,7 +722,7 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi } } - storageScratchCapacity, storageScratchAllocatable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceStorageScratch) + ephemeralStorageCapacity, ephemeralStorageAllocatable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceEphemeralStorage) if exist { for _, pod := range pods { podStat, ok := statsFunc(pod) @@ -735,25 +730,23 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi continue } - usage, err := podDiskUsage(podStat, pod, []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource, fsStatsRoot}) + fsStatsSet := []fsStatsType{} + if withImageFs { + fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource} + } else { + fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource} + } + + usage, err := podDiskUsage(podStat, pod, fsStatsSet) if err != nil { glog.Warningf("eviction manager: error getting pod disk usage %v", err) continue } - // If there is a seperate imagefs set up for container runtimes, the scratch disk usage from nodefs should exclude the overlay usage - if withImageFs { - diskUsage := usage[resourceDisk] - diskUsageP := &diskUsage - diskUsagep := diskUsageP.Copy() - diskUsagep.Sub(usage[resourceOverlay]) - storageScratchAllocatable.Sub(*diskUsagep) - } else { - storageScratchAllocatable.Sub(usage[resourceDisk]) - } + ephemeralStorageAllocatable.Sub(usage[resourceDisk]) } result[evictionapi.SignalAllocatableNodeFsAvailable] = signalObservation{ - available: storageScratchAllocatable, - capacity: storageScratchCapacity, + available: ephemeralStorageAllocatable, + capacity: ephemeralStorageCapacity, } } diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index 9a20a75600a..b5dd137cfd2 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -564,11 +564,7 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) { // capacity for every node status request initialCapacity := kl.containerManager.GetCapacity() if initialCapacity != nil { - node.Status.Capacity[v1.ResourceStorageScratch] = initialCapacity[v1.ResourceStorageScratch] - imageCapacity, ok := initialCapacity[v1.ResourceStorageOverlay] - if ok { - node.Status.Capacity[v1.ResourceStorageOverlay] = imageCapacity - } + node.Status.Capacity[v1.ResourceEphemeralStorage] = initialCapacity[v1.ResourceEphemeralStorage] } } } diff --git a/test/e2e_node/local_storage_isolation_eviction_test.go b/test/e2e_node/local_storage_isolation_eviction_test.go index f0932383a6c..5d3408c4b2c 100644 --- a/test/e2e_node/local_storage_isolation_eviction_test.go +++ b/test/e2e_node/local_storage_isolation_eviction_test.go @@ -36,7 +36,7 @@ type podEvictSpec struct { } const ( - totalEvict = 3 + totalEvict = 4 ) // Eviction Policy is described here: @@ -48,7 +48,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se emptyDirVolumeName := "volume-emptydir-pod" podTestSpecs := []podEvictSpec{ - {evicted: true, // This pod should be evicted because emptyDir (defualt storage type) usage violation + {evicted: true, // This pod should be evicted because emptyDir (default storage type) usage violation pod: v1.Pod{ ObjectMeta: metav1.ObjectMeta{Name: "emptydir-hog-pod"}, Spec: v1.PodSpec{ @@ -157,7 +157,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se }, }, - {evicted: true, // This pod should be evicted because container overlay usage violation + {evicted: true, // This pod should be evicted because container ephemeral storage usage violation pod: v1.Pod{ ObjectMeta: metav1.ObjectMeta{Name: "container-hog-pod"}, Spec: v1.PodSpec{ @@ -173,7 +173,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se }, Resources: v1.ResourceRequirements{ Limits: v1.ResourceList{ - v1.ResourceStorageOverlay: *resource.NewMilliQuantity( + v1.ResourceEphemeralStorage: *resource.NewMilliQuantity( int64(40000), resource.BinarySI), }, @@ -183,10 +183,53 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se }, }, }, + + {evicted: true, // This pod should be evicted because pod ephemeral storage usage violation + pod: v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "emptydir-container-hog-pod"}, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Image: "gcr.io/google_containers/busybox:1.24", + Name: "emptydir-container-hog-pod", + Command: []string{ + "sh", + "-c", + "sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done", + }, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceEphemeralStorage: *resource.NewMilliQuantity( + int64(40000), + resource.BinarySI), + }, + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: emptyDirVolumeName, + MountPath: "/cache", + }, + }, + }, + }, + Volumes: []v1.Volume{ + { + Name: emptyDirVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: *resource.NewQuantity(int64(100000), resource.BinarySI), + }, + }, + }, + }, + }, + }, + }, } evictionTestTimeout := 10 * time.Minute - testCondition := "EmptyDir/ContainerOverlay usage limit violation" + testCondition := "EmptyDir/ContainerContainerEphemeralStorage usage limit violation" Context(fmt.Sprintf("EmptyDirEviction when we run containers that should cause %s", testCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { initialConfig.FeatureGates += ", LocalStorageCapacityIsolation=true"