mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 22:46:12 +00:00
Change eviction policy to manage one single local storage resource
This commit is contained in:
parent
acdf625e46
commit
27901ad5df
@ -745,7 +745,7 @@ func parseResourceList(m kubeletconfiginternal.ConfigurationMap) (v1.ResourceLis
|
||||
for k, v := range m {
|
||||
switch v1.ResourceName(k) {
|
||||
// CPU, memory and local storage resources are supported.
|
||||
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceStorage:
|
||||
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage:
|
||||
q, err := resource.ParseQuantity(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -753,12 +753,7 @@ func parseResourceList(m kubeletconfiginternal.ConfigurationMap) (v1.ResourceLis
|
||||
if q.Sign() == -1 {
|
||||
return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v)
|
||||
}
|
||||
// storage specified in configuration map is mapped to ResourceStorageScratch API
|
||||
if v1.ResourceName(k) == v1.ResourceStorage {
|
||||
rl[v1.ResourceStorageScratch] = q
|
||||
} else {
|
||||
rl[v1.ResourceName(k)] = q
|
||||
}
|
||||
rl[v1.ResourceName(k)] = q
|
||||
default:
|
||||
return nil, fmt.Errorf("cannot reserve %q resource", k)
|
||||
}
|
||||
|
@ -35,18 +35,9 @@ func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
|
||||
return c
|
||||
}
|
||||
|
||||
func StorageScratchCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
|
||||
func EphemeralStorageCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceStorageScratch: *resource.NewQuantity(
|
||||
int64(info.Capacity),
|
||||
resource.BinarySI),
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func StorageOverlayCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceStorageOverlay: *resource.NewQuantity(
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(
|
||||
int64(info.Capacity),
|
||||
resource.BinarySI),
|
||||
}
|
||||
|
@ -55,7 +55,6 @@ go_library(
|
||||
"//pkg/util/procfs:go_default_library",
|
||||
"//pkg/util/sysctl:go_default_library",
|
||||
"//pkg/util/version:go_default_library",
|
||||
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
|
||||
|
@ -30,7 +30,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
@ -552,24 +551,11 @@ func (cm *containerManagerImpl) setFsCapacity() error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("Fail to get rootfs information %v", err)
|
||||
}
|
||||
hasDedicatedImageFs, _ := cm.cadvisorInterface.HasDedicatedImageFs()
|
||||
var imagesfs cadvisorapiv2.FsInfo
|
||||
if hasDedicatedImageFs {
|
||||
imagesfs, err = cm.cadvisorInterface.ImagesFsInfo()
|
||||
if err != nil {
|
||||
return fmt.Errorf("Fail to get imagefs information %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
cm.Lock()
|
||||
for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) {
|
||||
for rName, rCap := range cadvisor.EphemeralStorageCapacityFromFsInfo(rootfs) {
|
||||
cm.capacity[rName] = rCap
|
||||
}
|
||||
if hasDedicatedImageFs {
|
||||
for rName, rCap := range cadvisor.StorageOverlayCapacityFromFsInfo(imagesfs) {
|
||||
cm.capacity[rName] = rCap
|
||||
}
|
||||
}
|
||||
cm.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
@ -218,9 +218,9 @@ func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.Res
|
||||
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
|
||||
ret[v1.ResourceMemory] = *value
|
||||
case evictionapi.SignalNodeFsAvailable:
|
||||
storageCapacity := capacity[v1.ResourceStorageScratch]
|
||||
storageCapacity := capacity[v1.ResourceEphemeralStorage]
|
||||
value := evictionapi.GetThresholdQuantity(threshold.Value, &storageCapacity)
|
||||
ret[v1.ResourceStorageScratch] = *value
|
||||
ret[v1.ResourceEphemeralStorage] = *value
|
||||
}
|
||||
}
|
||||
return ret
|
||||
|
@ -316,17 +316,17 @@ func TestNodeAllocatableInputValidation(t *testing.T) {
|
||||
invalidConfiguration bool
|
||||
}{
|
||||
{
|
||||
kubeReserved: getScratchResourceList("100Mi"),
|
||||
systemReserved: getScratchResourceList("50Mi"),
|
||||
capacity: getScratchResourceList("500Mi"),
|
||||
kubeReserved: getEphemeralStorageResourceList("100Mi"),
|
||||
systemReserved: getEphemeralStorageResourceList("50Mi"),
|
||||
capacity: getEphemeralStorageResourceList("500Mi"),
|
||||
},
|
||||
{
|
||||
kubeReserved: getScratchResourceList("10Gi"),
|
||||
systemReserved: getScratchResourceList("10Gi"),
|
||||
kubeReserved: getEphemeralStorageResourceList("10Gi"),
|
||||
systemReserved: getEphemeralStorageResourceList("10Gi"),
|
||||
hardThreshold: evictionapi.ThresholdValue{
|
||||
Quantity: &storageEvictionThreshold,
|
||||
},
|
||||
capacity: getScratchResourceList("20Gi"),
|
||||
capacity: getEphemeralStorageResourceList("20Gi"),
|
||||
invalidConfiguration: true,
|
||||
},
|
||||
}
|
||||
@ -359,12 +359,12 @@ func TestNodeAllocatableInputValidation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// getScratchResourceList returns a ResourceList with the
|
||||
// specified scratch storage resource values
|
||||
func getScratchResourceList(storage string) v1.ResourceList {
|
||||
// getEphemeralStorageResourceList returns a ResourceList with the
|
||||
// specified ephemeral storage resource values
|
||||
func getEphemeralStorageResourceList(storage string) v1.ResourceList {
|
||||
res := v1.ResourceList{}
|
||||
if storage != "" {
|
||||
res[v1.ResourceStorageScratch] = resource.MustParse(storage)
|
||||
res[v1.ResourceEphemeralStorage] = resource.MustParse(storage)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
@ -48,6 +48,7 @@ go_library(
|
||||
deps = [
|
||||
"//pkg/api:go_default_library",
|
||||
"//pkg/api/v1/helper/qos:go_default_library",
|
||||
"//pkg/api/v1/resource:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
|
@ -31,6 +31,7 @@ import (
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
|
||||
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
@ -472,7 +473,12 @@ func (m *managerImpl) localStorageEviction(pods []*v1.Pod) []*v1.Pod {
|
||||
continue
|
||||
}
|
||||
|
||||
if m.containerOverlayLimitEviction(podStats, pod) {
|
||||
if m.podEphemeralStorageLimitEviction(podStats, pod) {
|
||||
evicted = append(evicted, pod)
|
||||
continue
|
||||
}
|
||||
|
||||
if m.containerEphemeralStorageLimitEviction(podStats, pod) {
|
||||
evicted = append(evicted, pod)
|
||||
}
|
||||
}
|
||||
@ -496,23 +502,56 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) containerOverlayLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
|
||||
func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
|
||||
_, podLimits := apiv1resource.PodRequestsAndLimits(pod)
|
||||
_, found := podLimits[v1.ResourceEphemeralStorage]
|
||||
if !found {
|
||||
return false
|
||||
}
|
||||
|
||||
podEphemeralStorageTotalUsage := &resource.Quantity{}
|
||||
fsStatsSet := []fsStatsType{}
|
||||
if *m.dedicatedImageFs {
|
||||
fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
|
||||
} else {
|
||||
fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
|
||||
}
|
||||
podUsage, err := podDiskUsage(podStats, pod, fsStatsSet)
|
||||
if err != nil {
|
||||
glog.Errorf("eviction manager: error getting pod disk usage %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
podEphemeralStorageTotalUsage.Add(podUsage[resourceDisk])
|
||||
if podEphemeralStorageTotalUsage.Cmp(podLimits[v1.ResourceEphemeralStorage]) > 0 {
|
||||
// the total usage of pod exceeds the total size limit of containers, evict the pod
|
||||
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("pod ephemeral local storage usage exceeds the total limit of containers %v", podLimits[v1.ResourceEphemeralStorage]))
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
|
||||
thresholdsMap := make(map[string]*resource.Quantity)
|
||||
for _, container := range pod.Spec.Containers {
|
||||
overlayLimit := container.Resources.Limits.StorageOverlay()
|
||||
if overlayLimit != nil && overlayLimit.Value() != 0 {
|
||||
thresholdsMap[container.Name] = overlayLimit
|
||||
ephemeralLimit := container.Resources.Limits.StorageEphemeral()
|
||||
if ephemeralLimit != nil && ephemeralLimit.Value() != 0 {
|
||||
thresholdsMap[container.Name] = ephemeralLimit
|
||||
}
|
||||
}
|
||||
|
||||
for _, containerStat := range podStats.Containers {
|
||||
rootfs := diskUsage(containerStat.Rootfs)
|
||||
if overlayThreshold, ok := thresholdsMap[containerStat.Name]; ok {
|
||||
if overlayThreshold.Cmp(*rootfs) < 0 {
|
||||
return m.evictPod(pod, v1.ResourceName("containerOverlay"), fmt.Sprintf("container's overlay usage exceeds the limit %q", overlayThreshold.String()))
|
||||
containerUsed := diskUsage(containerStat.Logs)
|
||||
if !*m.dedicatedImageFs {
|
||||
containerUsed.Add(*diskUsage(containerStat.Rootfs))
|
||||
}
|
||||
|
||||
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
|
||||
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
|
||||
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("container's ephemeral local storage usage exceeds the limit %q", ephemeralStorageThreshold.String()))
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -54,8 +54,6 @@ const (
|
||||
resourceNodeFs v1.ResourceName = "nodefs"
|
||||
// nodefs inodes, number. internal to this module, used to account for local node root filesystem inodes.
|
||||
resourceNodeFsInodes v1.ResourceName = "nodefsInodes"
|
||||
// container overlay storage, in bytes. internal to this module, used to account for local disk usage for container overlay.
|
||||
resourceOverlay v1.ResourceName = "overlay"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -400,12 +398,10 @@ func localVolumeNames(pod *v1.Pod) []string {
|
||||
func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsStatsType) (v1.ResourceList, error) {
|
||||
disk := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.BinarySI}
|
||||
overlay := resource.Quantity{Format: resource.BinarySI}
|
||||
for _, container := range podStats.Containers {
|
||||
if hasFsStatsType(statsToMeasure, fsStatsRoot) {
|
||||
disk.Add(*diskUsage(container.Rootfs))
|
||||
inodes.Add(*inodeUsage(container.Rootfs))
|
||||
overlay.Add(*diskUsage(container.Rootfs))
|
||||
}
|
||||
if hasFsStatsType(statsToMeasure, fsStatsLogs) {
|
||||
disk.Add(*diskUsage(container.Logs))
|
||||
@ -425,9 +421,8 @@ func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsSt
|
||||
}
|
||||
}
|
||||
return v1.ResourceList{
|
||||
resourceDisk: disk,
|
||||
resourceInodes: inodes,
|
||||
resourceOverlay: overlay,
|
||||
resourceDisk: disk,
|
||||
resourceInodes: inodes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -727,7 +722,7 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
|
||||
}
|
||||
}
|
||||
|
||||
storageScratchCapacity, storageScratchAllocatable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceStorageScratch)
|
||||
ephemeralStorageCapacity, ephemeralStorageAllocatable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceEphemeralStorage)
|
||||
if exist {
|
||||
for _, pod := range pods {
|
||||
podStat, ok := statsFunc(pod)
|
||||
@ -735,25 +730,23 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
|
||||
continue
|
||||
}
|
||||
|
||||
usage, err := podDiskUsage(podStat, pod, []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource, fsStatsRoot})
|
||||
fsStatsSet := []fsStatsType{}
|
||||
if withImageFs {
|
||||
fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
|
||||
} else {
|
||||
fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
|
||||
}
|
||||
|
||||
usage, err := podDiskUsage(podStat, pod, fsStatsSet)
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: error getting pod disk usage %v", err)
|
||||
continue
|
||||
}
|
||||
// If there is a seperate imagefs set up for container runtimes, the scratch disk usage from nodefs should exclude the overlay usage
|
||||
if withImageFs {
|
||||
diskUsage := usage[resourceDisk]
|
||||
diskUsageP := &diskUsage
|
||||
diskUsagep := diskUsageP.Copy()
|
||||
diskUsagep.Sub(usage[resourceOverlay])
|
||||
storageScratchAllocatable.Sub(*diskUsagep)
|
||||
} else {
|
||||
storageScratchAllocatable.Sub(usage[resourceDisk])
|
||||
}
|
||||
ephemeralStorageAllocatable.Sub(usage[resourceDisk])
|
||||
}
|
||||
result[evictionapi.SignalAllocatableNodeFsAvailable] = signalObservation{
|
||||
available: storageScratchAllocatable,
|
||||
capacity: storageScratchCapacity,
|
||||
available: ephemeralStorageAllocatable,
|
||||
capacity: ephemeralStorageCapacity,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -564,11 +564,7 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
|
||||
// capacity for every node status request
|
||||
initialCapacity := kl.containerManager.GetCapacity()
|
||||
if initialCapacity != nil {
|
||||
node.Status.Capacity[v1.ResourceStorageScratch] = initialCapacity[v1.ResourceStorageScratch]
|
||||
imageCapacity, ok := initialCapacity[v1.ResourceStorageOverlay]
|
||||
if ok {
|
||||
node.Status.Capacity[v1.ResourceStorageOverlay] = imageCapacity
|
||||
}
|
||||
node.Status.Capacity[v1.ResourceEphemeralStorage] = initialCapacity[v1.ResourceEphemeralStorage]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ type podEvictSpec struct {
|
||||
}
|
||||
|
||||
const (
|
||||
totalEvict = 3
|
||||
totalEvict = 4
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
@ -48,7 +48,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
|
||||
|
||||
emptyDirVolumeName := "volume-emptydir-pod"
|
||||
podTestSpecs := []podEvictSpec{
|
||||
{evicted: true, // This pod should be evicted because emptyDir (defualt storage type) usage violation
|
||||
{evicted: true, // This pod should be evicted because emptyDir (default storage type) usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
@ -157,7 +157,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because container overlay usage violation
|
||||
{evicted: true, // This pod should be evicted because container ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
@ -173,7 +173,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceStorageOverlay: *resource.NewMilliQuantity(
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
@ -183,10 +183,53 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "emptydir-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: *resource.NewQuantity(int64(100000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
evictionTestTimeout := 10 * time.Minute
|
||||
testCondition := "EmptyDir/ContainerOverlay usage limit violation"
|
||||
testCondition := "EmptyDir/ContainerContainerEphemeralStorage usage limit violation"
|
||||
Context(fmt.Sprintf("EmptyDirEviction when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
initialConfig.FeatureGates += ", LocalStorageCapacityIsolation=true"
|
||||
|
Loading…
Reference in New Issue
Block a user