Change eviction policy to manage one single local storage resource

This commit is contained in:
NickrenREN 2017-08-18 12:42:19 +08:00
parent acdf625e46
commit 27901ad5df
11 changed files with 129 additions and 86 deletions

View File

@ -745,7 +745,7 @@ func parseResourceList(m kubeletconfiginternal.ConfigurationMap) (v1.ResourceLis
for k, v := range m {
switch v1.ResourceName(k) {
// CPU, memory and local storage resources are supported.
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceStorage:
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage:
q, err := resource.ParseQuantity(v)
if err != nil {
return nil, err
@ -753,12 +753,7 @@ func parseResourceList(m kubeletconfiginternal.ConfigurationMap) (v1.ResourceLis
if q.Sign() == -1 {
return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v)
}
// storage specified in configuration map is mapped to ResourceStorageScratch API
if v1.ResourceName(k) == v1.ResourceStorage {
rl[v1.ResourceStorageScratch] = q
} else {
rl[v1.ResourceName(k)] = q
}
rl[v1.ResourceName(k)] = q
default:
return nil, fmt.Errorf("cannot reserve %q resource", k)
}

View File

@ -35,18 +35,9 @@ func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
return c
}
func StorageScratchCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
func EphemeralStorageCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
c := v1.ResourceList{
v1.ResourceStorageScratch: *resource.NewQuantity(
int64(info.Capacity),
resource.BinarySI),
}
return c
}
func StorageOverlayCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceList {
c := v1.ResourceList{
v1.ResourceStorageOverlay: *resource.NewQuantity(
v1.ResourceEphemeralStorage: *resource.NewQuantity(
int64(info.Capacity),
resource.BinarySI),
}

View File

@ -55,7 +55,6 @@ go_library(
"//pkg/util/procfs:go_default_library",
"//pkg/util/sysctl:go_default_library",
"//pkg/util/version:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",

View File

@ -30,7 +30,6 @@ import (
"time"
"github.com/golang/glog"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
@ -552,24 +551,11 @@ func (cm *containerManagerImpl) setFsCapacity() error {
if err != nil {
return fmt.Errorf("Fail to get rootfs information %v", err)
}
hasDedicatedImageFs, _ := cm.cadvisorInterface.HasDedicatedImageFs()
var imagesfs cadvisorapiv2.FsInfo
if hasDedicatedImageFs {
imagesfs, err = cm.cadvisorInterface.ImagesFsInfo()
if err != nil {
return fmt.Errorf("Fail to get imagefs information %v", err)
}
}
cm.Lock()
for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) {
for rName, rCap := range cadvisor.EphemeralStorageCapacityFromFsInfo(rootfs) {
cm.capacity[rName] = rCap
}
if hasDedicatedImageFs {
for rName, rCap := range cadvisor.StorageOverlayCapacityFromFsInfo(imagesfs) {
cm.capacity[rName] = rCap
}
}
cm.Unlock()
return nil
}

View File

@ -218,9 +218,9 @@ func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.Res
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
ret[v1.ResourceMemory] = *value
case evictionapi.SignalNodeFsAvailable:
storageCapacity := capacity[v1.ResourceStorageScratch]
storageCapacity := capacity[v1.ResourceEphemeralStorage]
value := evictionapi.GetThresholdQuantity(threshold.Value, &storageCapacity)
ret[v1.ResourceStorageScratch] = *value
ret[v1.ResourceEphemeralStorage] = *value
}
}
return ret

View File

@ -316,17 +316,17 @@ func TestNodeAllocatableInputValidation(t *testing.T) {
invalidConfiguration bool
}{
{
kubeReserved: getScratchResourceList("100Mi"),
systemReserved: getScratchResourceList("50Mi"),
capacity: getScratchResourceList("500Mi"),
kubeReserved: getEphemeralStorageResourceList("100Mi"),
systemReserved: getEphemeralStorageResourceList("50Mi"),
capacity: getEphemeralStorageResourceList("500Mi"),
},
{
kubeReserved: getScratchResourceList("10Gi"),
systemReserved: getScratchResourceList("10Gi"),
kubeReserved: getEphemeralStorageResourceList("10Gi"),
systemReserved: getEphemeralStorageResourceList("10Gi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &storageEvictionThreshold,
},
capacity: getScratchResourceList("20Gi"),
capacity: getEphemeralStorageResourceList("20Gi"),
invalidConfiguration: true,
},
}
@ -359,12 +359,12 @@ func TestNodeAllocatableInputValidation(t *testing.T) {
}
}
// getScratchResourceList returns a ResourceList with the
// specified scratch storage resource values
func getScratchResourceList(storage string) v1.ResourceList {
// getEphemeralStorageResourceList returns a ResourceList with the
// specified ephemeral storage resource values
func getEphemeralStorageResourceList(storage string) v1.ResourceList {
res := v1.ResourceList{}
if storage != "" {
res[v1.ResourceStorageScratch] = resource.MustParse(storage)
res[v1.ResourceEphemeralStorage] = resource.MustParse(storage)
}
return res
}

View File

@ -48,6 +48,7 @@ go_library(
deps = [
"//pkg/api:go_default_library",
"//pkg/api/v1/helper/qos:go_default_library",
"//pkg/api/v1/resource:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/cm:go_default_library",

View File

@ -31,6 +31,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos"
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
"k8s.io/kubernetes/pkg/features"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm"
@ -472,7 +473,12 @@ func (m *managerImpl) localStorageEviction(pods []*v1.Pod) []*v1.Pod {
continue
}
if m.containerOverlayLimitEviction(podStats, pod) {
if m.podEphemeralStorageLimitEviction(podStats, pod) {
evicted = append(evicted, pod)
continue
}
if m.containerEphemeralStorageLimitEviction(podStats, pod) {
evicted = append(evicted, pod)
}
}
@ -496,23 +502,56 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
}
}
}
return false
}
func (m *managerImpl) containerOverlayLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
_, podLimits := apiv1resource.PodRequestsAndLimits(pod)
_, found := podLimits[v1.ResourceEphemeralStorage]
if !found {
return false
}
podEphemeralStorageTotalUsage := &resource.Quantity{}
fsStatsSet := []fsStatsType{}
if *m.dedicatedImageFs {
fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
} else {
fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
}
podUsage, err := podDiskUsage(podStats, pod, fsStatsSet)
if err != nil {
glog.Errorf("eviction manager: error getting pod disk usage %v", err)
return false
}
podEphemeralStorageTotalUsage.Add(podUsage[resourceDisk])
if podEphemeralStorageTotalUsage.Cmp(podLimits[v1.ResourceEphemeralStorage]) > 0 {
// the total usage of pod exceeds the total size limit of containers, evict the pod
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("pod ephemeral local storage usage exceeds the total limit of containers %v", podLimits[v1.ResourceEphemeralStorage]))
}
return false
}
func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
thresholdsMap := make(map[string]*resource.Quantity)
for _, container := range pod.Spec.Containers {
overlayLimit := container.Resources.Limits.StorageOverlay()
if overlayLimit != nil && overlayLimit.Value() != 0 {
thresholdsMap[container.Name] = overlayLimit
ephemeralLimit := container.Resources.Limits.StorageEphemeral()
if ephemeralLimit != nil && ephemeralLimit.Value() != 0 {
thresholdsMap[container.Name] = ephemeralLimit
}
}
for _, containerStat := range podStats.Containers {
rootfs := diskUsage(containerStat.Rootfs)
if overlayThreshold, ok := thresholdsMap[containerStat.Name]; ok {
if overlayThreshold.Cmp(*rootfs) < 0 {
return m.evictPod(pod, v1.ResourceName("containerOverlay"), fmt.Sprintf("container's overlay usage exceeds the limit %q", overlayThreshold.String()))
containerUsed := diskUsage(containerStat.Logs)
if !*m.dedicatedImageFs {
containerUsed.Add(*diskUsage(containerStat.Rootfs))
}
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("container's ephemeral local storage usage exceeds the limit %q", ephemeralStorageThreshold.String()))
}
}

View File

@ -54,8 +54,6 @@ const (
resourceNodeFs v1.ResourceName = "nodefs"
// nodefs inodes, number. internal to this module, used to account for local node root filesystem inodes.
resourceNodeFsInodes v1.ResourceName = "nodefsInodes"
// container overlay storage, in bytes. internal to this module, used to account for local disk usage for container overlay.
resourceOverlay v1.ResourceName = "overlay"
)
var (
@ -400,12 +398,10 @@ func localVolumeNames(pod *v1.Pod) []string {
func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsStatsType) (v1.ResourceList, error) {
disk := resource.Quantity{Format: resource.BinarySI}
inodes := resource.Quantity{Format: resource.BinarySI}
overlay := resource.Quantity{Format: resource.BinarySI}
for _, container := range podStats.Containers {
if hasFsStatsType(statsToMeasure, fsStatsRoot) {
disk.Add(*diskUsage(container.Rootfs))
inodes.Add(*inodeUsage(container.Rootfs))
overlay.Add(*diskUsage(container.Rootfs))
}
if hasFsStatsType(statsToMeasure, fsStatsLogs) {
disk.Add(*diskUsage(container.Logs))
@ -425,9 +421,8 @@ func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsSt
}
}
return v1.ResourceList{
resourceDisk: disk,
resourceInodes: inodes,
resourceOverlay: overlay,
resourceDisk: disk,
resourceInodes: inodes,
}, nil
}
@ -727,7 +722,7 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
}
}
storageScratchCapacity, storageScratchAllocatable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceStorageScratch)
ephemeralStorageCapacity, ephemeralStorageAllocatable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceEphemeralStorage)
if exist {
for _, pod := range pods {
podStat, ok := statsFunc(pod)
@ -735,25 +730,23 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
continue
}
usage, err := podDiskUsage(podStat, pod, []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource, fsStatsRoot})
fsStatsSet := []fsStatsType{}
if withImageFs {
fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
} else {
fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
}
usage, err := podDiskUsage(podStat, pod, fsStatsSet)
if err != nil {
glog.Warningf("eviction manager: error getting pod disk usage %v", err)
continue
}
// If there is a seperate imagefs set up for container runtimes, the scratch disk usage from nodefs should exclude the overlay usage
if withImageFs {
diskUsage := usage[resourceDisk]
diskUsageP := &diskUsage
diskUsagep := diskUsageP.Copy()
diskUsagep.Sub(usage[resourceOverlay])
storageScratchAllocatable.Sub(*diskUsagep)
} else {
storageScratchAllocatable.Sub(usage[resourceDisk])
}
ephemeralStorageAllocatable.Sub(usage[resourceDisk])
}
result[evictionapi.SignalAllocatableNodeFsAvailable] = signalObservation{
available: storageScratchAllocatable,
capacity: storageScratchCapacity,
available: ephemeralStorageAllocatable,
capacity: ephemeralStorageCapacity,
}
}

View File

@ -564,11 +564,7 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
// capacity for every node status request
initialCapacity := kl.containerManager.GetCapacity()
if initialCapacity != nil {
node.Status.Capacity[v1.ResourceStorageScratch] = initialCapacity[v1.ResourceStorageScratch]
imageCapacity, ok := initialCapacity[v1.ResourceStorageOverlay]
if ok {
node.Status.Capacity[v1.ResourceStorageOverlay] = imageCapacity
}
node.Status.Capacity[v1.ResourceEphemeralStorage] = initialCapacity[v1.ResourceEphemeralStorage]
}
}
}

View File

@ -36,7 +36,7 @@ type podEvictSpec struct {
}
const (
totalEvict = 3
totalEvict = 4
)
// Eviction Policy is described here:
@ -48,7 +48,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
emptyDirVolumeName := "volume-emptydir-pod"
podTestSpecs := []podEvictSpec{
{evicted: true, // This pod should be evicted because emptyDir (defualt storage type) usage violation
{evicted: true, // This pod should be evicted because emptyDir (default storage type) usage violation
pod: v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-hog-pod"},
Spec: v1.PodSpec{
@ -157,7 +157,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
},
},
{evicted: true, // This pod should be evicted because container overlay usage violation
{evicted: true, // This pod should be evicted because container ephemeral storage usage violation
pod: v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "container-hog-pod"},
Spec: v1.PodSpec{
@ -173,7 +173,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
},
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceStorageOverlay: *resource.NewMilliQuantity(
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
int64(40000),
resource.BinarySI),
},
@ -183,10 +183,53 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
},
},
},
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
pod: v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-container-hog-pod"},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Image: "gcr.io/google_containers/busybox:1.24",
Name: "emptydir-container-hog-pod",
Command: []string{
"sh",
"-c",
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
},
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
int64(40000),
resource.BinarySI),
},
},
VolumeMounts: []v1.VolumeMount{
{
Name: emptyDirVolumeName,
MountPath: "/cache",
},
},
},
},
Volumes: []v1.Volume{
{
Name: emptyDirVolumeName,
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: *resource.NewQuantity(int64(100000), resource.BinarySI),
},
},
},
},
},
},
},
}
evictionTestTimeout := 10 * time.Minute
testCondition := "EmptyDir/ContainerOverlay usage limit violation"
testCondition := "EmptyDir/ContainerContainerEphemeralStorage usage limit violation"
Context(fmt.Sprintf("EmptyDirEviction when we run containers that should cause %s", testCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
initialConfig.FeatureGates += ", LocalStorageCapacityIsolation=true"