In-place Pod Vertical Scaling - core implementation

1. Core Kubelet changes to implement In-place Pod Vertical Scaling.
2. E2E tests for In-place Pod Vertical Scaling.
3. Refactor kubelet code and add missing tests (Derek's kubelet review)
4. Add a new hash over container fields without Resources field to allow feature gate toggling without restarting containers not using the feature.
5. Fix corner-case where resize A->B->A gets ignored
6. Add cgroup v2 support to pod resize E2E test.
KEP: /enhancements/keps/sig-node/1287-in-place-update-pod-resources

Co-authored-by: Chen Wang <Chen.Wang1@ibm.com>
This commit is contained in:
Vinay Kulkarni 2022-11-04 13:47:33 -07:00 committed by vinay kulkarni
parent 231849a908
commit f2bd94a0de
48 changed files with 4639 additions and 56 deletions

View File

@ -22,6 +22,7 @@ import (
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -557,3 +558,86 @@ func (m *cgroupManagerImpl) MemoryUsage(name CgroupName) (int64, error) {
val, err := fscommon.GetCgroupParamUint(path, file) val, err := fscommon.GetCgroupParamUint(path, file)
return int64(val), err return int64(val), err
} }
// Get the memory limit in bytes applied to the cgroup
func (m *cgroupManagerImpl) GetCgroupMemoryConfig(name CgroupName) (uint64, error) {
cgroupPaths := m.buildCgroupPaths(name)
cgroupMemoryPath, found := cgroupPaths["memory"]
if !found {
return 0, fmt.Errorf("failed to build memory cgroup fs path for cgroup %v", name)
}
memLimit, err := fscommon.GetCgroupParamUint(cgroupMemoryPath, "memory.limit_in_bytes")
if err != nil {
return 0, fmt.Errorf("failed to get memory.limit_in_bytes for cgroup %v: %v", name, err)
}
return memLimit, nil
}
// Get the cpu quota, cpu period, and cpu shares applied to the cgroup
func (m *cgroupManagerImpl) GetCgroupCpuConfig(name CgroupName) (int64, uint64, uint64, error) {
cgroupPaths := m.buildCgroupPaths(name)
cgroupCpuPath, found := cgroupPaths["cpu"]
if !found {
return 0, 0, 0, fmt.Errorf("failed to build CPU cgroup fs path for cgroup %v", name)
}
cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupCpuPath, "cpu.cfs_quota_us")
if errQ != nil {
return 0, 0, 0, fmt.Errorf("failed to read CPU quota for cgroup %v: %v", name, errQ)
}
cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64)
if errInt != nil {
return 0, 0, 0, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %v", name, errInt)
}
cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupCpuPath, "cpu.cfs_period_us")
if errP != nil {
return 0, 0, 0, fmt.Errorf("failed to read CPU period for cgroup %v: %v", name, errP)
}
cpuShares, errS := fscommon.GetCgroupParamUint(cgroupCpuPath, "cpu.shares")
if errP != nil {
return 0, 0, 0, fmt.Errorf("failed to read CPU shares for cgroup %v: %v", name, errS)
}
return cpuQuota, cpuPeriod, cpuShares, nil
}
// Set the memory limit in bytes applied to the cgroup
func (m *cgroupManagerImpl) SetCgroupMemoryConfig(name CgroupName, memoryLimit int64) error {
cgroupPaths := m.buildCgroupPaths(name)
cgroupMemoryPath, found := cgroupPaths["memory"]
if !found {
return fmt.Errorf("failed to build memory cgroup fs path for cgroup %v", name)
}
memLimit := strconv.FormatInt(memoryLimit, 10)
if err := os.WriteFile(filepath.Join(cgroupMemoryPath, "memory.limit_in_bytes"), []byte(memLimit), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", memLimit, cgroupMemoryPath, err)
}
return nil
}
// Set the cpu quota, cpu period, and cpu shares applied to the cgroup
func (m *cgroupManagerImpl) SetCgroupCpuConfig(name CgroupName, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error {
var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string
cgroupPaths := m.buildCgroupPaths(name)
cgroupCpuPath, found := cgroupPaths["cpu"]
if !found {
return fmt.Errorf("failed to build cpu cgroup fs path for cgroup %v", name)
}
if cpuQuota != nil {
cpuQuotaStr = strconv.FormatInt(*cpuQuota, 10)
if err := os.WriteFile(filepath.Join(cgroupCpuPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuQuotaStr, cgroupCpuPath, err)
}
}
if cpuPeriod != nil {
cpuPeriodStr = strconv.FormatUint(*cpuPeriod, 10)
if err := os.WriteFile(filepath.Join(cgroupCpuPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuPeriodStr, cgroupCpuPath, err)
}
}
if cpuShares != nil {
cpuSharesStr = strconv.FormatUint(*cpuShares, 10)
if err := os.WriteFile(filepath.Join(cgroupCpuPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuSharesStr, cgroupCpuPath, err)
}
}
return nil
}

View File

@ -77,6 +77,22 @@ func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error
return nil return nil
} }
func (m *unsupportedCgroupManager) GetCgroupMemoryConfig(name CgroupName) (uint64, error) {
return 0, errNotSupported
}
func (m *unsupportedCgroupManager) GetCgroupCpuConfig(name CgroupName) (int64, uint64, uint64, error) {
return 0, 0, 0, errNotSupported
}
func (m *unsupportedCgroupManager) SetCgroupMemoryConfig(name CgroupName, memoryLimit int64) error {
return errNotSupported
}
func (m *unsupportedCgroupManager) SetCgroupCpuConfig(name CgroupName, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error {
return errNotSupported
}
var RootCgroupName = CgroupName([]string{}) var RootCgroupName = CgroupName([]string{})
func NewCgroupName(base CgroupName, components ...string) CgroupName { func NewCgroupName(base CgroupName, components ...string) CgroupName {

View File

@ -95,6 +95,22 @@ func (cm *containerManagerStub) GetDevicePluginResourceCapacity() (v1.ResourceLi
return cm.extendedPluginResources, cm.extendedPluginResources, []string{} return cm.extendedPluginResources, cm.extendedPluginResources, []string{}
} }
func (m *podContainerManagerStub) GetPodCgroupMemoryConfig(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerStub) GetPodCgroupCpuConfig(_ *v1.Pod) (int64, uint64, uint64, error) {
return 0, 0, 0, nil
}
func (m *podContainerManagerStub) SetPodCgroupMemoryConfig(_ *v1.Pod, _ int64) error {
return nil
}
func (m *podContainerManagerStub) SetPodCgroupCpuConfig(_ *v1.Pod, _ *int64, _, _ *uint64) error {
return nil
}
func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager { func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager {
return &podContainerManagerStub{} return &podContainerManagerStub{}
} }

View File

@ -20,9 +20,11 @@ import (
"fmt" "fmt"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2" "k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
@ -380,6 +382,11 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
return 0 return 0
} }
cpuQuantity := container.Resources.Requests[v1.ResourceCPU] cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
cpuQuantity = cs.ResourcesAllocated[v1.ResourceCPU]
}
}
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
return 0 return 0
} }

View File

@ -104,3 +104,38 @@ func (m *FakePodContainerManager) IsPodCgroup(cgroupfs string) (bool, types.UID)
m.CalledFunctions = append(m.CalledFunctions, "IsPodCgroup") m.CalledFunctions = append(m.CalledFunctions, "IsPodCgroup")
return false, types.UID("") return false, types.UID("")
} }
func (cm *FakePodContainerManager) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupMemoryUsage")
return 0, nil
}
func (cm *FakePodContainerManager) GetPodCgroupMemoryConfig(_ *v1.Pod) (uint64, error) {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupMemoryConfig")
return 0, nil
}
func (cm *FakePodContainerManager) GetPodCgroupCpuConfig(_ *v1.Pod) (int64, uint64, uint64, error) {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupCpuConfig")
return 0, 0, 0, nil
}
func (cm *FakePodContainerManager) SetPodCgroupMemoryConfig(_ *v1.Pod, _ int64) error {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "SetPodCgroupMemoryConfig")
return nil
}
func (cm *FakePodContainerManager) SetPodCgroupCpuConfig(_ *v1.Pod, _ *int64, _, _ *uint64) error {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "SetPodCgroupCpuConfig")
return nil
}

View File

@ -28,6 +28,7 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/pkg/api/v1/resource"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
@ -151,6 +152,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
memoryLimitsDeclared = false memoryLimitsDeclared = false
} }
containerHugePageLimits := HugePageLimits(container.Resources.Requests) containerHugePageLimits := HugePageLimits(container.Resources.Requests)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
containerHugePageLimits = HugePageLimits(cs.ResourcesAllocated)
}
}
for k, v := range containerHugePageLimits { for k, v := range containerHugePageLimits {
if value, exists := hugePageLimits[k]; exists { if value, exists := hugePageLimits[k]; exists {
hugePageLimits[k] = value + v hugePageLimits[k] = value + v

View File

@ -25,10 +25,13 @@ import (
) )
const ( const (
MinShares = 0 MinShares = 0
MaxShares = 0
SharesPerCPU = 0 SharesPerCPU = 0
MilliCPUToCPU = 0 MilliCPUToCPU = 0
QuotaPeriod = 0
MinQuotaPeriod = 0 MinQuotaPeriod = 0
) )

View File

@ -25,9 +25,12 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2" "k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
corehelper "k8s.io/kubernetes/pkg/apis/core/v1/helper" corehelper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
@ -107,7 +110,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
hint := p.affinity.GetAffinity(podUID, container.Name) hint := p.affinity.GetAffinity(podUID, container.Name)
klog.InfoS("Got topology affinity", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "hint", hint) klog.InfoS("Got topology affinity", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "hint", hint)
requestedResources, err := getRequestedResources(container) requestedResources, err := getRequestedResources(pod, container)
if err != nil { if err != nil {
return err return err
} }
@ -319,7 +322,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) {
reqRsrcsByAppCtrs := make(map[v1.ResourceName]uint64) reqRsrcsByAppCtrs := make(map[v1.ResourceName]uint64)
for _, ctr := range pod.Spec.InitContainers { for _, ctr := range pod.Spec.InitContainers {
reqRsrcs, err := getRequestedResources(&ctr) reqRsrcs, err := getRequestedResources(pod, &ctr)
if err != nil { if err != nil {
return nil, err return nil, err
@ -336,7 +339,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) {
} }
for _, ctr := range pod.Spec.Containers { for _, ctr := range pod.Spec.Containers {
reqRsrcs, err := getRequestedResources(&ctr) reqRsrcs, err := getRequestedResources(pod, &ctr)
if err != nil { if err != nil {
return nil, err return nil, err
@ -391,7 +394,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
return nil return nil
} }
requestedResources, err := getRequestedResources(container) requestedResources, err := getRequestedResources(pod, container)
if err != nil { if err != nil {
klog.ErrorS(err, "Failed to get container requested resources", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) klog.ErrorS(err, "Failed to get container requested resources", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
return nil return nil
@ -408,9 +411,15 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
return p.calculateHints(s.GetMachineState(), pod, requestedResources) return p.calculateHints(s.GetMachineState(), pod, requestedResources)
} }
func getRequestedResources(container *v1.Container) (map[v1.ResourceName]uint64, error) { func getRequestedResources(pod *v1.Pod, container *v1.Container) (map[v1.ResourceName]uint64, error) {
requestedResources := map[v1.ResourceName]uint64{} requestedResources := map[v1.ResourceName]uint64{}
for resourceName, quantity := range container.Resources.Requests { resources := container.Resources.Requests
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
resources = cs.ResourcesAllocated
}
}
for resourceName, quantity := range resources {
if resourceName != v1.ResourceMemory && !corehelper.IsHugePageResourceName(resourceName) { if resourceName != v1.ResourceMemory && !corehelper.IsHugePageResourceName(resourceName) {
continue continue
} }

View File

@ -120,6 +120,35 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName,
return cgroupName, cgroupfsName return cgroupName, cgroupfsName
} }
func (m *podContainerManagerImpl) GetPodCgroupMemoryUsage(pod *v1.Pod) (uint64, error) {
podCgroupName, _ := m.GetPodContainerName(pod)
memUsage, err := m.cgroupManager.MemoryUsage(podCgroupName)
if err != nil {
return 0, err
}
return uint64(memUsage), nil
}
func (m *podContainerManagerImpl) GetPodCgroupMemoryConfig(pod *v1.Pod) (uint64, error) {
podCgroupName, _ := m.GetPodContainerName(pod)
return m.cgroupManager.GetCgroupMemoryConfig(podCgroupName)
}
func (m *podContainerManagerImpl) GetPodCgroupCpuConfig(pod *v1.Pod) (int64, uint64, uint64, error) {
podCgroupName, _ := m.GetPodContainerName(pod)
return m.cgroupManager.GetCgroupCpuConfig(podCgroupName)
}
func (m *podContainerManagerImpl) SetPodCgroupMemoryConfig(pod *v1.Pod, memoryLimit int64) error {
podCgroupName, _ := m.GetPodContainerName(pod)
return m.cgroupManager.SetCgroupMemoryConfig(podCgroupName, memoryLimit)
}
func (m *podContainerManagerImpl) SetPodCgroupCpuConfig(pod *v1.Pod, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error {
podCgroupName, _ := m.GetPodContainerName(pod)
return m.cgroupManager.SetCgroupCpuConfig(podCgroupName, cpuQuota, cpuPeriod, cpuShares)
}
// Kill one process ID // Kill one process ID
func (m *podContainerManagerImpl) killOnePid(pid int) error { func (m *podContainerManagerImpl) killOnePid(pid int) error {
// os.FindProcess never returns an error on POSIX // os.FindProcess never returns an error on POSIX
@ -322,3 +351,23 @@ func (m *podContainerManagerNoop) GetAllPodsFromCgroups() (map[types.UID]CgroupN
func (m *podContainerManagerNoop) IsPodCgroup(cgroupfs string) (bool, types.UID) { func (m *podContainerManagerNoop) IsPodCgroup(cgroupfs string) (bool, types.UID) {
return false, types.UID("") return false, types.UID("")
} }
func (m *podContainerManagerNoop) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerNoop) GetPodCgroupMemoryConfig(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerNoop) GetPodCgroupCpuConfig(_ *v1.Pod) (int64, uint64, uint64, error) {
return 0, 0, 0, nil
}
func (m *podContainerManagerNoop) SetPodCgroupMemoryConfig(_ *v1.Pod, _ int64) error {
return nil
}
func (m *podContainerManagerNoop) SetPodCgroupCpuConfig(_ *v1.Pod, _ *int64, _, _ *uint64) error {
return nil
}

View File

@ -53,3 +53,23 @@ func (m *podContainerManagerStub) GetAllPodsFromCgroups() (map[types.UID]CgroupN
func (m *podContainerManagerStub) IsPodCgroup(cgroupfs string) (bool, types.UID) { func (m *podContainerManagerStub) IsPodCgroup(cgroupfs string) (bool, types.UID) {
return false, types.UID("") return false, types.UID("")
} }
func (m *podContainerManagerStub) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerStub) GetPodCgroupMemoryLimit(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerStub) GetPodCgroupCpuLimit(_ *v1.Pod) (int64, uint64, uint64, error) {
return 0, 0, 0, nil
}
func (m *podContainerManagerStub) SetPodCgroupMemoryLimit(_ *v1.Pod, _ int64) error {
return nil
}
func (m *podContainerManagerStub) SetPodCgroupCpuLimit(_ *v1.Pod, _ *int64, _, _ *uint64) error {
return nil
}

View File

@ -84,6 +84,14 @@ type CgroupManager interface {
ReduceCPULimits(cgroupName CgroupName) error ReduceCPULimits(cgroupName CgroupName) error
// MemoryUsage returns current memory usage of the specified cgroup, as read from the cgroupfs. // MemoryUsage returns current memory usage of the specified cgroup, as read from the cgroupfs.
MemoryUsage(name CgroupName) (int64, error) MemoryUsage(name CgroupName) (int64, error)
// GetCgroupMemoryConfig returns the memory limit of the specified cgroup as read from cgroup fs.
GetCgroupMemoryConfig(name CgroupName) (uint64, error)
// GetCgroupCpuConfig returns the cpu quota, cpu period, and cpu shares of the specified cgroup as read from cgroup fs.
GetCgroupCpuConfig(name CgroupName) (int64, uint64, uint64, error)
// SetCgroupMemoryConfig sets the memory limit of the specified cgroup.
SetCgroupMemoryConfig(name CgroupName, memoryLimit int64) error
// SetCgroupCpuConfig sets the cpu quota, cpu period, and cpu shares of the specified cgroup.
SetCgroupCpuConfig(name CgroupName, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error
} }
// QOSContainersInfo stores the names of containers per qos // QOSContainersInfo stores the names of containers per qos
@ -119,4 +127,19 @@ type PodContainerManager interface {
// IsPodCgroup returns true if the literal cgroupfs name corresponds to a pod // IsPodCgroup returns true if the literal cgroupfs name corresponds to a pod
IsPodCgroup(cgroupfs string) (bool, types.UID) IsPodCgroup(cgroupfs string) (bool, types.UID)
// Get value of memory.usage_in_bytes for the pod Cgroup
GetPodCgroupMemoryUsage(pod *v1.Pod) (uint64, error)
// Get value of memory.limit_in_bytes for the pod Cgroup
GetPodCgroupMemoryConfig(pod *v1.Pod) (uint64, error)
// Get values of cpu.cfs_quota_us, cpu.cfs_period_us, and cpu.shares for the pod Cgroup
GetPodCgroupCpuConfig(pod *v1.Pod) (int64, uint64, uint64, error)
// Set value of memory.limit_in_bytes for the pod Cgroup
SetPodCgroupMemoryConfig(pod *v1.Pod, memoryLimit int64) error
// Set values of cpu.cfs_quota_us, cpu.cfs_period_us, and cpu.shares for the pod Cgroup
SetPodCgroupCpuConfig(pod *v1.Pod, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error
} }

View File

@ -117,6 +117,23 @@ func HashContainer(container *v1.Container) uint64 {
return uint64(hash.Sum32()) return uint64(hash.Sum32())
} }
// HashContainerWithoutResources returns the hash of the container with Resources field zero'd out.
func HashContainerWithoutResources(container *v1.Container) uint64 {
// InPlacePodVerticalScaling enables mutable Resources field.
// Changes to this field may not require container restart depending on policy.
// Compute hash over fields besides the Resources field
// NOTE: This is needed during alpha and beta so that containers using Resources but
// not subject to In-place resize are not unexpectedly restarted when
// InPlacePodVerticalScaling feature-gate is toggled.
//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashContainerWithoutResources to become Hash.
hashWithoutResources := fnv.New32a()
containerCopy := container.DeepCopy()
containerCopy.Resources = v1.ResourceRequirements{}
containerJSON, _ := json.Marshal(containerCopy)
hashutil.DeepHashObject(hashWithoutResources, containerJSON)
return uint64(hashWithoutResources.Sum32())
}
// envVarsToMap constructs a map of environment name to value from a slice // envVarsToMap constructs a map of environment name to value from a slice
// of env vars. // of env vars.
func envVarsToMap(envs []EnvVar) map[string]string { func envVarsToMap(envs []EnvVar) map[string]string {
@ -252,12 +269,13 @@ func ConvertPodStatusToRunningPod(runtimeName string, podStatus *PodStatus) Pod
continue continue
} }
container := &Container{ container := &Container{
ID: containerStatus.ID, ID: containerStatus.ID,
Name: containerStatus.Name, Name: containerStatus.Name,
Image: containerStatus.Image, Image: containerStatus.Image,
ImageID: containerStatus.ImageID, ImageID: containerStatus.ImageID,
Hash: containerStatus.Hash, Hash: containerStatus.Hash,
State: containerStatus.State, HashWithoutResources: containerStatus.HashWithoutResources,
State: containerStatus.State,
} }
runningPod.Containers = append(runningPod.Containers, container) runningPod.Containers = append(runningPod.Containers, container)
} }

View File

@ -25,6 +25,7 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
@ -908,3 +909,83 @@ func TestHasWindowsHostProcessContainer(t *testing.T) {
}) })
} }
} }
func TestHashContainerWithoutResources(t *testing.T) {
cpu100m := resource.MustParse("100m")
cpu200m := resource.MustParse("200m")
mem100M := resource.MustParse("100Mi")
mem200M := resource.MustParse("200Mi")
cpuPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartNotRequired}
memPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartNotRequired}
cpuPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartRequired}
memPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartRequired}
type testCase struct {
name string
container *v1.Container
expectedHash uint64
}
tests := []testCase{
{
"Burstable pod with CPU policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired},
},
0x86a4393c,
},
{
"Burstable pod with memory policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired},
},
0x73a18cce,
},
{
"Guaranteed pod with CPU policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired},
},
0x86a4393c,
},
{
"Guaranteed pod with memory policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired},
},
0x73a18cce,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
containerCopy := tc.container.DeepCopy()
hash := HashContainerWithoutResources(tc.container)
assert.Equal(t, tc.expectedHash, hash, "[%s]", tc.name)
assert.Equal(t, containerCopy, tc.container, "[%s]", tc.name)
})
}
}

View File

@ -27,6 +27,7 @@ import (
"time" "time"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/remotecommand" "k8s.io/client-go/tools/remotecommand"
"k8s.io/client-go/util/flowcontrol" "k8s.io/client-go/util/flowcontrol"
@ -295,6 +296,11 @@ type Container struct {
// Hash of the container, used for comparison. Optional for containers // Hash of the container, used for comparison. Optional for containers
// not managed by kubelet. // not managed by kubelet.
Hash uint64 Hash uint64
// Hash of the container over fields with Resources field zero'd out.
// NOTE: This is needed during alpha and beta so that containers using Resources are
// not unexpectedly restarted when InPlacePodVerticalScaling feature-gate is toggled.
//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashWithoutResources to become Hash.
HashWithoutResources uint64
// State is the state of the container. // State is the state of the container.
State State State State
} }
@ -319,6 +325,18 @@ type PodStatus struct {
TimeStamp time.Time TimeStamp time.Time
} }
// ContainerResources represents the Resources allocated to the running container.
type ContainerResources struct {
// CPU capacity reserved for the container (cpu.shares)
CPURequest *resource.Quantity
// CPU limit enforced on the container (cpu.cfs_quota_us)
CPULimit *resource.Quantity
// Memory capaacity reserved for the container
MemoryRequest *resource.Quantity
// Memory limit enforced on the container (memory.limit_in_bytes)
MemoryLimit *resource.Quantity
}
// Status represents the status of a container. // Status represents the status of a container.
type Status struct { type Status struct {
// ID of the container. // ID of the container.
@ -342,6 +360,8 @@ type Status struct {
ImageID string ImageID string
// Hash of the container, used for comparison. // Hash of the container, used for comparison.
Hash uint64 Hash uint64
// Hash of the container over fields with Resources field zero'd out.
HashWithoutResources uint64
// Number of times that the container has been restarted. // Number of times that the container has been restarted.
RestartCount int RestartCount int
// A string explains why container is in such a status. // A string explains why container is in such a status.
@ -349,6 +369,8 @@ type Status struct {
// Message written by the container before exiting (stored in // Message written by the container before exiting (stored in
// TerminationMessagePath). // TerminationMessagePath).
Message string Message string
// CPU and memory resources for this container
Resources *ContainerResources
} }
// FindContainerStatusByName returns container status in the pod status with the given name. // FindContainerStatusByName returns container status in the pod status with the given name.

View File

@ -25,10 +25,13 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
corev1helpers "k8s.io/component-helpers/scheduling/corev1" corev1helpers "k8s.io/component-helpers/scheduling/corev1"
"k8s.io/klog/v2" "k8s.io/klog/v2"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1resource "k8s.io/kubernetes/pkg/api/v1/resource" v1resource "k8s.io/kubernetes/pkg/api/v1/resource"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
volumeutils "k8s.io/kubernetes/pkg/volume/util" volumeutils "k8s.io/kubernetes/pkg/volume/util"
@ -1018,6 +1021,12 @@ func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats stats
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
if container.Name == containerStats.Name { if container.Name == containerStats.Name {
requests := container.Resources.Requests[resourceToReclaim] requests := container.Resources.Requests[resourceToReclaim]
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) &&
(resourceToReclaim == v1.ResourceMemory || resourceToReclaim == v1.ResourceCPU) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
requests = cs.ResourcesAllocated[resourceToReclaim]
}
}
var usage *resource.Quantity var usage *resource.Quantity
switch resourceToReclaim { switch resourceToReclaim {
case v1.ResourceEphemeralStorage: case v1.ResourceEphemeralStorage:

View File

@ -21,6 +21,7 @@ import (
"fmt" "fmt"
"reflect" "reflect"
"sort" "sort"
"strings"
"testing" "testing"
"time" "time"
@ -2121,3 +2122,51 @@ func (s1 thresholdList) Equal(s2 thresholdList) bool {
} }
return true return true
} }
func TestEvictonMessageWithResourceResize(t *testing.T) {
testpod := newPod("testpod", 1, []v1.Container{
newContainer("testcontainer", newResourceList("", "200Mi", ""), newResourceList("", "", "")),
}, nil)
testpod.Status = v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "testcontainer",
ResourcesAllocated: newResourceList("", "100Mi", ""),
},
},
}
testpodMemory := resource.MustParse("150Mi")
testpodStats := newPodMemoryStats(testpod, testpodMemory)
testpodMemoryBytes := uint64(testpodMemory.Value())
testpodStats.Containers = []statsapi.ContainerStats{
{
Name: "testcontainer",
Memory: &statsapi.MemoryStats{
WorkingSetBytes: &testpodMemoryBytes,
},
},
}
stats := map[*v1.Pod]statsapi.PodStats{
testpod: testpodStats,
}
statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) {
result, found := stats[pod]
return result, found
}
for _, enabled := range []bool{true, false} {
t.Run(fmt.Sprintf("InPlacePodVerticalScaling enabled=%v", enabled), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, enabled)()
msg, _ := evictionMessage(v1.ResourceMemory, testpod, statsFn)
if enabled {
if !strings.Contains(msg, "testcontainer was using 150Mi, which exceeds its request of 100Mi") {
t.Errorf("Expected 'exceeds memory' eviction message was not found.")
}
} else {
if strings.Contains(msg, "which exceeds its request") {
t.Errorf("Found 'exceeds memory' eviction message which was not expected.")
}
}
})
}
}

View File

@ -48,6 +48,7 @@ import (
"k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
utilruntime "k8s.io/apimachinery/pkg/util/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
@ -66,6 +67,8 @@ import (
"k8s.io/klog/v2" "k8s.io/klog/v2"
pluginwatcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1" pluginwatcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/api/v1/resource"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config" kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/apis/podresources" "k8s.io/kubernetes/pkg/kubelet/apis/podresources"
@ -608,7 +611,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
mirrorPodClient := kubepod.NewBasicMirrorClient(klet.kubeClient, string(nodeName), nodeLister) mirrorPodClient := kubepod.NewBasicMirrorClient(klet.kubeClient, string(nodeName), nodeLister)
klet.podManager = kubepod.NewBasicPodManager(mirrorPodClient) klet.podManager = kubepod.NewBasicPodManager(mirrorPodClient)
klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet, kubeDeps.PodStartupLatencyTracker) klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet, kubeDeps.PodStartupLatencyTracker, klet.getRootDir())
klet.resourceAnalyzer = serverstats.NewResourceAnalyzer(klet, kubeCfg.VolumeStatsAggPeriod.Duration, kubeDeps.Recorder) klet.resourceAnalyzer = serverstats.NewResourceAnalyzer(klet, kubeCfg.VolumeStatsAggPeriod.Duration, kubeDeps.Recorder)
@ -665,7 +668,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
kubeCfg.CPUCFSQuotaPeriod, kubeCfg.CPUCFSQuotaPeriod,
kubeDeps.RemoteRuntimeService, kubeDeps.RemoteRuntimeService,
kubeDeps.RemoteImageService, kubeDeps.RemoteImageService,
kubeDeps.ContainerManager.InternalContainerLifecycle(), kubeDeps.ContainerManager,
klet.containerLogManager, klet.containerLogManager,
klet.runtimeClassManager, klet.runtimeClassManager,
seccompDefault, seccompDefault,
@ -1247,6 +1250,9 @@ type Kubelet struct {
// Manage user namespaces // Manage user namespaces
usernsManager *usernsManager usernsManager *usernsManager
// Mutex to serialize new pod admission and existing pod resizing
podResizeMutex sync.Mutex
} }
// ListPodStats is delegated to StatsProvider, which implements stats.Provider interface // ListPodStats is delegated to StatsProvider, which implements stats.Provider interface
@ -1826,6 +1832,16 @@ func (kl *Kubelet) syncPod(_ context.Context, updateType kubetypes.SyncPodType,
// Ensure the pod is being probed // Ensure the pod is being probed
kl.probeManager.AddPod(pod) kl.probeManager.AddPod(pod)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// Handle pod resize here instead of doing it in HandlePodUpdates because
// this conveniently retries any Deferred resize requests
// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) {
kl.handlePodResourcesResize(pod)
}
}
// Call the container runtime's SyncPod callback // Call the container runtime's SyncPod callback
result := kl.containerRuntime.SyncPod(ctx, pod, podStatus, pullSecrets, kl.backOff) result := kl.containerRuntime.SyncPod(ctx, pod, podStatus, pullSecrets, kl.backOff)
kl.reasonCache.Update(pod.UID, result) kl.reasonCache.Update(pod.UID, result)
@ -1842,6 +1858,15 @@ func (kl *Kubelet) syncPod(_ context.Context, updateType kubetypes.SyncPodType,
return false, nil return false, nil
} }
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && isPodResizeInProgress(pod, &apiPodStatus) {
// While resize is in progress, periodically call PLEG to update pod cache
runningPod := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
if err := kl.pleg.UpdateCache(&runningPod, pod.UID); err != nil {
klog.ErrorS(err, "Failed to update pod cache", "pod", klog.KObj(pod))
return false, err
}
}
return false, nil return false, nil
} }
@ -2078,6 +2103,23 @@ func (kl *Kubelet) canAdmitPod(pods []*v1.Pod, pod *v1.Pod) (bool, string, strin
// TODO: move out of disk check into a pod admitter // TODO: move out of disk check into a pod admitter
// TODO: out of resource eviction should have a pod admitter call-out // TODO: out of resource eviction should have a pod admitter call-out
attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods} attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// Use allocated resources values from checkpoint store (source of truth) to determine fit
otherPods := make([]*v1.Pod, 0, len(pods))
checkpointState := kl.statusManager.State()
for _, p := range pods {
op := p.DeepCopy()
for _, c := range op.Spec.Containers {
resourcesAllocated, found := checkpointState.GetContainerResourceAllocation(string(p.UID), c.Name)
if c.Resources.Requests != nil && found {
c.Resources.Requests[v1.ResourceCPU] = resourcesAllocated[v1.ResourceCPU]
c.Resources.Requests[v1.ResourceMemory] = resourcesAllocated[v1.ResourceMemory]
}
}
otherPods = append(otherPods, op)
}
attrs.OtherPods = otherPods
}
for _, podAdmitHandler := range kl.admitHandlers { for _, podAdmitHandler := range kl.admitHandlers {
if result := podAdmitHandler.Admit(attrs); !result.Admit { if result := podAdmitHandler.Admit(attrs); !result.Admit {
return false, result.Reason, result.Message return false, result.Reason, result.Message
@ -2332,6 +2374,10 @@ func (kl *Kubelet) handleMirrorPod(mirrorPod *v1.Pod, start time.Time) {
func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) { func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
start := kl.clock.Now() start := kl.clock.Now()
sort.Sort(sliceutils.PodsByCreationTime(pods)) sort.Sort(sliceutils.PodsByCreationTime(pods))
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
kl.podResizeMutex.Lock()
defer kl.podResizeMutex.Unlock()
}
for _, pod := range pods { for _, pod := range pods {
existingPods := kl.podManager.GetPods() existingPods := kl.podManager.GetPods()
// Always add the pod to the pod manager. Kubelet relies on the pod // Always add the pod to the pod manager. Kubelet relies on the pod
@ -2356,10 +2402,36 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
// pods that are alive. // pods that are alive.
activePods := kl.filterOutInactivePods(existingPods) activePods := kl.filterOutInactivePods(existingPods)
// Check if we can admit the pod; if not, reject it. if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok { // To handle kubelet restarts, test pod admissibility using ResourcesAllocated values
kl.rejectPod(pod, reason, message) // (for cpu & memory) from checkpoint store. If found, that is the source of truth.
continue checkpointState := kl.statusManager.State()
podCopy := pod.DeepCopy()
for _, c := range podCopy.Spec.Containers {
resourcesAllocated, found := checkpointState.GetContainerResourceAllocation(string(pod.UID), c.Name)
if c.Resources.Requests != nil && found {
c.Resources.Requests[v1.ResourceCPU] = resourcesAllocated[v1.ResourceCPU]
c.Resources.Requests[v1.ResourceMemory] = resourcesAllocated[v1.ResourceMemory]
}
}
// Check if we can admit the pod; if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, podCopy); !ok {
kl.rejectPod(pod, reason, message)
continue
}
// For new pod, checkpoint the resource values at which the Pod has been admitted
if err := kl.statusManager.SetPodAllocation(podCopy); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod))
}
} else {
// Check if we can admit the pod; if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok {
kl.rejectPod(pod, reason, message)
continue
}
} }
} }
mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
@ -2434,6 +2506,116 @@ func (kl *Kubelet) HandlePodSyncs(pods []*v1.Pod) {
} }
} }
func isPodResizeInProgress(pod *v1.Pod, podStatus *v1.PodStatus) bool {
for _, c := range pod.Spec.Containers {
if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok {
if cs.Resources == nil {
continue
}
if diff.ObjectDiff(c.Resources.Limits, cs.Resources.Limits) != "" ||
diff.ObjectDiff(cs.ResourcesAllocated, cs.Resources.Requests) != "" {
return true
}
}
}
return false
}
func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus) {
var otherActivePods []*v1.Pod
node, err := kl.getNodeAnyWay()
if err != nil {
klog.ErrorS(err, "getNodeAnyway function failed")
return false, nil, ""
}
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
memAvailable := node.Status.Allocatable.Memory().Value()
cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU)
memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory)
if cpuRequests > cpuAvailable || memRequests > memAvailable {
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "Pod", pod.Name)
return false, nil, v1.PodResizeStatusInfeasible
}
// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
// If desired resources don't fit, pod continues to run with currently allocated resources.
activePods := kl.GetActivePods()
for _, p := range activePods {
if p.UID != pod.UID {
otherActivePods = append(otherActivePods, p)
}
}
if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, pod); !ok {
// Log reason and return. Let the next sync iteration retry the resize
klog.V(3).InfoS("Resize cannot be accommodated", "Pod", pod.Name, "Reason", failReason, "Message", failMessage)
return false, nil, v1.PodResizeStatusDeferred
}
podCopy := pod.DeepCopy()
for _, container := range podCopy.Spec.Containers {
idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name)
if found {
for rName, rQuantity := range container.Resources.Requests {
podCopy.Status.ContainerStatuses[idx].ResourcesAllocated[rName] = rQuantity
}
}
}
return true, podCopy, v1.PodResizeStatusInProgress
}
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
if pod.Status.Phase != v1.PodRunning {
return
}
podResized := false
for _, container := range pod.Spec.Containers {
if len(container.Resources.Requests) == 0 {
continue
}
containerStatus, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name)
if !found {
klog.V(5).InfoS("ContainerStatus not found", "pod", pod.Name, "container", container.Name)
break
}
if len(containerStatus.ResourcesAllocated) != len(container.Resources.Requests) {
klog.V(5).InfoS("ContainerStatus.ResourcesAllocated length mismatch", "pod", pod.Name, "container", container.Name)
break
}
if len(diff.ObjectDiff(container.Resources.Requests, containerStatus.ResourcesAllocated)) > 0 {
podResized = true
break
}
}
if !podResized {
return
}
kl.podResizeMutex.Lock()
defer kl.podResizeMutex.Unlock()
fit, updatedPod, resizeStatus := kl.canResizePod(pod)
if fit {
// Update pod resource allocation checkpoint
if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod))
}
*pod = *updatedPod
}
if resizeStatus != "" {
// Save resize decision to checkpoint
if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(pod))
}
pod.Status.Resize = resizeStatus
}
kl.podManager.UpdatePod(pod)
kl.statusManager.SetPodStatus(pod, pod.Status)
return
}
// LatestLoopEntryTime returns the last time in the sync loop monitor. // LatestLoopEntryTime returns the last time in the sync loop monitor.
func (kl *Kubelet) LatestLoopEntryTime() time.Time { func (kl *Kubelet) LatestLoopEntryTime() time.Time {
val := kl.syncLoopMonitor.Load() val := kl.syncLoopMonitor.Load()

View File

@ -34,6 +34,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
utilvalidation "k8s.io/apimachinery/pkg/util/validation" utilvalidation "k8s.io/apimachinery/pkg/util/validation"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -1454,6 +1455,31 @@ func getPhase(spec *v1.PodSpec, info []v1.ContainerStatus) v1.PodPhase {
} }
} }
func (kl *Kubelet) determinePodResizeStatus(pod *v1.Pod, podStatus *v1.PodStatus) v1.PodResizeStatus {
var podResizeStatus v1.PodResizeStatus
specStatusDiffer := false
for _, c := range pod.Spec.Containers {
if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok {
if cs.Resources != nil && diff.ObjectDiff(c.Resources, *cs.Resources) != "" {
specStatusDiffer = true
break
}
}
}
if !specStatusDiffer {
// Clear last resize state from checkpoint
if err := kl.statusManager.SetPodResizeStatus(pod.UID, ""); err != nil {
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", pod.Name)
}
} else {
checkpointState := kl.statusManager.State()
if resizeStatus, found := checkpointState.GetPodResizeStatus(string(pod.UID)); found {
podResizeStatus = resizeStatus
}
}
return podResizeStatus
}
// generateAPIPodStatus creates the final API pod status for a pod, given the // generateAPIPodStatus creates the final API pod status for a pod, given the
// internal pod status. This method should only be called from within sync*Pod methods. // internal pod status. This method should only be called from within sync*Pod methods.
func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) v1.PodStatus { func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) v1.PodStatus {
@ -1464,6 +1490,9 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
oldPodStatus = pod.Status oldPodStatus = pod.Status
} }
s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus) s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
s.Resize = kl.determinePodResizeStatus(pod, s)
}
// calculate the next phase and preserve reason // calculate the next phase and preserve reason
allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
s.Phase = getPhase(&pod.Spec, allStatus) s.Phase = getPhase(&pod.Spec, allStatus)
@ -1715,6 +1744,84 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
return status return status
} }
convertContainerStatusResources := func(cName string, status *v1.ContainerStatus, cStatus *kubecontainer.Status, oldStatuses map[string]v1.ContainerStatus) *v1.ResourceRequirements {
var requests, limits v1.ResourceList
// oldStatus should always exist if container is running
oldStatus, oldStatusFound := oldStatuses[cName]
// Initialize limits/requests from container's spec upon transition to Running state
// For cpu & memory, values queried from runtime via CRI always supercedes spec values
// For ephemeral-storage, a running container's status.limit/request equals spec.limit/request
determineResource := func(rName v1.ResourceName, v1ContainerResource, oldStatusResource, resource v1.ResourceList) {
if oldStatusFound {
if oldStatus.State.Running == nil || status.ContainerID != oldStatus.ContainerID {
if r, exists := v1ContainerResource[rName]; exists {
resource[rName] = r.DeepCopy()
}
} else {
if oldStatusResource != nil {
if r, exists := oldStatusResource[rName]; exists {
resource[rName] = r.DeepCopy()
}
}
}
}
}
container := kubecontainer.GetContainerSpec(pod, cName)
// ResourcesAllocated values come from checkpoint. It is the source-of-truth.
found := false
checkpointState := kl.statusManager.State()
status.ResourcesAllocated, found = checkpointState.GetContainerResourceAllocation(string(pod.UID), cName)
if !(container.Resources.Requests == nil && container.Resources.Limits == nil) && !found {
// Log error and fallback to ResourcesAllocated in oldStatus if it exists
klog.ErrorS(nil, "resource allocation not found in checkpoint store", "pod", pod.Name, "container", cName)
if oldStatusFound {
status.ResourcesAllocated = oldStatus.ResourcesAllocated
}
}
if oldStatus.Resources == nil {
oldStatus.Resources = &v1.ResourceRequirements{}
}
// Convert Limits
if container.Resources.Limits != nil {
limits = make(v1.ResourceList)
if cStatus.Resources != nil && cStatus.Resources.CPULimit != nil {
limits[v1.ResourceCPU] = cStatus.Resources.CPULimit.DeepCopy()
} else {
determineResource(v1.ResourceCPU, container.Resources.Limits, oldStatus.Resources.Limits, limits)
}
if cStatus.Resources != nil && cStatus.Resources.MemoryLimit != nil {
limits[v1.ResourceMemory] = cStatus.Resources.MemoryLimit.DeepCopy()
} else {
determineResource(v1.ResourceMemory, container.Resources.Limits, oldStatus.Resources.Limits, limits)
}
if ephemeralStorage, found := container.Resources.Limits[v1.ResourceEphemeralStorage]; found {
limits[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
}
}
// Convert Requests
if status.ResourcesAllocated != nil {
requests = make(v1.ResourceList)
if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil {
requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
} else {
determineResource(v1.ResourceCPU, status.ResourcesAllocated, oldStatus.Resources.Requests, requests)
}
if memory, found := status.ResourcesAllocated[v1.ResourceMemory]; found {
requests[v1.ResourceMemory] = memory.DeepCopy()
}
if ephemeralStorage, found := status.ResourcesAllocated[v1.ResourceEphemeralStorage]; found {
requests[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
}
}
//TODO(vinaykul,derekwaynecarr,InPlacePodVerticalScaling): Update this to include extended resources in
// addition to CPU, memory, ephemeral storage. Add test case for extended resources.
resources := &v1.ResourceRequirements{
Limits: limits,
Requests: requests,
}
return resources
}
// Fetch old containers statuses from old pod status. // Fetch old containers statuses from old pod status.
oldStatuses := make(map[string]v1.ContainerStatus, len(containers)) oldStatuses := make(map[string]v1.ContainerStatus, len(containers))
for _, status := range previousStatus { for _, status := range previousStatus {
@ -1835,6 +1942,11 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
oldStatusPtr = &oldStatus oldStatusPtr = &oldStatus
} }
status := convertContainerStatus(cStatus, oldStatusPtr) status := convertContainerStatus(cStatus, oldStatusPtr)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if status.State.Running != nil {
status.Resources = convertContainerStatusResources(cName, status, cStatus, oldStatuses)
}
}
if containerSeen[cName] == 0 { if containerSeen[cName] == 0 {
statuses[cName] = status statuses[cName] = status
} else { } else {

View File

@ -33,6 +33,7 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality" apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
@ -56,6 +57,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward" "k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward"
"k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand" "k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand"
"k8s.io/kubernetes/pkg/kubelet/prober/results" "k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
) )
@ -3861,3 +3863,219 @@ func TestConvertToAPIContainerStatusesDataRace(t *testing.T) {
}() }()
} }
} }
func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
nowTime := time.Now()
testContainerName := "ctr0"
testContainerID := kubecontainer.ContainerID{Type: "test", ID: testContainerName}
testContainer := v1.Container{
Name: testContainerName,
Image: "img",
}
testContainerStatus := v1.ContainerStatus{
Name: testContainerName,
}
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "123456",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{testContainer},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{testContainerStatus},
},
}
testKubeContainerStatus := kubecontainer.Status{
Name: testContainerName,
ID: testContainerID,
Image: "img",
ImageID: "img1234",
State: kubecontainer.ContainerStateRunning,
StartedAt: nowTime,
}
testPodStatus := &kubecontainer.PodStatus{
ID: testPod.UID,
Name: testPod.Name,
Namespace: testPod.Namespace,
ContainerStatuses: []*kubecontainer.Status{&testKubeContainerStatus},
}
CPU1AndMem1G := v1.ResourceList{v1.ResourceCPU: resource.MustParse("1"), v1.ResourceMemory: resource.MustParse("1Gi")}
CPU2AndMem2G := v1.ResourceList{v1.ResourceCPU: resource.MustParse("2"), v1.ResourceMemory: resource.MustParse("2Gi")}
CPU1AndMem1GAndStorage2G := CPU1AndMem1G.DeepCopy()
CPU1AndMem1GAndStorage2G[v1.ResourceEphemeralStorage] = resource.MustParse("2Gi")
CPU2AndMem2GAndStorage2G := CPU2AndMem2G.DeepCopy()
CPU2AndMem2GAndStorage2G[v1.ResourceEphemeralStorage] = resource.MustParse("2Gi")
testKubelet := newTestKubelet(t, false)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.statusManager = status.NewFakeManager()
idx := 0
for tdesc, tc := range map[string]struct {
Resources []v1.ResourceRequirements
OldStatus []v1.ContainerStatus
Expected []v1.ContainerStatus
}{
"GuaranteedQoSPod with CPU and memory CRI status": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
},
"BurstableQoSPod with CPU and memory CRI status": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU2AndMem2G, Requests: CPU1AndMem1G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
},
"GuaranteedQoSPod with CPU and memory CRI status, with ephemeral storage": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1GAndStorage2G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
},
},
},
"BurstableQoSPod with CPU and memory CRI status, with ephemeral storage": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU2AndMem2GAndStorage2G, Requests: CPU2AndMem2GAndStorage2G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1GAndStorage2G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
},
},
},
"BurstableQoSPod with CPU and memory CRI status, with ephemeral storage, nil resources in OldStatus": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1GAndStorage2G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
},
},
},
"BestEffortQoSPod": {
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
Resources: &v1.ResourceRequirements{},
},
},
},
} {
tPod := testPod.DeepCopy()
tPod.Name = fmt.Sprintf("%s-%d", testPod.Name, idx)
for i := range tPod.Spec.Containers {
if tc.Resources != nil {
tPod.Spec.Containers[i].Resources = tc.Resources[i]
}
kubelet.statusManager.SetPodAllocation(tPod)
if tc.Resources != nil {
tPod.Status.ContainerStatuses[i].ResourcesAllocated = tc.Resources[i].Requests
testPodStatus.ContainerStatuses[i].Resources = &kubecontainer.ContainerResources{
MemoryLimit: tc.Resources[i].Limits.Memory(),
CPULimit: tc.Resources[i].Limits.Cpu(),
CPURequest: tc.Resources[i].Requests.Cpu(),
}
}
}
t.Logf("TestCase: %q", tdesc)
cStatuses := kubelet.convertToAPIContainerStatuses(tPod, testPodStatus, tc.OldStatus, tPod.Spec.Containers, false, false)
assert.Equal(t, tc.Expected, cStatuses)
}
}

View File

@ -48,12 +48,15 @@ import (
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol" "k8s.io/client-go/util/flowcontrol"
featuregatetesting "k8s.io/component-base/featuregate/testing"
internalapi "k8s.io/cri-api/pkg/apis" internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/klog/v2/ktesting" "k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/features"
kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config" kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config"
cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing" cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing"
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
@ -260,7 +263,7 @@ func newTestKubeletWithImageList(
kubelet.configMapManager = configMapManager kubelet.configMapManager = configMapManager
kubelet.podManager = kubepod.NewBasicPodManager(fakeMirrorClient) kubelet.podManager = kubepod.NewBasicPodManager(fakeMirrorClient)
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker() podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker) kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, kubelet.getRootDir())
kubelet.containerRuntime = fakeRuntime kubelet.containerRuntime = fakeRuntime
kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime) kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime)
@ -2436,6 +2439,162 @@ func TestHandlePodAdditionsInvokesPodAdmitHandlers(t *testing.T) {
checkPodStatus(t, kl, podToAdmit, v1.PodPending) checkPodStatus(t, kl, podToAdmit, v1.PodPending)
} }
func TestHandlePodResourcesResize(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
testKubelet := newTestKubelet(t, false)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.statusManager = status.NewFakeManager()
cpu500m := resource.MustParse("500m")
cpu1000m := resource.MustParse("1")
cpu1500m := resource.MustParse("1500m")
cpu2500m := resource.MustParse("2500m")
cpu5000m := resource.MustParse("5000m")
mem500M := resource.MustParse("500Mi")
mem1000M := resource.MustParse("1Gi")
mem1500M := resource.MustParse("1500Mi")
mem2500M := resource.MustParse("2500Mi")
mem4500M := resource.MustParse("4500Mi")
nodes := []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("8"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("4Gi"),
v1.ResourcePods: *resource.NewQuantity(40, resource.DecimalSI),
},
},
},
}
kubelet.nodeLister = testNodeLister{nodes: nodes}
testPod1 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "1111",
Name: "pod1",
Namespace: "ns1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Image: "i1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
},
},
},
},
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
Resources: &v1.ResourceRequirements{},
},
},
},
}
testPod2 := testPod1.DeepCopy()
testPod2.UID = "2222"
testPod2.Name = "pod2"
testPod2.Namespace = "ns2"
testPod3 := testPod1.DeepCopy()
testPod3.UID = "3333"
testPod3.Name = "pod3"
testPod3.Namespace = "ns2"
testKubelet.fakeKubeClient = fake.NewSimpleClientset(testPod1, testPod2, testPod3)
kubelet.kubeClient = testKubelet.fakeKubeClient
defer testKubelet.fakeKubeClient.ClearActions()
kubelet.podManager.AddPod(testPod1)
kubelet.podManager.AddPod(testPod2)
kubelet.podManager.AddPod(testPod3)
kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
testPod1.UID: true,
testPod2.UID: true,
testPod3.UID: true,
}
defer kubelet.podManager.DeletePod(testPod3)
defer kubelet.podManager.DeletePod(testPod2)
defer kubelet.podManager.DeletePod(testPod1)
tests := []struct {
name string
pod *v1.Pod
newRequests v1.ResourceList
expectedAllocations v1.ResourceList
expectedResize v1.PodResizeStatus
}{
{
name: "Request CPU and memory decrease - expect InProgress",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
},
{
name: "Request CPU increase, memory decrease - expect InProgress",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
},
{
name: "Request CPU decrease, memory increase - expect InProgress",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedResize: v1.PodResizeStatusInProgress,
},
{
name: "Request CPU and memory increase beyond current capacity - expect Deferred",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2500m, v1.ResourceMemory: mem2500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusDeferred,
},
{
name: "Request CPU decrease and memory increase beyond current capacity - expect Deferred",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem2500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusDeferred,
},
{
name: "Request memory increase beyond node capacity - expect Infeasible",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem4500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
},
{
name: "Request CPU increase beyond node capacity - expect Infeasible",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu5000m, v1.ResourceMemory: mem1000M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
},
}
for _, tt := range tests {
tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests
tt.pod.Status.ContainerStatuses[0].ResourcesAllocated = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}
kubelet.handlePodResourcesResize(tt.pod)
assert.Equal(t, tt.expectedAllocations, tt.pod.Status.ContainerStatuses[0].ResourcesAllocated, tt.name)
assert.Equal(t, tt.expectedResize, tt.pod.Status.Resize, tt.name)
testKubelet.fakeKubeClient.ClearActions()
}
}
// testPodSyncLoopHandler is a lifecycle.PodSyncLoopHandler that is used for testing. // testPodSyncLoopHandler is a lifecycle.PodSyncLoopHandler that is used for testing.
type testPodSyncLoopHandler struct { type testPodSyncLoopHandler struct {
// list of pods to sync // list of pods to sync

View File

@ -94,12 +94,13 @@ func (m *kubeGenericRuntimeManager) toKubeContainer(c *runtimeapi.Container) (*k
annotatedInfo := getContainerInfoFromAnnotations(c.Annotations) annotatedInfo := getContainerInfoFromAnnotations(c.Annotations)
return &kubecontainer.Container{ return &kubecontainer.Container{
ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id}, ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id},
Name: c.GetMetadata().GetName(), Name: c.GetMetadata().GetName(),
ImageID: c.ImageRef, ImageID: c.ImageRef,
Image: c.Image.Image, Image: c.Image.Image,
Hash: annotatedInfo.Hash, Hash: annotatedInfo.Hash,
State: toKubeContainerState(c.State), HashWithoutResources: annotatedInfo.HashWithoutResources,
State: toKubeContainerState(c.State),
}, nil }, nil
} }

View File

@ -19,6 +19,11 @@ limitations under the License.
package kuberuntime package kuberuntime
import (
"k8s.io/kubernetes/pkg/kubelet/cm"
"math"
)
const ( const (
milliCPUToCPU = 1000 milliCPUToCPU = 1000
@ -53,3 +58,22 @@ func milliCPUToQuota(milliCPU int64, period int64) (quota int64) {
return return
} }
// sharesToMilliCPU converts CpuShares (cpu.shares) to milli-CPU value
// TODO(vinaykul,InPlacePodVerticalScaling): Address issue that sets min req/limit to 2m/10m before beta
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r662552642
func sharesToMilliCPU(shares int64) int64 {
milliCPU := int64(0)
if shares >= int64(cm.MinShares) {
milliCPU = int64(math.Ceil(float64(shares*milliCPUToCPU) / float64(cm.SharesPerCPU)))
}
return milliCPU
}
// quotaToMilliCPU converts cpu.cfs_quota_us and cpu.cfs_period_us to milli-CPU value
func quotaToMilliCPU(quota int64, period int64) int64 {
if quota == -1 {
return int64(0)
}
return (quota * milliCPUToCPU) / period
}

View File

@ -28,6 +28,7 @@ import (
featuregatetesting "k8s.io/component-base/featuregate/testing" featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm"
utilpointer "k8s.io/utils/pointer" utilpointer "k8s.io/utils/pointer"
) )
@ -670,3 +671,81 @@ func TestGetSeccompProfileDefaultSeccomp(t *testing.T) {
func getLocal(v string) *string { func getLocal(v string) *string {
return &v return &v
} }
func TestSharesToMilliCPU(t *testing.T) {
knownMilliCPUToShares := map[int64]int64{
0: 2,
1: 2,
2: 2,
3: 3,
4: 4,
32: 32,
64: 65,
100: 102,
250: 256,
500: 512,
1000: 1024,
1500: 1536,
2000: 2048,
}
t.Run("sharesToMilliCPUTest", func(t *testing.T) {
var testMilliCPU int64
for testMilliCPU = 0; testMilliCPU <= 2000; testMilliCPU++ {
shares := int64(cm.MilliCPUToShares(testMilliCPU))
if expectedShares, found := knownMilliCPUToShares[testMilliCPU]; found {
if shares != expectedShares {
t.Errorf("Test milliCPIToShares: Input milliCPU %v, expected shares %v, but got %v", testMilliCPU, expectedShares, shares)
}
}
expectedMilliCPU := testMilliCPU
if testMilliCPU < 2 {
expectedMilliCPU = 2
}
milliCPU := sharesToMilliCPU(shares)
if milliCPU != expectedMilliCPU {
t.Errorf("Test sharesToMilliCPU: Input shares %v, expected milliCPU %v, but got %v", shares, expectedMilliCPU, milliCPU)
}
}
})
}
func TestQuotaToMilliCPU(t *testing.T) {
for _, tc := range []struct {
name string
quota int64
period int64
expected int64
}{
{
name: "50m",
quota: int64(5000),
period: int64(100000),
expected: int64(50),
},
{
name: "750m",
quota: int64(75000),
period: int64(100000),
expected: int64(750),
},
{
name: "1000m",
quota: int64(100000),
period: int64(100000),
expected: int64(1000),
},
{
name: "1500m",
quota: int64(150000),
period: int64(100000),
expected: int64(1500),
}} {
t.Run(tc.name, func(t *testing.T) {
milliCPU := quotaToMilliCPU(tc.quota, tc.period)
if milliCPU != tc.expected {
t.Errorf("Test %s: Input quota %v and period %v, expected milliCPU %v, but got %v", tc.name, tc.quota, tc.period, tc.expected, milliCPU)
}
})
}
}

View File

@ -46,7 +46,9 @@ import (
kubetypes "k8s.io/apimachinery/pkg/types" kubetypes "k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/cri/remote" "k8s.io/kubernetes/pkg/kubelet/cri/remote"
"k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/pkg/kubelet/events"
@ -359,6 +361,18 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(ctx context.Context,
return config, cleanupAction, nil return config, cleanupAction, nil
} }
func (m *kubeGenericRuntimeManager) updateContainerResources(pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error {
containerResources := m.generateContainerResources(pod, container)
if containerResources == nil {
return fmt.Errorf("container %q updateContainerResources failed: cannot generate resources config", containerID.String())
}
err := m.runtimeService.UpdateContainerResources(containerID.ID, containerResources)
if err != nil {
klog.ErrorS(err, "UpdateContainerResources failed", "container", containerID.String())
}
return err
}
// makeDevices generates container devices for kubelet runtime v1. // makeDevices generates container devices for kubelet runtime v1.
func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device { func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device {
devices := make([]*runtimeapi.Device, len(opts.Devices)) devices := make([]*runtimeapi.Device, len(opts.Devices))
@ -557,18 +571,25 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(ctx context.Context,
func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.Status { func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.Status {
annotatedInfo := getContainerInfoFromAnnotations(status.Annotations) annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
labeledInfo := getContainerInfoFromLabels(status.Labels) labeledInfo := getContainerInfoFromLabels(status.Labels)
var cStatusResources *kubecontainer.ContainerResources
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// If runtime reports cpu & memory resources info, add it to container status
cStatusResources = toKubeContainerResources(status.Resources)
}
cStatus := &kubecontainer.Status{ cStatus := &kubecontainer.Status{
ID: kubecontainer.ContainerID{ ID: kubecontainer.ContainerID{
Type: runtimeName, Type: runtimeName,
ID: status.Id, ID: status.Id,
}, },
Name: labeledInfo.ContainerName, Name: labeledInfo.ContainerName,
Image: status.Image.Image, Image: status.Image.Image,
ImageID: status.ImageRef, ImageID: status.ImageRef,
Hash: annotatedInfo.Hash, Hash: annotatedInfo.Hash,
RestartCount: annotatedInfo.RestartCount, HashWithoutResources: annotatedInfo.HashWithoutResources,
State: toKubeContainerState(status.State), RestartCount: annotatedInfo.RestartCount,
CreatedAt: time.Unix(0, status.CreatedAt), State: toKubeContainerState(status.State),
CreatedAt: time.Unix(0, status.CreatedAt),
Resources: cStatusResources,
} }
if status.State != runtimeapi.ContainerState_CONTAINER_CREATED { if status.State != runtimeapi.ContainerState_CONTAINER_CREATED {

View File

@ -60,7 +60,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
return nil, err return nil, err
} }
lc := &runtimeapi.LinuxContainerConfig{ lc := &runtimeapi.LinuxContainerConfig{
Resources: &runtimeapi.LinuxContainerResources{}, Resources: m.generateLinuxContainerResources(pod, container, enforceMemoryQoS),
SecurityContext: sc, SecurityContext: sc,
} }
@ -69,17 +69,22 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
lc.SecurityContext.NamespaceOptions.TargetId = nsTarget.ID lc.SecurityContext.NamespaceOptions.TargetId = nsTarget.ID
} }
return lc, nil
}
// generateLinuxContainerResources generates linux container resources config for runtime
func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, container *v1.Container, enforceMemoryQoS bool) *runtimeapi.LinuxContainerResources {
// set linux container resources // set linux container resources
var cpuRequest *resource.Quantity var cpuRequest *resource.Quantity
if _, cpuRequestExists := container.Resources.Requests[v1.ResourceCPU]; cpuRequestExists { if _, cpuRequestExists := container.Resources.Requests[v1.ResourceCPU]; cpuRequestExists {
cpuRequest = container.Resources.Requests.Cpu() cpuRequest = container.Resources.Requests.Cpu()
} }
lc.Resources = m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory()) lcr := m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory())
lc.Resources.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container, lcr.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container,
int64(m.machineInfo.MemoryCapacity))) int64(m.machineInfo.MemoryCapacity)))
lc.Resources.HugepageLimits = GetHugepageLimitsFromResources(container.Resources) lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) { if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec: // NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
@ -87,14 +92,14 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
switch m.memorySwapBehavior { switch m.memorySwapBehavior {
case kubelettypes.UnlimitedSwap: case kubelettypes.UnlimitedSwap:
// -1 = unlimited swap // -1 = unlimited swap
lc.Resources.MemorySwapLimitInBytes = -1 lcr.MemorySwapLimitInBytes = -1
case kubelettypes.LimitedSwap: case kubelettypes.LimitedSwap:
fallthrough fallthrough
default: default:
// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit // memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
// Some swapping is still possible. // Some swapping is still possible.
// Note that if memory limit is 0, memory swap limit is ignored. // Note that if memory limit is 0, memory swap limit is ignored.
lc.Resources.MemorySwapLimitInBytes = lc.Resources.MemoryLimitInBytes lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
} }
} }
@ -125,18 +130,31 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10) unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
} }
if len(unified) > 0 { if len(unified) > 0 {
if lc.Resources.Unified == nil { if lcr.Unified == nil {
lc.Resources.Unified = unified lcr.Unified = unified
} else { } else {
for k, v := range unified { for k, v := range unified {
lc.Resources.Unified[k] = v lcr.Unified[k] = v
} }
} }
klog.V(4).InfoS("MemoryQoS config for container", "pod", klog.KObj(pod), "containerName", container.Name, "unified", unified) klog.V(4).InfoS("MemoryQoS config for container", "pod", klog.KObj(pod), "containerName", container.Name, "unified", unified)
} }
} }
return lc, nil return lcr
}
// generateContainerResources generates platform specific (linux) container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
enforceMemoryQoS := false
// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() {
enforceMemoryQoS = true
}
return &runtimeapi.ContainerResources{
Linux: m.generateLinuxContainerResources(pod, container, enforceMemoryQoS),
}
} }
// calculateLinuxResources will create the linuxContainerResources type based on the provided CPU and memory resource requests, limits // calculateLinuxResources will create the linuxContainerResources type based on the provided CPU and memory resource requests, limits
@ -218,3 +236,34 @@ func GetHugepageLimitsFromResources(resources v1.ResourceRequirements) []*runtim
return hugepageLimits return hugepageLimits
} }
func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources {
var cStatusResources *kubecontainer.ContainerResources
runtimeStatusResources := statusResources.GetLinux()
if runtimeStatusResources != nil {
var cpuLimit, memLimit, cpuRequest *resource.Quantity
if runtimeStatusResources.CpuPeriod > 0 {
milliCPU := quotaToMilliCPU(runtimeStatusResources.CpuQuota, runtimeStatusResources.CpuPeriod)
if milliCPU > 0 {
cpuLimit = resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
}
}
if runtimeStatusResources.CpuShares > 0 {
milliCPU := sharesToMilliCPU(runtimeStatusResources.CpuShares)
if milliCPU > 0 {
cpuRequest = resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
}
}
if runtimeStatusResources.MemoryLimitInBytes > 0 {
memLimit = resource.NewQuantity(runtimeStatusResources.MemoryLimitInBytes, resource.BinarySI)
}
if cpuLimit != nil || memLimit != nil || cpuRequest != nil {
cStatusResources = &kubecontainer.ContainerResources{
CPULimit: cpuLimit,
CPURequest: cpuRequest,
MemoryLimit: memLimit,
}
}
}
return cStatusResources
}

View File

@ -31,6 +31,7 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/diff"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing" featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
@ -712,3 +713,167 @@ func TestGenerateLinuxContainerConfigSwap(t *testing.T) {
}) })
} }
} }
func TestGenerateLinuxContainerResources(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
m.machineInfo.MemoryCapacity = 17179860387 // 16GB
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Image: "busybox",
},
},
},
Status: v1.PodStatus{},
}
for _, tc := range []struct {
name string
scalingFg bool
limits v1.ResourceList
requests v1.ResourceList
cStatus []v1.ContainerStatus
expected *runtimeapi.LinuxContainerResources
}{
{
"requests & limits, cpu & memory, guaranteed qos - no container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - no container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970},
},
{
"best-effort qos - no container status",
true,
nil,
nil,
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000},
},
{
"requests & limits, cpu & memory, guaranteed qos - empty resources container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{{Name: "c1"}},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - empty resources container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{{Name: "c1"}},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 999},
},
{
"best-effort qos - empty resources container status",
true,
nil,
nil,
[]v1.ContainerStatus{{Name: "c1"}},
&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000},
},
{
"requests & limits, cpu & memory, guaranteed qos - container status with resourcesAllocated",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 204, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - container status with resourcesAllocated",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970},
},
{
"requests & limits, cpu & memory, guaranteed qos - no container status",
false,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - container status with resourcesAllocated",
false,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970},
},
{
"requests & limits, cpu & memory, guaranteed qos - container status with resourcesAllocated",
false,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 204, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"best-effort qos - no container status",
false,
nil,
nil,
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000},
},
} {
t.Run(tc.name, func(t *testing.T) {
if tc.scalingFg {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
}
tc.expected.HugepageLimits = []*runtimeapi.HugepageLimit{{PageSize: "2MB", Limit: 0}, {PageSize: "1GB", Limit: 0}}
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests}
if len(tc.cStatus) > 0 {
pod.Status.ContainerStatuses = tc.cStatus
}
resources := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
if diff.ObjectDiff(resources, tc.expected) != "" {
t.Errorf("Test %s: expected resources %+v, but got %+v", tc.name, tc.expected, resources)
}
})
}
}

View File

@ -28,6 +28,7 @@ import (
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
@ -230,6 +231,111 @@ func TestToKubeContainerStatus(t *testing.T) {
} }
} }
// TestToKubeContainerStatusWithResources tests the converting the CRI container status to
// the internal type (i.e., toKubeContainerStatus()) for containers that returns Resources.
func TestToKubeContainerStatusWithResources(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
cid := &kubecontainer.ContainerID{Type: "testRuntime", ID: "dummyid"}
meta := &runtimeapi.ContainerMetadata{Name: "cname", Attempt: 3}
imageSpec := &runtimeapi.ImageSpec{Image: "fimage"}
var (
createdAt int64 = 327
startedAt int64 = 999
)
for desc, test := range map[string]struct {
input *runtimeapi.ContainerStatus
expected *kubecontainer.Status
}{
"container reporting cpu and memory": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
StartedAt: startedAt,
Resources: &runtimeapi.ContainerResources{
Linux: &runtimeapi.LinuxContainerResources{
CpuQuota: 25000,
CpuPeriod: 100000,
MemoryLimitInBytes: 524288000,
OomScoreAdj: -998,
},
},
},
expected: &kubecontainer.Status{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateRunning,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
Resources: &kubecontainer.ContainerResources{
CPULimit: resource.NewMilliQuantity(250, resource.DecimalSI),
MemoryLimit: resource.NewQuantity(524288000, resource.BinarySI),
},
},
},
"container reporting cpu only": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
StartedAt: startedAt,
Resources: &runtimeapi.ContainerResources{
Linux: &runtimeapi.LinuxContainerResources{
CpuQuota: 50000,
CpuPeriod: 100000,
},
},
},
expected: &kubecontainer.Status{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateRunning,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
Resources: &kubecontainer.ContainerResources{
CPULimit: resource.NewMilliQuantity(500, resource.DecimalSI),
},
},
},
"container reporting memory only": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
StartedAt: startedAt,
Resources: &runtimeapi.ContainerResources{
Linux: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 524288000,
OomScoreAdj: -998,
},
},
},
expected: &kubecontainer.Status{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateRunning,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
Resources: &kubecontainer.ContainerResources{
MemoryLimit: resource.NewQuantity(524288000, resource.BinarySI),
},
},
},
} {
t.Run(desc, func(t *testing.T) {
actual := toKubeContainerStatus(test.input, cid.Type)
assert.Equal(t, test.expected, actual, desc)
})
}
}
func TestLifeCycleHook(t *testing.T) { func TestLifeCycleHook(t *testing.T) {
// Setup // Setup
@ -696,3 +802,39 @@ func TestKillContainerGracePeriod(t *testing.T) {
}) })
} }
} }
// TestUpdateContainerResources tests updating a container in a Pod.
func TestUpdateContainerResources(t *testing.T) {
fakeRuntime, _, m, errCreate := createTestRuntimeManager()
require.NoError(t, errCreate)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
},
},
}
// Create fake sandbox and container
_, fakeContainers := makeAndSetFakePod(t, m, fakeRuntime, pod)
assert.Equal(t, len(fakeContainers), 1)
cStatus, err := m.getPodContainerStatuses(pod.UID, pod.Name, pod.Namespace)
assert.NoError(t, err)
containerID := cStatus[0].ID
err = m.updateContainerResources(pod, &pod.Spec.Containers[0], containerID)
assert.NoError(t, err)
// Verify container is updated
assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources")
}

View File

@ -29,3 +29,12 @@ import (
func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string, nsTarget *kubecontainer.ContainerID) error { func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string, nsTarget *kubecontainer.ContainerID) error {
return nil return nil
} }
// generateContainerResources generates platform specific container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
return nil
}
func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources {
return nil
}

View File

@ -40,6 +40,12 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config
return nil return nil
} }
// generateContainerResources generates platform specific (windows) container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
//TODO: Add windows support
return nil
}
// generateWindowsContainerConfig generates windows container config for kubelet runtime v1. // generateWindowsContainerConfig generates windows container config for kubelet runtime v1.
// Refer https://git.k8s.io/design-proposals-archive/node/cri-windows.md. // Refer https://git.k8s.io/design-proposals-archive/node/cri-windows.md.
func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) (*runtimeapi.WindowsContainerConfig, error) { func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) (*runtimeapi.WindowsContainerConfig, error) {
@ -126,3 +132,8 @@ func calculateCPUMaximum(cpuLimit *resource.Quantity, cpuCount int64) int64 {
} }
return cpuMaximum return cpuMaximum
} }
func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources {
//TODO: Add windows support
return nil
}

View File

@ -30,8 +30,10 @@ import (
"k8s.io/klog/v2" "k8s.io/klog/v2"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types" kubetypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
utilruntime "k8s.io/apimachinery/pkg/util/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime"
utilversion "k8s.io/apimachinery/pkg/util/version" utilversion "k8s.io/apimachinery/pkg/util/version"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -42,6 +44,7 @@ import (
internalapi "k8s.io/cri-api/pkg/apis" internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/api/legacyscheme" "k8s.io/kubernetes/pkg/api/legacyscheme"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/credentialprovider"
"k8s.io/kubernetes/pkg/credentialprovider/plugin" "k8s.io/kubernetes/pkg/credentialprovider/plugin"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
@ -133,6 +136,9 @@ type kubeGenericRuntimeManager struct {
// The directory path for seccomp profiles. // The directory path for seccomp profiles.
seccompProfileRoot string seccompProfileRoot string
// Container management interface for pod container.
containerManager cm.ContainerManager
// Internal lifecycle event handlers for container resource management. // Internal lifecycle event handlers for container resource management.
internalLifecycle cm.InternalContainerLifecycle internalLifecycle cm.InternalContainerLifecycle
@ -190,7 +196,7 @@ func NewKubeGenericRuntimeManager(
cpuCFSQuotaPeriod metav1.Duration, cpuCFSQuotaPeriod metav1.Duration,
runtimeService internalapi.RuntimeService, runtimeService internalapi.RuntimeService,
imageService internalapi.ImageManagerService, imageService internalapi.ImageManagerService,
internalLifecycle cm.InternalContainerLifecycle, containerManager cm.ContainerManager,
logManager logs.ContainerLogManager, logManager logs.ContainerLogManager,
runtimeClassManager *runtimeclass.Manager, runtimeClassManager *runtimeclass.Manager,
seccompDefault bool, seccompDefault bool,
@ -215,7 +221,8 @@ func NewKubeGenericRuntimeManager(
runtimeHelper: runtimeHelper, runtimeHelper: runtimeHelper,
runtimeService: runtimeService, runtimeService: runtimeService,
imageService: imageService, imageService: imageService,
internalLifecycle: internalLifecycle, containerManager: containerManager,
internalLifecycle: containerManager.InternalContainerLifecycle(),
logManager: logManager, logManager: logManager,
runtimeClassManager: runtimeClassManager, runtimeClassManager: runtimeClassManager,
logReduction: logreduction.NewLogReduction(identicalErrorDelay), logReduction: logreduction.NewLogReduction(identicalErrorDelay),
@ -446,6 +453,26 @@ type containerToKillInfo struct {
reason containerKillReason reason containerKillReason
} }
// containerResources holds the set of resources applicable to the running container
type containerResources struct {
memoryLimit int64
memoryRequest int64
cpuLimit int64
cpuRequest int64
}
// containerToUpdateInfo contains necessary information to update a container's resources.
type containerToUpdateInfo struct {
// Index of the container in pod.Spec.Containers that needs resource update
apiContainerIdx int
// ID of the runtime container that needs resource update
kubeContainerID kubecontainer.ContainerID
// Desired resources for the running container
desiredContainerResources containerResources
// Most recently configured resources on the running container
currentContainerResources *containerResources
}
// podActions keeps information what to do for a pod. // podActions keeps information what to do for a pod.
type podActions struct { type podActions struct {
// Stop all running (regular, init and ephemeral) containers and the sandbox for the pod. // Stop all running (regular, init and ephemeral) containers and the sandbox for the pod.
@ -471,6 +498,11 @@ type podActions struct {
// EphemeralContainersToStart is a list of indexes for the ephemeral containers to start, // EphemeralContainersToStart is a list of indexes for the ephemeral containers to start,
// where the index is the index of the specific container in pod.Spec.EphemeralContainers. // where the index is the index of the specific container in pod.Spec.EphemeralContainers.
EphemeralContainersToStart []int EphemeralContainersToStart []int
// ContainersToUpdate keeps a list of containers needing resource update.
// Container resource update is applicable only for CPU and memory.
ContainersToUpdate map[v1.ResourceName][]containerToUpdateInfo
// UpdatePodResources is true if container(s) need resource update with restart
UpdatePodResources bool
} }
func containerChanged(container *v1.Container, containerStatus *kubecontainer.Status) (uint64, uint64, bool) { func containerChanged(container *v1.Container, containerStatus *kubecontainer.Status) (uint64, uint64, bool) {
@ -490,6 +522,263 @@ func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) boo
return cStatus.ExitCode == 0 return cStatus.ExitCode == 0
} }
func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containerIdx int, kubeContainerStatus *kubecontainer.Status, changes *podActions) bool {
container := pod.Spec.Containers[containerIdx]
if container.Resources.Limits == nil || len(pod.Status.ContainerStatuses) == 0 {
return true
}
// Determine if the *running* container needs resource update by comparing v1.Spec.Resources (desired)
// with v1.Status.Resources / runtime.Status.Resources (last known actual).
// Proceed only when kubelet has accepted the resize a.k.a v1.Spec.Resources.Requests == v1.Status.ResourcesAllocated.
// Skip if runtime containerID doesn't match pod.Status containerID (container is restarting)
apiContainerStatus, exists := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name)
if !exists || apiContainerStatus.State.Running == nil || apiContainerStatus.Resources == nil ||
kubeContainerStatus.State != kubecontainer.ContainerStateRunning ||
kubeContainerStatus.ID.String() != apiContainerStatus.ContainerID ||
len(diff.ObjectDiff(container.Resources.Requests, apiContainerStatus.ResourcesAllocated)) != 0 {
return true
}
desiredMemoryLimit := container.Resources.Limits.Memory().Value()
desiredCPULimit := container.Resources.Limits.Cpu().MilliValue()
desiredCPURequest := container.Resources.Requests.Cpu().MilliValue()
currentMemoryLimit := apiContainerStatus.Resources.Limits.Memory().Value()
currentCPULimit := apiContainerStatus.Resources.Limits.Cpu().MilliValue()
currentCPURequest := apiContainerStatus.Resources.Requests.Cpu().MilliValue()
// Runtime container status resources (from CRI), if set, supercedes v1(api) container status resrouces.
if kubeContainerStatus.Resources != nil {
if kubeContainerStatus.Resources.MemoryLimit != nil {
currentMemoryLimit = kubeContainerStatus.Resources.MemoryLimit.Value()
}
if kubeContainerStatus.Resources.CPULimit != nil {
currentCPULimit = kubeContainerStatus.Resources.CPULimit.MilliValue()
}
if kubeContainerStatus.Resources.CPURequest != nil {
currentCPURequest = kubeContainerStatus.Resources.CPURequest.MilliValue()
}
}
// Note: cgroup doesn't support memory request today, so we don't compare that. If canAdmitPod called during
// handlePodResourcesResize finds 'fit', then desiredMemoryRequest == currentMemoryRequest.
if desiredMemoryLimit == currentMemoryLimit && desiredCPULimit == currentCPULimit && desiredCPURequest == currentCPURequest {
return true
}
desiredResources := containerResources{
memoryLimit: desiredMemoryLimit,
memoryRequest: apiContainerStatus.ResourcesAllocated.Memory().Value(),
cpuLimit: desiredCPULimit,
cpuRequest: desiredCPURequest,
}
currentResources := containerResources{
memoryLimit: currentMemoryLimit,
memoryRequest: apiContainerStatus.Resources.Requests.Memory().Value(),
cpuLimit: currentCPULimit,
cpuRequest: currentCPURequest,
}
resizePolicy := make(map[v1.ResourceName]v1.ResourceResizePolicy)
for _, pol := range container.ResizePolicy {
resizePolicy[pol.ResourceName] = pol.Policy
}
determineContainerResize := func(rName v1.ResourceName, specValue, statusValue int64) (resize, restart bool) {
if specValue == statusValue {
return false, false
}
if resizePolicy[rName] == v1.RestartRequired {
return true, true
}
return true, false
}
markContainerForUpdate := func(rName v1.ResourceName, specValue, statusValue int64) {
cUpdateInfo := containerToUpdateInfo{
apiContainerIdx: containerIdx,
kubeContainerID: kubeContainerStatus.ID,
desiredContainerResources: desiredResources,
currentContainerResources: &currentResources,
}
// Order the container updates such that resource decreases are applied before increases
switch {
case specValue > statusValue: // append
changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], cUpdateInfo)
case specValue < statusValue: // prepend
changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], containerToUpdateInfo{})
copy(changes.ContainersToUpdate[rName][1:], changes.ContainersToUpdate[rName])
changes.ContainersToUpdate[rName][0] = cUpdateInfo
}
}
resizeMemLim, restartMemLim := determineContainerResize(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
resizeCPULim, restartCPULim := determineContainerResize(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
resizeCPUReq, restartCPUReq := determineContainerResize(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
if restartCPULim || restartCPUReq || restartMemLim {
// resize policy requires this container to restart
changes.ContainersToKill[kubeContainerStatus.ID] = containerToKillInfo{
name: kubeContainerStatus.Name,
container: &pod.Spec.Containers[containerIdx],
message: fmt.Sprintf("Container %s resize requires restart", container.Name),
}
changes.ContainersToStart = append(changes.ContainersToStart, containerIdx)
changes.UpdatePodResources = true
return false
} else {
if resizeMemLim {
markContainerForUpdate(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
}
if resizeCPULim {
markContainerForUpdate(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
} else if resizeCPUReq {
markContainerForUpdate(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
}
}
return true
}
func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions, result kubecontainer.PodSyncResult) {
pcm := m.containerManager.NewPodContainerManager()
//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false)
if podResources == nil {
klog.ErrorS(nil, "Unable to get resource configuration", "pod", pod.Name)
result.Fail(fmt.Errorf("Unable to get resource configuration processing resize for pod %s", pod.Name))
return
}
setPodCgroupConfig := func(rName v1.ResourceName, setLimitValue bool) error {
var err error
switch rName {
case v1.ResourceCPU:
if setLimitValue == true {
err = pcm.SetPodCgroupCpuConfig(pod, podResources.CpuQuota, podResources.CpuPeriod, nil)
} else {
err = pcm.SetPodCgroupCpuConfig(pod, nil, podResources.CpuPeriod, podResources.CpuShares)
}
case v1.ResourceMemory:
err = pcm.SetPodCgroupMemoryConfig(pod, *podResources.Memory)
}
if err != nil {
klog.ErrorS(err, "Failed to set cgroup config", "resource", rName, "pod", pod.Name)
}
return err
}
// Memory and CPU are updated separately because memory resizes may be ordered differently than CPU resizes.
// If resize results in net pod resource increase, set pod cgroup config before resizing containers.
// If resize results in net pod resource decrease, set pod cgroup config after resizing containers.
// If an error occurs at any point, abort. Let future syncpod iterations retry the unfinished stuff.
resizeContainers := func(rName v1.ResourceName, currPodCgLimValue, newPodCgLimValue, currPodCgReqValue, newPodCgReqValue int64) error {
var err error
if newPodCgLimValue > currPodCgLimValue {
if err = setPodCgroupConfig(rName, true); err != nil {
return err
}
}
if newPodCgReqValue > currPodCgReqValue {
if err = setPodCgroupConfig(rName, false); err != nil {
return err
}
}
if len(podContainerChanges.ContainersToUpdate[rName]) > 0 {
if err = m.updatePodContainerResources(pod, rName, podContainerChanges.ContainersToUpdate[rName]); err != nil {
klog.ErrorS(err, "updatePodContainerResources failed", "pod", format.Pod(pod), "resource", rName)
return err
}
}
if newPodCgLimValue < currPodCgLimValue {
err = setPodCgroupConfig(rName, true)
}
if newPodCgReqValue < currPodCgReqValue {
if err = setPodCgroupConfig(rName, false); err != nil {
return err
}
}
return err
}
if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources {
currentPodMemoryLimit, err := pcm.GetPodCgroupMemoryConfig(pod)
if err != nil {
klog.ErrorS(err, "GetPodCgroupMemoryConfig failed", "pod", pod.Name)
result.Fail(err)
return
}
currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod)
if err != nil {
klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name)
result.Fail(err)
return
}
if currentPodMemoryUsage >= uint64(*podResources.Memory) {
klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name)
result.Fail(fmt.Errorf("Aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name))
return
}
if errResize := resizeContainers(v1.ResourceMemory, int64(currentPodMemoryLimit), *podResources.Memory, 0, 0); errResize != nil {
result.Fail(errResize)
return
}
}
if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources {
currentPodCpuQuota, _, currentPodCPUShares, err := pcm.GetPodCgroupCpuConfig(pod)
if err != nil {
klog.ErrorS(err, "GetPodCgroupCpuConfig failed", "pod", pod.Name)
result.Fail(err)
return
}
if errResize := resizeContainers(v1.ResourceCPU, currentPodCpuQuota, *podResources.CpuQuota,
int64(currentPodCPUShares), int64(*podResources.CpuShares)); errResize != nil {
result.Fail(errResize)
return
}
}
}
func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) error {
klog.V(5).InfoS("Updating container resources", "pod", klog.KObj(pod))
for _, cInfo := range containersToUpdate {
container := pod.Spec.Containers[cInfo.apiContainerIdx].DeepCopy()
// If updating memory limit, use most recently configured CPU request and limit values.
// If updating CPU request and limit, use most recently configured memory request and limit values.
switch resourceName {
case v1.ResourceMemory:
container.Resources.Limits = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuLimit, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryLimit, resource.BinarySI),
}
container.Resources.Requests = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuRequest, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryRequest, resource.BinarySI),
}
case v1.ResourceCPU:
container.Resources.Limits = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuLimit, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryLimit, resource.BinarySI),
}
container.Resources.Requests = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuRequest, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryRequest, resource.BinarySI),
}
}
if err := m.updateContainerResources(pod, container, cInfo.kubeContainerID); err != nil {
// Log error and abort as container updates need to succeed in the order determined by computePodResizeAction.
// The recovery path is for SyncPod to keep retrying at later times until it succeeds.
klog.ErrorS(err, "updateContainerResources failed", "container", container.Name, "cID", cInfo.kubeContainerID,
"pod", format.Pod(pod), "resourceName", resourceName)
return err
}
// If UpdateContainerResources is error-free, it means desired values for 'resourceName' was accepted by runtime.
// So we update currentContainerResources for 'resourceName', which is our view of most recently configured resources.
// Note: We can't rely on GetPodStatus as runtime may lag in actuating the resource values it just accepted.
switch resourceName {
case v1.ResourceMemory:
cInfo.currentContainerResources.memoryLimit = cInfo.desiredContainerResources.memoryLimit
cInfo.currentContainerResources.memoryRequest = cInfo.desiredContainerResources.memoryRequest
case v1.ResourceCPU:
cInfo.currentContainerResources.cpuLimit = cInfo.desiredContainerResources.cpuLimit
cInfo.currentContainerResources.cpuRequest = cInfo.desiredContainerResources.cpuRequest
}
}
return nil
}
// computePodActions checks whether the pod spec has changed and returns the changes if true. // computePodActions checks whether the pod spec has changed and returns the changes if true.
func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions { func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod)) klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod))
@ -582,6 +871,14 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
return changes return changes
} }
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
changes.ContainersToUpdate = make(map[v1.ResourceName][]containerToUpdateInfo)
latestPodStatus, err := m.GetPodStatus(podStatus.ID, pod.Name, pod.Namespace)
if err == nil {
podStatus = latestPodStatus
}
}
// Number of running containers to keep. // Number of running containers to keep.
keepCount := 0 keepCount := 0
// check the status of containers. // check the status of containers.
@ -623,7 +920,10 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
var message string var message string
var reason containerKillReason var reason containerKillReason
restart := shouldRestartOnFailure(pod) restart := shouldRestartOnFailure(pod)
if _, _, changed := containerChanged(&container, containerStatus); changed { // Do not restart if only the Resources field has changed with InPlacePodVerticalScaling enabled
if _, _, changed := containerChanged(&container, containerStatus); changed &&
(!utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) ||
kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) {
message = fmt.Sprintf("Container %s definition changed", container.Name) message = fmt.Sprintf("Container %s definition changed", container.Name)
// Restart regardless of the restart policy because the container // Restart regardless of the restart policy because the container
// spec changed. // spec changed.
@ -636,6 +936,10 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
// If the container failed the startup probe, we should kill it. // If the container failed the startup probe, we should kill it.
message = fmt.Sprintf("Container %s failed startup probe", container.Name) message = fmt.Sprintf("Container %s failed startup probe", container.Name)
reason = reasonStartupProbe reason = reasonStartupProbe
} else if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) &&
!m.computePodResizeAction(pod, idx, containerStatus, &changes) {
// computePodResizeAction updates 'changes' if resize policy requires restarting this container
continue
} else { } else {
// Keep the container. // Keep the container.
keepCount++ keepCount++
@ -674,7 +978,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
// 4. Create sandbox if necessary. // 4. Create sandbox if necessary.
// 5. Create ephemeral containers. // 5. Create ephemeral containers.
// 6. Create init containers. // 6. Create init containers.
// 7. Create normal containers. // 7. Resize running containers (if InPlacePodVerticalScaling==true)
// 8. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) { func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// Step 1: Compute sandbox and container changes. // Step 1: Compute sandbox and container changes.
podContainerChanges := m.computePodActions(pod, podStatus) podContainerChanges := m.computePodActions(pod, podStatus)
@ -903,7 +1208,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod)) klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
} }
// Step 7: start containers in podContainerChanges.ContainersToStart. // Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
m.doPodResizeAction(pod, podStatus, podContainerChanges, result)
}
}
// Step 8: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart { for _, idx := range podContainerChanges.ContainersToStart {
start(ctx, "container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx])) start(ctx, "container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
} }
@ -1096,7 +1408,6 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety
} }
m.logReduction.ClearID(podFullName) m.logReduction.ClearID(podFullName)
return &kubecontainer.PodStatus{ return &kubecontainer.PodStatus{
ID: uid, ID: uid,
Name: name, Name: name,

View File

@ -37,11 +37,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/util/flowcontrol" "k8s.io/client-go/util/flowcontrol"
featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
apitest "k8s.io/cri-api/pkg/apis/testing" apitest "k8s.io/cri-api/pkg/apis/testing"
podutil "k8s.io/kubernetes/pkg/api/v1/pod" podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/credentialprovider"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
@ -861,6 +864,28 @@ func makeBasePodAndStatus() (*v1.Pod, *kubecontainer.PodStatus) {
}, },
}, },
}, },
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
ContainerID: "://id1",
Name: "foo1",
Image: "busybox",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
{
ContainerID: "://id2",
Name: "foo2",
Image: "busybox",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
{
ContainerID: "://id3",
Name: "foo3",
Image: "busybox",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
},
},
} }
status := &kubecontainer.PodStatus{ status := &kubecontainer.PodStatus{
ID: pod.UID, ID: pod.UID,
@ -1615,3 +1640,466 @@ func makeBasePodAndStatusWithInitAndEphemeralContainers() (*v1.Pod, *kubecontain
}) })
return pod, status return pod, status
} }
func TestComputePodActionsForPodResize(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
fakeRuntime, _, m, err := createTestRuntimeManager()
m.machineInfo.MemoryCapacity = 17179860387 // 16GB
assert.NoError(t, err)
cpu100m := resource.MustParse("100m")
cpu200m := resource.MustParse("200m")
mem100M := resource.MustParse("100Mi")
mem200M := resource.MustParse("200Mi")
cpuPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartNotRequired}
memPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartNotRequired}
cpuPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartRequired}
memPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartRequired}
for desc, test := range map[string]struct {
podResizePolicyFn func(*v1.Pod)
mutatePodFn func(*v1.Pod)
getExpectedPodActionsFn func(*v1.Pod, *kubecontainer.PodStatus) *podActions
}{
"Update container CPU and memory resources": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceMemory: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
v1.ResourceCPU: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Update container CPU resources": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceCPU: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Update container memory resources": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceMemory: {
{
apiContainerIdx: 2,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Nothing when spec.Resources and status.Resources are equal": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m},
}
pod.Status.ContainerStatuses[1].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m},
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToStart: []int{},
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
}
return &pa
},
},
"Update container CPU and memory resources with Restart policy for CPU": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[0].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[0].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[0].Name)
killMap := make(map[kubecontainer.ContainerID]containerToKillInfo)
killMap[kcs.ID] = containerToKillInfo{
container: &pod.Spec.Containers[0],
name: pod.Spec.Containers[0].Name,
}
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{0},
ContainersToKill: killMap,
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
UpdatePodResources: true,
}
return &pa
},
},
"Update container CPU and memory resources with Restart policy for memory": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name)
killMap := make(map[kubecontainer.ContainerID]containerToKillInfo)
killMap[kcs.ID] = containerToKillInfo{
container: &pod.Spec.Containers[2],
name: pod.Spec.Containers[2].Name,
}
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{2},
ContainersToKill: killMap,
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
UpdatePodResources: true,
}
return &pa
},
},
"Update container memory resources with Restart policy for CPU": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceMemory: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Update container CPU resources with Restart policy for memory": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceCPU: {
{
apiContainerIdx: 2,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
},
},
},
}
return &pa
},
},
} {
pod, kps := makeBasePodAndStatus()
for idx := range pod.Spec.Containers {
// default resize policy when pod resize feature is enabled
pod.Spec.Containers[idx].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartNotRequired}
}
if test.podResizePolicyFn != nil {
test.podResizePolicyFn(pod)
}
for idx := range pod.Spec.Containers {
// compute hash
if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil {
kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx])
kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx])
}
}
makeAndSetFakePod(t, m, fakeRuntime, pod)
status, _ := m.GetPodStatus(kps.ID, pod.Name, pod.Namespace)
for idx := range pod.Spec.Containers {
if rcs := status.FindContainerStatusByName(pod.Spec.Containers[idx].Name); rcs != nil {
if csIdx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[idx].Name); found {
pod.Status.ContainerStatuses[csIdx].ContainerID = rcs.ID.String()
}
}
}
for idx := range pod.Spec.Containers {
if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil {
kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx])
kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx])
}
}
if test.mutatePodFn != nil {
test.mutatePodFn(pod)
}
expectedActions := test.getExpectedPodActionsFn(pod, status)
actions := m.computePodActions(pod, status)
verifyActions(t, expectedActions, &actions, desc)
}
}
func TestUpdatePodContainerResources(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
fakeRuntime, _, m, err := createTestRuntimeManager()
m.machineInfo.MemoryCapacity = 17179860387 // 16GB
assert.NoError(t, err)
cpu100m := resource.MustParse("100m")
cpu150m := resource.MustParse("150m")
cpu200m := resource.MustParse("200m")
cpu250m := resource.MustParse("250m")
cpu300m := resource.MustParse("300m")
cpu350m := resource.MustParse("350m")
mem100M := resource.MustParse("100Mi")
mem150M := resource.MustParse("150Mi")
mem200M := resource.MustParse("200Mi")
mem250M := resource.MustParse("250Mi")
mem300M := resource.MustParse("300Mi")
mem350M := resource.MustParse("350Mi")
res100m100Mi := v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}
res150m100Mi := v1.ResourceList{v1.ResourceCPU: cpu150m, v1.ResourceMemory: mem100M}
res100m150Mi := v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem150M}
res150m150Mi := v1.ResourceList{v1.ResourceCPU: cpu150m, v1.ResourceMemory: mem150M}
res200m200Mi := v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}
res250m200Mi := v1.ResourceList{v1.ResourceCPU: cpu250m, v1.ResourceMemory: mem200M}
res200m250Mi := v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem250M}
res250m250Mi := v1.ResourceList{v1.ResourceCPU: cpu250m, v1.ResourceMemory: mem250M}
res300m300Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem300M}
res350m300Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem300M}
res300m350Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem350M}
res350m350Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem350M}
pod, _ := makeBasePodAndStatus()
makeAndSetFakePod(t, m, fakeRuntime, pod)
for dsc, tc := range map[string]struct {
resourceName v1.ResourceName
apiSpecResources []v1.ResourceRequirements
apiStatusResources []v1.ResourceRequirements
requiresRestart []bool
invokeUpdateResources bool
expectedCurrentLimits []v1.ResourceList
expectedCurrentRequests []v1.ResourceList
}{
"Guaranteed QoS Pod - CPU & memory resize requested, update CPU": {
resourceName: v1.ResourceCPU,
apiSpecResources: []v1.ResourceRequirements{
{Limits: res150m150Mi, Requests: res150m150Mi},
{Limits: res250m250Mi, Requests: res250m250Mi},
{Limits: res350m350Mi, Requests: res350m350Mi},
},
apiStatusResources: []v1.ResourceRequirements{
{Limits: res100m100Mi, Requests: res100m100Mi},
{Limits: res200m200Mi, Requests: res200m200Mi},
{Limits: res300m300Mi, Requests: res300m300Mi},
},
requiresRestart: []bool{false, false, false},
invokeUpdateResources: true,
expectedCurrentLimits: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi},
expectedCurrentRequests: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi},
},
"Guaranteed QoS Pod - CPU & memory resize requested, update memory": {
resourceName: v1.ResourceMemory,
apiSpecResources: []v1.ResourceRequirements{
{Limits: res150m150Mi, Requests: res150m150Mi},
{Limits: res250m250Mi, Requests: res250m250Mi},
{Limits: res350m350Mi, Requests: res350m350Mi},
},
apiStatusResources: []v1.ResourceRequirements{
{Limits: res100m100Mi, Requests: res100m100Mi},
{Limits: res200m200Mi, Requests: res200m200Mi},
{Limits: res300m300Mi, Requests: res300m300Mi},
},
requiresRestart: []bool{false, false, false},
invokeUpdateResources: true,
expectedCurrentLimits: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi},
expectedCurrentRequests: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi},
},
} {
var containersToUpdate []containerToUpdateInfo
for idx := range pod.Spec.Containers {
// default resize policy when pod resize feature is enabled
pod.Spec.Containers[idx].Resources = tc.apiSpecResources[idx]
pod.Status.ContainerStatuses[idx].Resources = &tc.apiStatusResources[idx]
cInfo := containerToUpdateInfo{
apiContainerIdx: idx,
kubeContainerID: kubecontainer.ContainerID{},
desiredContainerResources: containerResources{
memoryLimit: tc.apiSpecResources[idx].Limits.Memory().Value(),
memoryRequest: tc.apiSpecResources[idx].Requests.Memory().Value(),
cpuLimit: tc.apiSpecResources[idx].Limits.Cpu().MilliValue(),
cpuRequest: tc.apiSpecResources[idx].Requests.Cpu().MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: tc.apiStatusResources[idx].Limits.Memory().Value(),
memoryRequest: tc.apiStatusResources[idx].Requests.Memory().Value(),
cpuLimit: tc.apiStatusResources[idx].Limits.Cpu().MilliValue(),
cpuRequest: tc.apiStatusResources[idx].Requests.Cpu().MilliValue(),
},
}
containersToUpdate = append(containersToUpdate, cInfo)
}
fakeRuntime.Called = []string{}
err := m.updatePodContainerResources(pod, tc.resourceName, containersToUpdate)
assert.NoError(t, err, dsc)
if tc.invokeUpdateResources {
assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources", dsc)
}
for idx := range pod.Spec.Containers {
assert.Equal(t, tc.expectedCurrentLimits[idx].Memory().Value(), containersToUpdate[idx].currentContainerResources.memoryLimit, dsc)
assert.Equal(t, tc.expectedCurrentRequests[idx].Memory().Value(), containersToUpdate[idx].currentContainerResources.memoryRequest, dsc)
assert.Equal(t, tc.expectedCurrentLimits[idx].Cpu().MilliValue(), containersToUpdate[idx].currentContainerResources.cpuLimit, dsc)
assert.Equal(t, tc.expectedCurrentRequests[idx].Cpu().MilliValue(), containersToUpdate[idx].currentContainerResources.cpuRequest, dsc)
}
}
}

View File

@ -22,7 +22,9 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
kubetypes "k8s.io/apimachinery/pkg/types" kubetypes "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/types"
) )
@ -33,6 +35,7 @@ const (
podTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod" podTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
containerHashLabel = "io.kubernetes.container.hash" containerHashLabel = "io.kubernetes.container.hash"
containerHashWithoutResourcesLabel = "io.kubernetes.container.hashWithoutResources"
containerRestartCountLabel = "io.kubernetes.container.restartCount" containerRestartCountLabel = "io.kubernetes.container.restartCount"
containerTerminationMessagePathLabel = "io.kubernetes.container.terminationMessagePath" containerTerminationMessagePathLabel = "io.kubernetes.container.terminationMessagePath"
containerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy" containerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy"
@ -62,6 +65,7 @@ type labeledContainerInfo struct {
type annotatedContainerInfo struct { type annotatedContainerInfo struct {
Hash uint64 Hash uint64
HashWithoutResources uint64
RestartCount int RestartCount int
PodDeletionGracePeriod *int64 PodDeletionGracePeriod *int64
PodTerminationGracePeriod *int64 PodTerminationGracePeriod *int64
@ -113,6 +117,9 @@ func newContainerAnnotations(container *v1.Container, pod *v1.Pod, restartCount
} }
annotations[containerHashLabel] = strconv.FormatUint(kubecontainer.HashContainer(container), 16) annotations[containerHashLabel] = strconv.FormatUint(kubecontainer.HashContainer(container), 16)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
annotations[containerHashWithoutResourcesLabel] = strconv.FormatUint(kubecontainer.HashContainerWithoutResources(container), 16)
}
annotations[containerRestartCountLabel] = strconv.Itoa(restartCount) annotations[containerRestartCountLabel] = strconv.Itoa(restartCount)
annotations[containerTerminationMessagePathLabel] = container.TerminationMessagePath annotations[containerTerminationMessagePathLabel] = container.TerminationMessagePath
annotations[containerTerminationMessagePolicyLabel] = string(container.TerminationMessagePolicy) annotations[containerTerminationMessagePolicyLabel] = string(container.TerminationMessagePolicy)
@ -193,6 +200,11 @@ func getContainerInfoFromAnnotations(annotations map[string]string) *annotatedCo
if containerInfo.Hash, err = getUint64ValueFromLabel(annotations, containerHashLabel); err != nil { if containerInfo.Hash, err = getUint64ValueFromLabel(annotations, containerHashLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashLabel, "annotations", annotations) klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashLabel, "annotations", annotations)
} }
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if containerInfo.HashWithoutResources, err = getUint64ValueFromLabel(annotations, containerHashWithoutResourcesLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashWithoutResourcesLabel, "annotations", annotations)
}
}
if containerInfo.RestartCount, err = getIntValueFromLabel(annotations, containerRestartCountLabel); err != nil { if containerInfo.RestartCount, err = getIntValueFromLabel(annotations, containerRestartCountLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerRestartCountLabel, "annotations", annotations) klog.ErrorS(err, "Unable to get label value from annotations", "label", containerRestartCountLabel, "annotations", annotations)
} }

View File

@ -23,6 +23,9 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
) )
@ -152,11 +155,13 @@ func TestContainerAnnotations(t *testing.T) {
PodDeletionGracePeriod: pod.DeletionGracePeriodSeconds, PodDeletionGracePeriod: pod.DeletionGracePeriodSeconds,
PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds, PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds,
Hash: kubecontainer.HashContainer(container), Hash: kubecontainer.HashContainer(container),
HashWithoutResources: kubecontainer.HashContainerWithoutResources(container),
RestartCount: restartCount, RestartCount: restartCount,
TerminationMessagePath: container.TerminationMessagePath, TerminationMessagePath: container.TerminationMessagePath,
PreStopHandler: container.Lifecycle.PreStop, PreStopHandler: container.Lifecycle.PreStop,
} }
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
// Test whether we can get right information from label // Test whether we can get right information from label
annotations := newContainerAnnotations(container, pod, restartCount, opts) annotations := newContainerAnnotations(container, pod, restartCount, opts)
containerInfo := getContainerInfoFromAnnotations(annotations) containerInfo := getContainerInfoFromAnnotations(annotations)
@ -177,6 +182,7 @@ func TestContainerAnnotations(t *testing.T) {
expected.PreStopHandler = nil expected.PreStopHandler = nil
// Because container is changed, the Hash should be updated // Because container is changed, the Hash should be updated
expected.Hash = kubecontainer.HashContainer(container) expected.Hash = kubecontainer.HashContainer(container)
expected.HashWithoutResources = kubecontainer.HashContainerWithoutResources(container)
annotations = newContainerAnnotations(container, pod, restartCount, opts) annotations = newContainerAnnotations(container, pod, restartCount, opts)
containerInfo = getContainerInfoFromAnnotations(annotations) containerInfo = getContainerInfoFromAnnotations(annotations)
if !reflect.DeepEqual(containerInfo, expected) { if !reflect.DeepEqual(containerInfo, expected) {

View File

@ -76,6 +76,8 @@ type GenericPLEG struct {
runningMu sync.Mutex runningMu sync.Mutex
// Indicates relisting related parameters // Indicates relisting related parameters
relistDuration *RelistDuration relistDuration *RelistDuration
// Mutex to serialize updateCache called by relist vs UpdateCache interface
podCacheMutex sync.Mutex
} }
// plegContainerState has a one-to-one mapping to the // plegContainerState has a one-to-one mapping to the
@ -436,6 +438,8 @@ func (g *GenericPLEG) updateCache(ctx context.Context, pod *kubecontainer.Pod, p
return nil, true return nil, true
} }
g.podCacheMutex.Lock()
defer g.podCacheMutex.Unlock()
timestamp := g.clock.Now() timestamp := g.clock.Now()
status, err := g.runtime.GetPodStatus(ctx, pod.ID, pod.Name, pod.Namespace) status, err := g.runtime.GetPodStatus(ctx, pod.ID, pod.Name, pod.Namespace)
@ -478,6 +482,16 @@ func (g *GenericPLEG) updateCache(ctx context.Context, pod *kubecontainer.Pod, p
return err, g.cache.Set(pod.ID, status, err, timestamp) return err, g.cache.Set(pod.ID, status, err, timestamp)
} }
func (g *GenericPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) error {
if !g.cacheEnabled() {
return fmt.Errorf("pod cache disabled")
}
if pod == nil {
return fmt.Errorf("pod cannot be nil")
}
return g.updateCache(pod, pid)
}
func updateEvents(eventsByPodID map[types.UID][]*PodLifecycleEvent, e *PodLifecycleEvent) { func updateEvents(eventsByPodID map[types.UID][]*PodLifecycleEvent, e *PodLifecycleEvent) {
if e == nil { if e == nil {
return return

View File

@ -20,6 +20,7 @@ import (
"time" "time"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
) )
// PodLifeCycleEventType define the event type of pod life cycle events. // PodLifeCycleEventType define the event type of pod life cycle events.
@ -68,4 +69,5 @@ type PodLifecycleEventGenerator interface {
Watch() chan *PodLifecycleEvent Watch() chan *PodLifecycleEvent
Healthy() (bool, error) Healthy() (bool, error)
Relist() Relist()
UpdateCache(*kubecontainer.Pod, types.UID) error
} }

View File

@ -17,6 +17,7 @@ limitations under the License.
package prober package prober
import ( import (
"io/ioutil"
"reflect" "reflect"
"sync" "sync"
@ -109,8 +110,14 @@ func newTestManager() *manager {
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker() podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
// Add test pod to pod manager, so that status manager can get the pod from pod manager if needed. // Add test pod to pod manager, so that status manager can get the pod from pod manager if needed.
podManager.AddPod(getTestPod()) podManager.AddPod(getTestPod())
testRootDir := ""
if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil {
return nil
} else {
testRootDir = tempDir
}
m := NewManager( m := NewManager(
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker), status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, testRootDir),
results.NewManager(), results.NewManager(),
results.NewManager(), results.NewManager(),
results.NewManager(), results.NewManager(),

View File

@ -19,6 +19,7 @@ package prober
import ( import (
"context" "context"
"fmt" "fmt"
"io/ioutil"
"testing" "testing"
"time" "time"
@ -153,7 +154,13 @@ func TestDoProbe(t *testing.T) {
} }
// Clean up. // Clean up.
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker()) testRootDir := ""
if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil {
t.Fatalf("can't make a temp rootdir: %v", err)
} else {
testRootDir = tempDir
}
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker(), testRootDir)
resultsManager(m, probeType).Remove(testContainerID) resultsManager(m, probeType).Remove(testContainerID)
} }
} }

View File

@ -18,7 +18,10 @@ package qos
import ( import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/types"
) )
@ -60,6 +63,11 @@ func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapa
// targets for OOM kills. // targets for OOM kills.
// Note that this is a heuristic, it won't work if a container has many small processes. // Note that this is a heuristic, it won't work if a container has many small processes.
memoryRequest := container.Resources.Requests.Memory().Value() memoryRequest := container.Resources.Requests.Memory().Value()
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
memoryRequest = cs.ResourcesAllocated.Memory().Value()
}
}
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure // A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure
// that burstable pods have a higher OOM score adjustment. // that burstable pods have a higher OOM score adjustment.

View File

@ -85,7 +85,7 @@ func TestRunOnce(t *testing.T) {
recorder: &record.FakeRecorder{}, recorder: &record.FakeRecorder{},
cadvisor: cadvisor, cadvisor: cadvisor,
nodeLister: testNodeLister{}, nodeLister: testNodeLister{},
statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker), statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, basePath),
podManager: podManager, podManager: podManager,
podWorkers: &fakePodWorkers{}, podWorkers: &fakePodWorkers{},
os: &containertest.FakeOS{}, os: &containertest.FakeOS{},

View File

@ -0,0 +1,93 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package status
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/status/state"
)
type fakeManager struct {
state state.State
}
func (m *fakeManager) Start() {
klog.InfoS("Start()")
return
}
func (m *fakeManager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) {
klog.InfoS("GetPodStatus()")
return v1.PodStatus{}, false
}
func (m *fakeManager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) {
klog.InfoS("SetPodStatus()")
return
}
func (m *fakeManager) SetContainerReadiness(podUID types.UID, containerID kubecontainer.ContainerID, ready bool) {
klog.InfoS("SetContainerReadiness()")
return
}
func (m *fakeManager) SetContainerStartup(podUID types.UID, containerID kubecontainer.ContainerID, started bool) {
klog.InfoS("SetContainerStartup()")
return
}
func (m *fakeManager) TerminatePod(pod *v1.Pod) {
klog.InfoS("TerminatePod()")
return
}
func (m *fakeManager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) {
klog.InfoS("RemoveOrphanedStatuses()")
return
}
func (m *fakeManager) State() state.Reader {
klog.InfoS("State()")
return m.state
}
func (m *fakeManager) SetPodAllocation(pod *v1.Pod) error {
klog.InfoS("SetPodAllocation()")
for _, container := range pod.Spec.Containers {
var alloc v1.ResourceList
if container.Resources.Requests != nil {
alloc = container.Resources.Requests.DeepCopy()
}
m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc)
}
return nil
}
func (m *fakeManager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error {
klog.InfoS("SetPodResizeStatus()")
return nil
}
// NewFakeManager creates empty/fake memory manager
func NewFakeManager() Manager {
return &fakeManager{
state: state.NewStateMemory(),
}
}

View File

@ -0,0 +1,65 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"encoding/json"
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
)
var _ checkpointmanager.Checkpoint = &PodResourceAllocationCheckpoint{}
// PodResourceAllocationCheckpoint is used to store resources allocated to a pod in checkpoint
type PodResourceAllocationCheckpoint struct {
AllocationEntries map[string]map[string]v1.ResourceList `json:"allocationEntries,omitempty"`
ResizeStatusEntries map[string]v1.PodResizeStatus `json:"resizeStatusEntries,omitempty"`
Checksum checksum.Checksum `json:"checksum"`
}
// NewPodResourceAllocationCheckpoint returns an instance of Checkpoint
func NewPodResourceAllocationCheckpoint() *PodResourceAllocationCheckpoint {
//lint:ignore unexported-type-in-api user-facing error message
return &PodResourceAllocationCheckpoint{
AllocationEntries: make(map[string]map[string]v1.ResourceList),
ResizeStatusEntries: make(map[string]v1.PodResizeStatus),
}
}
// MarshalCheckpoint returns marshalled checkpoint
func (prc *PodResourceAllocationCheckpoint) MarshalCheckpoint() ([]byte, error) {
// make sure checksum wasn't set before so it doesn't affect output checksum
prc.Checksum = 0
prc.Checksum = checksum.New(prc)
return json.Marshal(*prc)
}
// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint
func (prc *PodResourceAllocationCheckpoint) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, prc)
}
// VerifyChecksum verifies that current checksum of checkpoint is valid
func (prc *PodResourceAllocationCheckpoint) VerifyChecksum() error {
ck := prc.Checksum
prc.Checksum = 0
err := ck.Verify(prc)
prc.Checksum = ck
return err
}

View File

@ -0,0 +1,62 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"k8s.io/api/core/v1"
)
// PodResourceAllocation type is used in tracking resources allocated to pod's containers
type PodResourceAllocation map[string]map[string]v1.ResourceList
// PodResizeStatus type is used in tracking the last resize decision for pod
type PodResizeStatus map[string]v1.PodResizeStatus
// Clone returns a copy of PodResourceAllocation
func (pr PodResourceAllocation) Clone() PodResourceAllocation {
prCopy := make(PodResourceAllocation)
for pod := range pr {
prCopy[pod] = make(map[string]v1.ResourceList)
for container, alloc := range pr[pod] {
prCopy[pod][container] = alloc.DeepCopy()
}
}
return prCopy
}
// Reader interface used to read current pod resource allocation state
type Reader interface {
GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool)
GetPodResourceAllocation() PodResourceAllocation
GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool)
GetResizeStatus() PodResizeStatus
}
type writer interface {
SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error
SetPodResourceAllocation(PodResourceAllocation) error
SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error
SetResizeStatus(PodResizeStatus) error
Delete(podUID string, containerName string) error
ClearState() error
}
// State interface provides methods for tracking and setting pod resource allocation
type State interface {
Reader
writer
}

View File

@ -0,0 +1,179 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"fmt"
"path"
"sync"
"k8s.io/api/core/v1"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
)
var _ State = &stateCheckpoint{}
type stateCheckpoint struct {
mux sync.RWMutex
cache State
checkpointManager checkpointmanager.CheckpointManager
checkpointName string
}
// NewStateCheckpoint creates new State for keeping track of pod resource allocations with checkpoint backend
func NewStateCheckpoint(stateDir, checkpointName string) (State, error) {
checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
if err != nil {
return nil, fmt.Errorf("failed to initialize checkpoint manager for pod allocation tracking: %v", err)
}
stateCheckpoint := &stateCheckpoint{
cache: NewStateMemory(),
checkpointManager: checkpointManager,
checkpointName: checkpointName,
}
if err := stateCheckpoint.restoreState(); err != nil {
//lint:ignore ST1005 user-facing error message
return nil, fmt.Errorf("could not restore state from checkpoint: %v, please drain this node and delete pod allocation checkpoint file %q before restarting Kubelet", err, path.Join(stateDir, checkpointName))
}
return stateCheckpoint, nil
}
// restores state from a checkpoint and creates it if it doesn't exist
func (sc *stateCheckpoint) restoreState() error {
sc.mux.Lock()
defer sc.mux.Unlock()
var err error
checkpoint := NewPodResourceAllocationCheckpoint()
if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint); err != nil {
if err == errors.ErrCheckpointNotFound {
return sc.storeState()
}
return err
}
sc.cache.SetPodResourceAllocation(checkpoint.AllocationEntries)
sc.cache.SetResizeStatus(checkpoint.ResizeStatusEntries)
klog.V(2).InfoS("State checkpoint: restored pod resource allocation state from checkpoint")
return nil
}
// saves state to a checkpoint, caller is responsible for locking
func (sc *stateCheckpoint) storeState() error {
checkpoint := NewPodResourceAllocationCheckpoint()
podAllocation := sc.cache.GetPodResourceAllocation()
for pod := range podAllocation {
checkpoint.AllocationEntries[pod] = make(map[string]v1.ResourceList)
for container, alloc := range podAllocation[pod] {
checkpoint.AllocationEntries[pod][container] = alloc
}
}
podResizeStatus := sc.cache.GetResizeStatus()
checkpoint.ResizeStatusEntries = make(map[string]v1.PodResizeStatus)
for pUID, rStatus := range podResizeStatus {
checkpoint.ResizeStatusEntries[pUID] = rStatus
}
err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
if err != nil {
klog.ErrorS(err, "Failed to save pod allocation checkpoint")
return err
}
return nil
}
// GetContainerResourceAllocation returns current resources allocated to a pod's container
func (sc *stateCheckpoint) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetContainerResourceAllocation(podUID, containerName)
}
// GetPodResourceAllocation returns current pod resource allocation
func (sc *stateCheckpoint) GetPodResourceAllocation() PodResourceAllocation {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetPodResourceAllocation()
}
// GetPodResizeStatus returns the last resize decision for a pod
func (sc *stateCheckpoint) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetPodResizeStatus(podUID)
}
// GetResizeStatus returns the set of resize decisions made
func (sc *stateCheckpoint) GetResizeStatus() PodResizeStatus {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetResizeStatus()
}
// SetContainerResourceAllocation sets resources allocated to a pod's container
func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetContainerResourceAllocation(podUID, containerName, alloc)
return sc.storeState()
}
// SetPodResourceAllocation sets pod resource allocation
func (sc *stateCheckpoint) SetPodResourceAllocation(a PodResourceAllocation) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetPodResourceAllocation(a)
return sc.storeState()
}
// SetPodResizeStatus sets the last resize decision for a pod
func (sc *stateCheckpoint) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetPodResizeStatus(podUID, resizeStatus)
return sc.storeState()
}
// SetResizeStatus sets the resize decisions
func (sc *stateCheckpoint) SetResizeStatus(rs PodResizeStatus) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetResizeStatus(rs)
return sc.storeState()
}
// Delete deletes allocations for specified pod
func (sc *stateCheckpoint) Delete(podUID string, containerName string) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.Delete(podUID, containerName)
return sc.storeState()
}
// ClearState clears the state and saves it in a checkpoint
func (sc *stateCheckpoint) ClearState() error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.ClearState()
return sc.storeState()
}

View File

@ -0,0 +1,152 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"sync"
"k8s.io/api/core/v1"
"k8s.io/klog/v2"
)
type stateMemory struct {
sync.RWMutex
podAllocation PodResourceAllocation
podResizeStatus PodResizeStatus
}
var _ State = &stateMemory{}
// NewStateMemory creates new State to track resources allocated to pods
func NewStateMemory() State {
klog.V(2).InfoS("Initialized new in-memory state store for pod resource allocation tracking")
return &stateMemory{
podAllocation: PodResourceAllocation{},
podResizeStatus: PodResizeStatus{},
}
}
func (s *stateMemory) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
s.RLock()
defer s.RUnlock()
alloc, ok := s.podAllocation[podUID][containerName]
return alloc.DeepCopy(), ok
}
func (s *stateMemory) GetPodResourceAllocation() PodResourceAllocation {
s.RLock()
defer s.RUnlock()
return s.podAllocation.Clone()
}
func (s *stateMemory) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
s.RLock()
defer s.RUnlock()
resizeStatus, ok := s.podResizeStatus[podUID]
return resizeStatus, ok
}
func (s *stateMemory) GetResizeStatus() PodResizeStatus {
s.RLock()
defer s.RUnlock()
prs := make(map[string]v1.PodResizeStatus)
for k, v := range s.podResizeStatus {
prs[k] = v
}
return prs
}
func (s *stateMemory) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error {
s.Lock()
defer s.Unlock()
if _, ok := s.podAllocation[podUID]; !ok {
s.podAllocation[podUID] = make(map[string]v1.ResourceList)
}
s.podAllocation[podUID][containerName] = alloc
klog.V(3).InfoS("Updated container resource allocation", "podUID", podUID, "containerName", containerName, "alloc", alloc)
return nil
}
func (s *stateMemory) SetPodResourceAllocation(a PodResourceAllocation) error {
s.Lock()
defer s.Unlock()
s.podAllocation = a.Clone()
klog.V(3).InfoS("Updated pod resource allocation", "allocation", a)
return nil
}
func (s *stateMemory) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error {
s.Lock()
defer s.Unlock()
if resizeStatus != "" {
s.podResizeStatus[podUID] = resizeStatus
} else {
delete(s.podResizeStatus, podUID)
}
klog.V(3).InfoS("Updated pod resize state", "podUID", podUID, "resizeStatus", resizeStatus)
return nil
}
func (s *stateMemory) SetResizeStatus(rs PodResizeStatus) error {
s.Lock()
defer s.Unlock()
prs := make(map[string]v1.PodResizeStatus)
for k, v := range rs {
prs[k] = v
}
s.podResizeStatus = prs
klog.V(3).InfoS("Updated pod resize state", "resizes", rs)
return nil
}
func (s *stateMemory) deleteContainer(podUID string, containerName string) {
delete(s.podAllocation[podUID], containerName)
if len(s.podAllocation[podUID]) == 0 {
delete(s.podAllocation, podUID)
delete(s.podResizeStatus, podUID)
}
klog.V(3).InfoS("Deleted pod resource allocation", "podUID", podUID, "containerName", containerName)
}
func (s *stateMemory) Delete(podUID string, containerName string) error {
s.Lock()
defer s.Unlock()
if len(containerName) == 0 {
delete(s.podAllocation, podUID)
delete(s.podResizeStatus, podUID)
klog.V(3).InfoS("Deleted pod resource allocation and resize state", "podUID", podUID)
return nil
}
s.deleteContainer(podUID, containerName)
return nil
}
func (s *stateMemory) ClearState() error {
s.Lock()
defer s.Unlock()
s.podAllocation = make(PodResourceAllocation)
s.podResizeStatus = make(PodResizeStatus)
klog.V(3).InfoS("Cleared state")
return nil
}

View File

@ -41,10 +41,14 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/metrics"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod" kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/status/state"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
statusutil "k8s.io/kubernetes/pkg/util/pod" statusutil "k8s.io/kubernetes/pkg/util/pod"
) )
// podStatusManagerStateFile is the file name where status manager stores its state
const podStatusManagerStateFile = "pod_status_manager_state"
// A wrapper around v1.PodStatus that includes a version to enforce that stale pod statuses are // A wrapper around v1.PodStatus that includes a version to enforce that stale pod statuses are
// not sent to the API server. // not sent to the API server.
type versionedPodStatus struct { type versionedPodStatus struct {
@ -79,6 +83,10 @@ type manager struct {
podDeletionSafety PodDeletionSafetyProvider podDeletionSafety PodDeletionSafetyProvider
podStartupLatencyHelper PodStartupLatencyStateHelper podStartupLatencyHelper PodStartupLatencyStateHelper
// state allows to save/restore pod resource allocation and tolerate kubelet restarts.
state state.State
// stateFileDirectory holds the directory where the state file for checkpoints is held.
stateFileDirectory string
} }
// PodStatusProvider knows how to provide status for a pod. It's intended to be used by other components // PodStatusProvider knows how to provide status for a pod. It's intended to be used by other components
@ -128,12 +136,21 @@ type Manager interface {
// RemoveOrphanedStatuses scans the status cache and removes any entries for pods not included in // RemoveOrphanedStatuses scans the status cache and removes any entries for pods not included in
// the provided podUIDs. // the provided podUIDs.
RemoveOrphanedStatuses(podUIDs map[types.UID]bool) RemoveOrphanedStatuses(podUIDs map[types.UID]bool)
// State returns a read-only interface to the internal status manager state.
State() state.Reader
// SetPodAllocation checkpoints the resources allocated to a pod's containers.
SetPodAllocation(pod *v1.Pod) error
// SetPodResizeStatus checkpoints the last resizing decision for the pod.
SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error
} }
const syncPeriod = 10 * time.Second const syncPeriod = 10 * time.Second
// NewManager returns a functional Manager. // NewManager returns a functional Manager.
func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper) Manager { func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper, stateFileDirectory string) Manager {
return &manager{ return &manager{
kubeClient: kubeClient, kubeClient: kubeClient,
podManager: podManager, podManager: podManager,
@ -142,6 +159,7 @@ func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podD
apiStatusVersions: make(map[kubetypes.MirrorPodUID]uint64), apiStatusVersions: make(map[kubetypes.MirrorPodUID]uint64),
podDeletionSafety: podDeletionSafety, podDeletionSafety: podDeletionSafety,
podStartupLatencyHelper: podStartupLatencyHelper, podStartupLatencyHelper: podStartupLatencyHelper,
stateFileDirectory: stateFileDirectory,
} }
} }
@ -173,6 +191,15 @@ func (m *manager) Start() {
return return
} }
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
stateImpl, err := state.NewStateCheckpoint(m.stateFileDirectory, podStatusManagerStateFile)
if err != nil {
klog.ErrorS(err, "Could not initialize pod allocation checkpoint manager, please drain node and remove policy state file")
return
}
m.state = stateImpl
}
klog.InfoS("Starting to sync pod status with apiserver") klog.InfoS("Starting to sync pod status with apiserver")
//nolint:staticcheck // SA1015 Ticker can leak since this is only called once and doesn't handle termination. //nolint:staticcheck // SA1015 Ticker can leak since this is only called once and doesn't handle termination.
@ -200,6 +227,34 @@ func (m *manager) Start() {
}, 0) }, 0)
} }
// State returns the pod resources checkpoint state of the pod status manager
func (m *manager) State() state.Reader {
return m.state
}
// SetPodAllocation checkpoints the resources allocated to a pod's containers
func (m *manager) SetPodAllocation(pod *v1.Pod) error {
m.podStatusesLock.RLock()
defer m.podStatusesLock.RUnlock()
for _, container := range pod.Spec.Containers {
var alloc v1.ResourceList
if container.Resources.Requests != nil {
alloc = container.Resources.Requests.DeepCopy()
}
if err := m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc); err != nil {
return err
}
}
return nil
}
// SetPodResizeStatus checkpoints the last resizing decision for the pod.
func (m *manager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error {
m.podStatusesLock.RLock()
defer m.podStatusesLock.RUnlock()
return m.state.SetPodResizeStatus(string(podUID), resizeStatus)
}
func (m *manager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) { func (m *manager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) {
m.podStatusesLock.RLock() m.podStatusesLock.RLock()
defer m.podStatusesLock.RUnlock() defer m.podStatusesLock.RUnlock()
@ -616,6 +671,9 @@ func (m *manager) deletePodStatus(uid types.UID) {
defer m.podStatusesLock.Unlock() defer m.podStatusesLock.Unlock()
delete(m.podStatuses, uid) delete(m.podStatuses, uid)
m.podStartupLatencyHelper.DeletePodStartupState(uid) m.podStartupLatencyHelper.DeletePodStartupState(uid)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
m.state.Delete(string(uid), "")
}
} }
// TODO(filipg): It'd be cleaner if we can do this without signal from user. // TODO(filipg): It'd be cleaner if we can do this without signal from user.
@ -626,6 +684,9 @@ func (m *manager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) {
if _, ok := podUIDs[key]; !ok { if _, ok := podUIDs[key]; !ok {
klog.V(5).InfoS("Removing pod from status map.", "podUID", key) klog.V(5).InfoS("Removing pod from status map.", "podUID", key)
delete(m.podStatuses, key) delete(m.podStatuses, key)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
m.state.Delete(string(key), "")
}
} }
} }
} }

View File

@ -18,6 +18,7 @@ package status
import ( import (
"fmt" "fmt"
"io/ioutil"
"math/rand" "math/rand"
"reflect" "reflect"
"strconv" "strconv"
@ -87,7 +88,13 @@ func newTestManager(kubeClient clientset.Interface) *manager {
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient()) podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient())
podManager.AddPod(getTestPod()) podManager.AddPod(getTestPod())
podStartupLatencyTracker := util.NewPodStartupLatencyTracker() podStartupLatencyTracker := util.NewPodStartupLatencyTracker()
return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager) testRootDir := ""
if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil {
return nil
} else {
testRootDir = tempDir
}
return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, testRootDir).(*manager)
} }
func generateRandomMessage() string { func generateRandomMessage() string {
@ -962,7 +969,7 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) {
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient()) podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient())
podStartupLatencyTracker := util.NewPodStartupLatencyTracker() podStartupLatencyTracker := util.NewPodStartupLatencyTracker()
syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager) syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, "").(*manager)
original := tc.pod.DeepCopy() original := tc.pod.DeepCopy()
syncer.SetPodStatus(original, original.Status) syncer.SetPodStatus(original, original.Status)

View File

@ -27,6 +27,7 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
types "k8s.io/apimachinery/pkg/types" types "k8s.io/apimachinery/pkg/types"
container "k8s.io/kubernetes/pkg/kubelet/container" container "k8s.io/kubernetes/pkg/kubelet/container"
state "k8s.io/kubernetes/pkg/kubelet/status/state"
) )
// MockPodStatusProvider is a mock of PodStatusProvider interface. // MockPodStatusProvider is a mock of PodStatusProvider interface.
@ -239,6 +240,34 @@ func (mr *MockManagerMockRecorder) SetContainerStartup(podUID, containerID, star
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetContainerStartup", reflect.TypeOf((*MockManager)(nil).SetContainerStartup), podUID, containerID, started) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetContainerStartup", reflect.TypeOf((*MockManager)(nil).SetContainerStartup), podUID, containerID, started)
} }
// SetPodAllocation mocks base method.
func (m *MockManager) SetPodAllocation(pod *v1.Pod) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "SetPodAllocation", pod)
ret0, _ := ret[0].(error)
return ret0
}
// SetPodAllocation indicates an expected call of SetPodAllocation.
func (mr *MockManagerMockRecorder) SetPodAllocation(pod interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPodAllocation", reflect.TypeOf((*MockManager)(nil).SetPodAllocation), pod)
}
// SetPodResizeStatus mocks base method.
func (m *MockManager) SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "SetPodResizeStatus", podUID, resize)
ret0, _ := ret[0].(error)
return ret0
}
// SetPodResizeStatus indicates an expected call of SetPodResizeStatus.
func (mr *MockManagerMockRecorder) SetPodResizeStatus(podUID, resize interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPodResizeStatus", reflect.TypeOf((*MockManager)(nil).SetPodResizeStatus), podUID, resize)
}
// SetPodStatus mocks base method. // SetPodStatus mocks base method.
func (m *MockManager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) { func (m *MockManager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) {
m.ctrl.T.Helper() m.ctrl.T.Helper()
@ -263,6 +292,20 @@ func (mr *MockManagerMockRecorder) Start() *gomock.Call {
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Start", reflect.TypeOf((*MockManager)(nil).Start)) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Start", reflect.TypeOf((*MockManager)(nil).Start))
} }
// State mocks base method.
func (m *MockManager) State() state.Reader {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "State")
ret0, _ := ret[0].(state.Reader)
return ret0
}
// State indicates an expected call of State.
func (mr *MockManagerMockRecorder) State() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "State", reflect.TypeOf((*MockManager)(nil).State))
}
// TerminatePod mocks base method. // TerminatePod mocks base method.
func (m *MockManager) TerminatePod(pod *v1.Pod) { func (m *MockManager) TerminatePod(pod *v1.Pod) {
m.ctrl.T.Helper() m.ctrl.T.Helper()

1447
test/e2e/node/pod_resize.go Normal file

File diff suppressed because it is too large Load Diff