diff --git a/pkg/api/pod/util.go b/pkg/api/pod/util.go index ed6d175859c..5c4558bd93e 100644 --- a/pkg/api/pod/util.go +++ b/pkg/api/pod/util.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metavalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation" + "k8s.io/apimachinery/pkg/util/diff" utilfeature "k8s.io/apiserver/pkg/util/feature" api "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/core/helper" @@ -455,19 +456,24 @@ func DropDisabledTemplateFields(podTemplate, oldPodTemplate *api.PodTemplateSpec func DropDisabledPodFields(pod, oldPod *api.Pod) { var ( podSpec *api.PodSpec + podStatus *api.PodStatus podAnnotations map[string]string oldPodSpec *api.PodSpec + oldPodStatus *api.PodStatus oldPodAnnotations map[string]string ) if pod != nil { podSpec = &pod.Spec + podStatus = &pod.Status podAnnotations = pod.Annotations } if oldPod != nil { oldPodSpec = &oldPod.Spec + oldPodStatus = &oldPod.Status oldPodAnnotations = oldPod.Annotations } dropDisabledFields(podSpec, podAnnotations, oldPodSpec, oldPodAnnotations) + dropDisabledPodStatusFields(podStatus, oldPodStatus, podSpec, oldPodSpec) } // dropDisabledFields removes disabled fields from the pod metadata and spec. @@ -522,6 +528,42 @@ func dropDisabledFields( dropDisabledNodeInclusionPolicyFields(podSpec, oldPodSpec) dropDisabledMatchLabelKeysField(podSpec, oldPodSpec) dropDisabledDynamicResourceAllocationFields(podSpec, oldPodSpec) + + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && !inPlacePodVerticalScalingInUse(oldPodSpec) { + // Drop ResizePolicy fields. Don't drop updates to Resources field as template.spec.resources + // field is mutable for certain controllers. Let ValidatePodUpdate handle it. + for i := range podSpec.Containers { + podSpec.Containers[i].ResizePolicy = nil + } + for i := range podSpec.InitContainers { + podSpec.InitContainers[i].ResizePolicy = nil + } + for i := range podSpec.EphemeralContainers { + podSpec.EphemeralContainers[i].ResizePolicy = nil + } + } +} + +// dropDisabledPodStatusFields removes disabled fields from the pod status +func dropDisabledPodStatusFields(podStatus, oldPodStatus *api.PodStatus, podSpec, oldPodSpec *api.PodSpec) { + // the new status is always be non-nil + if podStatus == nil { + podStatus = &api.PodStatus{} + } + + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && !inPlacePodVerticalScalingInUse(oldPodSpec) { + // Drop Resize, ResourcesAllocated, and Resources fields + dropResourcesFields := func(csl []api.ContainerStatus) { + for i := range csl { + csl[i].ResourcesAllocated = nil + csl[i].Resources = nil + } + } + dropResourcesFields(podStatus.ContainerStatuses) + dropResourcesFields(podStatus.InitContainerStatuses) + dropResourcesFields(podStatus.EphemeralContainerStatuses) + podStatus.Resize = "" + } } // dropDisabledDynamicResourceAllocationFields removes pod claim references from @@ -692,6 +734,22 @@ func hostUsersInUse(podSpec *api.PodSpec) bool { return false } +// inPlacePodVerticalScalingInUse returns true if pod spec is non-nil and ResizePolicy is set +func inPlacePodVerticalScalingInUse(podSpec *api.PodSpec) bool { + if podSpec == nil { + return false + } + var inUse bool + VisitContainers(podSpec, Containers, func(c *api.Container, containerType ContainerType) bool { + if len(c.ResizePolicy) > 0 { + inUse = true + return false + } + return true + }) + return inUse +} + // procMountInUse returns true if the pod spec is non-nil and has a SecurityContext's ProcMount field set to a non-default value func procMountInUse(podSpec *api.PodSpec) bool { if podSpec == nil { @@ -785,3 +843,28 @@ func hasInvalidLabelValueInAffinitySelector(spec *api.PodSpec) bool { } return false } + +func MarkPodProposedForResize(oldPod, newPod *api.Pod) { + for i, c := range newPod.Spec.Containers { + if c.Resources.Requests == nil { + continue + } + if diff.ObjectDiff(oldPod.Spec.Containers[i].Resources, c.Resources) == "" { + continue + } + findContainerStatus := func(css []api.ContainerStatus, cName string) (api.ContainerStatus, bool) { + for i := range css { + if css[i].Name == cName { + return css[i], true + } + } + return api.ContainerStatus{}, false + } + if cs, ok := findContainerStatus(newPod.Status.ContainerStatuses, c.Name); ok { + if diff.ObjectDiff(c.Resources.Requests, cs.ResourcesAllocated) != "" { + newPod.Status.Resize = api.PodResizeStatusProposed + break + } + } + } +} diff --git a/pkg/api/pod/util_test.go b/pkg/api/pod/util_test.go index 5def1ade4d8..9a05e914a75 100644 --- a/pkg/api/pod/util_test.go +++ b/pkg/api/pod/util_test.go @@ -25,7 +25,9 @@ import ( "github.com/google/go-cmp/cmp" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/diff" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation/field" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -2274,3 +2276,394 @@ func TestDropVolumesClaimField(t *testing.T) { } } } + +func TestDropInPlacePodVerticalScaling(t *testing.T) { + podWithInPlaceVerticalScaling := func() *api.Pod { + return &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + ResizePolicy: []api.ContainerResizePolicy{ + {ResourceName: api.ResourceCPU, Policy: api.RestartNotRequired}, + {ResourceName: api.ResourceMemory, Policy: api.RestartRequired}, + }, + }, + }, + }, + Status: api.PodStatus{ + Resize: api.PodResizeStatusInProgress, + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Resources: &api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")}, + }, + }, + }, + }, + } + } + podWithoutInPlaceVerticalScaling := func() *api.Pod { + return &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + }, + }, + }, + } + } + + podInfo := []struct { + description string + hasInPlaceVerticalScaling bool + pod func() *api.Pod + }{ + { + description: "has in-place vertical scaling enabled with resources", + hasInPlaceVerticalScaling: true, + pod: podWithInPlaceVerticalScaling, + }, + { + description: "has in-place vertical scaling disabled", + hasInPlaceVerticalScaling: false, + pod: podWithoutInPlaceVerticalScaling, + }, + { + description: "is nil", + hasInPlaceVerticalScaling: false, + pod: func() *api.Pod { return nil }, + }, + } + + for _, enabled := range []bool{true, false} { + for _, oldPodInfo := range podInfo { + for _, newPodInfo := range podInfo { + oldPodHasInPlaceVerticalScaling, oldPod := oldPodInfo.hasInPlaceVerticalScaling, oldPodInfo.pod() + newPodHasInPlaceVerticalScaling, newPod := newPodInfo.hasInPlaceVerticalScaling, newPodInfo.pod() + if newPod == nil { + continue + } + + t.Run(fmt.Sprintf("feature enabled=%v, old pod %v, new pod %v", enabled, oldPodInfo.description, newPodInfo.description), func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, enabled)() + + var oldPodSpec *api.PodSpec + var oldPodStatus *api.PodStatus + if oldPod != nil { + oldPodSpec = &oldPod.Spec + oldPodStatus = &oldPod.Status + } + dropDisabledFields(&newPod.Spec, nil, oldPodSpec, nil) + dropDisabledPodStatusFields(&newPod.Status, oldPodStatus, &newPod.Spec, oldPodSpec) + + // old pod should never be changed + if !reflect.DeepEqual(oldPod, oldPodInfo.pod()) { + t.Errorf("old pod changed: %v", diff.ObjectReflectDiff(oldPod, oldPodInfo.pod())) + } + + switch { + case enabled || oldPodHasInPlaceVerticalScaling: + // new pod shouldn't change if feature enabled or if old pod has ResizePolicy set + if !reflect.DeepEqual(newPod, newPodInfo.pod()) { + t.Errorf("new pod changed: %v", diff.ObjectReflectDiff(newPod, newPodInfo.pod())) + } + case newPodHasInPlaceVerticalScaling: + // new pod should be changed + if reflect.DeepEqual(newPod, newPodInfo.pod()) { + t.Errorf("new pod was not changed") + } + // new pod should not have ResizePolicy + if !reflect.DeepEqual(newPod, podWithoutInPlaceVerticalScaling()) { + t.Errorf("new pod has ResizePolicy: %v", diff.ObjectReflectDiff(newPod, podWithoutInPlaceVerticalScaling())) + } + default: + // new pod should not need to be changed + if !reflect.DeepEqual(newPod, newPodInfo.pod()) { + t.Errorf("new pod changed: %v", diff.ObjectReflectDiff(newPod, newPodInfo.pod())) + } + } + }) + } + } + } +} + +func TestMarkPodProposedForResize(t *testing.T) { + testCases := []struct { + desc string + newPod *api.Pod + oldPod *api.Pod + expectedPod *api.Pod + }{ + { + desc: "nil requests", + newPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + }, + }, + }, + }, + oldPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + }, + }, + }, + }, + expectedPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + }, + }, + }, + }, + }, + { + desc: "resources unchanged", + newPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + }, + }, + }, + }, + oldPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + }, + }, + }, + }, + expectedPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + }, + }, + }, + }, + }, + { + desc: "resize desired", + newPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + { + Name: "c2", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("400m")}, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + }, + { + Name: "c2", + Image: "image", + ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + }, + }, + oldPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + { + Name: "c2", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")}, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + }, + { + Name: "c2", + Image: "image", + ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + }, + }, + expectedPod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "c1", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + { + Name: "c2", + Image: "image", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")}, + Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("400m")}, + }, + }, + }, + }, + Status: api.PodStatus{ + Resize: api.PodResizeStatusProposed, + ContainerStatuses: []api.ContainerStatus{ + { + Name: "c1", + Image: "image", + ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")}, + }, + { + Name: "c2", + Image: "image", + ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")}, + }, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + MarkPodProposedForResize(tc.oldPod, tc.newPod) + if diff := cmp.Diff(tc.expectedPod, tc.newPod); diff != "" { + t.Errorf("unexpected pod spec (-want, +got):\n%s", diff) + } + }) + } +} diff --git a/pkg/api/v1/pod/util.go b/pkg/api/v1/pod/util.go index dc526b62eb2..2bd37c5b5a1 100644 --- a/pkg/api/v1/pod/util.go +++ b/pkg/api/v1/pod/util.go @@ -257,7 +257,7 @@ func visitContainerConfigmapNames(container *v1.Container, visitor Visitor) bool } // GetContainerStatus extracts the status of container "name" from "statuses". -// It also returns if "name" exists. +// It returns true if "name" exists, else returns false. func GetContainerStatus(statuses []v1.ContainerStatus, name string) (v1.ContainerStatus, bool) { for i := range statuses { if statuses[i].Name == name { @@ -274,6 +274,17 @@ func GetExistingContainerStatus(statuses []v1.ContainerStatus, name string) v1.C return status } +// GetIndexOfContainerStatus gets the index of status of container "name" from "statuses", +// It returns (index, true) if "name" exists, else returns (0, false). +func GetIndexOfContainerStatus(statuses []v1.ContainerStatus, name string) (int, bool) { + for i := range statuses { + if statuses[i].Name == name { + return i, true + } + } + return 0, false +} + // IsPodAvailable returns true if a pod is available; false otherwise. // Precondition for an available pod is that it must be ready. On top // of that, there are two cases when a pod can be considered available: diff --git a/pkg/api/v1/pod/util_test.go b/pkg/api/v1/pod/util_test.go index 20b32206f20..24357f6c082 100644 --- a/pkg/api/v1/pod/util_test.go +++ b/pkg/api/v1/pod/util_test.go @@ -809,6 +809,53 @@ func TestGetContainerStatus(t *testing.T) { } } +func TestGetIndexOfContainerStatus(t *testing.T) { + testStatus := []v1.ContainerStatus{ + { + Name: "c1", + Ready: false, + Image: "image1", + }, + { + Name: "c2", + Ready: true, + Image: "image1", + }, + } + + tests := []struct { + desc string + containerName string + expectedExists bool + expectedIndex int + }{ + { + desc: "first container", + containerName: "c1", + expectedExists: true, + expectedIndex: 0, + }, + { + desc: "second container", + containerName: "c2", + expectedExists: true, + expectedIndex: 1, + }, + { + desc: "non-existent container", + containerName: "c3", + expectedExists: false, + expectedIndex: 0, + }, + } + + for _, test := range tests { + idx, exists := GetIndexOfContainerStatus(testStatus, test.containerName) + assert.Equal(t, test.expectedExists, exists, "GetIndexOfContainerStatus: "+test.desc) + assert.Equal(t, test.expectedIndex, idx, "GetIndexOfContainerStatus: "+test.desc) + } +} + func TestUpdatePodCondition(t *testing.T) { time := metav1.Now() diff --git a/pkg/apis/core/types.go b/pkg/apis/core/types.go index 06b4f9e59da..112421181f8 100644 --- a/pkg/apis/core/types.go +++ b/pkg/apis/core/types.go @@ -2138,6 +2138,33 @@ const ( PullIfNotPresent PullPolicy = "IfNotPresent" ) +// ResourceResizePolicy specifies how Kubernetes should handle resource resize. +type ResourceResizePolicy string + +// These are the valid resource resize policy values: +const ( + // RestartNotRequired tells Kubernetes to resize the container in-place + // without restarting it, if possible. Kubernetes may however choose to + // restart the container if it is unable to actuate resize without a + // restart. For e.g. the runtime doesn't support restart-free resizing. + RestartNotRequired ResourceResizePolicy = "RestartNotRequired" + // 'RestartRequired' tells Kubernetes to resize the container in-place + // by stopping and starting the container when new resources are applied. + // This is needed for legacy applications. For e.g. java apps using the + // -xmxN flag which are unable to use resized memory without restarting. + RestartRequired ResourceResizePolicy = "RestartRequired" +) + +// ContainerResizePolicy represents resource resize policy for a single container. +type ContainerResizePolicy struct { + // Name of the resource type to which this resource resize policy applies. + // Supported values: cpu, memory. + ResourceName ResourceName + // Resource resize policy applicable to the specified resource name. + // If not specified, it defaults to RestartNotRequired. + Policy ResourceResizePolicy +} + // PreemptionPolicy describes a policy for if/when to preempt a pod. type PreemptionPolicy string @@ -2246,6 +2273,10 @@ type Container struct { // Compute resource requirements. // +optional Resources ResourceRequirements + // Resources resize policy for the container. + // +featureGate=InPlacePodVerticalScaling + // +optional + ResizePolicy []ContainerResizePolicy // +optional VolumeMounts []VolumeMount // volumeDevices is the list of block devices to be used by the container. @@ -2430,6 +2461,17 @@ type ContainerStatus struct { // +optional ContainerID string Started *bool + // ResourcesAllocated represents the compute resources allocated for this container by the + // node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission + // and after successfully admitting desired pod resize. + // +featureGate=InPlacePodVerticalScaling + // +optional + ResourcesAllocated ResourceList + // Resources represents the compute resource requests and limits that have been successfully + // enacted on the running container after it has been started or has been successfully resized. + // +featureGate=InPlacePodVerticalScaling + // +optional + Resources *ResourceRequirements } // PodPhase is a label for the condition of a pod at the current time. @@ -2495,6 +2537,20 @@ type PodCondition struct { Message string } +// PodResizeStatus shows status of desired resize of a pod's containers. +type PodResizeStatus string + +const ( + // Pod resources resize has been requested and will be evaluated by node. + PodResizeStatusProposed PodResizeStatus = "Proposed" + // Pod resources resize has been accepted by node and is being actuated. + PodResizeStatusInProgress PodResizeStatus = "InProgress" + // Node cannot resize the pod at this time and will keep retrying. + PodResizeStatusDeferred PodResizeStatus = "Deferred" + // Requested pod resize is not feasible and will not be re-evaluated. + PodResizeStatusInfeasible PodResizeStatus = "Infeasible" +) + // RestartPolicy describes how the container should be restarted. // Only one of the following restart policies may be specified. // If none of the following policies is specified, the default one @@ -3412,6 +3468,10 @@ type EphemeralContainerCommon struct { // already allocated to the pod. // +optional Resources ResourceRequirements + // Resources resize policy for the container. + // +featureGate=InPlacePodVerticalScaling + // +optional + ResizePolicy []ContainerResizePolicy // Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. // +optional VolumeMounts []VolumeMount @@ -3528,6 +3588,13 @@ type PodStatus struct { // Status for any ephemeral containers that have run in this pod. // +optional EphemeralContainerStatuses []ContainerStatus + + // Status of resources resize desired for pod's containers. + // It is empty if no resources resize is pending. + // Any changes to container resources will automatically set this to "Proposed" + // +featureGate=InPlacePodVerticalScaling + // +optional + Resize PodResizeStatus } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index 8ccd27ced14..2f6adf79657 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -46,6 +46,7 @@ import ( apiservice "k8s.io/kubernetes/pkg/api/service" "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/core/helper" + "k8s.io/kubernetes/pkg/apis/core/helper/qos" podshelper "k8s.io/kubernetes/pkg/apis/core/pods" corev1 "k8s.io/kubernetes/pkg/apis/core/v1" "k8s.io/kubernetes/pkg/capabilities" @@ -3011,6 +3012,37 @@ func validatePullPolicy(policy core.PullPolicy, fldPath *field.Path) field.Error return allErrors } +var supportedResizeResources = sets.NewString(string(core.ResourceCPU), string(core.ResourceMemory)) +var supportedResizePolicies = sets.NewString(string(core.RestartNotRequired), string(core.RestartRequired)) + +func validateResizePolicy(policyList []core.ContainerResizePolicy, fldPath *field.Path) field.ErrorList { + allErrors := field.ErrorList{} + + // validate that resource name is not repeated, supported resource names and policy values are specified + resources := make(map[core.ResourceName]bool) + for i, p := range policyList { + if _, found := resources[p.ResourceName]; found { + allErrors = append(allErrors, field.Duplicate(fldPath.Index(i), p.ResourceName)) + } + resources[p.ResourceName] = true + switch p.ResourceName { + case core.ResourceCPU, core.ResourceMemory: + case "": + allErrors = append(allErrors, field.Required(fldPath, "")) + default: + allErrors = append(allErrors, field.NotSupported(fldPath, p.ResourceName, supportedResizeResources.List())) + } + switch p.Policy { + case core.RestartNotRequired, core.RestartRequired: + case "": + allErrors = append(allErrors, field.Required(fldPath, "")) + default: + allErrors = append(allErrors, field.NotSupported(fldPath, p.Policy, supportedResizePolicies.List())) + } + } + return allErrors +} + // validateEphemeralContainers is called by pod spec and template validation to validate the list of ephemeral containers. // Note that this is called for pod template even though ephemeral containers aren't allowed in pod templates. func validateEphemeralContainers(ephemeralContainers []core.EphemeralContainer, containers, initContainers []core.Container, volumes map[string]core.VolumeSource, podClaimNames sets.String, fldPath *field.Path, opts PodValidationOptions) field.ErrorList { @@ -3133,6 +3165,9 @@ func validateInitContainers(containers []core.Container, regularContainers []cor if ctr.StartupProbe != nil { allErrs = append(allErrs, field.Forbidden(idxPath.Child("startupProbe"), "may not be set for init containers")) } + if len(ctr.ResizePolicy) > 0 { + allErrs = append(allErrs, field.Invalid(idxPath.Child("resizePolicy"), ctr.ResizePolicy, "must not be set for init containers")) + } } return allErrs @@ -4479,6 +4514,24 @@ func validateSeccompAnnotationsAndFieldsMatch(annotationValue string, seccompFie return nil } +var updatablePodSpecFields = []string{ + "`spec.containers[*].image`", + "`spec.initContainers[*].image`", + "`spec.activeDeadlineSeconds`", + "`spec.tolerations` (only additions to existing tolerations)", + "`spec.terminationGracePeriodSeconds` (allow it to be set to 1 if it was previously negative)", + "`spec.containers[*].resources` (for CPU/memory only)", +} + +// TODO(vinaykul,InPlacePodVerticalScaling): Drop this var once InPlacePodVerticalScaling goes GA and featuregate is gone. +var updatablePodSpecFieldsNoResources = []string{ + "`spec.containers[*].image`", + "`spec.initContainers[*].image`", + "`spec.activeDeadlineSeconds`", + "`spec.tolerations` (only additions to existing tolerations)", + "`spec.terminationGracePeriodSeconds` (allow it to be set to 1 if it was previously negative)", +} + // ValidatePodUpdate tests to see if the update is legal for an end user to make. newPod is updated with fields // that cannot be changed. func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) field.ErrorList { @@ -4538,12 +4591,56 @@ func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel return allErrs } + //TODO(vinaykul,InPlacePodVerticalScaling): With KEP 2527, we can rely on persistence of PodStatus.QOSClass + // We can use PodStatus.QOSClass instead of GetPodQOS here, in kubelet, and elsewhere, as PodStatus.QOSClass + // does not change once it is bootstrapped in podCreate. This needs to be addressed before beta as a + // separate PR covering all uses of GetPodQOS. With that change, we can drop the below block. + // Ref: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r1093790446 + // Ref: https://github.com/kubernetes/kubernetes/pull/102884/#discussion_r663280487 + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // reject attempts to change pod qos + oldQoS := qos.GetPodQOS(oldPod) + newQoS := qos.GetPodQOS(newPod) + if newQoS != oldQoS { + allErrs = append(allErrs, field.Invalid(fldPath, newQoS, "Pod QoS is immutable")) + } + } + // handle updateable fields by munging those fields prior to deep equal comparison. mungedPodSpec := *newPod.Spec.DeepCopy() // munge spec.containers[*].image var newContainers []core.Container for ix, container := range mungedPodSpec.Containers { container.Image = oldPod.Spec.Containers[ix].Image // +k8s:verify-mutation:reason=clone + // When the feature-gate is turned off, any new requests attempting to update CPU or memory + // resource values will result in validation failure. + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // Resources are mutable for CPU & memory only + // - user can now modify Resources to express new desired Resources + mungeCpuMemResources := func(resourceList, oldResourceList core.ResourceList) core.ResourceList { + if oldResourceList == nil { + return nil + } + var mungedResourceList core.ResourceList + if resourceList == nil { + mungedResourceList = make(core.ResourceList) + } else { + mungedResourceList = resourceList.DeepCopy() + } + delete(mungedResourceList, core.ResourceCPU) + delete(mungedResourceList, core.ResourceMemory) + if cpu, found := oldResourceList[core.ResourceCPU]; found { + mungedResourceList[core.ResourceCPU] = cpu + } + if mem, found := oldResourceList[core.ResourceMemory]; found { + mungedResourceList[core.ResourceMemory] = mem + } + return mungedResourceList + } + lim := mungeCpuMemResources(container.Resources.Limits, oldPod.Spec.Containers[ix].Resources.Limits) + req := mungeCpuMemResources(container.Resources.Requests, oldPod.Spec.Containers[ix].Resources.Requests) + container.Resources = core.ResourceRequirements{Limits: lim, Requests: req} + } newContainers = append(newContainers, container) } mungedPodSpec.Containers = newContainers @@ -4575,7 +4672,11 @@ func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel // This diff isn't perfect, but it's a helluva lot better an "I'm not going to tell you what the difference is". // TODO: Pinpoint the specific field that causes the invalid error after we have strategic merge diff specDiff := cmp.Diff(oldPod.Spec, mungedPodSpec) - allErrs = append(allErrs, field.Forbidden(specPath, fmt.Sprintf("pod updates may not change fields other than `spec.containers[*].image`, `spec.initContainers[*].image`, `spec.activeDeadlineSeconds`, `spec.tolerations` (only additions to existing tolerations) or `spec.terminationGracePeriodSeconds` (allow it to be set to 1 if it was previously negative)\n%v", specDiff))) + errs := field.Forbidden(specPath, fmt.Sprintf("pod updates may not change fields other than %s\n%v", strings.Join(updatablePodSpecFieldsNoResources, ","), specDiff)) + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + errs = field.Forbidden(specPath, fmt.Sprintf("pod updates may not change fields other than %s\n%v", strings.Join(updatablePodSpecFields, ","), specDiff)) + } + allErrs = append(allErrs, errs) } return allErrs diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 2f453037e00..3e25e3b0f08 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -6707,6 +6707,100 @@ func TestValidatePullPolicy(t *testing.T) { } } +func TestValidateResizePolicy(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() + tSupportedResizeResources := sets.NewString(string(core.ResourceCPU), string(core.ResourceMemory)) + tSupportedResizePolicies := sets.NewString(string(core.RestartNotRequired), string(core.RestartRequired)) + type T struct { + PolicyList []core.ContainerResizePolicy + ExpectError bool + Errors field.ErrorList + } + testCases := map[string]T{ + "ValidCPUandMemoryPolicies": { + []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequired"}, + {ResourceName: "memory", Policy: "RestartRequired"}, + }, + false, + nil, + }, + "ValidCPUPolicy": { + []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartRequired"}, + }, + false, + nil, + }, + "ValidMemoryPolicy": { + []core.ContainerResizePolicy{ + {ResourceName: "memory", Policy: "RestartNotRequired"}, + }, + false, + nil, + }, + "NoPolicy": { + []core.ContainerResizePolicy{}, + false, + nil, + }, + "ValidCPUandInvalidMemoryPolicy": { + []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequired"}, + {ResourceName: "memory", Policy: "Restarrrt"}, + }, + true, + field.ErrorList{field.NotSupported(field.NewPath("field"), core.ResourceResizePolicy("Restarrrt"), tSupportedResizePolicies.List())}, + }, + "ValidMemoryandInvalidCPUPolicy": { + []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequirrred"}, + {ResourceName: "memory", Policy: "RestartRequired"}, + }, + true, + field.ErrorList{field.NotSupported(field.NewPath("field"), core.ResourceResizePolicy("RestartNotRequirrred"), tSupportedResizePolicies.List())}, + }, + "InvalidResourceNameValidPolicy": { + []core.ContainerResizePolicy{ + {ResourceName: "cpuuu", Policy: "RestartNotRequired"}, + }, + true, + field.ErrorList{field.NotSupported(field.NewPath("field"), core.ResourceName("cpuuu"), tSupportedResizeResources.List())}, + }, + "ValidResourceNameMissingPolicy": { + []core.ContainerResizePolicy{ + {ResourceName: "memory", Policy: ""}, + }, + true, + field.ErrorList{field.Required(field.NewPath("field"), "")}, + }, + "RepeatedPolicies": { + []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequired"}, + {ResourceName: "memory", Policy: "RestartRequired"}, + {ResourceName: "cpu", Policy: "RestartRequired"}, + }, + true, + field.ErrorList{field.Duplicate(field.NewPath("field").Index(2), core.ResourceCPU)}, + }, + } + for k, v := range testCases { + errs := validateResizePolicy(v.PolicyList, field.NewPath("field")) + if !v.ExpectError && len(errs) > 0 { + t.Errorf("Testcase %s - expected success, got error: %+v", k, errs) + } + if v.ExpectError { + if len(errs) == 0 { + t.Errorf("Testcase %s - expected error, got success", k) + } + delta := cmp.Diff(errs, v.Errors) + if delta != "" { + t.Errorf("Testcase %s - expected errors '%v', got '%v', diff: '%v'", k, v.Errors, errs, delta) + } + } + } +} + func getResourceLimits(cpu, memory string) core.ResourceList { res := core.ResourceList{} res[core.ResourceCPU] = resource.MustParse(cpu) @@ -6714,6 +6808,20 @@ func getResourceLimits(cpu, memory string) core.ResourceList { return res } +func getResources(cpu, memory, storage string) core.ResourceList { + res := core.ResourceList{} + if cpu != "" { + res[core.ResourceCPU] = resource.MustParse(cpu) + } + if memory != "" { + res[core.ResourceMemory] = resource.MustParse(memory) + } + if storage != "" { + res[core.ResourceEphemeralStorage] = resource.MustParse(storage) + } + return res +} + func TestValidateEphemeralContainers(t *testing.T) { containers := []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}} initContainers := []core.Container{{Name: "ictr", Image: "iimage", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}} @@ -7057,6 +7165,24 @@ func TestValidateEphemeralContainers(t *testing.T) { }, field.ErrorList{{Type: field.ErrorTypeForbidden, Field: "ephemeralContainers[0].lifecycle"}}, }, + { + "Container uses disallowed field: ResizePolicy", + line(), + []core.EphemeralContainer{ + { + EphemeralContainerCommon: core.EphemeralContainerCommon{ + Name: "resources-resize-policy", + Image: "image", + ImagePullPolicy: "IfNotPresent", + TerminationMessagePolicy: "File", + ResizePolicy: []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequired"}, + }, + }, + }, + }, + field.ErrorList{{Type: field.ErrorTypeForbidden, Field: "ephemeralContainers[0].resizePolicy"}}, + }, } for _, tc := range tcs { @@ -7273,6 +7399,16 @@ func TestValidateContainers(t *testing.T) { ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File", }, + { + Name: "resources-resize-policy", + Image: "image", + ResizePolicy: []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequired"}, + {ResourceName: "memory", Policy: "RestartRequired"}, + }, + ImagePullPolicy: "IfNotPresent", + TerminationMessagePolicy: "File", + }, { Name: "same-host-port-different-protocol", Image: "image", @@ -9036,6 +9172,32 @@ func TestValidatePodSpec(t *testing.T) { RestartPolicy: core.RestartPolicyAlways, DNSPolicy: core.DNSClusterFirst, }, + "disallowed resources resize policy for init containers": { + InitContainers: []core.Container{ + { + Name: "initctr", + Image: "initimage", + ResizePolicy: []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequired"}, + }, + ImagePullPolicy: "IfNotPresent", + TerminationMessagePolicy: "File", + }, + }, + Containers: []core.Container{ + { + Name: "ctr", + Image: "image", + ResizePolicy: []core.ContainerResizePolicy{ + {ResourceName: "cpu", Policy: "RestartNotRequired"}, + }, + ImagePullPolicy: "IfNotPresent", + TerminationMessagePolicy: "File", + }, + }, + RestartPolicy: core.RestartPolicyAlways, + DNSPolicy: core.DNSClusterFirst, + }, } for k, v := range failureCases { if errs := ValidatePodSpec(&v, nil, field.NewPath("field"), PodValidationOptions{}); len(errs) == 0 { @@ -10818,6 +10980,7 @@ func TestValidatePodCreateWithSchedulingGates(t *testing.T) { } func TestValidatePodUpdate(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() var ( activeDeadlineSecondsZero = int64(0) activeDeadlineSecondsNegative = int64(-30) @@ -11272,33 +11435,586 @@ func TestValidatePodUpdate(t *testing.T) { }, { new: core.Pod{ - ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, Spec: core.PodSpec{ Containers: []core.Container{ { - Image: "foo:V1", + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", Resources: core.ResourceRequirements{ - Limits: getResourceLimits("100m", "0"), + Limits: getResources("200m", "0", "1Gi"), }, }, }, }, }, old: core.Pod{ - ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, Spec: core.PodSpec{ Containers: []core.Container{ { - Image: "foo:V2", + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", Resources: core.ResourceRequirements{ - Limits: getResourceLimits("1000m", "0"), + Limits: getResources("100m", "0", "1Gi"), }, }, }, }, }, - err: "spec: Forbidden: pod updates may not change fields", - test: "cpu change", + err: "", + test: "cpu limit change", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("100m", "200Mi"), + }, + }, + }, + }, + }, + err: "", + test: "memory limit change", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Limits: getResources("100m", "100Mi", "1Gi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResources("100m", "100Mi", "2Gi"), + }, + }, + }, + }, + }, + err: "Forbidden: pod updates may not change fields other than", + test: "storage limit change", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Requests: getResourceLimits("100m", "0"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Requests: getResourceLimits("200m", "0"), + }, + }, + }, + }, + }, + err: "", + test: "cpu request change", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Requests: getResourceLimits("0", "200Mi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Requests: getResourceLimits("0", "100Mi"), + }, + }, + }, + }, + }, + err: "", + test: "memory request change", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Requests: getResources("100m", "0", "2Gi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Requests: getResources("100m", "0", "1Gi"), + }, + }, + }, + }, + }, + err: "Forbidden: pod updates may not change fields other than", + test: "storage request change", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Limits: getResources("200m", "400Mi", "1Gi"), + Requests: getResources("200m", "400Mi", "1Gi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Limits: getResources("100m", "100Mi", "1Gi"), + Requests: getResources("100m", "100Mi", "1Gi"), + }, + }, + }, + }, + }, + err: "", + test: "Pod QoS unchanged, guaranteed -> guaranteed", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Limits: getResources("200m", "200Mi", "2Gi"), + Requests: getResources("100m", "100Mi", "1Gi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V1", + Resources: core.ResourceRequirements{ + Limits: getResources("400m", "400Mi", "2Gi"), + Requests: getResources("200m", "200Mi", "1Gi"), + }, + }, + }, + }, + }, + err: "", + test: "Pod QoS unchanged, burstable -> burstable", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("200m", "200Mi"), + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + err: "", + test: "Pod QoS unchanged, burstable -> burstable, add limits", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("200m", "200Mi"), + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + err: "", + test: "Pod QoS unchanged, burstable -> burstable, remove limits", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResources("400m", "", "1Gi"), + Requests: getResources("300m", "", "1Gi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResources("200m", "500Mi", "1Gi"), + }, + }, + }, + }, + }, + err: "", + test: "Pod QoS unchanged, burstable -> burstable, add requests", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResources("400m", "500Mi", "2Gi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResources("200m", "300Mi", "2Gi"), + Requests: getResourceLimits("100m", "200Mi"), + }, + }, + }, + }, + }, + err: "", + test: "Pod QoS unchanged, burstable -> burstable, remove requests", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("200m", "200Mi"), + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("100m", "100Mi"), + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + err: "Pod QoS is immutable", + test: "Pod QoS change, guaranteed -> burstable", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("100m", "100Mi"), + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + err: "Pod QoS is immutable", + test: "Pod QoS change, burstable -> guaranteed", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("200m", "200Mi"), + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + }, + }, + }, + }, + err: "Pod QoS is immutable", + test: "Pod QoS change, besteffort -> burstable", + }, + { + new: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + }, + }, + }, + }, + old: core.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod"}, + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container", + TerminationMessagePolicy: "File", + ImagePullPolicy: "Always", + Image: "foo:V2", + Resources: core.ResourceRequirements{ + Limits: getResourceLimits("200m", "200Mi"), + Requests: getResourceLimits("100m", "100Mi"), + }, + }, + }, + }, + }, + err: "Pod QoS is immutable", + test: "Pod QoS change, burstable -> besteffort", }, { new: core.Pod{ @@ -18511,6 +19227,8 @@ func TestValidateOSFields(t *testing.T) { "Containers[*].Ports", "Containers[*].ReadinessProbe", "Containers[*].Resources", + "Containers[*].ResizePolicy[*].Policy", + "Containers[*].ResizePolicy[*].ResourceName", "Containers[*].SecurityContext.RunAsNonRoot", "Containers[*].Stdin", "Containers[*].StdinOnce", @@ -18535,6 +19253,8 @@ func TestValidateOSFields(t *testing.T) { "EphemeralContainers[*].EphemeralContainerCommon.Ports", "EphemeralContainers[*].EphemeralContainerCommon.ReadinessProbe", "EphemeralContainers[*].EphemeralContainerCommon.Resources", + "EphemeralContainers[*].EphemeralContainerCommon.ResizePolicy[*].Policy", + "EphemeralContainers[*].EphemeralContainerCommon.ResizePolicy[*].ResourceName", "EphemeralContainers[*].EphemeralContainerCommon.Stdin", "EphemeralContainers[*].EphemeralContainerCommon.StdinOnce", "EphemeralContainers[*].EphemeralContainerCommon.TTY", @@ -18561,6 +19281,8 @@ func TestValidateOSFields(t *testing.T) { "InitContainers[*].Ports", "InitContainers[*].ReadinessProbe", "InitContainers[*].Resources", + "InitContainers[*].ResizePolicy[*].Policy", + "InitContainers[*].ResizePolicy[*].ResourceName", "InitContainers[*].Stdin", "InitContainers[*].StdinOnce", "InitContainers[*].TTY", diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index e27e12a740e..411b9318d5c 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -845,6 +845,13 @@ const ( // instead of changing each file on the volumes recursively. // Initial implementation focused on ReadWriteOncePod volumes. SELinuxMountReadWriteOncePod featuregate.Feature = "SELinuxMountReadWriteOncePod" + + // owner: @vinaykul + // kep: http://kep.k8s.io/1287 + // alpha: v1.27 + // + // Enables In-Place Pod Vertical Scaling + InPlacePodVerticalScaling featuregate.Feature = "InPlacePodVerticalScaling" ) func init() { @@ -1074,6 +1081,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS SELinuxMountReadWriteOncePod: {Default: false, PreRelease: featuregate.Alpha}, + InPlacePodVerticalScaling: {Default: false, PreRelease: featuregate.Alpha}, + // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/quota/v1/evaluator/core/pods.go b/pkg/quota/v1/evaluator/core/pods.go index f85fbde45d3..ab710d74fb4 100644 --- a/pkg/quota/v1/evaluator/core/pods.go +++ b/pkg/quota/v1/evaluator/core/pods.go @@ -30,10 +30,13 @@ import ( "k8s.io/apiserver/pkg/admission" quota "k8s.io/apiserver/pkg/quota/v1" "k8s.io/apiserver/pkg/quota/v1/generic" + "k8s.io/apiserver/pkg/util/feature" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" api "k8s.io/kubernetes/pkg/apis/core" k8s_api_v1 "k8s.io/kubernetes/pkg/apis/core/v1" "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" + "k8s.io/kubernetes/pkg/features" "k8s.io/utils/clock" ) @@ -155,6 +158,9 @@ func (p *podEvaluator) Handles(a admission.Attributes) bool { if op == admission.Create { return true } + if feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && op == admission.Update { + return true + } return false } @@ -356,7 +362,14 @@ func PodUsageFunc(obj runtime.Object, clock clock.Clock) (corev1.ResourceList, e limits := corev1.ResourceList{} // TODO: ideally, we have pod level requests and limits in the future. for i := range pod.Spec.Containers { - requests = quota.Add(requests, pod.Spec.Containers[i].Resources.Requests) + containerRequests := pod.Spec.Containers[i].Resources.Requests + if feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[i].Name) + if ok && cs.ResourcesAllocated != nil { + containerRequests = quota.Max(containerRequests, cs.ResourcesAllocated) + } + } + requests = quota.Add(requests, containerRequests) limits = quota.Add(limits, pod.Spec.Containers[i].Resources.Limits) } // InitContainers are run sequentially before other containers start, so the highest diff --git a/pkg/quota/v1/evaluator/core/pods_test.go b/pkg/quota/v1/evaluator/core/pods_test.go index d74f7392adb..d5d36ca99c6 100644 --- a/pkg/quota/v1/evaluator/core/pods_test.go +++ b/pkg/quota/v1/evaluator/core/pods_test.go @@ -27,7 +27,10 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" quota "k8s.io/apiserver/pkg/quota/v1" "k8s.io/apiserver/pkg/quota/v1/generic" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" api "k8s.io/kubernetes/pkg/apis/core" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/util/node" "k8s.io/utils/clock" testingclock "k8s.io/utils/clock/testing" @@ -750,3 +753,211 @@ func TestPodEvaluatorMatchingScopes(t *testing.T) { }) } } + +func TestPodEvaluatorUsageResourceResize(t *testing.T) { + fakeClock := testingclock.NewFakeClock(time.Now()) + evaluator := NewPodEvaluator(nil, fakeClock) + + testCases := map[string]struct { + pod *api.Pod + usageFgEnabled corev1.ResourceList + usageFgDisabled corev1.ResourceList + }{ + "verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated) for memory resource": { + pod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceMemory: resource.MustParse("200Mi"), + }, + Limits: api.ResourceList{ + api.ResourceMemory: resource.MustParse("400Mi"), + }, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + ResourcesAllocated: api.ResourceList{ + api.ResourceMemory: resource.MustParse("150Mi"), + }, + }, + }, + }, + }, + usageFgEnabled: corev1.ResourceList{ + corev1.ResourceRequestsMemory: resource.MustParse("200Mi"), + corev1.ResourceLimitsMemory: resource.MustParse("400Mi"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("200Mi"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + usageFgDisabled: corev1.ResourceList{ + corev1.ResourceRequestsMemory: resource.MustParse("200Mi"), + corev1.ResourceLimitsMemory: resource.MustParse("400Mi"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("200Mi"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + }, + "verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated) for CPU resource": { + pod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceCPU: resource.MustParse("100m"), + }, + Limits: api.ResourceList{ + api.ResourceCPU: resource.MustParse("200m"), + }, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + ResourcesAllocated: api.ResourceList{ + api.ResourceCPU: resource.MustParse("150m"), + }, + }, + }, + }, + }, + usageFgEnabled: corev1.ResourceList{ + corev1.ResourceRequestsCPU: resource.MustParse("150m"), + corev1.ResourceLimitsCPU: resource.MustParse("200m"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceCPU: resource.MustParse("150m"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + usageFgDisabled: corev1.ResourceList{ + corev1.ResourceRequestsCPU: resource.MustParse("100m"), + corev1.ResourceLimitsCPU: resource.MustParse("200m"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceCPU: resource.MustParse("100m"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + }, + "verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated) for CPU and memory resource": { + pod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceCPU: resource.MustParse("100m"), + api.ResourceMemory: resource.MustParse("200Mi"), + }, + Limits: api.ResourceList{ + api.ResourceCPU: resource.MustParse("200m"), + api.ResourceMemory: resource.MustParse("400Mi"), + }, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + ResourcesAllocated: api.ResourceList{ + api.ResourceCPU: resource.MustParse("150m"), + api.ResourceMemory: resource.MustParse("250Mi"), + }, + }, + }, + }, + }, + usageFgEnabled: corev1.ResourceList{ + corev1.ResourceRequestsCPU: resource.MustParse("150m"), + corev1.ResourceLimitsCPU: resource.MustParse("200m"), + corev1.ResourceRequestsMemory: resource.MustParse("250Mi"), + corev1.ResourceLimitsMemory: resource.MustParse("400Mi"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceCPU: resource.MustParse("150m"), + corev1.ResourceMemory: resource.MustParse("250Mi"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + usageFgDisabled: corev1.ResourceList{ + corev1.ResourceRequestsCPU: resource.MustParse("100m"), + corev1.ResourceLimitsCPU: resource.MustParse("200m"), + corev1.ResourceRequestsMemory: resource.MustParse("200Mi"), + corev1.ResourceLimitsMemory: resource.MustParse("400Mi"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("200Mi"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + }, + "verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated==nil) for CPU and memory resource": { + pod: &api.Pod{ + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceCPU: resource.MustParse("100m"), + api.ResourceMemory: resource.MustParse("200Mi"), + }, + Limits: api.ResourceList{ + api.ResourceCPU: resource.MustParse("200m"), + api.ResourceMemory: resource.MustParse("400Mi"), + }, + }, + }, + }, + }, + Status: api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + {}, + }, + }, + }, + usageFgEnabled: corev1.ResourceList{ + corev1.ResourceRequestsCPU: resource.MustParse("100m"), + corev1.ResourceLimitsCPU: resource.MustParse("200m"), + corev1.ResourceRequestsMemory: resource.MustParse("200Mi"), + corev1.ResourceLimitsMemory: resource.MustParse("400Mi"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("200Mi"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + usageFgDisabled: corev1.ResourceList{ + corev1.ResourceRequestsCPU: resource.MustParse("100m"), + corev1.ResourceLimitsCPU: resource.MustParse("200m"), + corev1.ResourceRequestsMemory: resource.MustParse("200Mi"), + corev1.ResourceLimitsMemory: resource.MustParse("400Mi"), + corev1.ResourcePods: resource.MustParse("1"), + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("200Mi"), + generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"), + }, + }, + } + t.Parallel() + for _, enabled := range []bool{true, false} { + for testName, testCase := range testCases { + t.Run(testName, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, enabled)() + actual, err := evaluator.Usage(testCase.pod) + if err != nil { + t.Error(err) + } + usage := testCase.usageFgEnabled + if !enabled { + usage = testCase.usageFgDisabled + } + if !quota.Equals(usage, actual) { + t.Errorf("FG enabled: %v, expected: %v, actual: %v", enabled, usage, actual) + } + }) + } + } +} diff --git a/pkg/registry/core/pod/strategy.go b/pkg/registry/core/pod/strategy.go index 6b14b296b40..7c235a64742 100644 --- a/pkg/registry/core/pod/strategy.go +++ b/pkg/registry/core/pod/strategy.go @@ -97,6 +97,14 @@ func (podStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object oldPod := old.(*api.Pod) newPod.Status = oldPod.Status + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // With support for in-place pod resizing, container resources are now mutable. + // If container resources are updated with new resource requests values, a pod resize is + // desired. The status of this request is reflected by setting Resize field to "Proposed" + // as a signal to the caller that the request is being considered. + podutil.MarkPodProposedForResize(oldPod, newPod) + } + podutil.DropDisabledPodFields(newPod, oldPod) } diff --git a/plugin/pkg/admission/resourcequota/admission_test.go b/plugin/pkg/admission/resourcequota/admission_test.go index dfc2308462f..a1114331e3c 100644 --- a/plugin/pkg/admission/resourcequota/admission_test.go +++ b/plugin/pkg/admission/resourcequota/admission_test.go @@ -31,12 +31,15 @@ import ( genericadmissioninitializer "k8s.io/apiserver/pkg/admission/initializer" "k8s.io/apiserver/pkg/admission/plugin/resourcequota" resourcequotaapi "k8s.io/apiserver/pkg/admission/plugin/resourcequota/apis/resourcequota" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" testcore "k8s.io/client-go/testing" "k8s.io/client-go/tools/cache" + featuregatetesting "k8s.io/component-base/featuregate/testing" api "k8s.io/kubernetes/pkg/apis/core" + "k8s.io/kubernetes/pkg/features" kubeapiserveradmission "k8s.io/kubernetes/pkg/kubeapiserver/admission" "k8s.io/kubernetes/pkg/quota/v1/install" ) @@ -2105,3 +2108,112 @@ func TestAdmitAllowDecreaseUsageWithoutCoveringQuota(t *testing.T) { t.Errorf("Expected no error for decreasing a limited resource without quota, got %v", err) } } + +func TestPodResourcesResizeWithResourceQuota(t *testing.T) { + stopCh := make(chan struct{}) + defer close(stopCh) + + resourceQuota := &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "quota", Namespace: "test", ResourceVersion: "124"}, + Status: corev1.ResourceQuotaStatus{ + Hard: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1000m"), + corev1.ResourceMemory: resource.MustParse("1000Mi"), + corev1.ResourcePods: resource.MustParse("5"), + }, + Used: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("500Mi"), + corev1.ResourcePods: resource.MustParse("1"), + }, + }, + } + + currentPod := validPod("testpod", 1, getResourceRequirements(getResourceList("500m", "500Mi"), getResourceList("500m", "500Mi"))) + currentPod.ResourceVersion = "1" + + type testCase struct { + newPod *api.Pod + fgEnabled bool + expectError string + expectActions sets.String + } + testCases := map[string]testCase{ + "pod resize featuregate enabled, increase CPU within quota": { + newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("990m", "500Mi"), getResourceList("990m", "500Mi"))), + fgEnabled: true, + expectError: "", + expectActions: sets.NewString(strings.Join([]string{"update", "resourcequotas", "status"}, "-")), + }, + "pod resize featuregate enabled, increase memory beyond quota": { + newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("500m", "1100Mi"), getResourceList("500m", "1100Mi"))), + fgEnabled: true, + expectError: "forbidden: exceeded quota: quota, requested: memory=600Mi, used: memory=500Mi, limited: memory=1000Mi", + expectActions: sets.NewString(strings.Join([]string{"update", "resourcequotas", "status"}, "-")), + }, + "pod resize featuregate enabled, decrease CPU within quota": { + newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("300m", "500Mi"), getResourceList("300m", "500Mi"))), + fgEnabled: true, + expectError: "", + expectActions: sets.NewString(strings.Join([]string{"update", "resourcequotas", "status"}, "-")), + }, + "pod resize featuregate disabled, decrease memory within quota": { + newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("500m", "400Mi"), getResourceList("500m", "400Mi"))), + fgEnabled: false, + expectError: "", + expectActions: nil, + }, + "pod resize featuregate disabled, increase CPU beyond quota": { + newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("1010m", "500Mi"), getResourceList("1010m", "500Mi"))), + fgEnabled: false, + expectError: "", + expectActions: nil, + }, + } + + for desc, tc := range testCases { + t.Run(desc, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, tc.fgEnabled)() + kubeClient := fake.NewSimpleClientset(resourceQuota) + informerFactory := informers.NewSharedInformerFactory(kubeClient, 0) + handler, err := createHandler(kubeClient, informerFactory, stopCh) + if err != nil { + t.Errorf("Error occurred while creating admission plugin: %v", err) + } + informerFactory.Core().V1().ResourceQuotas().Informer().GetIndexer().Add(resourceQuota) + + tc.newPod.ResourceVersion = "2" + err = handler.Validate(context.TODO(), admission.NewAttributesRecord(tc.newPod, currentPod, + api.Kind("Pod").WithVersion("version"), tc.newPod.Namespace, tc.newPod.Name, + corev1.Resource("pods").WithVersion("version"), "", admission.Update, &metav1.UpdateOptions{}, + false, nil), nil) + if tc.expectError == "" { + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if tc.expectActions != nil { + if len(kubeClient.Actions()) == 0 { + t.Errorf("Expected a client action") + } + } else { + if len(kubeClient.Actions()) > 0 { + t.Errorf("Got client action(s) when not expected") + } + } + actionSet := sets.NewString() + for _, action := range kubeClient.Actions() { + actionSet.Insert(strings.Join([]string{action.GetVerb(), action.GetResource().Resource, + action.GetSubresource()}, "-")) + } + if !actionSet.HasAll(tc.expectActions.List()...) { + t.Errorf("Expected actions:\n%v\n but got:\n%v\nDifference:\n%v", tc.expectActions, + actionSet, tc.expectActions.Difference(actionSet)) + } + } else { + if err == nil || !strings.Contains(err.Error(), tc.expectError) { + t.Errorf("Expected error containing '%s' got err: '%v'", tc.expectError, err) + } + } + }) + } +} diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index a19bf23cb5d..1c83ad5c6e8 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -2263,6 +2263,33 @@ const ( PullIfNotPresent PullPolicy = "IfNotPresent" ) +// ResourceResizePolicy specifies how Kubernetes should handle resource resize. +type ResourceResizePolicy string + +// These are the valid resource resize policy values: +const ( + // RestartNotRequired tells Kubernetes to resize the container in-place + // without restarting it, if possible. Kubernetes may however choose to + // restart the container if it is unable to actuate resize without a + // restart. For e.g. the runtime doesn't support restart-free resizing. + RestartNotRequired ResourceResizePolicy = "RestartNotRequired" + // 'RestartRequired' tells Kubernetes to resize the container in-place + // by stopping and starting the container when new resources are applied. + // This is needed for legacy applications. For e.g. java apps using the + // -xmxN flag which are unable to use resized memory without restarting. + RestartRequired ResourceResizePolicy = "RestartRequired" +) + +// ContainerResizePolicy represents resource resize policy for a single container. +type ContainerResizePolicy struct { + // Name of the resource type to which this resource resize policy applies. + // Supported values: cpu, memory. + ResourceName ResourceName `json:"resourceName" protobuf:"bytes,1,opt,name=resourceName,casttype=ResourceName"` + // Resource resize policy applicable to the specified resource name. + // If not specified, it defaults to RestartNotRequired. + Policy ResourceResizePolicy `json:"policy" protobuf:"bytes,2,opt,name=policy,casttype=ResourceResizePolicy"` +} + // PreemptionPolicy describes a policy for if/when to preempt a pod. // +enum type PreemptionPolicy string @@ -2412,6 +2439,11 @@ type Container struct { // More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ // +optional Resources ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,8,opt,name=resources"` + // Resources resize policy for the container. + // +featureGate=InPlacePodVerticalScaling + // +optional + // +listType=atomic + ResizePolicy []ContainerResizePolicy `json:"resizePolicy,omitempty" protobuf:"bytes,23,rep,name=resizePolicy"` // Pod volumes to mount into the container's filesystem. // Cannot be updated. // +optional @@ -2658,6 +2690,17 @@ type ContainerStatus struct { // Is always true when no startupProbe is defined. // +optional Started *bool `json:"started,omitempty" protobuf:"varint,9,opt,name=started"` + // ResourcesAllocated represents the compute resources allocated for this container by the + // node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission + // and after successfully admitting desired pod resize. + // +featureGate=InPlacePodVerticalScaling + // +optional + ResourcesAllocated ResourceList `json:"resourcesAllocated,omitempty" protobuf:"bytes,10,rep,name=resourcesAllocated,casttype=ResourceList,castkey=ResourceName"` + // Resources represents the compute resource requests and limits that have been successfully + // enacted on the running container after it has been started or has been successfully resized. + // +featureGate=InPlacePodVerticalScaling + // +optional + Resources *ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,11,opt,name=resources"` } // PodPhase is a label for the condition of a pod at the current time. @@ -2750,6 +2793,20 @@ type PodCondition struct { Message string `json:"message,omitempty" protobuf:"bytes,6,opt,name=message"` } +// PodResizeStatus shows status of desired resize of a pod's containers. +type PodResizeStatus string + +const ( + // Pod resources resize has been requested and will be evaluated by node. + PodResizeStatusProposed PodResizeStatus = "Proposed" + // Pod resources resize has been accepted by node and is being actuated. + PodResizeStatusInProgress PodResizeStatus = "InProgress" + // Node cannot resize the pod at this time and will keep retrying. + PodResizeStatusDeferred PodResizeStatus = "Deferred" + // Requested pod resize is not feasible and will not be re-evaluated. + PodResizeStatusInfeasible PodResizeStatus = "Infeasible" +) + // RestartPolicy describes how the container should be restarted. // Only one of the following restart policies may be specified. // If none of the following policies is specified, the default one @@ -3888,6 +3945,11 @@ type EphemeralContainerCommon struct { // already allocated to the pod. // +optional Resources ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,8,opt,name=resources"` + // Resources resize policy for the container. + // +featureGate=InPlacePodVerticalScaling + // +optional + // +listType=atomic + ResizePolicy []ContainerResizePolicy `json:"resizePolicy,omitempty" protobuf:"bytes,23,rep,name=resizePolicy"` // Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. // Cannot be updated. // +optional @@ -4079,6 +4141,13 @@ type PodStatus struct { // Status for any ephemeral containers that have run in this pod. // +optional EphemeralContainerStatuses []ContainerStatus `json:"ephemeralContainerStatuses,omitempty" protobuf:"bytes,13,rep,name=ephemeralContainerStatuses"` + + // Status of resources resize desired for pod's containers. + // It is empty if no resources resize is pending. + // Any changes to container resources will automatically set this to "Proposed" + // +featureGate=InPlacePodVerticalScaling + // +optional + Resize PodResizeStatus `json:"resize,omitempty" protobuf:"bytes,14,opt,name=resize,casttype=PodResizeStatus"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/resourcequota/controller.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/resourcequota/controller.go index be6e75fcdb5..75dcc5e3abf 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/resourcequota/controller.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/resourcequota/controller.go @@ -35,8 +35,10 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/admission" resourcequotaapi "k8s.io/apiserver/pkg/admission/plugin/resourcequota/apis/resourcequota" + "k8s.io/apiserver/pkg/features" quota "k8s.io/apiserver/pkg/quota/v1" "k8s.io/apiserver/pkg/quota/v1/generic" + "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/util/workqueue" ) @@ -516,7 +518,14 @@ func CheckRequest(quotas []corev1.ResourceQuota, a admission.Attributes, evaluat if innerErr != nil { return quotas, innerErr } - deltaUsage = quota.SubtractWithNonNegativeResult(deltaUsage, prevUsage) + if feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + // allow negative usage for pods as pod resources can increase or decrease + if a.GetResource().GroupResource() == corev1.Resource("pods") { + deltaUsage = quota.Subtract(deltaUsage, prevUsage) + } + } else { + deltaUsage = quota.SubtractWithNonNegativeResult(deltaUsage, prevUsage) + } } } diff --git a/staging/src/k8s.io/apiserver/pkg/features/kube_features.go b/staging/src/k8s.io/apiserver/pkg/features/kube_features.go index a468270649d..1e986800108 100644 --- a/staging/src/k8s.io/apiserver/pkg/features/kube_features.go +++ b/staging/src/k8s.io/apiserver/pkg/features/kube_features.go @@ -198,6 +198,13 @@ const ( // // Enables support for watch bookmark events. WatchBookmark featuregate.Feature = "WatchBookmark" + + // owner: @vinaykul + // kep: http://kep.k8s.io/1287 + // alpha: v1.27 + // + // Enables In-Place Pod Vertical Scaling + InPlacePodVerticalScaling featuregate.Feature = "InPlacePodVerticalScaling" ) func init() { @@ -249,4 +256,6 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS StorageVersionHash: {Default: true, PreRelease: featuregate.Beta}, WatchBookmark: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, + + InPlacePodVerticalScaling: {Default: false, PreRelease: featuregate.Alpha}, } diff --git a/test/integration/logs/benchmark/load_test.go b/test/integration/logs/benchmark/load_test.go index 3d250c845dd..5ccd2e0b1c8 100644 --- a/test/integration/logs/benchmark/load_test.go +++ b/test/integration/logs/benchmark/load_test.go @@ -152,7 +152,7 @@ if [ $count -eq 2 ]; then exit 0 fi while true; do sleep 1; done -],Args:[],WorkingDir:,Ports:[]ContainerPort{},Env:[]EnvVar{},Resources:ResourceRequirements{Limits:ResourceList{},Requests:ResourceList{},Claims:[]ResourceClaim{},},VolumeMounts:[]VolumeMount{},LivenessProbe:nil,ReadinessProbe:nil,Lifecycle:nil,TerminationMessagePath:/dev/termination-log,ImagePullPolicy:,SecurityContext:nil,Stdin:false,StdinOnce:false,TTY:false,EnvFrom:[]EnvFromSource{},TerminationMessagePolicy:,VolumeDevices:[]VolumeDevice{},StartupProbe:nil,}] +],Args:[],WorkingDir:,Ports:[]ContainerPort{},Env:[]EnvVar{},Resources:ResourceRequirements{Limits:ResourceList{},Requests:ResourceList{},Claims:[]ResourceClaim{},},VolumeMounts:[]VolumeMount{},LivenessProbe:nil,ReadinessProbe:nil,Lifecycle:nil,TerminationMessagePath:/dev/termination-log,ImagePullPolicy:,SecurityContext:nil,Stdin:false,StdinOnce:false,TTY:false,EnvFrom:[]EnvFromSource{},TerminationMessagePolicy:,VolumeDevices:[]VolumeDevice{},StartupProbe:nil,ResizePolicy:[]ContainerResizePolicy{},}] `, structured: `"Creating container in pod" container=< &Container{Name:terminate-cmd-rpn,Image:registry.k8s.io/e2e-test-images/busybox:1.29-2,Command:[sh -c @@ -165,13 +165,13 @@ while true; do sleep 1; done exit 0 fi while true; do sleep 1; done - ],Args:[],WorkingDir:,Ports:[]ContainerPort{},Env:[]EnvVar{},Resources:ResourceRequirements{Limits:ResourceList{},Requests:ResourceList{},Claims:[]ResourceClaim{},},VolumeMounts:[]VolumeMount{},LivenessProbe:nil,ReadinessProbe:nil,Lifecycle:nil,TerminationMessagePath:/dev/termination-log,ImagePullPolicy:,SecurityContext:nil,Stdin:false,StdinOnce:false,TTY:false,EnvFrom:[]EnvFromSource{},TerminationMessagePolicy:,VolumeDevices:[]VolumeDevice{},StartupProbe:nil,} + ],Args:[],WorkingDir:,Ports:[]ContainerPort{},Env:[]EnvVar{},Resources:ResourceRequirements{Limits:ResourceList{},Requests:ResourceList{},Claims:[]ResourceClaim{},},VolumeMounts:[]VolumeMount{},LivenessProbe:nil,ReadinessProbe:nil,Lifecycle:nil,TerminationMessagePath:/dev/termination-log,ImagePullPolicy:,SecurityContext:nil,Stdin:false,StdinOnce:false,TTY:false,EnvFrom:[]EnvFromSource{},TerminationMessagePolicy:,VolumeDevices:[]VolumeDevice{},StartupProbe:nil,ResizePolicy:[]ContainerResizePolicy{},} > `, // This is what the output would look like with JSON object. Because of https://github.com/kubernetes/kubernetes/issues/106652 we get the string instead. // json: `{"msg":"Creating container in pod","v":0,"container":{"name":"terminate-cmd-rpn","image":"registry.k8s.io/e2e-test-images/busybox:1.29-2","command":["sh -c \nf=/restart-count/restartCount\ncount=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})\nif [ $count -eq 1 ]; then\n\texit 1\nfi\nif [ $count -eq 2 ]; then\n\texit 0\nfi\nwhile true; do sleep 1; done\n"],"resources":{},"terminationMessagePath":"/dev/termination-log"}} // `, - json: `{"msg":"Creating container in pod","v":0,"container":"&Container{Name:terminate-cmd-rpn,Image:registry.k8s.io/e2e-test-images/busybox:1.29-2,Command:[sh -c \nf=/restart-count/restartCount\ncount=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})\nif [ $count -eq 1 ]; then\n\texit 1\nfi\nif [ $count -eq 2 ]; then\n\texit 0\nfi\nwhile true; do sleep 1; done\n],Args:[],WorkingDir:,Ports:[]ContainerPort{},Env:[]EnvVar{},Resources:ResourceRequirements{Limits:ResourceList{},Requests:ResourceList{},Claims:[]ResourceClaim{},},VolumeMounts:[]VolumeMount{},LivenessProbe:nil,ReadinessProbe:nil,Lifecycle:nil,TerminationMessagePath:/dev/termination-log,ImagePullPolicy:,SecurityContext:nil,Stdin:false,StdinOnce:false,TTY:false,EnvFrom:[]EnvFromSource{},TerminationMessagePolicy:,VolumeDevices:[]VolumeDevice{},StartupProbe:nil,}"} + json: `{"msg":"Creating container in pod","v":0,"container":"&Container{Name:terminate-cmd-rpn,Image:registry.k8s.io/e2e-test-images/busybox:1.29-2,Command:[sh -c \nf=/restart-count/restartCount\ncount=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})\nif [ $count -eq 1 ]; then\n\texit 1\nfi\nif [ $count -eq 2 ]; then\n\texit 0\nfi\nwhile true; do sleep 1; done\n],Args:[],WorkingDir:,Ports:[]ContainerPort{},Env:[]EnvVar{},Resources:ResourceRequirements{Limits:ResourceList{},Requests:ResourceList{},Claims:[]ResourceClaim{},},VolumeMounts:[]VolumeMount{},LivenessProbe:nil,ReadinessProbe:nil,Lifecycle:nil,TerminationMessagePath:/dev/termination-log,ImagePullPolicy:,SecurityContext:nil,Stdin:false,StdinOnce:false,TTY:false,EnvFrom:[]EnvFromSource{},TerminationMessagePolicy:,VolumeDevices:[]VolumeDevice{},StartupProbe:nil,ResizePolicy:[]ContainerResizePolicy{},}"} `, stats: logStats{ TotalLines: 2,