use original requests in NodeResourcesBalancedAllocation instead of NonZero (#105845)

2025-07-23 19:56:01 +00:00 · 2021-10-30 03:04:14 +01:00 · 2021-10-30 03:04:14 +01:00 · a2c37bfd09
commit a2c37bfd09
parent 324eff5b75
5 changed files with 70 additions and 36 deletions
--- a/pkg/scheduler/framework/plugins/noderesources/balanced_allocation.go
+++ b/pkg/scheduler/framework/plugins/noderesources/balanced_allocation.go
@ -89,6 +89,7 @@ func NewBalancedAllocation(baArgs runtime.Object, h framework.Handle, fts featur
 		resourceAllocationScorer: resourceAllocationScorer{
 			Name:                BalancedAllocationName,
 			scorer:              balancedResourceScorer,
+			useRequested:        true,
 			resourceToWeightMap: resToWeightMap,
 			enablePodOverhead:   fts.EnablePodOverhead,
 		},
--- a/pkg/scheduler/framework/plugins/noderesources/balanced_allocation_test.go
+++ b/pkg/scheduler/framework/plugins/noderesources/balanced_allocation_test.go
@ -196,18 +196,18 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
 		},
 		{
 			// Node1 scores on 0-MaxNodeScore scale
-			// CPU Fraction: 300 / 250 = 100%
-			// Memory Fraction: 600 / 10000 = 60%
-			// Node1 std: (1 - 0.6) / 2 = 0.2
-			// Node1 Score: (1 - 0.2)*MaxNodeScore = 80
+			// CPU Fraction: 0 / 250 = 0%
+			// Memory Fraction: 0 / 1000 = 0%
+			// Node1 std: (0 - 0) / 2 = 0
+			// Node1 Score: (1 - 0)*MaxNodeScore = 100
 			// Node2 scores on 0-MaxNodeScore scale
-			// CPU Fraction: 100 / 250 = 40%
-			// Memory Fraction: 200 / 10000 = 20%
-			// Node2 std: (0.4 - 0.2) / 2 = 0.1
-			// Node2 Score: (1 - 0.1)*MaxNodeScore = 90
+			// CPU Fraction: 0 / 250 = 0%
+			// Memory Fraction: 0 / 1000 = 0%
+			// Node2 std: (0 - 0) / 2 = 0
+			// Node2 Score: (1 - 0)*MaxNodeScore = 100
 			pod:          &v1.Pod{Spec: nonZeroContainer},
 			nodes:        []*v1.Node{makeNode("machine1", 250, 1000*1024*1024), makeNode("machine2", 250, 1000*1024*1024)},
-			expectedList: []framework.NodeScore{{Name: "machine1", Score: 80}, {Name: "machine2", Score: 90}},
+			expectedList: []framework.NodeScore{{Name: "machine1", Score: 100}, {Name: "machine2", Score: 100}},
 			name:         "no resources requested, pods with container scheduled",
 			pods: []*v1.Pod{
 				{Spec: nonZeroContainer1},
@ -314,17 +314,17 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
 			args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
 		},
 		// Node1 scores on 0-MaxNodeScore scale
-		// CPU Fraction: 3100 / 3500 = 88.57%
+		// CPU Fraction: 3000 / 3500 = 85.71%
 		// Memory Fraction: 5000 / 40000 = 12.5%
 		// GPU Fraction: 4 / 8 = 0.5%
-		// Node1 std: sqrt(((0.8857 - 0.503) *  (0.8857 - 0.503) + (0.503 - 0.125) * (0.503 - 0.125) + (0.503 - 0.5) * (0.503 - 0.5)) / 3) = 0.3105
-		// Node1 Score: (1 - 0.3105)*MaxNodeScore = 68
+		// Node1 std: sqrt(((0.8571 - 0.503) *  (0.8571 - 0.503) + (0.503 - 0.125) * (0.503 - 0.125) + (0.503 - 0.5) * (0.503 - 0.5)) / 3) = 0.3002
+		// Node1 Score: (1 - 0.3002)*MaxNodeScore = 70
 		// Node2 scores on 0-MaxNodeScore scale
-		// CPU Fraction: 3100 / 3500 = 88.57%
+		// CPU Fraction: 3000 / 3500 = 85.71%
 		// Memory Fraction: 5000 / 40000 = 12.5%
 		// GPU Fraction: 1 / 8 = 12.5%
-		// Node2 std: sqrt(((0.8875 - 0.378) *  (0.8875 - 0.378) + (0.378 - 0.125) * (0.378 - 0.125)) + (0.378 - 0.125) * (0.378 - 0.125)) / 3) = 0.358
-		// Node2 Score: (1 - 0.358)*MaxNodeScore = 64
+		// Node2 std: sqrt(((0.8571 - 0.378) *  (0.8571 - 0.378) + (0.378 - 0.125) * (0.378 - 0.125)) + (0.378 - 0.125) * (0.378 - 0.125)) / 3) = 0.345
+		// Node2 Score: (1 - 0.358)*MaxNodeScore = 65
 		{
 			pod: &v1.Pod{
 				Spec: v1.PodSpec{
@ -341,7 +341,7 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
 				},
 			},
 			nodes:        []*v1.Node{makeNodeWithExtendedResource("machine1", 3500, 40000, scalarResource), makeNodeWithExtendedResource("machine2", 3500, 40000, scalarResource)},
-			expectedList: []framework.NodeScore{{Name: "machine1", Score: 68}, {Name: "machine2", Score: 64}},
+			expectedList: []framework.NodeScore{{Name: "machine1", Score: 70}, {Name: "machine2", Score: 65}},
 			name:         "include scalar resource on a node for balanced resource allocation",
 			pods: []*v1.Pod{
 				{Spec: cpuAndMemory},
--- a/pkg/scheduler/framework/plugins/noderesources/resource_allocation.go
+++ b/pkg/scheduler/framework/plugins/noderesources/resource_allocation.go
@ -32,7 +32,10 @@ type scorer func(args *config.NodeResourcesFitArgs) *resourceAllocationScorer

 // resourceAllocationScorer contains information to calculate resource allocation score.
 type resourceAllocationScorer struct {
-	Name                string
+	Name string
+	// used to decide whether to use Requested or NonZeroRequested for
+	// cpu and memory.
+	useRequested        bool
 	scorer              func(requested, allocable resourceToValueMap) int64
 	resourceToWeightMap resourceToWeightMap

@ -53,10 +56,11 @@ func (r *resourceAllocationScorer) score(
 	if r.resourceToWeightMap == nil {
 		return 0, framework.NewStatus(framework.Error, "resources not found")
 	}
+
 	requested := make(resourceToValueMap)
 	allocatable := make(resourceToValueMap)
 	for resource := range r.resourceToWeightMap {
-		alloc, req := calculateResourceAllocatableRequest(nodeInfo, pod, resource, r.enablePodOverhead)
+		alloc, req := r.calculateResourceAllocatableRequest(nodeInfo, pod, resource)
 		if alloc != 0 {
 			// Only fill the extended resource entry when it's non-zero.
 			allocatable[resource], requested[resource] = alloc, req
@ -80,8 +84,13 @@ func (r *resourceAllocationScorer) score(
 // - 1st param: quantity of allocatable resource on the node.
 // - 2nd param: aggregated quantity of requested resource on the node.
 // Note: if it's an extended resource, and the pod doesn't request it, (0, 0) is returned.
-func calculateResourceAllocatableRequest(nodeInfo *framework.NodeInfo, pod *v1.Pod, resource v1.ResourceName, enablePodOverhead bool) (int64, int64) {
-	podRequest := calculatePodResourceRequest(pod, resource, enablePodOverhead)
+func (r *resourceAllocationScorer) calculateResourceAllocatableRequest(nodeInfo *framework.NodeInfo, pod *v1.Pod, resource v1.ResourceName) (int64, int64) {
+	requested := nodeInfo.NonZeroRequested
+	if r.useRequested {
+		requested = nodeInfo.Requested
+	}
+
+	podRequest := r.calculatePodResourceRequest(pod, resource)
 	// If it's an extended resource, and the pod doesn't request it. We return (0, 0)
 	// as an implication to bypass scoring on this resource.
 	if podRequest == 0 && schedutil.IsScalarResourceName(resource) {
@ -89,9 +98,9 @@ func calculateResourceAllocatableRequest(nodeInfo *framework.NodeInfo, pod *v1.P
 	}
 	switch resource {
 	case v1.ResourceCPU:
-		return nodeInfo.Allocatable.MilliCPU, (nodeInfo.NonZeroRequested.MilliCPU + podRequest)
+		return nodeInfo.Allocatable.MilliCPU, (requested.MilliCPU + podRequest)
 	case v1.ResourceMemory:
-		return nodeInfo.Allocatable.Memory, (nodeInfo.NonZeroRequested.Memory + podRequest)
+		return nodeInfo.Allocatable.Memory, (requested.Memory + podRequest)
 	case v1.ResourceEphemeralStorage:
 		return nodeInfo.Allocatable.EphemeralStorage, (nodeInfo.Requested.EphemeralStorage + podRequest)
 	default:
@ -108,24 +117,24 @@ func calculateResourceAllocatableRequest(nodeInfo *framework.NodeInfo, pod *v1.P
 // calculatePodResourceRequest returns the total non-zero requests. If Overhead is defined for the pod and the
 // PodOverhead feature is enabled, the Overhead is added to the result.
 // podResourceRequest = max(sum(podSpec.Containers), podSpec.InitContainers) + overHead
-func calculatePodResourceRequest(pod *v1.Pod, resource v1.ResourceName, enablePodOverhead bool) int64 {
+func (r *resourceAllocationScorer) calculatePodResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
 	var podRequest int64
 	for i := range pod.Spec.Containers {
 		container := &pod.Spec.Containers[i]
-		value := schedutil.GetNonzeroRequestForResource(resource, &container.Resources.Requests)
+		value := schedutil.GetRequestForResource(resource, &container.Resources.Requests, !r.useRequested)
 		podRequest += value
 	}

 	for i := range pod.Spec.InitContainers {
 		initContainer := &pod.Spec.InitContainers[i]
-		value := schedutil.GetNonzeroRequestForResource(resource, &initContainer.Resources.Requests)
+		value := schedutil.GetRequestForResource(resource, &initContainer.Resources.Requests, !r.useRequested)
 		if podRequest < value {
 			podRequest = value
 		}
 	}

 	// If Overhead is being utilized, add to the total requests for the pod
-	if pod.Spec.Overhead != nil && enablePodOverhead {
+	if pod.Spec.Overhead != nil && r.enablePodOverhead {
 		if quantity, found := pod.Spec.Overhead[resource]; found {
 			podRequest += quantity.Value()
 		}
--- a/pkg/scheduler/util/pod_resources.go
+++ b/pkg/scheduler/util/pod_resources.go
@ -40,26 +40,27 @@ const (
 // GetNonzeroRequests returns the default cpu and memory resource request if none is found or
 // what is provided on the request.
 func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
-	return GetNonzeroRequestForResource(v1.ResourceCPU, requests),
-		GetNonzeroRequestForResource(v1.ResourceMemory, requests)
+	return GetRequestForResource(v1.ResourceCPU, requests, true),
+		GetRequestForResource(v1.ResourceMemory, requests, true)
 }

-// GetNonzeroRequestForResource returns the default resource request if none is found or
-// what is provided on the request.
-func GetNonzeroRequestForResource(resource v1.ResourceName, requests *v1.ResourceList) int64 {
+// GetRequestForResource returns the requested values unless nonZero is true and there is no defined request
+// for CPU and memory.
+// If nonZero is true and the resource has no defined request for CPU or memory, it returns a default value.
+func GetRequestForResource(resource v1.ResourceName, requests *v1.ResourceList, nonZero bool) int64 {
 	if requests == nil {
 		return 0
 	}
 	switch resource {
 	case v1.ResourceCPU:
 		// Override if un-set, but not if explicitly set to zero
-		if _, found := (*requests)[v1.ResourceCPU]; !found {
+		if _, found := (*requests)[v1.ResourceCPU]; !found && nonZero {
 			return DefaultMilliCPURequest
 		}
 		return requests.Cpu().MilliValue()
 	case v1.ResourceMemory:
 		// Override if un-set, but not if explicitly set to zero
-		if _, found := (*requests)[v1.ResourceMemory]; !found {
+		if _, found := (*requests)[v1.ResourceMemory]; !found && nonZero {
 			return DefaultMemoryRequest
 		}
 		return requests.Memory().Value()
--- a/pkg/scheduler/util/pod_resources_test.go
+++ b/pkg/scheduler/util/pod_resources_test.go
@ -21,7 +21,7 @@ import (

 	"github.com/stretchr/testify/assert"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 )

@ -74,18 +74,20 @@ func TestGetNonZeroRequest(t *testing.T) {
 	}
 }

-func TestGetLeastRequestResource(t *testing.T) {
+func TestGetRequestForResource(t *testing.T) {
 	tests := []struct {
 		name             string
 		requests         v1.ResourceList
 		resource         v1.ResourceName
 		expectedQuantity int64
+		nonZero          bool
 	}{
 		{
 			"extended_resource_not_found",
 			v1.ResourceList{},
 			v1.ResourceName("intel.com/foo"),
 			0,
+			true,
 		},
 		{
 			"extended_resource_found",
@ -94,18 +96,21 @@ func TestGetLeastRequestResource(t *testing.T) {
 			},
 			v1.ResourceName("intel.com/foo"),
 			4,
+			true,
 		},
 		{
 			"cpu_not_found",
 			v1.ResourceList{},
 			v1.ResourceCPU,
 			DefaultMilliCPURequest,
+			true,
 		},
 		{
 			"memory_not_found",
 			v1.ResourceList{},
 			v1.ResourceMemory,
 			DefaultMemoryRequest,
+			true,
 		},
 		{
 			"cpu_exist",
@ -114,6 +119,7 @@ func TestGetLeastRequestResource(t *testing.T) {
 			},
 			v1.ResourceCPU,
 			200,
+			true,
 		},
 		{
 			"memory_exist",
@ -122,6 +128,7 @@ func TestGetLeastRequestResource(t *testing.T) {
 			},
 			v1.ResourceMemory,
 			400 * 1024 * 1024,
+			true,
 		},
 		{
 			"ephemeralStorage_exist",
@ -130,18 +137,34 @@ func TestGetLeastRequestResource(t *testing.T) {
 			},
 			v1.ResourceEphemeralStorage,
 			400 * 1024 * 1024,
+			true,
 		},
 		{
 			"ephemeralStorage_not_found",
 			v1.ResourceList{},
 			v1.ResourceEphemeralStorage,
 			0,
+			true,
+		},
+		{
+			"cpu_not_found, useRequested is true",
+			v1.ResourceList{},
+			v1.ResourceCPU,
+			0,
+			false,
+		},
+		{
+			"memory_not_found, useRequested is true",
+			v1.ResourceList{},
+			v1.ResourceMemory,
+			0,
+			false,
 		},
 	}

 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			realQuantity := GetNonzeroRequestForResource(test.resource, &test.requests)
+			realQuantity := GetRequestForResource(test.resource, &test.requests, test.nonZero)
 			assert.EqualValuesf(t, test.expectedQuantity, realQuantity, "Failed to test: %s", test.name)
 		})
 	}