dra test: enhance performance of test driver controller

Analyzing the CPU profile of go test -timeout=0 -count=5 -cpuprofile profile.out -bench=BenchmarkPerfScheduling/.*Claim.* -benchtime=1ns -run=xxx ./test/integration/scheduler_perf showed that a significant amount of time was spent iterating over allocated claims to determine how many were allocated per node. That "naive" approach was taken to avoid maintaining a redundant data structure, but now that performance measurements show that this comes at a cost, it's not "premature optimization" anymore to introduce such a second field. The average scheduling throughput in SchedulingWithResourceClaimTemplate/2000pods_100nodes increases from 16.4 pods/s to 19.2 pods/s.
2025-08-11 21:12:07 +00:00 · 2023-08-08 13:36:35 +02:00 · 2023-08-08 13:36:35 +02:00 · 0e23840929
commit 0e23840929
parent 99190634ab
1 changed files with 13 additions and 20 deletions
--- a/test/e2e/dra/test-driver/app/controller.go
+++ b/test/e2e/dra/test-driver/app/controller.go
@ -63,6 +63,9 @@ type ExampleController struct {
 	mutex sync.Mutex
 	// allocated maps claim.UID to the node (if network-attached) or empty (if not).
 	allocated map[types.UID]string
 	// claimsPerNode counts how many claims are currently allocated for a node (non-empty key)
 	// or allocated for the entire cluster (empty key). Must be kept in sync with allocated.
 	claimsPerNode map[string]int
 	numAllocations, numDeallocations int64
 }
@ -73,7 +76,8 @@ func NewController(clientset kubernetes.Interface, driverName string, resources
 		resources:  resources,
 		driverName: driverName,
-		allocated: make(map[types.UID]string),
+		allocated:     make(map[types.UID]string),
 		claimsPerNode: make(map[string]int),
 	}
 	return c
 }
@ -95,16 +99,6 @@ type parameters struct {
 var _ controller.Driver = &ExampleController{}
 func (c *ExampleController) countAllocations(node string) int {
 	total := 0
 	for _, n := range c.allocated {
 		if n == node {
 			total++
 		}
 	}
 	return total
 }
 // GetNumAllocations returns the number of times that a claim was allocated.
 // Idempotent calls to Allocate that do not need to allocate the claim again do
 // not contribute to that counter.
@ -204,7 +198,7 @@ func (c *ExampleController) allocateOne(ctx context.Context, claim *resourcev1al
 				var viableNodes []string
 				for _, n := range c.resources.Nodes {
 					if c.resources.MaxAllocations == 0 ||
-						c.countAllocations(n) < c.resources.MaxAllocations {
+						c.claimsPerNode[n] < c.resources.MaxAllocations {
 						viableNodes = append(viableNodes, n)
 					}
 				}
@ -217,7 +211,7 @@ func (c *ExampleController) allocateOne(ctx context.Context, claim *resourcev1al
 				logger.V(3).Info("picked a node ourselves", "selectedNode", selectedNode)
 			} else if !contains(c.resources.Nodes, node) ||
 				c.resources.MaxAllocations > 0 &&
-					c.countAllocations(node) >= c.resources.MaxAllocations {
+					c.claimsPerNode[node] >= c.resources.MaxAllocations {
 				return nil, fmt.Errorf("resources exhausted on node %q", node)
 			}
 		} else {
@ -271,6 +265,7 @@ func (c *ExampleController) allocateOne(ctx context.Context, claim *resourcev1al
 	if !alreadyAllocated {
 		c.numAllocations++
 		c.allocated[claim.UID] = node
 		c.claimsPerNode[node]++
 	}
 	return allocation, nil
 }
@ -280,7 +275,8 @@ func (c *ExampleController) Deallocate(ctx context.Context, claim *resourcev1alp
 	c.mutex.Lock()
 	defer c.mutex.Unlock()
-	if _, ok := c.allocated[claim.UID]; !ok {
+	node, ok := c.allocated[claim.UID]
 	if !ok {
 		logger.V(3).Info("already deallocated")
 		return nil
 	}
@ -288,6 +284,7 @@ func (c *ExampleController) Deallocate(ctx context.Context, claim *resourcev1alp
 	logger.V(3).Info("done")
 	c.numDeallocations++
 	delete(c.allocated, claim.UID)
 	c.claimsPerNode[node]--
 	return nil
 }
@ -307,10 +304,6 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
 		return nil
 	}
 	if c.resources.NodeLocal {
 		allocationsPerNode := make(map[string]int)
 		for _, node := range c.resources.Nodes {
 			allocationsPerNode[node] = c.countAllocations(node)
 		}
 		for _, claim := range claims {
 			claim.UnsuitableNodes = nil
 			for _, node := range potentialNodes {
@ -320,7 +313,7 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
 				// for all of them. Also, nodes that the driver
 				// doesn't run on cannot be used.
 				if !contains(c.resources.Nodes, node) ||
-					allocationsPerNode[node]+len(claims) > c.resources.MaxAllocations {
+					c.claimsPerNode[node]+len(claims) > c.resources.MaxAllocations {
 					claim.UnsuitableNodes = append(claim.UnsuitableNodes, node)
 				}
 			}
@ -328,7 +321,7 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
 		return nil
 	}
-	allocations := c.countAllocations("")
+	allocations := c.claimsPerNode[""]
 	for _, claim := range claims {
 		claim.UnsuitableNodes = nil
 		for _, node := range potentialNodes {