Merge pull request #128260 from macsko/add_node_update_event_test_case

Add scheduler_perf test case for NodeUpdate event handling
This commit is contained in:
Kubernetes Prow Robot 2024-10-22 22:36:58 +01:00 committed by GitHub
commit d4a0c2be3e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 368 additions and 0 deletions

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Node
metadata:
name: node-low-capacity-{{ .Index }}
spec:
taints:
- key: allow-noderesources
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "4"
memory: 32Gi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Node
metadata:
name: node-low-capacity-{{ .Index }}
spec:
taints:
- key: allow-noderesources
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "0.0001"
memory: 1Mi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Node
metadata:
name: node-unschedulable-{{ .Index }}
spec:
taints:
- key: allow
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "4"
memory: 32Gi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,18 @@
apiVersion: v1
kind: Node
metadata:
name: node-unschedulable-{{ .Index }}
spec:
unschedulable: true
taints:
- key: allow
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "4"
memory: 32Gi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,21 @@
apiVersion: v1
kind: Node
metadata:
name: node-without-labels-{{ .Index }}
labels:
topology.kubernetes.io/zone: zone1
affinity: allow
topology: allow
spec:
taints:
- key: allow
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "4"
memory: 32Gi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Node
metadata:
name: node-without-labels-{{ .Index }}
spec:
taints:
- key: allow
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "4"
memory: 32Gi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Node
metadata:
name: node-without-taints-{{ .Index }}
spec:
taints:
- key: toleration-{{ .Index }}
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "4"
memory: 32Gi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Node
metadata:
name: node-without-taints-{{ .Index }}
spec:
taints:
- key: allow
effect: NoSchedule
status:
capacity:
pods: "3000"
cpu: "4"
memory: 32Gi
conditions:
- status: "True"
type: Ready
phase: Running

View File

@ -0,0 +1,21 @@
apiVersion: v1
kind: Pod
metadata:
generateName: pod-unsched-
labels:
color: green
spec:
tolerations:
- key: allow
operator: Exists
effect: NoSchedule
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
color: green
topologyKey: topology.kubernetes.io/zone
containers:
- image: registry.k8s.io/pause:3.10
name: pause

View File

@ -0,0 +1,21 @@
apiVersion: v1
kind: Pod
metadata:
generateName: pod-unsched-
spec:
tolerations:
- key: allow
operator: Exists
effect: NoSchedule
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: affinity
operator: In
values:
- allow
containers:
- image: registry.k8s.io/pause:3.10
name: pause

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: Pod
metadata:
generateName: pod-unsched-
spec:
tolerations:
- key: allow-noderesources
operator: Exists
effect: NoSchedule
containers:
- image: registry.k8s.io/pause:3.10
name: pause
resources:
requests:
cpu: 0.35
memory: 3Gi

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: Pod
metadata:
generateName: pod-unsched-
spec:
tolerations:
- key: allow
operator: Exists
effect: NoSchedule
containers:
- image: registry.k8s.io/pause:3.10
name: pause

View File

@ -0,0 +1,22 @@
apiVersion: v1
kind: Pod
metadata:
generateName: pod-unsched-
labels:
color: blue
spec:
tolerations:
- key: allow
operator: Exists
effect: NoSchedule
topologySpreadConstraints:
- maxSkew: 1
minDomains: 1
topologyKey: topology
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
color: blue
containers:
- image: registry.k8s.io/pause:3.10
name: pause

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: Pod
metadata:
generateName: pod-unsched-
spec:
tolerations:
- key: toleration-{{ div .Index 10 }}
operator: Exists
effect: NoSchedule
containers:
- image: registry.k8s.io/pause:3.10
name: pause

View File

@ -1838,3 +1838,118 @@
labels: [performance, short]
params:
measurePods: 1000
# This test case is used to measure the performance of queuing hints when handling the NodeUpdate events.
# First, group of nodes is created and prevent any further pod from being scheduled on them.
# Then, pods are created and are filtered by a specific plugin, thus unschedulable.
# Next, nodes are updated and previously unscheduled pods can be scheduled on them.
# The test case is divided into several stages to make sure that the pods are filtered by a specific plugin.
# Plugins covered: InterPodAffinity, NodeAffinity, NodeResources, NodeUnschedulable, PodTopologySpread and TaintToleration.
- name: EventHandlingNodeUpdate
featureGates:
SchedulerQueueingHints: true
workloadTemplate:
# Collect metrics from all createPods ops.
- opcode: startCollectingMetrics
name: unschedPods
namespaces: [nodeunschedulable, noderesources, interpodaffinity, nodeaffinity, podtopologyspread, tainttoleration]
# Create unschedulable nodes.
- opcode: createNodes
countParam: $nodes
nodeTemplatePath: config/event_handling/nodeupdate-node-unschedulable.yaml
# Create pods blocked using NodeUnschedulable plugin.
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/event_handling/nodeupdate-pod-nodeunschedulable.yaml
skipWaitToCompletion: true
namespace: nodeunschedulable
# Wait for unschedulable pods to be processed by the scheduler.
- opcode: barrier
stageRequirement: Attempted
# Update nodes not to be unschedulable anymore.
- opcode: updateAny
countParam: $nodes
templatePath: config/event_handling/nodeupdate-node-unschedulable-update.yaml
updatePerSecond: 100
# Wait on barrier for NodeUnschedulable pods to be scheduled.
- opcode: barrier
# Create node with low capacity.
- opcode: createNodes
countParam: $nodes
nodeTemplatePath: config/event_handling/nodeupdate-node-low-capacity.yaml
# Create pods blocked using NodeResources plugin.
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/event_handling/nodeupdate-pod-noderesources.yaml
skipWaitToCompletion: true
namespace: noderesources
# Wait for unschedulable pods to be processed by the scheduler.
- opcode: barrier
stageRequirement: Attempted
# Update nodes to have enough resource capacity for pods blocked by NodeResources plugin.
# These nodes will still block the next pods from being scheduled.
- opcode: updateAny
countParam: $nodes
templatePath: config/event_handling/nodeupdate-node-low-capacity-update.yaml
updatePerSecond: 100
# Wait on barrier for NodeResources pods to be scheduled.
- opcode: barrier
# Create nodes without any labels.
- opcode: createNodes
countParam: $nodes
nodeTemplatePath: config/event_handling/nodeupdate-node-without-labels.yaml
# Create pods blocked using InterPodAffinity plugin.
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/event_handling/nodeupdate-pod-interpodaffinity.yaml
skipWaitToCompletion: true
namespace: interpodaffinity
# Create pods blocked using NodeAffinity plugin.
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/event_handling/nodeupdate-pod-nodeaffinity.yaml
skipWaitToCompletion: true
namespace: nodeaffinity
# Create pods blocked using PodTopologySpread plugin.
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/event_handling/nodeupdate-pod-podtopologyspread.yaml
skipWaitToCompletion: true
namespace: podtopologyspread
# Wait for unschedulable pods to be processed by the scheduler.
- opcode: barrier
stageRequirement: Attempted
# Update nodes to have labels required by the pods above.
- opcode: updateAny
countParam: $nodes
templatePath: config/event_handling/nodeupdate-node-without-labels-update.yaml
updatePerSecond: 100
# Wait on barrier for InterPodAffinity, NodeAffinity and PodTopologySpread pods to be scheduled.
- opcode: barrier
# Create nodes with taints not matching the tolerations of pods below.
- opcode: createNodes
countParam: $nodes
nodeTemplatePath: config/event_handling/nodeupdate-node-without-taints.yaml
# Create pods blocked using TaintToleration plugin.
- opcode: createPods
countParam: $measurePods
podTemplatePath: config/event_handling/nodeupdate-pod-tainttoleration.yaml
skipWaitToCompletion: true
namespace: tainttoleration
# Wait for unschedulable pods to be processed by the scheduler.
- opcode: barrier
stageRequirement: Attempted
# Update nodes to have matching taints to the pods above.
- opcode: updateAny
countParam: $nodes
templatePath: config/event_handling/nodeupdate-node-without-taints-update.yaml
updatePerSecond: 100
# Wait on barrier for TaintToleration pods to be scheduled.
- opcode: barrier
- opcode: stopCollectingMetrics
workloads:
- name: 100Nodes_1000Pods
labels: [performance, short]
params:
nodes: 100
measurePods: 1000 # Must be initNodes * 10

View File

@ -149,6 +149,10 @@ func (c *updateAny) update(tCtx ktesting.TContext, env map[string]any) error {
if err != nil {
return fmt.Errorf("failed to update object in namespace %q: %w", c.Namespace, err)
}
_, err = resourceClient.Namespace(c.Namespace).UpdateStatus(tCtx, obj, options)
if err != nil {
return fmt.Errorf("failed to update object status in namespace %q: %w", c.Namespace, err)
}
return nil
}
if c.cachedMapping.Scope.Name() != meta.RESTScopeNameRoot {
@ -158,5 +162,9 @@ func (c *updateAny) update(tCtx ktesting.TContext, env map[string]any) error {
if err != nil {
return fmt.Errorf("failed to update object: %w", err)
}
_, err = resourceClient.UpdateStatus(tCtx, obj, options)
if err != nil {
return fmt.Errorf("failed to update object status: %w", err)
}
return nil
}