Add benchmark for scheduling of pods with PVs

This commit is contained in:
Jan Safranek
2019-06-26 14:07:17 +02:00
parent b84d5b8162
commit f6430c0159
5 changed files with 561 additions and 6 deletions

View File

@@ -33,11 +33,17 @@ go_test(
embed = [":go_default_library"],
tags = ["integration"],
deps = [
"//pkg/features:go_default_library",
"//pkg/scheduler/factory:go_default_library",
"//pkg/volume/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/storage/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//staging/src/k8s.io/csi-translation-lib/plugins:go_default_library",
"//test/integration/framework:go_default_library",
"//test/utils:go_default_library",
"//vendor/k8s.io/klog:go_default_library",

View File

@@ -22,16 +22,27 @@ import (
"time"
"k8s.io/api/core/v1"
storagev1beta1 "k8s.io/api/storage/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/csi-translation-lib/plugins"
csilibplugins "k8s.io/csi-translation-lib/plugins"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/volume/util"
"k8s.io/kubernetes/test/integration/framework"
testutils "k8s.io/kubernetes/test/utils"
"k8s.io/klog"
)
var (
defaultNodeStrategy = &testutils.TrivialNodePrepareStrategy{}
testCSIDriver = plugins.AWSEBSDriverName
// From PV controller
annBindCompleted = "pv.kubernetes.io/bind-completed"
)
// BenchmarkScheduling benchmarks the scheduling rate when the cluster has
@@ -79,6 +90,134 @@ func BenchmarkSchedulingPodAntiAffinity(b *testing.B) {
}
}
// BenchmarkSchedulingSecrets benchmarks the scheduling rate of pods with
// volumes that don't require any special handling, such as Secrets.
// It can be used to compare scheduler efficiency with the other benchmarks
// that use volume scheduling predicates.
func BenchmarkSchedulingSecrets(b *testing.B) {
tests := []struct{ nodes, existingPods, minPods int }{
{nodes: 500, existingPods: 250, minPods: 250},
{nodes: 500, existingPods: 5000, minPods: 250},
{nodes: 1000, existingPods: 1000, minPods: 500},
{nodes: 5000, existingPods: 1000, minPods: 1000},
}
// The setup strategy creates pods with no volumes.
setupStrategy := testutils.NewSimpleWithControllerCreatePodStrategy("setup")
// The test strategy creates pods with a secret.
testBasePod := makeBasePodWithSecret()
testStrategy := testutils.NewCustomCreatePodStrategy(testBasePod)
for _, test := range tests {
name := fmt.Sprintf("%vNodes/%vPods", test.nodes, test.existingPods)
b.Run(name, func(b *testing.B) {
benchmarkScheduling(test.nodes, test.existingPods, test.minPods, defaultNodeStrategy, setupStrategy, testStrategy, b)
})
}
}
// BenchmarkSchedulingInTreePVs benchmarks the scheduling rate of pods with
// in-tree volumes (used via PV/PVC). Nodes have default hardcoded attach limits
// (39 for AWS EBS).
func BenchmarkSchedulingInTreePVs(b *testing.B) {
tests := []struct{ nodes, existingPods, minPods int }{
{nodes: 500, existingPods: 250, minPods: 250},
{nodes: 500, existingPods: 5000, minPods: 250},
{nodes: 1000, existingPods: 1000, minPods: 500},
{nodes: 5000, existingPods: 1000, minPods: 1000},
}
// The setup strategy creates pods with no volumes.
setupStrategy := testutils.NewSimpleWithControllerCreatePodStrategy("setup")
// The test strategy creates pods with AWS EBS volume used via PV.
baseClaim := makeBasePersistentVolumeClaim()
basePod := makeBasePod()
testStrategy := testutils.NewCreatePodWithPersistentVolumeStrategy(baseClaim, awsVolumeFactory, basePod)
for _, test := range tests {
name := fmt.Sprintf("%vNodes/%vPods", test.nodes, test.existingPods)
b.Run(name, func(b *testing.B) {
benchmarkScheduling(test.nodes, test.existingPods, test.minPods, defaultNodeStrategy, setupStrategy, testStrategy, b)
})
}
}
// BenchmarkSchedulingMigratedInTreePVs benchmarks the scheduling rate of pods with
// in-tree volumes (used via PV/PVC) that are migrated to CSI. CSINode instances exist
// for all nodes and have proper annotation that AWS is migrated.
func BenchmarkSchedulingMigratedInTreePVs(b *testing.B) {
tests := []struct{ nodes, existingPods, minPods int }{
{nodes: 500, existingPods: 250, minPods: 250},
{nodes: 500, existingPods: 5000, minPods: 250},
{nodes: 1000, existingPods: 1000, minPods: 500},
{nodes: 5000, existingPods: 1000, minPods: 1000},
}
// The setup strategy creates pods with no volumes.
setupStrategy := testutils.NewSimpleWithControllerCreatePodStrategy("setup")
// The test strategy creates pods with AWS EBS volume used via PV.
baseClaim := makeBasePersistentVolumeClaim()
basePod := makeBasePod()
testStrategy := testutils.NewCreatePodWithPersistentVolumeStrategy(baseClaim, awsVolumeFactory, basePod)
// Each node can use the same amount of CSI volumes as in-tree AWS volume
// plugin, so the results should be comparable with BenchmarkSchedulingInTreePVs.
driverKey := util.GetCSIAttachLimitKey(testCSIDriver)
allocatable := map[v1.ResourceName]string{
v1.ResourceName(driverKey): fmt.Sprintf("%d", util.DefaultMaxEBSVolumes),
}
var count int32 = util.DefaultMaxEBSVolumes
csiAllocatable := map[string]*storagev1beta1.VolumeNodeResources{
testCSIDriver: {
Count: &count,
},
}
nodeStrategy := testutils.NewNodeAllocatableStrategy(allocatable, csiAllocatable, []string{csilibplugins.AWSEBSInTreePluginName})
for _, test := range tests {
name := fmt.Sprintf("%vNodes/%vPods", test.nodes, test.existingPods)
b.Run(name, func(b *testing.B) {
defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, features.CSIMigration, true)()
defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, features.CSIMigrationAWS, true)()
benchmarkScheduling(test.nodes, test.existingPods, test.minPods, nodeStrategy, setupStrategy, testStrategy, b)
})
}
}
// node.status.allocatable.
func BenchmarkSchedulingCSIPVs(b *testing.B) {
tests := []struct{ nodes, existingPods, minPods int }{
{nodes: 500, existingPods: 250, minPods: 250},
{nodes: 500, existingPods: 5000, minPods: 250},
{nodes: 1000, existingPods: 1000, minPods: 500},
{nodes: 5000, existingPods: 1000, minPods: 1000},
}
// The setup strategy creates pods with no volumes.
setupStrategy := testutils.NewSimpleWithControllerCreatePodStrategy("setup")
// The test strategy creates pods with CSI volume via PV.
baseClaim := makeBasePersistentVolumeClaim()
basePod := makeBasePod()
testStrategy := testutils.NewCreatePodWithPersistentVolumeStrategy(baseClaim, csiVolumeFactory, basePod)
// Each node can use the same amount of CSI volumes as in-tree AWS volume
// plugin, so the results should be comparable with BenchmarkSchedulingInTreePVs.
driverKey := util.GetCSIAttachLimitKey(testCSIDriver)
allocatable := map[v1.ResourceName]string{
v1.ResourceName(driverKey): fmt.Sprintf("%d", util.DefaultMaxEBSVolumes),
}
var count int32 = util.DefaultMaxEBSVolumes
csiAllocatable := map[string]*storagev1beta1.VolumeNodeResources{
testCSIDriver: {
Count: &count,
},
}
nodeStrategy := testutils.NewNodeAllocatableStrategy(allocatable, csiAllocatable, []string{})
for _, test := range tests {
name := fmt.Sprintf("%vNodes/%vPods", test.nodes, test.existingPods)
b.Run(name, func(b *testing.B) {
benchmarkScheduling(test.nodes, test.existingPods, test.minPods, nodeStrategy, setupStrategy, testStrategy, b)
})
}
}
// BenchmarkSchedulingPodAffinity benchmarks the scheduling rate of pods with
// PodAffinity rules when the cluster has various quantities of nodes and
// scheduled pods.
@@ -265,8 +404,110 @@ func benchmarkScheduling(numNodes, numExistingPods, minPods int,
if len(scheduled) >= numExistingPods+b.N {
break
}
// Note: This might introduce slight deviation in accuracy of benchmark results.
// Since the total amount of time is relatively large, it might not be a concern.
time.Sleep(100 * time.Millisecond)
}
}
// makeBasePodWithSecrets creates a Pod object to be used as a template.
// The pod uses a single Secrets volume.
func makeBasePodWithSecret() *v1.Pod {
basePod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "secret-volume-",
},
Spec: testutils.MakePodSpec(),
}
volumes := []v1.Volume{
{
Name: "secret",
VolumeSource: v1.VolumeSource{
Secret: &v1.SecretVolumeSource{
SecretName: "secret",
},
},
},
}
basePod.Spec.Volumes = volumes
return basePod
}
// makeBasePod creates a Pod object to be used as a template.
func makeBasePod() *v1.Pod {
basePod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "pod-",
},
Spec: testutils.MakePodSpec(),
}
return basePod
}
func makeBasePersistentVolumeClaim() *v1.PersistentVolumeClaim {
return &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
// Name is filled in NewCreatePodWithPersistentVolumeStrategy
Annotations: map[string]string{
annBindCompleted: "true",
},
},
Spec: v1.PersistentVolumeClaimSpec{
AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany},
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse("1Gi"),
},
},
},
}
}
func awsVolumeFactory(id int) *v1.PersistentVolume {
return &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("vol-%d", id),
},
Spec: v1.PersistentVolumeSpec{
AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany},
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse("1Gi"),
},
PersistentVolumeReclaimPolicy: v1.PersistentVolumeReclaimRetain,
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{
// VolumeID must be unique for each PV, so every PV is
// counted as a separate volume in MaxPDVolumeCountChecker
// predicate.
VolumeID: fmt.Sprintf("vol-%d", id),
},
},
},
}
}
func csiVolumeFactory(id int) *v1.PersistentVolume {
return &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("vol-%d", id),
},
Spec: v1.PersistentVolumeSpec{
AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany},
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse("1Gi"),
},
PersistentVolumeReclaimPolicy: v1.PersistentVolumeReclaimRetain,
PersistentVolumeSource: v1.PersistentVolumeSource{
CSI: &v1.CSIPersistentVolumeSource{
// Handle must be unique for each PV, so every PV is
// counted as a separate volume in CSIMaxVolumeLimitChecker
// predicate.
VolumeHandle: fmt.Sprintf("vol-%d", id),
Driver: testCSIDriver,
},
},
},
}
}