Merge pull request #95971 from chrishenzie/e2e-stress-snapshots

Add E2E stress test suite for creation / deletion of VolumeSnapshot resources
This commit is contained in:
Kubernetes Prow Robot 2020-11-05 14:25:03 -08:00 committed by GitHub
commit 0bb732842a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 348 additions and 20 deletions

View File

@ -90,6 +90,14 @@ func initHostPathCSIDriver(name string, capabilities map[testsuites.Capability]b
Min: "1Mi",
},
Capabilities: capabilities,
StressTestOptions: &testsuites.StressTestOptions{
NumPods: 10,
NumRestarts: 10,
},
VolumeSnapshotStressTestOptions: &testsuites.VolumeSnapshotStressTestOptions{
NumPods: 10,
NumSnapshots: 10,
},
},
manifests: manifests,
volumeAttributes: volumeAttributes,
@ -507,6 +515,14 @@ func InitGcePDCSIDriver() testsuites.TestDriver {
NumPods: 10,
NumRestarts: 10,
},
VolumeSnapshotStressTestOptions: &testsuites.VolumeSnapshotStressTestOptions{
// GCE only allows for one snapshot per volume to be created at a time,
// which can cause test timeouts. We reduce the likelihood of test timeouts
// by increasing the number of pods (and volumes) and reducing the number
// of snapshots per volume.
NumPods: 20,
NumSnapshots: 2,
},
},
}
}

View File

@ -10,12 +10,13 @@ go_library(
"multivolume.go",
"provisioning.go",
"snapshottable.go",
"stress.go",
"snapshottable_stress.go",
"subpath.go",
"testdriver.go",
"topology.go",
"volume_expand.go",
"volume_io.go",
"volume_stress.go",
"volumelimits.go",
"volumemode.go",
"volumes.go",

View File

@ -84,13 +84,14 @@ var BaseSuites = []func() TestSuite{
InitDisruptiveTestSuite,
InitVolumeLimitsTestSuite,
InitTopologyTestSuite,
InitStressTestSuite,
InitVolumeStressTestSuite,
}
// CSISuites is a list of storage test suites that work only for CSI drivers
var CSISuites = append(BaseSuites,
InitEphemeralTestSuite,
InitSnapshottableTestSuite,
InitSnapshottableStressTestSuite,
)
// TestSuite represents an interface for a set of tests which works with TestDriver

View File

@ -73,6 +73,7 @@ func GetStorageClass(
},
ObjectMeta: metav1.ObjectMeta{
// Name must be unique, so let's base it on namespace name and use GenerateName
// TODO(#96234): Remove unnecessary suffix.
Name: names.SimpleNameGenerator.GenerateName(ns + "-" + suffix),
},
Provisioner: provisioner,
@ -94,8 +95,9 @@ func GetSnapshotClass(
"kind": "VolumeSnapshotClass",
"apiVersion": snapshotAPIVersion,
"metadata": map[string]interface{}{
// Name must be unique, so let's base it on namespace name
"name": ns + "-" + suffix,
// Name must be unique, so let's base it on namespace name and use GenerateName
// TODO(#96234): Remove unnecessary suffix.
"name": names.SimpleNameGenerator.GenerateName(ns + "-" + suffix),
},
"driver": snapshotter,
"parameters": parameters,

View File

@ -0,0 +1,289 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This suite tests volume snapshots under stress conditions.
package testsuites
import (
"context"
"sync"
"github.com/onsi/ginkgo"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errors "k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2epv "k8s.io/kubernetes/test/e2e/framework/pv"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
e2evolume "k8s.io/kubernetes/test/e2e/framework/volume"
"k8s.io/kubernetes/test/e2e/storage/testpatterns"
)
type snapshottableStressTestSuite struct {
tsInfo TestSuiteInfo
}
type snapshottableStressTest struct {
config *PerTestConfig
testOptions VolumeSnapshotStressTestOptions
driverCleanup func()
pods []*v1.Pod
volumes []*VolumeResource
snapshots []*SnapshotResource
// Because we are appending snapshot resources in parallel goroutines.
snapshotsMutex sync.Mutex
// Stop and wait for any async routines.
ctx context.Context
wg sync.WaitGroup
cancel context.CancelFunc
}
var _ TestSuite = &snapshottableStressTestSuite{}
// InitSnapshottableStressTestSuite returns snapshottableStressTestSuite that implements TestSuite interface
func InitSnapshottableStressTestSuite() TestSuite {
return &snapshottableStressTestSuite{
tsInfo: TestSuiteInfo{
Name: "snapshottable-stress",
TestPatterns: []testpatterns.TestPattern{
testpatterns.DynamicSnapshotDelete,
testpatterns.DynamicSnapshotRetain,
},
SupportedSizeRange: e2evolume.SizeRange{
Min: "1Mi",
},
FeatureTag: "[Feature:VolumeSnapshotDataSource]",
},
}
}
func (t *snapshottableStressTestSuite) GetTestSuiteInfo() TestSuiteInfo {
return t.tsInfo
}
func (t *snapshottableStressTestSuite) SkipRedundantSuite(driver TestDriver, pattern testpatterns.TestPattern) {
}
func (t *snapshottableStressTestSuite) DefineTests(driver TestDriver, pattern testpatterns.TestPattern) {
var (
driverInfo *DriverInfo
snapshottableDriver SnapshottableTestDriver
cs clientset.Interface
stressTest *snapshottableStressTest
)
// Check preconditions before setting up namespace via framework below.
ginkgo.BeforeEach(func() {
driverInfo = driver.GetDriverInfo()
if driverInfo.VolumeSnapshotStressTestOptions == nil {
e2eskipper.Skipf("Driver %s doesn't specify snapshot stress test options -- skipping", driverInfo.Name)
}
if driverInfo.VolumeSnapshotStressTestOptions.NumPods <= 0 {
framework.Failf("NumPods in snapshot stress test options must be a positive integer, received: %d", driverInfo.VolumeSnapshotStressTestOptions.NumPods)
}
if driverInfo.VolumeSnapshotStressTestOptions.NumSnapshots <= 0 {
framework.Failf("NumSnapshots in snapshot stress test options must be a positive integer, received: %d", driverInfo.VolumeSnapshotStressTestOptions.NumSnapshots)
}
// Because we're initializing snapshottableDriver, both vars must exist.
ok := false
snapshottableDriver, ok = driver.(SnapshottableTestDriver)
if !driverInfo.Capabilities[CapSnapshotDataSource] || !ok {
e2eskipper.Skipf("Driver %q doesn't implement SnapshottableTestDriver - skipping", driverInfo.Name)
}
_, ok = driver.(DynamicPVTestDriver)
if !ok {
e2eskipper.Skipf("Driver %s doesn't implement DynamicPVTestDriver -- skipping", driverInfo.Name)
}
})
// This intentionally comes after checking the preconditions because it
// registers its own BeforeEach which creates the namespace. Beware that it
// also registers an AfterEach which renders f unusable. Any code using
// f must run inside an It or Context callback.
f := framework.NewDefaultFramework("snapshottable-stress")
init := func() {
cs = f.ClientSet
config, driverCleanup := driver.PrepareTest(f)
ctx, cancel := context.WithCancel(context.Background())
stressTest = &snapshottableStressTest{
config: config,
driverCleanup: driverCleanup,
volumes: []*VolumeResource{},
snapshots: []*SnapshotResource{},
pods: []*v1.Pod{},
testOptions: *driverInfo.VolumeSnapshotStressTestOptions,
ctx: ctx,
cancel: cancel,
}
}
createPodsAndVolumes := func() {
for i := 0; i < stressTest.testOptions.NumPods; i++ {
framework.Logf("Creating resources for pod %d/%d", i, stressTest.testOptions.NumPods-1)
volume := CreateVolumeResource(driver, stressTest.config, pattern, t.GetTestSuiteInfo().SupportedSizeRange)
stressTest.volumes = append(stressTest.volumes, volume)
podConfig := e2epod.Config{
NS: f.Namespace.Name,
PVCs: []*v1.PersistentVolumeClaim{volume.Pvc},
SeLinuxLabel: e2epv.SELinuxLabel,
}
pod, err := e2epod.MakeSecPod(&podConfig)
framework.ExpectNoError(err)
stressTest.pods = append(stressTest.pods, pod)
}
var wg sync.WaitGroup
for i, pod := range stressTest.pods {
wg.Add(1)
go func(i int, pod *v1.Pod) {
defer ginkgo.GinkgoRecover()
defer wg.Done()
if _, err := cs.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{}); err != nil {
stressTest.cancel()
framework.Failf("Failed to create pod-%d [%+v]. Error: %v", i, pod, err)
}
}(i, pod)
}
wg.Wait()
for i, pod := range stressTest.pods {
if err := e2epod.WaitForPodRunningInNamespace(cs, pod); err != nil {
stressTest.cancel()
framework.Failf("Failed to wait for pod-%d [%+v] turn into running status. Error: %v", i, pod, err)
}
}
}
cleanup := func() {
framework.Logf("Stopping and waiting for all test routines to finish")
stressTest.cancel()
stressTest.wg.Wait()
var (
errs []error
mu sync.Mutex
wg sync.WaitGroup
)
for i, snapshot := range stressTest.snapshots {
wg.Add(1)
go func(i int, snapshot *SnapshotResource) {
defer ginkgo.GinkgoRecover()
defer wg.Done()
framework.Logf("Deleting snapshot %s/%s", snapshot.Vs.GetNamespace(), snapshot.Vs.GetName())
err := snapshot.CleanupResource()
mu.Lock()
defer mu.Unlock()
errs = append(errs, err)
}(i, snapshot)
}
wg.Wait()
for i, pod := range stressTest.pods {
wg.Add(1)
go func(i int, pod *v1.Pod) {
defer ginkgo.GinkgoRecover()
defer wg.Done()
framework.Logf("Deleting pod %s", pod.Name)
err := e2epod.DeletePodWithWait(cs, pod)
mu.Lock()
defer mu.Unlock()
errs = append(errs, err)
}(i, pod)
}
wg.Wait()
for i, volume := range stressTest.volumes {
wg.Add(1)
go func(i int, volume *VolumeResource) {
defer ginkgo.GinkgoRecover()
defer wg.Done()
framework.Logf("Deleting volume %s", volume.Pvc.GetName())
err := volume.CleanupResource()
mu.Lock()
defer mu.Unlock()
errs = append(errs, err)
}(i, volume)
}
wg.Wait()
errs = append(errs, tryFunc(stressTest.driverCleanup))
framework.ExpectNoError(errors.NewAggregate(errs), "while cleaning up resources")
}
ginkgo.BeforeEach(func() {
init()
createPodsAndVolumes()
})
// See #96177, this is necessary for cleaning up resources when tests are interrupted.
f.AddAfterEach("cleanup", func(f *framework.Framework, failed bool) {
cleanup()
})
ginkgo.It("should support snapshotting of many volumes repeatedly [Slow] [Serial]", func() {
// Repeatedly create and delete snapshots of each volume.
for i := 0; i < stressTest.testOptions.NumPods; i++ {
for j := 0; j < stressTest.testOptions.NumSnapshots; j++ {
stressTest.wg.Add(1)
go func(podIndex, snapshotIndex int) {
defer ginkgo.GinkgoRecover()
defer stressTest.wg.Done()
pod := stressTest.pods[podIndex]
volume := stressTest.volumes[podIndex]
select {
case <-stressTest.ctx.Done():
return
default:
framework.Logf("Pod-%d [%s], Iteration %d/%d", podIndex, pod.Name, snapshotIndex, stressTest.testOptions.NumSnapshots-1)
snapshot := CreateSnapshotResource(snapshottableDriver, stressTest.config, pattern, volume.Pvc.GetName(), volume.Pvc.GetNamespace())
stressTest.snapshotsMutex.Lock()
defer stressTest.snapshotsMutex.Unlock()
stressTest.snapshots = append(stressTest.snapshots, snapshot)
}
}(i, j)
}
}
stressTest.wg.Wait()
})
}

View File

@ -189,7 +189,10 @@ type DriverInfo struct {
// Example: multi-zonal disk requires at least 2 allowed topologies.
NumAllowedTopologies int
// [Optional] Scale parameters for stress tests.
// TODO(#96241): Rename this field to reflect the tests that consume it.
StressTestOptions *StressTestOptions
// [Optional] Scale parameters for volume snapshot stress tests.
VolumeSnapshotStressTestOptions *VolumeSnapshotStressTestOptions
}
// StressTestOptions contains parameters used for stress tests.
@ -201,6 +204,15 @@ type StressTestOptions struct {
NumRestarts int
}
// VolumeSnapshotStressTestOptions contains parameters used for volume snapshot stress tests.
type VolumeSnapshotStressTestOptions struct {
// Number of pods to create in the test. This may also create
// up to 1 volume per pod.
NumPods int
// Number of snapshots to create for each volume.
NumSnapshots int
}
// PerTestConfig represents parameters that control test execution.
// One instance gets allocated for each test and is then passed
// via pointer to functions involved in the test.

View File

@ -35,11 +35,11 @@ import (
"k8s.io/kubernetes/test/e2e/storage/testpatterns"
)
type stressTestSuite struct {
type volumeStressTestSuite struct {
tsInfo TestSuiteInfo
}
type stressTest struct {
type volumeStressTest struct {
config *PerTestConfig
driverCleanup func()
@ -55,13 +55,13 @@ type stressTest struct {
testOptions StressTestOptions
}
var _ TestSuite = &stressTestSuite{}
var _ TestSuite = &volumeStressTestSuite{}
// InitStressTestSuite returns stressTestSuite that implements TestSuite interface
func InitStressTestSuite() TestSuite {
return &stressTestSuite{
// InitVolumeStressTestSuite returns volumeStressTestSuite that implements TestSuite interface
func InitVolumeStressTestSuite() TestSuite {
return &volumeStressTestSuite{
tsInfo: TestSuiteInfo{
Name: "stress",
Name: "volume-stress",
TestPatterns: []testpatterns.TestPattern{
testpatterns.DefaultFsDynamicPV,
testpatterns.BlockVolModeDynamicPV,
@ -70,18 +70,18 @@ func InitStressTestSuite() TestSuite {
}
}
func (t *stressTestSuite) GetTestSuiteInfo() TestSuiteInfo {
func (t *volumeStressTestSuite) GetTestSuiteInfo() TestSuiteInfo {
return t.tsInfo
}
func (t *stressTestSuite) SkipRedundantSuite(driver TestDriver, pattern testpatterns.TestPattern) {
func (t *volumeStressTestSuite) SkipRedundantSuite(driver TestDriver, pattern testpatterns.TestPattern) {
}
func (t *stressTestSuite) DefineTests(driver TestDriver, pattern testpatterns.TestPattern) {
func (t *volumeStressTestSuite) DefineTests(driver TestDriver, pattern testpatterns.TestPattern) {
var (
dInfo = driver.GetDriverInfo()
cs clientset.Interface
l *stressTest
l *volumeStressTest
)
// Check preconditions before setting up namespace via framework below.
@ -89,6 +89,12 @@ func (t *stressTestSuite) DefineTests(driver TestDriver, pattern testpatterns.Te
if dInfo.StressTestOptions == nil {
e2eskipper.Skipf("Driver %s doesn't specify stress test options -- skipping", dInfo.Name)
}
if dInfo.StressTestOptions.NumPods <= 0 {
framework.Failf("NumPods in stress test options must be a positive integer, received: %d", dInfo.StressTestOptions.NumPods)
}
if dInfo.StressTestOptions.NumRestarts <= 0 {
framework.Failf("NumRestarts in stress test options must be a positive integer, received: %d", dInfo.StressTestOptions.NumRestarts)
}
if _, ok := driver.(DynamicPVTestDriver); !ok {
e2eskipper.Skipf("Driver %s doesn't implement DynamicPVTestDriver -- skipping", dInfo.Name)
@ -103,11 +109,11 @@ func (t *stressTestSuite) DefineTests(driver TestDriver, pattern testpatterns.Te
// registers its own BeforeEach which creates the namespace. Beware that it
// also registers an AfterEach which renders f unusable. Any code using
// f must run inside an It or Context callback.
f := framework.NewDefaultFramework("stress")
f := framework.NewDefaultFramework("volume-stress")
init := func() {
cs = f.ClientSet
l = &stressTest{}
l = &volumeStressTest{}
// Now do the more expensive test initialization.
l.config, l.driverCleanup = driver.PrepareTest(f)
@ -162,6 +168,7 @@ func (t *stressTestSuite) DefineTests(driver TestDriver, pattern testpatterns.Te
createPodsAndVolumes()
})
// See #96177, this is necessary for cleaning up resources when tests are interrupted.
f.AddAfterEach("cleanup", func(f *framework.Framework, failed bool) {
cleanup()
})

View File

@ -21,7 +21,7 @@ spec:
serviceAccountName: csi-gce-pd-controller-sa
containers:
- name: csi-snapshotter
image: gcr.io/gke-release/csi-snapshotter:v2.1.1-gke.0
image: k8s.gcr.io/sig-storage/csi-snapshotter:v3.0.2
args:
- "--v=5"
- "--csi-address=/csi/csi.sock"
@ -55,7 +55,7 @@ spec:
- name: socket-dir
mountPath: /csi
- name: gce-pd-driver
image: gcr.io/gke-release/gcp-compute-persistent-disk-csi-driver:v0.7.0-gke.0
image: gcr.io/gke-release/gcp-compute-persistent-disk-csi-driver:v1.0.1-gke.0
args:
- "--v=5"
- "--endpoint=unix:/csi/csi.sock"

View File

@ -40,7 +40,7 @@ spec:
serviceAccount: csi-snapshotter
containers:
- name: csi-snapshotter
image: k8s.gcr.io/sig-storage/csi-snapshotter:v2.1.0
image: k8s.gcr.io/sig-storage/csi-snapshotter:v3.0.2
args:
- -v=5
- --csi-address=/csi/csi.sock