Start adding e2e for recovery from expansion failure

This commit is contained in:
Hemant Kumar 2022-12-01 13:30:37 -05:00
parent 68f808e6db
commit 3d33178dab
4 changed files with 346 additions and 67 deletions

View File

@ -89,14 +89,15 @@ type testParameters struct {
enableResizing bool // enable resizing for both CSI mock driver and storageClass.
enableNodeExpansion bool // enable node expansion for CSI mock driver
// just disable resizing on driver it overrides enableResizing flag for CSI mock driver
disableResizingOnDriver bool
enableSnapshot bool
enableVolumeMountGroup bool // enable the VOLUME_MOUNT_GROUP node capability in the CSI mock driver.
hooks *drivers.Hooks
tokenRequests []storagev1.TokenRequest
requiresRepublish *bool
fsGroupPolicy *storagev1.FSGroupPolicy
enableSELinuxMount *bool
disableResizingOnDriver bool
enableSnapshot bool
enableVolumeMountGroup bool // enable the VOLUME_MOUNT_GROUP node capability in the CSI mock driver.
hooks *drivers.Hooks
tokenRequests []storagev1.TokenRequest
requiresRepublish *bool
fsGroupPolicy *storagev1.FSGroupPolicy
enableSELinuxMount *bool
enableRecoverExpansionFailure bool
}
type mockDriverSetup struct {
@ -148,20 +149,21 @@ func (m *mockDriverSetup) init(tp testParameters) {
var err error
driverOpts := drivers.CSIMockDriverOpts{
RegisterDriver: tp.registerDriver,
PodInfo: tp.podInfo,
StorageCapacity: tp.storageCapacity,
EnableTopology: tp.enableTopology,
AttachLimit: tp.attachLimit,
DisableAttach: tp.disableAttach,
EnableResizing: tp.enableResizing,
EnableNodeExpansion: tp.enableNodeExpansion,
EnableSnapshot: tp.enableSnapshot,
EnableVolumeMountGroup: tp.enableVolumeMountGroup,
TokenRequests: tp.tokenRequests,
RequiresRepublish: tp.requiresRepublish,
FSGroupPolicy: tp.fsGroupPolicy,
EnableSELinuxMount: tp.enableSELinuxMount,
RegisterDriver: tp.registerDriver,
PodInfo: tp.podInfo,
StorageCapacity: tp.storageCapacity,
EnableTopology: tp.enableTopology,
AttachLimit: tp.attachLimit,
DisableAttach: tp.disableAttach,
EnableResizing: tp.enableResizing,
EnableNodeExpansion: tp.enableNodeExpansion,
EnableSnapshot: tp.enableSnapshot,
EnableVolumeMountGroup: tp.enableVolumeMountGroup,
TokenRequests: tp.tokenRequests,
RequiresRepublish: tp.requiresRepublish,
FSGroupPolicy: tp.fsGroupPolicy,
EnableSELinuxMount: tp.enableSELinuxMount,
EnableRecoverExpansionFailure: tp.enableRecoverExpansionFailure,
}
// At the moment, only tests which need hooks are

View File

@ -18,18 +18,55 @@ package csi_mock
import (
"context"
"fmt"
"time"
csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
"k8s.io/kubernetes/test/e2e/storage/drivers"
"k8s.io/kubernetes/test/e2e/storage/testsuites"
"k8s.io/kubernetes/test/e2e/storage/utils"
admissionapi "k8s.io/pod-security-admission/api"
)
type expansionStatus int
const (
expansionSuccess = iota
expansionFailed
expansionFailedOnController
expansionFailedOnNode
)
const (
resizePollInterval = 2 * time.Second
)
var (
maxControllerSizeLimit = resource.MustParse("10Gi")
maxNodeExpansionLimit = resource.MustParse("8Gi")
)
type recoveryTest struct {
name string
pvcRequestSize string
allocatedResource string
simulatedCSIDriverError expansionStatus
expectedResizeStatus v1.PersistentVolumeClaimResizeStatus
recoverySize resource.Quantity
}
var _ = utils.SIGDescribe("CSI Mock volume expansion", func() {
f := framework.NewDefaultFramework("csi-mock-volumes-expansion")
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
@ -210,4 +247,224 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() {
})
}
})
ginkgo.Context("Expansion with recovery[Feature:RecoverVolumeExpansionFailure]", func() {
tests := []recoveryTest{
{
name: "should record target size in allocated resources",
pvcRequestSize: "4Gi",
allocatedResource: "4Gi",
simulatedCSIDriverError: expansionSuccess,
expectedResizeStatus: v1.PersistentVolumeClaimNoExpansionInProgress,
},
{
name: "should allow recovery if controller expansion fails with final error",
pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller
allocatedResource: "11Gi",
simulatedCSIDriverError: expansionFailedOnController,
expectedResizeStatus: v1.PersistentVolumeClaimControllerExpansionFailed,
recoverySize: resource.MustParse("4Gi"),
},
{
name: "recovery should not be possible in partially expanded volumes",
pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node
allocatedResource: "9Gi",
simulatedCSIDriverError: expansionFailedOnNode,
expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionFailed,
recoverySize: resource.MustParse("5Gi"),
},
}
for _, t := range tests {
test := t
ginkgo.It(test.name, func(ctx context.Context) {
var err error
params := testParameters{enableResizing: true, enableNodeExpansion: true, enableRecoverExpansionFailure: true}
if test.simulatedCSIDriverError != expansionSuccess {
params.hooks = createExpansionHook(test.simulatedCSIDriverError)
}
m.init(params)
ginkgo.DeferCleanup(m.cleanup)
sc, pvc, pod := m.createPod(pvcReference)
gomega.Expect(pod).NotTo(gomega.BeNil(), "while creating pod for resizing")
if !*sc.AllowVolumeExpansion {
framework.Fail("failed creating sc with allowed expansion")
}
err = e2epod.WaitForPodNameRunningInNamespace(m.cs, pod.Name, pod.Namespace)
framework.ExpectNoError(err, "Failed to start pod1: %v", err)
ginkgo.By("Expanding current pvc")
newSize := resource.MustParse(test.pvcRequestSize)
newPVC, err := testsuites.ExpandPVCSize(pvc, newSize, m.cs)
framework.ExpectNoError(err, "While updating pvc for more size")
pvc = newPVC
gomega.Expect(pvc).NotTo(gomega.BeNil())
pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage]
if pvcSize.Cmp(newSize) != 0 {
framework.Failf("error updating pvc size %q", pvc.Name)
}
if test.simulatedCSIDriverError == expansionSuccess {
validateExpansionSuccess(pvc, m, test, test.allocatedResource)
} else {
validateRecoveryBehaviour(pvc, m, test)
}
})
}
})
})
func validateRecoveryBehaviour(pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, test recoveryTest) {
var err error
ginkgo.By("Waiting for resizer to set allocated resource")
err = waitForAllocatedResource(pvc, m, test.allocatedResource)
framework.ExpectNoError(err, "While waiting for allocated resource to be updated")
ginkgo.By("Waiting for resizer to set resize status")
err = waitForResizeStatus(pvc, m.cs, test.expectedResizeStatus)
framework.ExpectNoError(err, "While waiting for resize status to be set")
ginkgo.By("Recover pvc size")
newPVC, err := testsuites.ExpandPVCSize(pvc, test.recoverySize, m.cs)
framework.ExpectNoError(err, "While updating pvc for more size")
pvc = newPVC
gomega.Expect(pvc).NotTo(gomega.BeNil())
pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage]
if pvcSize.Cmp(test.recoverySize) != 0 {
framework.Failf("error updating pvc size %q", pvc.Name)
}
// if expansion failed on controller with final error, then recovery should be possible
if test.simulatedCSIDriverError == expansionFailedOnController {
validateExpansionSuccess(pvc, m, test, test.recoverySize.String())
return
}
// if expansion succeeded on controller but failed on the node
if test.simulatedCSIDriverError == expansionFailedOnNode {
ginkgo.By("Wait for expansion to fail on node again")
err = waitForResizeStatus(pvc, m.cs, v1.PersistentVolumeClaimNodeExpansionFailed)
framework.ExpectNoError(err, "While waiting for resize status to be set to expansion-failed-on-node")
ginkgo.By("verify allocated resources after recovery")
pvc, err = m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{})
framework.ExpectNoError(err, "while fetching pvc")
actualAllocatedResource := pvc.Status.AllocatedResources.Storage()
if actualAllocatedResource.Equal(test.recoverySize) {
framework.Failf("unexpected allocated resource size %s after node expansion failure", actualAllocatedResource.String())
}
if !actualAllocatedResource.Equal(resource.MustParse(test.allocatedResource)) {
framework.Failf("expected allocated resources to be %s got %s", test.allocatedResource, actualAllocatedResource.String())
}
}
}
func validateExpansionSuccess(pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, test recoveryTest, expectedAllocatedSize string) {
var err error
ginkgo.By("Waiting for persistent volume resize to finish")
err = testsuites.WaitForControllerVolumeResize(pvc, m.cs, csiResizeWaitPeriod)
framework.ExpectNoError(err, "While waiting for PV resize to finish")
ginkgo.By("Waiting for PVC resize to finish")
pvc, err = testsuites.WaitForFSResize(pvc, m.cs)
framework.ExpectNoError(err, "while waiting for PVC to finish")
pvcConditions := pvc.Status.Conditions
framework.ExpectEqual(len(pvcConditions), 0, "pvc should not have conditions")
allocatedResource := pvc.Status.AllocatedResources.Storage()
gomega.Expect(allocatedResource).NotTo(gomega.BeNil())
expectedAllocatedResource := resource.MustParse(expectedAllocatedSize)
if allocatedResource.Cmp(expectedAllocatedResource) != 0 {
framework.Failf("expected allocated Resources to be %s got %s", expectedAllocatedResource.String(), allocatedResource.String())
}
resizeStatus := pvc.Status.ResizeStatus
gomega.Expect(resizeStatus).NotTo(gomega.BeNil(), "resize status should not be nil")
framework.ExpectEqual(*resizeStatus, v1.PersistentVolumeClaimNoExpansionInProgress, "resize status should be empty")
}
func waitForResizeStatus(pvc *v1.PersistentVolumeClaim, c clientset.Interface, expectedStates ...v1.PersistentVolumeClaimResizeStatus) error {
var actualResizeStatus *v1.PersistentVolumeClaimResizeStatus
waitErr := wait.PollImmediate(resizePollInterval, csiResizeWaitPeriod, func() (bool, error) {
var err error
updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %v", pvc.Name, err)
}
actualResizeStatus = updatedPVC.Status.ResizeStatus
if actualResizeStatus != nil {
for _, s := range expectedStates {
if s == *actualResizeStatus {
return true, nil
}
}
}
return false, nil
})
if waitErr != nil {
return fmt.Errorf("error while waiting for resize status to sync to %+v, actualStatus %s: %v", expectedStates, *actualResizeStatus, waitErr)
}
return nil
}
func waitForAllocatedResource(pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, expectedSize string) error {
expectedQuantity := resource.MustParse(expectedSize)
waitErr := wait.PollImmediate(resizePollInterval, csiResizeWaitPeriod, func() (bool, error) {
var err error
updatedPVC, err := m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %v", pvc.Name, err)
}
actualAllocatedSize := updatedPVC.Status.AllocatedResources.Storage()
if actualAllocatedSize != nil && actualAllocatedSize.Equal(expectedQuantity) {
return true, nil
}
return false, nil
})
if waitErr != nil {
return fmt.Errorf("error while waiting for allocatedSize to sync to %s: %v", expectedSize, waitErr)
}
return nil
}
func createExpansionHook(expectedExpansionStatus expansionStatus) *drivers.Hooks {
return &drivers.Hooks{
Pre: func(ctx context.Context, method string, request interface{}) (reply interface{}, err error) {
switch expectedExpansionStatus {
case expansionFailedOnController:
expansionRequest, ok := request.(*csipbv1.ControllerExpandVolumeRequest)
if ok {
requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI)
if requestedSize.Cmp(maxControllerSizeLimit) > 0 {
return nil, status.Error(codes.InvalidArgument, "invalid expansion request")
}
}
case expansionFailedOnNode:
expansionRequest, ok := request.(*csipbv1.NodeExpandVolumeRequest)
if ok {
requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI)
if requestedSize.Cmp(maxNodeExpansionLimit) > 0 {
return nil, status.Error(codes.InvalidArgument, "invalid node expansion request")
}
}
}
return nil, nil
},
}
}

View File

@ -292,23 +292,24 @@ func (h *hostpathCSIDriver) PrepareTest(f *framework.Framework) *storageframewor
// mockCSI
type mockCSIDriver struct {
driverInfo storageframework.DriverInfo
manifests []string
podInfo *bool
storageCapacity *bool
attachable bool
attachLimit int
enableTopology bool
enableNodeExpansion bool
hooks Hooks
tokenRequests []storagev1.TokenRequest
requiresRepublish *bool
fsGroupPolicy *storagev1.FSGroupPolicy
enableVolumeMountGroup bool
embedded bool
calls MockCSICalls
embeddedCSIDriver *mockdriver.CSIDriver
enableSELinuxMount *bool
driverInfo storageframework.DriverInfo
manifests []string
podInfo *bool
storageCapacity *bool
attachable bool
attachLimit int
enableTopology bool
enableNodeExpansion bool
hooks Hooks
tokenRequests []storagev1.TokenRequest
requiresRepublish *bool
fsGroupPolicy *storagev1.FSGroupPolicy
enableVolumeMountGroup bool
embedded bool
calls MockCSICalls
embeddedCSIDriver *mockdriver.CSIDriver
enableSELinuxMount *bool
enableRecoverExpansionFailure bool
// Additional values set during PrepareTest
clientSet clientset.Interface
@ -342,20 +343,21 @@ type MockCSITestDriver interface {
// CSIMockDriverOpts defines options used for csi driver
type CSIMockDriverOpts struct {
RegisterDriver bool
DisableAttach bool
PodInfo *bool
StorageCapacity *bool
AttachLimit int
EnableTopology bool
EnableResizing bool
EnableNodeExpansion bool
EnableSnapshot bool
EnableVolumeMountGroup bool
TokenRequests []storagev1.TokenRequest
RequiresRepublish *bool
FSGroupPolicy *storagev1.FSGroupPolicy
EnableSELinuxMount *bool
RegisterDriver bool
DisableAttach bool
PodInfo *bool
StorageCapacity *bool
AttachLimit int
EnableTopology bool
EnableResizing bool
EnableNodeExpansion bool
EnableSnapshot bool
EnableVolumeMountGroup bool
TokenRequests []storagev1.TokenRequest
RequiresRepublish *bool
FSGroupPolicy *storagev1.FSGroupPolicy
EnableSELinuxMount *bool
EnableRecoverExpansionFailure bool
// Embedded defines whether the CSI mock driver runs
// inside the cluster (false, the default) or just a proxy
@ -497,20 +499,21 @@ func InitMockCSIDriver(driverOpts CSIMockDriverOpts) MockCSITestDriver {
storageframework.CapMultiplePVsSameID: true,
},
},
manifests: driverManifests,
podInfo: driverOpts.PodInfo,
storageCapacity: driverOpts.StorageCapacity,
enableTopology: driverOpts.EnableTopology,
attachable: !driverOpts.DisableAttach,
attachLimit: driverOpts.AttachLimit,
enableNodeExpansion: driverOpts.EnableNodeExpansion,
tokenRequests: driverOpts.TokenRequests,
requiresRepublish: driverOpts.RequiresRepublish,
fsGroupPolicy: driverOpts.FSGroupPolicy,
enableVolumeMountGroup: driverOpts.EnableVolumeMountGroup,
enableSELinuxMount: driverOpts.EnableSELinuxMount,
embedded: driverOpts.Embedded,
hooks: driverOpts.Hooks,
manifests: driverManifests,
podInfo: driverOpts.PodInfo,
storageCapacity: driverOpts.StorageCapacity,
enableTopology: driverOpts.EnableTopology,
attachable: !driverOpts.DisableAttach,
attachLimit: driverOpts.AttachLimit,
enableNodeExpansion: driverOpts.EnableNodeExpansion,
tokenRequests: driverOpts.TokenRequests,
requiresRepublish: driverOpts.RequiresRepublish,
fsGroupPolicy: driverOpts.FSGroupPolicy,
enableVolumeMountGroup: driverOpts.EnableVolumeMountGroup,
enableSELinuxMount: driverOpts.EnableSELinuxMount,
enableRecoverExpansionFailure: driverOpts.EnableRecoverExpansionFailure,
embedded: driverOpts.Embedded,
hooks: driverOpts.Hooks,
}
}
@ -660,6 +663,11 @@ func (m *mockCSIDriver) PrepareTest(f *framework.Framework) *storageframework.Pe
RequiresRepublish: m.requiresRepublish,
FSGroupPolicy: m.fsGroupPolicy,
SELinuxMount: m.enableSELinuxMount,
Features: map[string][]string{},
}
if m.enableRecoverExpansionFailure {
o.Features["csi-resizer"] = []string{"RecoverVolumeExpansionFailure=true"}
}
err = utils.CreateFromManifests(f, m.driverNamespace, func(item interface{}) error {
if err := utils.PatchCSIDeployment(config.Framework, o, item); err != nil {

View File

@ -17,6 +17,7 @@ limitations under the License.
package utils
import (
"fmt"
"path"
"strings"
@ -94,6 +95,11 @@ func PatchCSIDeployment(f *e2eframework.Framework, o PatchCSIOptions, object int
container.VolumeMounts[e].MountPath = substKubeletRootDir(container.VolumeMounts[e].MountPath)
}
if len(o.Features) > 0 && len(o.Features[container.Name]) > 0 {
featuregateString := strings.Join(o.Features[container.Name], ",")
container.Args = append(container.Args, fmt.Sprintf("--feature-gates=%s", featuregateString))
}
// Overwrite driver name resp. provider name
// by appending a parameter with the right
// value.
@ -218,4 +224,10 @@ type PatchCSIOptions struct {
// field *if* the driver deploys a CSIDriver object. Ignored
// otherwise.
SELinuxMount *bool
// If not nil, the values will be used for setting feature arguments to
// specific sidecar.
// Feature is a map - where key is sidecar name such as:
// -- key: resizer
// -- value: []string{feature-gates}
Features map[string][]string
}