From 4568cdada286e1569f31a66c0153348401194589 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 13 Oct 2021 15:57:55 +0200 Subject: [PATCH 1/4] storage e2e: test volume attach limits of generic ephemeral volumes There are unit tests for this particular code path in kube-scheduler, but no E2E tests. --- test/e2e/storage/testsuites/volumelimits.go | 127 ++++++++++---------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/test/e2e/storage/testsuites/volumelimits.go b/test/e2e/storage/testsuites/volumelimits.go index 846ecd2dc3c..dd0acbd0264 100644 --- a/test/e2e/storage/testsuites/volumelimits.go +++ b/test/e2e/storage/testsuites/volumelimits.go @@ -32,6 +32,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" + "k8s.io/component-helpers/storage/ephemeral" migrationplugins "k8s.io/csi-translation-lib/plugins" // volume plugin names are exported nicely there volumeutil "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/test/e2e/framework" @@ -74,6 +75,7 @@ func InitCustomVolumeLimitsTestSuite(patterns []storageframework.TestPattern) st func InitVolumeLimitsTestSuite() storageframework.TestSuite { patterns := []storageframework.TestPattern{ storageframework.FsVolModeDynamicPV, + storageframework.DefaultFsGenericEphemeralVolume, } return InitCustomVolumeLimitsTestSuite(patterns) } @@ -95,14 +97,14 @@ func (t *volumeLimitsTestSuite) DefineTests(driver storageframework.TestDriver, // VolumeResource contains pv, pvc, sc, etc. of the first pod created resource *storageframework.VolumeResource - // All created PVCs, incl. the one in resource - pvcs []*v1.PersistentVolumeClaim + // All created PVCs + pvcNames []string + + // All created Pods + podNames []string // All created PVs, incl. the one in resource pvNames sets.String - - runningPod *v1.Pod - unschedulablePod *v1.Pod } var ( l local @@ -164,57 +166,64 @@ func (t *volumeLimitsTestSuite) DefineTests(driver storageframework.TestDriver, framework.ExpectNoError(err, "while cleaning up resource") }() defer func() { - cleanupTest(l.cs, l.ns.Name, l.runningPod.Name, l.unschedulablePod.Name, l.pvcs, l.pvNames, testSlowMultiplier*f.Timeouts.PVDelete) + cleanupTest(l.cs, l.ns.Name, l.podNames, l.pvcNames, l.pvNames, testSlowMultiplier*f.Timeouts.PVDelete) }() - // Create PVCs for one gigantic pod. - ginkgo.By(fmt.Sprintf("Creating %d PVC(s)", limit)) - - for i := 0; i < limit; i++ { - pvc := e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ - ClaimSize: claimSize, - StorageClassName: &l.resource.Sc.Name, - }, l.ns.Name) - pvc, err = l.cs.CoreV1().PersistentVolumeClaims(l.ns.Name).Create(context.TODO(), pvc, metav1.CreateOptions{}) - framework.ExpectNoError(err) - l.pvcs = append(l.pvcs, pvc) - } - - ginkgo.By("Creating pod to use all PVC(s)") selection := e2epod.NodeSelection{Name: nodeName} - podConfig := e2epod.Config{ - NS: l.ns.Name, - PVCs: l.pvcs, - SeLinuxLabel: e2epv.SELinuxLabel, - NodeSelection: selection, + + if pattern.VolType == storageframework.GenericEphemeralVolume { + // Create Pods. + ginkgo.By(fmt.Sprintf("Creating %d Pod(s) with one volume each", limit)) + for i := 0; i < limit; i++ { + pod := StartInPodWithVolumeSource(l.cs, *l.resource.VolSource, l.ns.Name, "volume-limits", "sleep 1000000", selection) + l.podNames = append(l.podNames, pod.Name) + l.pvcNames = append(l.pvcNames, ephemeral.VolumeClaimName(pod, &pod.Spec.Volumes[0])) + } + } else { + // Create PVCs for one gigantic pod. + var pvcs []*v1.PersistentVolumeClaim + ginkgo.By(fmt.Sprintf("Creating %d PVC(s)", limit)) + for i := 0; i < limit; i++ { + pvc := e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ + ClaimSize: claimSize, + StorageClassName: &l.resource.Sc.Name, + }, l.ns.Name) + pvc, err = l.cs.CoreV1().PersistentVolumeClaims(l.ns.Name).Create(context.TODO(), pvc, metav1.CreateOptions{}) + framework.ExpectNoError(err) + l.pvcNames = append(l.pvcNames, pvc.Name) + pvcs = append(pvcs, pvc) + } + + ginkgo.By("Creating pod to use all PVC(s)") + podConfig := e2epod.Config{ + NS: l.ns.Name, + PVCs: pvcs, + SeLinuxLabel: e2epv.SELinuxLabel, + NodeSelection: selection, + } + pod, err := e2epod.MakeSecPod(&podConfig) + framework.ExpectNoError(err) + pod, err = l.cs.CoreV1().Pods(l.ns.Name).Create(context.TODO(), pod, metav1.CreateOptions{}) + framework.ExpectNoError(err) + l.podNames = append(l.podNames, pod.Name) } - pod, err := e2epod.MakeSecPod(&podConfig) - framework.ExpectNoError(err) - l.runningPod, err = l.cs.CoreV1().Pods(l.ns.Name).Create(context.TODO(), pod, metav1.CreateOptions{}) - framework.ExpectNoError(err) ginkgo.By("Waiting for all PVCs to get Bound") - l.pvNames, err = waitForAllPVCsBound(l.cs, testSlowMultiplier*f.Timeouts.PVBound, l.pvcs) + l.pvNames, err = waitForAllPVCsBound(l.cs, testSlowMultiplier*f.Timeouts.PVBound, l.ns.Name, l.pvcNames) framework.ExpectNoError(err) - ginkgo.By("Waiting for the pod Running") - err = e2epod.WaitTimeoutForPodRunningInNamespace(l.cs, l.runningPod.Name, l.ns.Name, testSlowMultiplier*f.Timeouts.PodStart) - framework.ExpectNoError(err) + ginkgo.By("Waiting for the pod(s) running") + for _, podName := range l.podNames { + err = e2epod.WaitTimeoutForPodRunningInNamespace(l.cs, podName, l.ns.Name, testSlowMultiplier*f.Timeouts.PodStart) + framework.ExpectNoError(err) + } ginkgo.By("Creating an extra pod with one volume to exceed the limit") - podConfig = e2epod.Config{ - NS: l.ns.Name, - PVCs: []*v1.PersistentVolumeClaim{l.resource.Pvc}, - SeLinuxLabel: e2epv.SELinuxLabel, - NodeSelection: selection, - } - pod, err = e2epod.MakeSecPod(&podConfig) - framework.ExpectNoError(err) - l.unschedulablePod, err = l.cs.CoreV1().Pods(l.ns.Name).Create(context.TODO(), pod, metav1.CreateOptions{}) - framework.ExpectNoError(err, "Failed to create an extra pod with one volume to exceed the limit") + pod := StartInPodWithVolumeSource(l.cs, *l.resource.VolSource, l.ns.Name, "volume-limits-exceeded", "sleep 10000", selection) + l.podNames = append(l.podNames, pod.Name) ginkgo.By("Waiting for the pod to get unschedulable with the right message") - err = e2epod.WaitForPodCondition(l.cs, l.ns.Name, l.unschedulablePod.Name, "Unschedulable", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + err = e2epod.WaitForPodCondition(l.cs, l.ns.Name, pod.Name, "Unschedulable", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { if pod.Status.Phase == v1.PodPending { reg, err := regexp.Compile(`max.+volume.+count`) if err != nil { @@ -270,24 +279,18 @@ func (t *volumeLimitsTestSuite) DefineTests(driver storageframework.TestDriver, }) } -func cleanupTest(cs clientset.Interface, ns string, runningPodName, unschedulablePodName string, pvcs []*v1.PersistentVolumeClaim, pvNames sets.String, timeout time.Duration) error { +func cleanupTest(cs clientset.Interface, ns string, podNames, pvcNames []string, pvNames sets.String, timeout time.Duration) error { var cleanupErrors []string - if runningPodName != "" { - err := cs.CoreV1().Pods(ns).Delete(context.TODO(), runningPodName, metav1.DeleteOptions{}) + for _, podName := range podNames { + err := cs.CoreV1().Pods(ns).Delete(context.TODO(), podName, metav1.DeleteOptions{}) if err != nil { - cleanupErrors = append(cleanupErrors, fmt.Sprintf("failed to delete pod %s: %s", runningPodName, err)) + cleanupErrors = append(cleanupErrors, fmt.Sprintf("failed to delete pod %s: %s", podName, err)) } } - if unschedulablePodName != "" { - err := cs.CoreV1().Pods(ns).Delete(context.TODO(), unschedulablePodName, metav1.DeleteOptions{}) - if err != nil { - cleanupErrors = append(cleanupErrors, fmt.Sprintf("failed to delete pod %s: %s", unschedulablePodName, err)) - } - } - for _, pvc := range pvcs { - err := cs.CoreV1().PersistentVolumeClaims(ns).Delete(context.TODO(), pvc.Name, metav1.DeleteOptions{}) - if err != nil { - cleanupErrors = append(cleanupErrors, fmt.Sprintf("failed to delete PVC %s: %s", pvc.Name, err)) + for _, pvcName := range pvcNames { + err := cs.CoreV1().PersistentVolumeClaims(ns).Delete(context.TODO(), pvcName, metav1.DeleteOptions{}) + if !apierrors.IsNotFound(err) { + cleanupErrors = append(cleanupErrors, fmt.Sprintf("failed to delete PVC %s: %s", pvcName, err)) } } // Wait for the PVs to be deleted. It includes also pod and PVC deletion because of PVC protection. @@ -323,12 +326,12 @@ func cleanupTest(cs clientset.Interface, ns string, runningPodName, unschedulabl } // waitForAllPVCsBound waits until the given PVCs are all bound. It then returns the bound PVC names as a set. -func waitForAllPVCsBound(cs clientset.Interface, timeout time.Duration, pvcs []*v1.PersistentVolumeClaim) (sets.String, error) { +func waitForAllPVCsBound(cs clientset.Interface, timeout time.Duration, ns string, pvcNames []string) (sets.String, error) { pvNames := sets.NewString() err := wait.Poll(5*time.Second, timeout, func() (bool, error) { unbound := 0 - for _, pvc := range pvcs { - pvc, err := cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + for _, pvcName := range pvcNames { + pvc, err := cs.CoreV1().PersistentVolumeClaims(ns).Get(context.TODO(), pvcName, metav1.GetOptions{}) if err != nil { return false, err } @@ -339,7 +342,7 @@ func waitForAllPVCsBound(cs clientset.Interface, timeout time.Duration, pvcs []* } } if unbound > 0 { - framework.Logf("%d/%d of PVCs are Bound", pvNames.Len(), len(pvcs)) + framework.Logf("%d/%d of PVCs are Bound", pvNames.Len(), len(pvcNames)) return false, nil } return true, nil From 7538d089d5cb5305c3558bb9614ae82bdad7bc2b Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 14 Oct 2021 16:28:26 +0200 Subject: [PATCH 2/4] storage e2e: fix volume metric test for generic ephemeral volume This is a fix for the new test case from https://github.com/kubernetes/kubernetes/pull/105636 which had to be merged without prior testing due to not having a cluster to test on and no pull job which runs these tests. https://testgrid.k8s.io/sig-storage-kubernetes#gce-serial then showed a failure. The fix is simple: in the ephemeral case, the PVC name isn't set in advance in pvc.Name and instead must be computed. The fix now was tested on a kubetest cluster in GCE. Signed-off-by: Patrick Ohly --- test/e2e/storage/volume_metrics.go | 65 +++++++++++++++++------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/test/e2e/storage/volume_metrics.go b/test/e2e/storage/volume_metrics.go index d4688a56fb3..9e2c7b3d93a 100644 --- a/test/e2e/storage/volume_metrics.go +++ b/test/e2e/storage/volume_metrics.go @@ -30,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/component-base/metrics/testutil" + "k8s.io/component-helpers/storage/ephemeral" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/test/e2e/framework" e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" @@ -211,14 +212,15 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { verifyMetricCount(storageOpMetrics, updatedStorageMetrics, "volume_provision", true) } - filesystemMode := func(ephemeral bool) { - if !ephemeral { + filesystemMode := func(isEphemeral bool) { + pvcName := pvc.Name + if !isEphemeral { pvc, err = c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}) framework.ExpectNoError(err) framework.ExpectNotEqual(pvc, nil) } - pod := makePod(ns, pvc, ephemeral) + pod := makePod(ns, pvc, isEphemeral) pod, err = c.CoreV1().Pods(ns).Create(context.TODO(), pod, metav1.CreateOptions{}) framework.ExpectNoError(err) @@ -228,6 +230,11 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { pod, err = c.CoreV1().Pods(ns).Get(context.TODO(), pod.Name, metav1.GetOptions{}) framework.ExpectNoError(err) + if isEphemeral { + pvcName = ephemeral.VolumeClaimName(pod, &pod.Spec.Volumes[0]) + } + pvcNamespace := pod.Namespace + // Verify volume stat metrics were collected for the referenced PVC volumeStatKeys := []string{ kubeletmetrics.VolumeStatsUsedBytesKey, @@ -251,31 +258,31 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { framework.Logf("Error fetching kubelet metrics") return false, err } - if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) { + if !findVolumeStatMetric(kubeletKeyName, pvcNamespace, pvcName, kubeMetrics) { return false, nil } return true, nil }) - framework.ExpectNoError(waitErr, "Unable to find metric %s for PVC %s/%s", kubeletKeyName, pvc.Namespace, pvc.Name) + framework.ExpectNoError(waitErr, "Unable to find metric %s for PVC %s/%s", kubeletKeyName, pvcNamespace, pvcName) for _, key := range volumeStatKeys { kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) - found := findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) - framework.ExpectEqual(found, true, "PVC %s, Namespace %s not found for %s", pvc.Name, pvc.Namespace, kubeletKeyName) + found := findVolumeStatMetric(kubeletKeyName, pvcNamespace, pvcName, kubeMetrics) + framework.ExpectEqual(found, true, "PVC %s, Namespace %s not found for %s", pvcName, pvcNamespace, kubeletKeyName) } framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name) framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod)) } - blockmode := func(ephemeral bool) { - if !ephemeral { + blockmode := func(isEphemeral bool) { + if !isEphemeral { pvcBlock, err = c.CoreV1().PersistentVolumeClaims(pvcBlock.Namespace).Create(context.TODO(), pvcBlock, metav1.CreateOptions{}) framework.ExpectNoError(err) framework.ExpectNotEqual(pvcBlock, nil) } - pod := makePod(ns, pvcBlock, ephemeral) + pod := makePod(ns, pvcBlock, isEphemeral) pod.Spec.Containers[0].VolumeDevices = []v1.VolumeDevice{{ Name: pod.Spec.Volumes[0].Name, DevicePath: "/mnt/" + pvcBlock.Name, @@ -326,14 +333,14 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod)) } - totalTime := func(ephemeral bool) { - if !ephemeral { + totalTime := func(isEphemeral bool) { + if !isEphemeral { pvc, err = c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}) framework.ExpectNoError(err) framework.ExpectNotEqual(pvc, nil) } - pod := makePod(ns, pvc, ephemeral) + pod := makePod(ns, pvc, isEphemeral) pod, err = c.CoreV1().Pods(ns).Create(context.TODO(), pod, metav1.CreateOptions{}) framework.ExpectNoError(err) @@ -357,14 +364,14 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod)) } - volumeManager := func(ephemeral bool) { - if !ephemeral { + volumeManager := func(isEphemeral bool) { + if !isEphemeral { pvc, err = c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}) framework.ExpectNoError(err) framework.ExpectNotEqual(pvc, nil) } - pod := makePod(ns, pvc, ephemeral) + pod := makePod(ns, pvc, isEphemeral) pod, err = c.CoreV1().Pods(ns).Create(context.TODO(), pod, metav1.CreateOptions{}) framework.ExpectNoError(err) @@ -387,14 +394,14 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod)) } - adController := func(ephemeral bool) { - if !ephemeral { + adController := func(isEphemeral bool) { + if !isEphemeral { pvc, err = c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}) framework.ExpectNoError(err) framework.ExpectNotEqual(pvc, nil) } - pod := makePod(ns, pvc, ephemeral) + pod := makePod(ns, pvc, isEphemeral) // Get metrics controllerMetrics, err := metricsGrabber.GrabFromControllerManager() @@ -447,27 +454,27 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod)) } - testAll := func(ephemeral bool) { + testAll := func(isEphemeral bool) { ginkgo.It("should create prometheus metrics for volume provisioning and attach/detach", func() { - provisioning(ephemeral) + provisioning(isEphemeral) }) ginkgo.It("should create prometheus metrics for volume provisioning errors [Slow]", func() { - provisioningError(ephemeral) + provisioningError(isEphemeral) }) ginkgo.It("should create volume metrics with the correct FilesystemMode PVC ref", func() { - filesystemMode(ephemeral) + filesystemMode(isEphemeral) }) ginkgo.It("should create volume metrics with the correct BlockMode PVC ref", func() { - blockmode(ephemeral) + blockmode(isEphemeral) }) ginkgo.It("should create metrics for total time taken in volume operations in P/V Controller", func() { - totalTime(ephemeral) + totalTime(isEphemeral) }) ginkgo.It("should create volume metrics in Volume Manager", func() { - volumeManager(ephemeral) + volumeManager(isEphemeral) }) ginkgo.It("should create metrics for total number of volumes in A/D Controller", func() { - adController(ephemeral) + adController(isEphemeral) }) } @@ -873,10 +880,10 @@ func waitForADControllerStatesMetrics(metricsGrabber *e2emetrics.Grabber, metric // makePod creates a pod which either references the PVC or creates it via a // generic ephemeral volume claim template. -func makePod(ns string, pvc *v1.PersistentVolumeClaim, ephemeral bool) *v1.Pod { +func makePod(ns string, pvc *v1.PersistentVolumeClaim, isEphemeral bool) *v1.Pod { claims := []*v1.PersistentVolumeClaim{pvc} pod := e2epod.MakePod(ns, nil, claims, false, "") - if ephemeral { + if isEphemeral { volSrc := pod.Spec.Volumes[0] volSrc.PersistentVolumeClaim = nil volSrc.Ephemeral = &v1.EphemeralVolumeSource{ From 5462d97e62297cc588bc8dda2976db1b9ec1cf98 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 13 Oct 2021 14:13:48 +0200 Subject: [PATCH 3/4] storage e2e: test snapshotting of generic ephemeral volumes Conceptually, snapshots have to be taken while the pod and thus the volume exist. Snapshotting has an issue where flushing of data is not guaranteed while the volume is still staged on the node, so the test relied on deleting the pod and checking for the volume to be unused. That part of the test cannot be done for ephmeral volumes. --- test/e2e/storage/framework/testpattern.go | 16 +++ test/e2e/storage/testsuites/provisioning.go | 19 ++- test/e2e/storage/testsuites/snapshottable.go | 132 +++++++++++++------ 3 files changed, 123 insertions(+), 44 deletions(-) diff --git a/test/e2e/storage/framework/testpattern.go b/test/e2e/storage/framework/testpattern.go index 3a61d1ba1c6..32421764d49 100644 --- a/test/e2e/storage/framework/testpattern.go +++ b/test/e2e/storage/framework/testpattern.go @@ -314,6 +314,14 @@ var ( SnapshotDeletionPolicy: DeleteSnapshot, VolType: DynamicPV, } + // EphemeralSnapshotDelete is TestPattern for snapshotting of a generic ephemeral volume + // where snapshots are deleted. + EphemeralSnapshotDelete = TestPattern{ + Name: "Ephemeral Snapshot (delete policy)", + SnapshotType: DynamicCreatedSnapshot, + SnapshotDeletionPolicy: DeleteSnapshot, + VolType: GenericEphemeralVolume, + } // DynamicSnapshotRetain is TestPattern for "Dynamic snapshot" DynamicSnapshotRetain = TestPattern{ Name: "Dynamic Snapshot (retain policy)", @@ -328,6 +336,14 @@ var ( SnapshotDeletionPolicy: RetainSnapshot, VolType: DynamicPV, } + // EphemeralSnapshotDelete is TestPattern for snapshotting of a generic ephemeral volume + // where snapshots are preserved. + EphemeralSnapshotRetain = TestPattern{ + Name: "Ephemeral Snapshot (retain policy)", + SnapshotType: DynamicCreatedSnapshot, + SnapshotDeletionPolicy: RetainSnapshot, + VolType: GenericEphemeralVolume, + } // Definitions for volume expansion case diff --git a/test/e2e/storage/testsuites/provisioning.go b/test/e2e/storage/testsuites/provisioning.go index 03717e210b6..eab48a2092c 100644 --- a/test/e2e/storage/testsuites/provisioning.go +++ b/test/e2e/storage/testsuites/provisioning.go @@ -698,6 +698,16 @@ func RunInPodWithVolume(c clientset.Interface, t *framework.TimeoutContext, ns, // StartInPodWithVolume starts a command in a pod with given claim mounted to /mnt directory // The caller is responsible for checking the pod and deleting it. func StartInPodWithVolume(c clientset.Interface, ns, claimName, podName, command string, node e2epod.NodeSelection) *v1.Pod { + return StartInPodWithVolumeSource(c, v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: claimName, + }, + }, ns, podName, command, node) +} + +// StartInPodWithVolumeSource starts a command in a pod with given volume mounted to /mnt directory +// The caller is responsible for checking the pod and deleting it. +func StartInPodWithVolumeSource(c clientset.Interface, volSrc v1.VolumeSource, ns, podName, command string, node e2epod.NodeSelection) *v1.Pod { pod := &v1.Pod{ TypeMeta: metav1.TypeMeta{ Kind: "Pod", @@ -726,13 +736,8 @@ func StartInPodWithVolume(c clientset.Interface, ns, claimName, podName, command RestartPolicy: v1.RestartPolicyNever, Volumes: []v1.Volume{ { - Name: "my-volume", - VolumeSource: v1.VolumeSource{ - PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ - ClaimName: claimName, - ReadOnly: false, - }, - }, + Name: "my-volume", + VolumeSource: volSrc, }, }, }, diff --git a/test/e2e/storage/testsuites/snapshottable.go b/test/e2e/storage/testsuites/snapshottable.go index ab8ed97ea00..866c970b7fa 100644 --- a/test/e2e/storage/testsuites/snapshottable.go +++ b/test/e2e/storage/testsuites/snapshottable.go @@ -32,6 +32,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/client-go/dynamic" clientset "k8s.io/client-go/kubernetes" + "k8s.io/component-helpers/storage/ephemeral" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2epv "k8s.io/kubernetes/test/e2e/framework/pv" @@ -75,6 +76,8 @@ func InitSnapshottableTestSuite() storageframework.TestSuite { patterns := []storageframework.TestPattern{ storageframework.DynamicSnapshotDelete, storageframework.DynamicSnapshotRetain, + storageframework.EphemeralSnapshotDelete, + storageframework.EphemeralSnapshotRetain, storageframework.PreprovisionedSnapshotDelete, storageframework.PreprovisionedSnapshotRetain, } @@ -137,29 +140,61 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, }) volumeResource = storageframework.CreateVolumeResource(dDriver, config, pattern, s.GetTestSuiteInfo().SupportedSizeRange) - pvc = volumeResource.Pvc - sc = volumeResource.Sc - claimSize = pvc.Spec.Resources.Requests.Storage().String() + pvcName := "" + pvcNamespace := f.Namespace.Name ginkgo.By("[init] starting a pod to use the claim") - originalMntTestData = fmt.Sprintf("hello from %s namespace", pvc.GetNamespace()) + originalMntTestData = fmt.Sprintf("hello from %s namespace", pvcNamespace) command := fmt.Sprintf("echo '%s' > %s", originalMntTestData, datapath) - pod := RunInPodWithVolume(cs, f.Timeouts, pvc.Namespace, pvc.Name, "pvc-snapshottable-tester", command, config.ClientNodeSelection) - - err = e2epv.WaitForPersistentVolumeClaimPhase(v1.ClaimBound, cs, pvc.Namespace, pvc.Name, framework.Poll, f.Timeouts.ClaimProvision) - framework.ExpectNoError(err) + pod := StartInPodWithVolumeSource(cs, *volumeResource.VolSource, f.Namespace.Name, "pvc-snapshottable-tester", command, config.ClientNodeSelection) + if pattern.VolType == storageframework.GenericEphemeralVolume { + // We can test snapshotting of generic ephemeral volumes. It's just a bit more complicated: + // - we need to start the pod + // - wait for the pod to stop + // - don't delete the pod because once it is marked for deletion, + // the PVC also gets marked and snapshotting no longer works + // (even when a finalizer prevents actual removal of the PVC) + // - skip data validation because data flushing wasn't guaranteed + // (see comments below) + cleanupSteps = append(cleanupSteps, func() { + e2epod.DeletePodWithWait(cs, pod) + }) + } else { + defer e2epod.DeletePodWithWait(cs, pod) + } + framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespaceTimeout(cs, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow)) + pod, err = cs.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "check pod after it terminated") // Get new copy of the claim ginkgo.By("[init] checking the claim") - pvc, err = cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + if pattern.VolType == storageframework.GenericEphemeralVolume { + pvcName = ephemeral.VolumeClaimName(pod, &pod.Spec.Volumes[0]) + } else { + pvcName = volumeResource.Pvc.Name + } + + err = e2epv.WaitForPersistentVolumeClaimPhase(v1.ClaimBound, cs, pvcNamespace, pvcName, framework.Poll, f.Timeouts.ClaimProvision) framework.ExpectNoError(err) + pvc, err = cs.CoreV1().PersistentVolumeClaims(pvcNamespace).Get(context.TODO(), pvcName, metav1.GetOptions{}) + framework.ExpectNoError(err, "get PVC") + claimSize = pvc.Spec.Resources.Requests.Storage().String() + sc = volumeResource.Sc + // Get the bound PV ginkgo.By("[init] checking the PV") pv, err := cs.CoreV1().PersistentVolumes().Get(context.TODO(), pvc.Spec.VolumeName, metav1.GetOptions{}) framework.ExpectNoError(err) + if pattern.VolType == storageframework.GenericEphemeralVolume { + return + } + + ginkgo.By("[init] deleting the pod") + StopPod(cs, pod) + // At this point we know that: // - a pod was created with a PV that's supposed to have data // @@ -260,6 +295,9 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, snapshotContentSpec := vscontent.Object["spec"].(map[string]interface{}) volumeSnapshotRef := snapshotContentSpec["volumeSnapshotRef"].(map[string]interface{}) + var restoredPVC *v1.PersistentVolumeClaim + var restoredPod *v1.Pod + // Check SnapshotContent properties ginkgo.By("checking the SnapshotContent") // PreprovisionedCreatedSnapshot do not need to set volume snapshot class name @@ -269,15 +307,15 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, framework.ExpectEqual(volumeSnapshotRef["name"], vs.GetName()) framework.ExpectEqual(volumeSnapshotRef["namespace"], vs.GetNamespace()) - ginkgo.By("Modifying source data test") - var restoredPVC *v1.PersistentVolumeClaim - var restoredPod *v1.Pod - modifiedMntTestData := fmt.Sprintf("modified data from %s namespace", pvc.GetNamespace()) + if pattern.VolType != storageframework.GenericEphemeralVolume { + ginkgo.By("Modifying source data test") + modifiedMntTestData := fmt.Sprintf("modified data from %s namespace", pvc.GetNamespace()) - ginkgo.By("modifying the data in the source PVC") + ginkgo.By("modifying the data in the source PVC") - command := fmt.Sprintf("echo '%s' > %s", modifiedMntTestData, datapath) - RunInPodWithVolume(cs, f.Timeouts, pvc.Namespace, pvc.Name, "pvc-snapshottable-data-tester", command, config.ClientNodeSelection) + command := fmt.Sprintf("echo '%s' > %s", modifiedMntTestData, datapath) + RunInPodWithVolume(cs, f.Timeouts, pvc.Namespace, pvc.Name, "pvc-snapshottable-data-tester", command, config.ClientNodeSelection) + } ginkgo.By("creating a pvc from the snapshot") restoredPVC = e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ @@ -293,27 +331,45 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, Name: vs.GetName(), } - restoredPVC, err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Create(context.TODO(), restoredPVC, metav1.CreateOptions{}) - framework.ExpectNoError(err) - cleanupSteps = append(cleanupSteps, func() { - framework.Logf("deleting claim %q/%q", restoredPVC.Namespace, restoredPVC.Name) - // typically this claim has already been deleted - err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(context.TODO(), restoredPVC.Name, metav1.DeleteOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - framework.Failf("Error deleting claim %q. Error: %v", restoredPVC.Name, err) + if pattern.VolType != storageframework.GenericEphemeralVolume { + restoredPVC, err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Create(context.TODO(), restoredPVC, metav1.CreateOptions{}) + framework.ExpectNoError(err) + cleanupSteps = append(cleanupSteps, func() { + framework.Logf("deleting claim %q/%q", restoredPVC.Namespace, restoredPVC.Name) + // typically this claim has already been deleted + err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(context.TODO(), restoredPVC.Name, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + framework.Failf("Error deleting claim %q. Error: %v", restoredPVC.Name, err) + } + }) + } + + ginkgo.By("starting a pod to use the snapshot") + volSrc := v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: restoredPVC.Name, + }, + } + if pattern.VolType == storageframework.GenericEphemeralVolume { + volSrc = v1.VolumeSource{ + Ephemeral: &v1.EphemeralVolumeSource{ + VolumeClaimTemplate: &v1.PersistentVolumeClaimTemplate{ + Spec: restoredPVC.Spec, + }, + }, } - }) + } - ginkgo.By("starting a pod to use the claim") - - restoredPod = StartInPodWithVolume(cs, restoredPVC.Namespace, restoredPVC.Name, "restored-pvc-tester", "sleep 300", config.ClientNodeSelection) + restoredPod = StartInPodWithVolumeSource(cs, volSrc, restoredPVC.Namespace, "restored-pvc-tester", "sleep 300", config.ClientNodeSelection) cleanupSteps = append(cleanupSteps, func() { StopPod(cs, restoredPod) }) framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(cs, restoredPod.Name, restoredPod.Namespace, f.Timeouts.PodStartSlow)) - commands := e2evolume.GenerateReadFileCmd(datapath) - _, err = framework.LookForStringInPodExec(restoredPod.Namespace, restoredPod.Name, commands, originalMntTestData, time.Minute) - framework.ExpectNoError(err) + if pattern.VolType != storageframework.GenericEphemeralVolume { + commands := e2evolume.GenerateReadFileCmd(datapath) + _, err = framework.LookForStringInPodExec(restoredPod.Namespace, restoredPod.Name, commands, originalMntTestData, time.Minute) + framework.ExpectNoError(err) + } ginkgo.By("should delete the VolumeSnapshotContent according to its deletion policy") @@ -322,12 +378,14 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, // Snapshot deletion and some are opposite. err = storageutils.DeleteSnapshotWithoutWaiting(dc, vs.GetNamespace(), vs.GetName()) framework.ExpectNoError(err) - framework.Logf("deleting restored pod %q/%q", restoredPod.Namespace, restoredPod.Name) - err = cs.CoreV1().Pods(restoredPod.Namespace).Delete(context.TODO(), restoredPod.Name, metav1.DeleteOptions{}) - framework.ExpectNoError(err) - framework.Logf("deleting restored PVC %q/%q", restoredPVC.Namespace, restoredPVC.Name) - err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(context.TODO(), restoredPVC.Name, metav1.DeleteOptions{}) - framework.ExpectNoError(err) + if pattern.VolType != storageframework.GenericEphemeralVolume { + framework.Logf("deleting restored pod %q/%q", restoredPod.Namespace, restoredPod.Name) + err = cs.CoreV1().Pods(restoredPod.Namespace).Delete(context.TODO(), restoredPod.Name, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + framework.Logf("deleting restored PVC %q/%q", restoredPVC.Namespace, restoredPVC.Name) + err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(context.TODO(), restoredPVC.Name, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + } // Wait for the Snapshot to be actually deleted from API server err = storageutils.WaitForNamespacedGVRDeletion(dc, storageutils.SnapshotGVR, vs.GetNamespace(), vs.GetNamespace(), framework.Poll, f.Timeouts.SnapshotDelete) From c0bdf149425d4b369fce09cd934e7734e12b2cd0 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 20 Oct 2021 11:29:33 +0200 Subject: [PATCH 4/4] storage e2e: refactor snapshottable During PR review it was pointed out that the branches for ephemeral vs. persistent make the test harder to read. Therefore all code that depends on if checks gets moved into two different versions of the test, one hat runs for ephemeral volumes and one for persistent volumes, with skip statements at the beginning. --- test/e2e/storage/testsuites/snapshottable.go | 386 +++++++++++-------- 1 file changed, 235 insertions(+), 151 deletions(-) diff --git a/test/e2e/storage/testsuites/snapshottable.go b/test/e2e/storage/testsuites/snapshottable.go index 866c970b7fa..c797e3847c0 100644 --- a/test/e2e/storage/testsuites/snapshottable.go +++ b/test/e2e/storage/testsuites/snapshottable.go @@ -29,7 +29,6 @@ import ( storagev1 "k8s.io/api/storage/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/client-go/dynamic" clientset "k8s.io/client-go/kubernetes" "k8s.io/component-helpers/storage/ephemeral" @@ -119,6 +118,8 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, dc dynamic.Interface pvc *v1.PersistentVolumeClaim sc *storagev1.StorageClass + volumeResource *storageframework.VolumeResource + pod *v1.Pod claimSize string originalMntTestData string ) @@ -134,110 +135,21 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, config, driverCleanup = driver.PrepareTest(f) cleanupSteps = append(cleanupSteps, driverCleanup) - var volumeResource *storageframework.VolumeResource cleanupSteps = append(cleanupSteps, func() { framework.ExpectNoError(volumeResource.CleanupResource()) }) volumeResource = storageframework.CreateVolumeResource(dDriver, config, pattern, s.GetTestSuiteInfo().SupportedSizeRange) - pvcName := "" - pvcNamespace := f.Namespace.Name - ginkgo.By("[init] starting a pod to use the claim") - originalMntTestData = fmt.Sprintf("hello from %s namespace", pvcNamespace) + originalMntTestData = fmt.Sprintf("hello from %s namespace", f.Namespace.Name) command := fmt.Sprintf("echo '%s' > %s", originalMntTestData, datapath) - pod := StartInPodWithVolumeSource(cs, *volumeResource.VolSource, f.Namespace.Name, "pvc-snapshottable-tester", command, config.ClientNodeSelection) - if pattern.VolType == storageframework.GenericEphemeralVolume { - // We can test snapshotting of generic ephemeral volumes. It's just a bit more complicated: - // - we need to start the pod - // - wait for the pod to stop - // - don't delete the pod because once it is marked for deletion, - // the PVC also gets marked and snapshotting no longer works - // (even when a finalizer prevents actual removal of the PVC) - // - skip data validation because data flushing wasn't guaranteed - // (see comments below) - cleanupSteps = append(cleanupSteps, func() { - e2epod.DeletePodWithWait(cs, pod) - }) - } else { - defer e2epod.DeletePodWithWait(cs, pod) - } - framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespaceTimeout(cs, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow)) - pod, err = cs.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, metav1.GetOptions{}) - framework.ExpectNoError(err, "check pod after it terminated") - - // Get new copy of the claim - ginkgo.By("[init] checking the claim") - if pattern.VolType == storageframework.GenericEphemeralVolume { - pvcName = ephemeral.VolumeClaimName(pod, &pod.Spec.Volumes[0]) - } else { - pvcName = volumeResource.Pvc.Name - } - - err = e2epv.WaitForPersistentVolumeClaimPhase(v1.ClaimBound, cs, pvcNamespace, pvcName, framework.Poll, f.Timeouts.ClaimProvision) - framework.ExpectNoError(err) - - pvc, err = cs.CoreV1().PersistentVolumeClaims(pvcNamespace).Get(context.TODO(), pvcName, metav1.GetOptions{}) - framework.ExpectNoError(err, "get PVC") - claimSize = pvc.Spec.Resources.Requests.Storage().String() - sc = volumeResource.Sc - - // Get the bound PV - ginkgo.By("[init] checking the PV") - pv, err := cs.CoreV1().PersistentVolumes().Get(context.TODO(), pvc.Spec.VolumeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - - if pattern.VolType == storageframework.GenericEphemeralVolume { - return - } - - ginkgo.By("[init] deleting the pod") - StopPod(cs, pod) - - // At this point we know that: - // - a pod was created with a PV that's supposed to have data - // - // However there's a caching issue that @jinxu97 explained and it's related with the pod & volume - // lifecycle, to understand it we first analyze what the volumemanager does: - // - when a pod is delete the volumemanager will try to cleanup the volume mounts - // - NodeUnpublishVolume: unbinds the bind mount from the container - // - Linux: the bind mount is removed, which does not flush any cache - // - Windows: we delete a symlink, data's not flushed yet to disk - // - NodeUnstageVolume: unmount the global mount - // - Linux: disk is unmounted and all caches flushed. - // - Windows: data is flushed to disk and the disk is detached - // - // Pod deletion might not guarantee a data flush to disk, however NodeUnstageVolume adds the logic - // to flush the data to disk (see #81690 for details). We need to wait for NodeUnstageVolume, as - // NodeUnpublishVolume only removes the bind mount, which doesn't force the caches to flush. - // It's possible to create empty snapshots if we don't wait (see #101279 for details). - // - // In the following code by checking if the PV is not in the node.Status.VolumesInUse field we - // ensure that the volume is not used by the node anymore (an indicator that NodeUnstageVolume has - // already finished) - nodeName := pod.Spec.NodeName - gomega.Expect(nodeName).NotTo(gomega.BeEmpty(), "pod.Spec.NodeName must not be empty") - - // Snapshot tests are only executed for CSI drivers. When CSI drivers - // are attached to the node they use VolumeHandle instead of the pv.Name. - volumeName := pv.Spec.PersistentVolumeSource.CSI.VolumeHandle - - ginkgo.By(fmt.Sprintf("[init] waiting until the node=%s is not using the volume=%s", nodeName, volumeName)) - success := storageutils.WaitUntil(framework.Poll, f.Timeouts.PVDelete, func() bool { - node, err := cs.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - volumesInUse := node.Status.VolumesInUse - framework.Logf("current volumes in use: %+v", volumesInUse) - for i := 0; i < len(volumesInUse); i++ { - if strings.HasSuffix(string(volumesInUse[i]), volumeName) { - return false - } - } - return true + pod = StartInPodWithVolumeSource(cs, *volumeResource.VolSource, f.Namespace.Name, "pvc-snapshottable-tester", command, config.ClientNodeSelection) + cleanupSteps = append(cleanupSteps, func() { + e2epod.DeletePodWithWait(cs, pod) }) - framework.ExpectEqual(success, true) + // At this point a pod is running with a PVC. How to proceed depends on which test is running. } cleanup := func() { @@ -255,32 +167,57 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, } } - ginkgo.BeforeEach(func() { - init() - }) ginkgo.AfterEach(func() { cleanup() }) ginkgo.Context("", func() { - var ( - vs *unstructured.Unstructured - vscontent *unstructured.Unstructured - vsc *unstructured.Unstructured - ) + ginkgo.It("should check snapshot fields, check restore correctly works, check deletion (ephemeral)", func() { + if pattern.VolType != storageframework.GenericEphemeralVolume { + e2eskipper.Skipf("volume type %q is not ephemeral", pattern.VolType) + } + init() - ginkgo.BeforeEach(func() { - var sr *storageframework.SnapshotResource + // We can test snapshotting of generic + // ephemeral volumes by creating the snapshot + // while the pod is running (online). We cannot do it after pod deletion, + // because then the PVC also gets marked and snapshotting no longer works + // (even when a finalizer prevents actual removal of the PVC). + // + // Because data consistency cannot be + // guaranteed, this flavor of the test doesn't + // check the content of the snapshot. + + framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespaceTimeout(cs, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow)) + pod, err = cs.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "check pod after it terminated") + + // Get new copy of the claim + ginkgo.By("[init] checking the claim") + pvcName := ephemeral.VolumeClaimName(pod, &pod.Spec.Volumes[0]) + pvcNamespace := pod.Namespace + + parameters := map[string]string{} + sr := storageframework.CreateSnapshotResource(sDriver, config, pattern, pvcName, pvcNamespace, f.Timeouts, parameters) cleanupSteps = append(cleanupSteps, func() { framework.ExpectNoError(sr.CleanupResource(f.Timeouts)) }) - parameters := map[string]string{} - sr = storageframework.CreateSnapshotResource(sDriver, config, pattern, pvc.GetName(), pvc.GetNamespace(), f.Timeouts, parameters) - vs = sr.Vs - vscontent = sr.Vscontent - vsc = sr.Vsclass - }) - ginkgo.It("should check snapshot fields, check restore correctly works after modifying source data, check deletion", func() { + vs := sr.Vs + vsc := sr.Vsclass + + err = e2epv.WaitForPersistentVolumeClaimPhase(v1.ClaimBound, cs, pvcNamespace, pvcName, framework.Poll, f.Timeouts.ClaimProvision) + framework.ExpectNoError(err) + + pvc, err = cs.CoreV1().PersistentVolumeClaims(pvcNamespace).Get(context.TODO(), pvcName, metav1.GetOptions{}) + framework.ExpectNoError(err, "get PVC") + claimSize = pvc.Spec.Resources.Requests.Storage().String() + sc = volumeResource.Sc + + // Get the bound PV + ginkgo.By("[init] checking the PV") + _, err := cs.CoreV1().PersistentVolumes().Get(context.TODO(), pvc.Spec.VolumeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + // Get new copy of the snapshot ginkgo.By("checking the snapshot") vs, err = dc.Resource(storageutils.SnapshotGVR).Namespace(vs.GetNamespace()).Get(context.TODO(), vs.GetName(), metav1.GetOptions{}) @@ -289,7 +226,7 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, // Get the bound snapshotContent snapshotStatus := vs.Object["status"].(map[string]interface{}) snapshotContentName := snapshotStatus["boundVolumeSnapshotContentName"].(string) - vscontent, err = dc.Resource(storageutils.SnapshotContentGVR).Get(context.TODO(), snapshotContentName, metav1.GetOptions{}) + vscontent, err := dc.Resource(storageutils.SnapshotContentGVR).Get(context.TODO(), snapshotContentName, metav1.GetOptions{}) framework.ExpectNoError(err) snapshotContentSpec := vscontent.Object["spec"].(map[string]interface{}) @@ -307,16 +244,6 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, framework.ExpectEqual(volumeSnapshotRef["name"], vs.GetName()) framework.ExpectEqual(volumeSnapshotRef["namespace"], vs.GetNamespace()) - if pattern.VolType != storageframework.GenericEphemeralVolume { - ginkgo.By("Modifying source data test") - modifiedMntTestData := fmt.Sprintf("modified data from %s namespace", pvc.GetNamespace()) - - ginkgo.By("modifying the data in the source PVC") - - command := fmt.Sprintf("echo '%s' > %s", modifiedMntTestData, datapath) - RunInPodWithVolume(cs, f.Timeouts, pvc.Namespace, pvc.Name, "pvc-snapshottable-data-tester", command, config.ClientNodeSelection) - } - ginkgo.By("creating a pvc from the snapshot") restoredPVC = e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ ClaimSize: claimSize, @@ -331,33 +258,13 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, Name: vs.GetName(), } - if pattern.VolType != storageframework.GenericEphemeralVolume { - restoredPVC, err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Create(context.TODO(), restoredPVC, metav1.CreateOptions{}) - framework.ExpectNoError(err) - cleanupSteps = append(cleanupSteps, func() { - framework.Logf("deleting claim %q/%q", restoredPVC.Namespace, restoredPVC.Name) - // typically this claim has already been deleted - err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(context.TODO(), restoredPVC.Name, metav1.DeleteOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - framework.Failf("Error deleting claim %q. Error: %v", restoredPVC.Name, err) - } - }) - } - ginkgo.By("starting a pod to use the snapshot") volSrc := v1.VolumeSource{ - PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ - ClaimName: restoredPVC.Name, - }, - } - if pattern.VolType == storageframework.GenericEphemeralVolume { - volSrc = v1.VolumeSource{ - Ephemeral: &v1.EphemeralVolumeSource{ - VolumeClaimTemplate: &v1.PersistentVolumeClaimTemplate{ - Spec: restoredPVC.Spec, - }, + Ephemeral: &v1.EphemeralVolumeSource{ + VolumeClaimTemplate: &v1.PersistentVolumeClaimTemplate{ + Spec: restoredPVC.Spec, }, - } + }, } restoredPod = StartInPodWithVolumeSource(cs, volSrc, restoredPVC.Namespace, "restored-pvc-tester", "sleep 300", config.ClientNodeSelection) @@ -378,15 +285,192 @@ func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, // Snapshot deletion and some are opposite. err = storageutils.DeleteSnapshotWithoutWaiting(dc, vs.GetNamespace(), vs.GetName()) framework.ExpectNoError(err) - if pattern.VolType != storageframework.GenericEphemeralVolume { - framework.Logf("deleting restored pod %q/%q", restoredPod.Namespace, restoredPod.Name) - err = cs.CoreV1().Pods(restoredPod.Namespace).Delete(context.TODO(), restoredPod.Name, metav1.DeleteOptions{}) + + // Wait for the Snapshot to be actually deleted from API server + err = storageutils.WaitForNamespacedGVRDeletion(dc, storageutils.SnapshotGVR, vs.GetNamespace(), vs.GetNamespace(), framework.Poll, f.Timeouts.SnapshotDelete) + framework.ExpectNoError(err) + + switch pattern.SnapshotDeletionPolicy { + case storageframework.DeleteSnapshot: + ginkgo.By("checking the SnapshotContent has been deleted") + err = utils.WaitForGVRDeletion(dc, storageutils.SnapshotContentGVR, vscontent.GetName(), framework.Poll, f.Timeouts.SnapshotDelete) framework.ExpectNoError(err) - framework.Logf("deleting restored PVC %q/%q", restoredPVC.Namespace, restoredPVC.Name) + case storageframework.RetainSnapshot: + ginkgo.By("checking the SnapshotContent has not been deleted") + err = utils.WaitForGVRDeletion(dc, storageutils.SnapshotContentGVR, vscontent.GetName(), 1*time.Second /* poll */, 30*time.Second /* timeout */) + framework.ExpectError(err) + } + }) + + ginkgo.It("should check snapshot fields, check restore correctly works after modifying source data, check deletion (persistent)", func() { + if pattern.VolType == storageframework.GenericEphemeralVolume { + e2eskipper.Skipf("volume type %q is ephemeral", pattern.VolType) + } + init() + + framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespaceTimeout(cs, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow)) + pod, err = cs.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "check pod after it terminated") + + // Get new copy of the claim + ginkgo.By("[init] checking the claim") + pvcName := volumeResource.Pvc.Name + pvcNamespace := volumeResource.Pvc.Namespace + + parameters := map[string]string{} + sr := storageframework.CreateSnapshotResource(sDriver, config, pattern, pvcName, pvcNamespace, f.Timeouts, parameters) + cleanupSteps = append(cleanupSteps, func() { + framework.ExpectNoError(sr.CleanupResource(f.Timeouts)) + }) + vs := sr.Vs + vsc := sr.Vsclass + + err = e2epv.WaitForPersistentVolumeClaimPhase(v1.ClaimBound, cs, pvcNamespace, pvcName, framework.Poll, f.Timeouts.ClaimProvision) + framework.ExpectNoError(err) + + pvc, err = cs.CoreV1().PersistentVolumeClaims(pvcNamespace).Get(context.TODO(), pvcName, metav1.GetOptions{}) + framework.ExpectNoError(err, "get PVC") + claimSize = pvc.Spec.Resources.Requests.Storage().String() + sc = volumeResource.Sc + + // Get the bound PV + ginkgo.By("[init] checking the PV") + pv, err := cs.CoreV1().PersistentVolumes().Get(context.TODO(), pvc.Spec.VolumeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("[init] deleting the pod") + StopPod(cs, pod) + + // At this point we know that: + // - a pod was created with a PV that's supposed to have data + // + // However there's a caching issue that @jinxu97 explained and it's related with the pod & volume + // lifecycle, to understand it we first analyze what the volumemanager does: + // - when a pod is delete the volumemanager will try to cleanup the volume mounts + // - NodeUnpublishVolume: unbinds the bind mount from the container + // - Linux: the bind mount is removed, which does not flush any cache + // - Windows: we delete a symlink, data's not flushed yet to disk + // - NodeUnstageVolume: unmount the global mount + // - Linux: disk is unmounted and all caches flushed. + // - Windows: data is flushed to disk and the disk is detached + // + // Pod deletion might not guarantee a data flush to disk, however NodeUnstageVolume adds the logic + // to flush the data to disk (see #81690 for details). We need to wait for NodeUnstageVolume, as + // NodeUnpublishVolume only removes the bind mount, which doesn't force the caches to flush. + // It's possible to create empty snapshots if we don't wait (see #101279 for details). + // + // In the following code by checking if the PV is not in the node.Status.VolumesInUse field we + // ensure that the volume is not used by the node anymore (an indicator that NodeUnstageVolume has + // already finished) + nodeName := pod.Spec.NodeName + gomega.Expect(nodeName).NotTo(gomega.BeEmpty(), "pod.Spec.NodeName must not be empty") + + // Snapshot tests are only executed for CSI drivers. When CSI drivers + // are attached to the node they use VolumeHandle instead of the pv.Name. + volumeName := pv.Spec.PersistentVolumeSource.CSI.VolumeHandle + + ginkgo.By(fmt.Sprintf("[init] waiting until the node=%s is not using the volume=%s", nodeName, volumeName)) + success := storageutils.WaitUntil(framework.Poll, f.Timeouts.PVDelete, func() bool { + node, err := cs.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + volumesInUse := node.Status.VolumesInUse + framework.Logf("current volumes in use: %+v", volumesInUse) + for i := 0; i < len(volumesInUse); i++ { + if strings.HasSuffix(string(volumesInUse[i]), volumeName) { + return false + } + } + return true + }) + if !success { + framework.Failf("timed out waiting for node=%s to not use the volume=%s", nodeName, volumeName) + } + + // Get new copy of the snapshot + ginkgo.By("checking the snapshot") + vs, err = dc.Resource(storageutils.SnapshotGVR).Namespace(vs.GetNamespace()).Get(context.TODO(), vs.GetName(), metav1.GetOptions{}) + framework.ExpectNoError(err) + + // Get the bound snapshotContent + snapshotStatus := vs.Object["status"].(map[string]interface{}) + snapshotContentName := snapshotStatus["boundVolumeSnapshotContentName"].(string) + vscontent, err := dc.Resource(storageutils.SnapshotContentGVR).Get(context.TODO(), snapshotContentName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + snapshotContentSpec := vscontent.Object["spec"].(map[string]interface{}) + volumeSnapshotRef := snapshotContentSpec["volumeSnapshotRef"].(map[string]interface{}) + + var restoredPVC *v1.PersistentVolumeClaim + var restoredPod *v1.Pod + + // Check SnapshotContent properties + ginkgo.By("checking the SnapshotContent") + // PreprovisionedCreatedSnapshot do not need to set volume snapshot class name + if pattern.SnapshotType != storageframework.PreprovisionedCreatedSnapshot { + framework.ExpectEqual(snapshotContentSpec["volumeSnapshotClassName"], vsc.GetName()) + } + framework.ExpectEqual(volumeSnapshotRef["name"], vs.GetName()) + framework.ExpectEqual(volumeSnapshotRef["namespace"], vs.GetNamespace()) + + ginkgo.By("Modifying source data test") + modifiedMntTestData := fmt.Sprintf("modified data from %s namespace", pvc.GetNamespace()) + + ginkgo.By("modifying the data in the source PVC") + + command := fmt.Sprintf("echo '%s' > %s", modifiedMntTestData, datapath) + RunInPodWithVolume(cs, f.Timeouts, pvc.Namespace, pvc.Name, "pvc-snapshottable-data-tester", command, config.ClientNodeSelection) + + ginkgo.By("creating a pvc from the snapshot") + restoredPVC = e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ + ClaimSize: claimSize, + StorageClassName: &(sc.Name), + }, config.Framework.Namespace.Name) + + group := "snapshot.storage.k8s.io" + + restoredPVC.Spec.DataSource = &v1.TypedLocalObjectReference{ + APIGroup: &group, + Kind: "VolumeSnapshot", + Name: vs.GetName(), + } + + restoredPVC, err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Create(context.TODO(), restoredPVC, metav1.CreateOptions{}) + framework.ExpectNoError(err) + cleanupSteps = append(cleanupSteps, func() { + framework.Logf("deleting claim %q/%q", restoredPVC.Namespace, restoredPVC.Name) + // typically this claim has already been deleted err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(context.TODO(), restoredPVC.Name, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + framework.Failf("Error deleting claim %q. Error: %v", restoredPVC.Name, err) + } + }) + + ginkgo.By("starting a pod to use the snapshot") + restoredPod = StartInPodWithVolume(cs, restoredPVC.Namespace, restoredPVC.Name, "restored-pvc-tester", "sleep 300", config.ClientNodeSelection) + cleanupSteps = append(cleanupSteps, func() { + StopPod(cs, restoredPod) + }) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(cs, restoredPod.Name, restoredPod.Namespace, f.Timeouts.PodStartSlow)) + if pattern.VolType != storageframework.GenericEphemeralVolume { + commands := e2evolume.GenerateReadFileCmd(datapath) + _, err = framework.LookForStringInPodExec(restoredPod.Namespace, restoredPod.Name, commands, originalMntTestData, time.Minute) framework.ExpectNoError(err) } + ginkgo.By("should delete the VolumeSnapshotContent according to its deletion policy") + + // Delete both Snapshot and restored Pod/PVC at the same time because different storage systems + // have different ordering of deletion. Some may require delete the restored PVC first before + // Snapshot deletion and some are opposite. + err = storageutils.DeleteSnapshotWithoutWaiting(dc, vs.GetNamespace(), vs.GetName()) + framework.ExpectNoError(err) + framework.Logf("deleting restored pod %q/%q", restoredPod.Namespace, restoredPod.Name) + err = cs.CoreV1().Pods(restoredPod.Namespace).Delete(context.TODO(), restoredPod.Name, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + framework.Logf("deleting restored PVC %q/%q", restoredPVC.Namespace, restoredPVC.Name) + err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(context.TODO(), restoredPVC.Name, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + // Wait for the Snapshot to be actually deleted from API server err = storageutils.WaitForNamespacedGVRDeletion(dc, storageutils.SnapshotGVR, vs.GetNamespace(), vs.GetNamespace(), framework.Poll, f.Timeouts.SnapshotDelete)