From fe9bdd1dd1afd075ee103cccc53b224be3b0f143 Mon Sep 17 00:00:00 2001 From: Rafael Castillo Date: Mon, 18 Aug 2025 19:53:06 +0000 Subject: [PATCH] Mark API server errors as transient in csi raw block driver Certain failures during SetupDevice and MapPodDevice are not treated as transient in the csi raw block plugin implementation, while they are in the file mode plugin. This can lead to certain failures causing volumes to be marked as unmounted incorrectly. This patch brings the block plugin up to parity with the fs one by marking the equivalent calls as transient. This mostly covers API server and some csi driver calls. --- pkg/volume/csi/csi_block.go | 15 ++++++------ pkg/volume/csi/csi_block_test.go | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/pkg/volume/csi/csi_block.go b/pkg/volume/csi/csi_block.go index 4626751c393..5613b354c0c 100644 --- a/pkg/volume/csi/csi_block.go +++ b/pkg/volume/csi/csi_block.go @@ -68,7 +68,6 @@ package csi import ( "context" "errors" - "fmt" "os" "path/filepath" @@ -171,8 +170,8 @@ func (m *csiBlockMapper) stageVolumeForBlock( if csiSource.NodeStageSecretRef != nil { nodeStageSecrets, err = getCredentialsFromSecret(m.k8s, csiSource.NodeStageSecretRef) if err != nil { - return "", fmt.Errorf("failed to get NodeStageSecretRef %s/%s: %v", - csiSource.NodeStageSecretRef.Namespace, csiSource.NodeStageSecretRef.Name, err) + return "", volumetypes.NewTransientOperationFailure(log("failed to get NodeStageSecretRef %s/%s: %v", + csiSource.NodeStageSecretRef.Namespace, csiSource.NodeStageSecretRef.Name, err)) } } @@ -223,11 +222,11 @@ func (m *csiBlockMapper) publishVolumeForBlock( volAttribs := csiSource.VolumeAttributes podInfoEnabled, err := m.plugin.podInfoEnabled(string(m.driverName)) if err != nil { - return "", errors.New(log("blockMapper.publishVolumeForBlock failed to assemble volume attributes: %v", err)) + return "", volumetypes.NewTransientOperationFailure(log("blockMapper.publishVolumeForBlock failed to assemble volume attributes: %v", err)) } volumeLifecycleMode, err := m.plugin.getVolumeLifecycleMode(m.spec) if err != nil { - return "", errors.New(log("blockMapper.publishVolumeForBlock failed to get VolumeLifecycleMode: %v", err)) + return "", volumetypes.NewTransientOperationFailure(log("blockMapper.publishVolumeForBlock failed to get VolumeLifecycleMode: %v", err)) } if podInfoEnabled { volAttribs = mergeMap(volAttribs, getPodInfoAttrs(m.pod, volumeLifecycleMode)) @@ -237,7 +236,7 @@ func (m *csiBlockMapper) publishVolumeForBlock( if csiSource.NodePublishSecretRef != nil { nodePublishSecrets, err = getCredentialsFromSecret(m.k8s, csiSource.NodePublishSecretRef) if err != nil { - return "", errors.New(log("blockMapper.publishVolumeForBlock failed to get NodePublishSecretRef %s/%s: %v", + return "", volumetypes.NewTransientOperationFailure(log("blockMapper.publishVolumeForBlock failed to get NodePublishSecretRef %s/%s: %v", csiSource.NodePublishSecretRef.Namespace, csiSource.NodePublishSecretRef.Name, err)) } } @@ -304,7 +303,7 @@ func (m *csiBlockMapper) SetUpDevice() (string, error) { attachID := getAttachmentName(csiSource.VolumeHandle, csiSource.Driver, nodeName) attachment, err = m.k8s.StorageV1().VolumeAttachments().Get(context.TODO(), attachID, meta.GetOptions{}) if err != nil { - return "", errors.New(log("blockMapper.SetupDevice failed to get volume attachment [id=%v]: %v", attachID, err)) + return "", volumetypes.NewTransientOperationFailure(log("blockMapper.SetupDevice failed to get volume attachment [id=%v]: %v", attachID, err)) } } @@ -366,7 +365,7 @@ func (m *csiBlockMapper) MapPodDevice() (string, error) { attachID := getAttachmentName(csiSource.VolumeHandle, csiSource.Driver, nodeName) attachment, err = m.k8s.StorageV1().VolumeAttachments().Get(context.TODO(), attachID, meta.GetOptions{}) if err != nil { - return "", errors.New(log("blockMapper.MapPodDevice failed to get volume attachment [id=%v]: %v", attachID, err)) + return "", volumetypes.NewTransientOperationFailure(log("blockMapper.MapPodDevice failed to get volume attachment [id=%v]: %v", attachID, err)) } } diff --git a/pkg/volume/csi/csi_block_test.go b/pkg/volume/csi/csi_block_test.go index 3b06ff1c7c7..deffc6b39f9 100644 --- a/pkg/volume/csi/csi_block_test.go +++ b/pkg/volume/csi/csi_block_test.go @@ -18,6 +18,7 @@ package csi import ( "context" + "errors" "fmt" "os" "path/filepath" @@ -491,6 +492,46 @@ func TestBlockMapperMapPodDeviceNoClientError(t *testing.T) { } } +func TestBlockMapperMapPodDeviceGetStageSecretsError(t *testing.T) { + transientError := volumetypes.NewTransientOperationFailure("") + plug, tmpDir := newTestPlugin(t, nil) + defer func() { + if err := os.RemoveAll(tmpDir); err != nil { + t.Error(err) + } + }() + + csiMapper, _, pv, err := prepareBlockMapperTest(plug, "test-pv", t) + if err != nil { + t.Fatalf("Failed to make a new Mapper: %v", err) + } + + // set a stage secret for the pv + pv.Spec.PersistentVolumeSource.CSI.NodePublishSecretRef = &api.SecretReference{ + Name: "foo", + Namespace: "default", + } + pvName := pv.GetName() + nodeName := string(plug.host.GetNodeName()) + + csiMapper.csiClient = setupClient(t, true) + + attachID := getAttachmentName(csiMapper.volumeID, string(csiMapper.driverName), nodeName) + attachment := makeTestAttachment(attachID, nodeName, pvName) + attachment.Status.Attached = true + if _, err = csiMapper.k8s.StorageV1().VolumeAttachments().Create(context.Background(), attachment, metav1.CreateOptions{}); err != nil { + t.Fatalf("failed to setup VolumeAttachment: %v", err) + } + t.Log("created attachment ", attachID) + + _, err = csiMapper.MapPodDevice() + if err == nil { + t.Errorf("test should fail, but no error occurred") + } else if !errors.As(err, &transientError) { + t.Errorf("expected a transient error but got %v", err) + } +} + func TestBlockMapperTearDownDevice(t *testing.T) { plug, tmpDir := newTestPlugin(t, nil) defer os.RemoveAll(tmpDir)