From 195d5c1546f1800217e3bfd441b56cbc3feec05c Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 29 Aug 2017 09:47:32 +0200 Subject: [PATCH] Fix handling of APIserver errors when saving provisioned PVs. When API server crashes *after* saving a provisioned PV and before sending 200 OK, the controller tries to save the PV again. In this case, it gets AlreadyExists error, which should be interpreted as success and not as error. Especially, a volume that corresponds to the PV should not be deleted in the underlying storage. --- pkg/controller/volume/persistentvolume/BUILD | 2 ++ .../volume/persistentvolume/provision_test.go | 32 +++++++++++++++++++ .../volume/persistentvolume/pv_controller.go | 18 +++++++---- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/pkg/controller/volume/persistentvolume/BUILD b/pkg/controller/volume/persistentvolume/BUILD index d9549568937..ebeb3defe4d 100644 --- a/pkg/controller/volume/persistentvolume/BUILD +++ b/pkg/controller/volume/persistentvolume/BUILD @@ -62,12 +62,14 @@ go_test( ], library = ":go_default_library", deps = [ + "//pkg/api:go_default_library", "//pkg/api/testapi:go_default_library", "//pkg/controller:go_default_library", "//pkg/volume:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/storage/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", diff --git a/pkg/controller/volume/persistentvolume/provision_test.go b/pkg/controller/volume/persistentvolume/provision_test.go index 8465136074c..ef7fa7e082a 100644 --- a/pkg/controller/volume/persistentvolume/provision_test.go +++ b/pkg/controller/volume/persistentvolume/provision_test.go @@ -22,7 +22,9 @@ import ( "k8s.io/api/core/v1" storage "k8s.io/api/storage/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubernetes/pkg/api" ) var class1Parameters = map[string]string{ @@ -360,6 +362,36 @@ func TestProvisionSync(t *testing.T) { []string{"Warning ProvisioningFailed"}, noerrors, wrapTestWithProvisionCalls([]provisionCall{}, testSyncClaim), }, + { + // Provision success - first save of a PV to API server fails (API + // server has written the object to etcd, but crashed before sending + // 200 OK response to the controller). Controller retries and the + // second save of the PV returns "AlreadyExists" because the PV + // object already is in the API server. + // + "11-19 - provisioned volume saved but API server crashed", + novolumes, + // We don't actually simulate API server saving the object and + // crashing afterwards, Create() just returns error without saving + // the volume in this test. So the set of expected volumes at the + // end of the test is empty. + novolumes, + newClaimArray("claim11-19", "uid11-19", "1Gi", "", v1.ClaimPending, &classGold), + newClaimArray("claim11-19", "uid11-19", "1Gi", "", v1.ClaimPending, &classGold, annStorageProvisioner), + noevents, + []reactorError{ + // Inject errors to simulate crashed API server during + // kubeclient.PersistentVolumes.Create() + {"create", "persistentvolumes", errors.New("Mock creation error1")}, + {"create", "persistentvolumes", apierrs.NewAlreadyExists(api.Resource("persistentvolumes"), "")}, + }, + wrapTestWithPluginCalls( + nil, // recycle calls + nil, // delete calls - if Delete was called the test would fail + []provisionCall{provision1Success}, + testSyncClaim, + ), + }, } runSyncTests(t, tests, storageClasses) } diff --git a/pkg/controller/volume/persistentvolume/pv_controller.go b/pkg/controller/volume/persistentvolume/pv_controller.go index 71ce8c1fecf..69b8b627871 100644 --- a/pkg/controller/volume/persistentvolume/pv_controller.go +++ b/pkg/controller/volume/persistentvolume/pv_controller.go @@ -24,6 +24,7 @@ import ( "k8s.io/api/core/v1" storage "k8s.io/api/storage/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -1364,14 +1365,19 @@ func (ctrl *PersistentVolumeController) provisionClaimOperation(claimObj interfa for i := 0; i < ctrl.createProvisionedPVRetryCount; i++ { glog.V(4).Infof("provisionClaimOperation [%s]: trying to save volume %s", claimToClaimKey(claim), volume.Name) var newVol *v1.PersistentVolume - if newVol, err = ctrl.kubeClient.Core().PersistentVolumes().Create(volume); err == nil { + if newVol, err = ctrl.kubeClient.Core().PersistentVolumes().Create(volume); err == nil || apierrs.IsAlreadyExists(err) { // Save succeeded. - glog.V(3).Infof("volume %q for claim %q saved", volume.Name, claimToClaimKey(claim)) + if err != nil { + glog.V(3).Infof("volume %q for claim %q already exists, reusing", volume.Name, claimToClaimKey(claim)) + err = nil + } else { + glog.V(3).Infof("volume %q for claim %q saved", volume.Name, claimToClaimKey(claim)) - _, updateErr := ctrl.storeVolumeUpdate(newVol) - if updateErr != nil { - // We will get an "volume added" event soon, this is not a big error - glog.V(4).Infof("provisionClaimOperation [%s]: cannot update internal cache: %v", volume.Name, updateErr) + _, updateErr := ctrl.storeVolumeUpdate(newVol) + if updateErr != nil { + // We will get an "volume added" event soon, this is not a big error + glog.V(4).Infof("provisionClaimOperation [%s]: cannot update internal cache: %v", volume.Name, updateErr) + } } break }