Fix handling of APIserver errors when saving provisioned PVs.

When API server crashes *after* saving a provisioned PV and before sending
200 OK, the controller tries to save the PV again. In this case, it gets
AlreadyExists error, which should be interpreted as success and not as error.

Especially, a volume that corresponds to the PV should not be deleted in the
underlying storage.
This commit is contained in:
Jan Safranek 2017-08-29 09:47:32 +02:00
parent c071a77156
commit 195d5c1546
3 changed files with 46 additions and 6 deletions

View File

@ -62,12 +62,14 @@ go_test(
],
library = ":go_default_library",
deps = [
"//pkg/api:go_default_library",
"//pkg/api/testapi:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/volume:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",

View File

@ -22,7 +22,9 @@ import (
"k8s.io/api/core/v1"
storage "k8s.io/api/storage/v1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api"
)
var class1Parameters = map[string]string{
@ -360,6 +362,36 @@ func TestProvisionSync(t *testing.T) {
[]string{"Warning ProvisioningFailed"},
noerrors, wrapTestWithProvisionCalls([]provisionCall{}, testSyncClaim),
},
{
// Provision success - first save of a PV to API server fails (API
// server has written the object to etcd, but crashed before sending
// 200 OK response to the controller). Controller retries and the
// second save of the PV returns "AlreadyExists" because the PV
// object already is in the API server.
//
"11-19 - provisioned volume saved but API server crashed",
novolumes,
// We don't actually simulate API server saving the object and
// crashing afterwards, Create() just returns error without saving
// the volume in this test. So the set of expected volumes at the
// end of the test is empty.
novolumes,
newClaimArray("claim11-19", "uid11-19", "1Gi", "", v1.ClaimPending, &classGold),
newClaimArray("claim11-19", "uid11-19", "1Gi", "", v1.ClaimPending, &classGold, annStorageProvisioner),
noevents,
[]reactorError{
// Inject errors to simulate crashed API server during
// kubeclient.PersistentVolumes.Create()
{"create", "persistentvolumes", errors.New("Mock creation error1")},
{"create", "persistentvolumes", apierrs.NewAlreadyExists(api.Resource("persistentvolumes"), "")},
},
wrapTestWithPluginCalls(
nil, // recycle calls
nil, // delete calls - if Delete was called the test would fail
[]provisionCall{provision1Success},
testSyncClaim,
),
},
}
runSyncTests(t, tests, storageClasses)
}

View File

@ -24,6 +24,7 @@ import (
"k8s.io/api/core/v1"
storage "k8s.io/api/storage/v1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
@ -1364,14 +1365,19 @@ func (ctrl *PersistentVolumeController) provisionClaimOperation(claimObj interfa
for i := 0; i < ctrl.createProvisionedPVRetryCount; i++ {
glog.V(4).Infof("provisionClaimOperation [%s]: trying to save volume %s", claimToClaimKey(claim), volume.Name)
var newVol *v1.PersistentVolume
if newVol, err = ctrl.kubeClient.Core().PersistentVolumes().Create(volume); err == nil {
if newVol, err = ctrl.kubeClient.Core().PersistentVolumes().Create(volume); err == nil || apierrs.IsAlreadyExists(err) {
// Save succeeded.
glog.V(3).Infof("volume %q for claim %q saved", volume.Name, claimToClaimKey(claim))
if err != nil {
glog.V(3).Infof("volume %q for claim %q already exists, reusing", volume.Name, claimToClaimKey(claim))
err = nil
} else {
glog.V(3).Infof("volume %q for claim %q saved", volume.Name, claimToClaimKey(claim))
_, updateErr := ctrl.storeVolumeUpdate(newVol)
if updateErr != nil {
// We will get an "volume added" event soon, this is not a big error
glog.V(4).Infof("provisionClaimOperation [%s]: cannot update internal cache: %v", volume.Name, updateErr)
_, updateErr := ctrl.storeVolumeUpdate(newVol)
if updateErr != nil {
// We will get an "volume added" event soon, this is not a big error
glog.V(4).Infof("provisionClaimOperation [%s]: cannot update internal cache: %v", volume.Name, updateErr)
}
}
break
}