mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 02:34:03 +00:00
Merge pull request #44719 from jsafrane/fix-apiserver-crash
Automatic merge from submit-queue (batch tested with PRs 44719, 48454) Fix handling of APIserver errors when saving provisioned PVs. When API server crashes *after* saving a provisioned PV and before sending 200 OK, the controller tries to save the PV again. In this case, it gets AlreadyExists error, which should be interpreted as success and not as error. Especially, a volume that corresponds to the PV should not be deleted in the underlying storage. Fixes #44372 ```release-note NONE ``` @kubernetes/sig-storage-pr-reviews
This commit is contained in:
commit
c98959e770
@ -62,12 +62,14 @@ go_test(
|
||||
],
|
||||
library = ":go_default_library",
|
||||
deps = [
|
||||
"//pkg/api:go_default_library",
|
||||
"//pkg/api/testapi:go_default_library",
|
||||
"//pkg/controller:go_default_library",
|
||||
"//pkg/volume:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/api/storage/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
|
@ -22,7 +22,9 @@ import (
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
storage "k8s.io/api/storage/v1"
|
||||
apierrs "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
)
|
||||
|
||||
var class1Parameters = map[string]string{
|
||||
@ -360,6 +362,36 @@ func TestProvisionSync(t *testing.T) {
|
||||
[]string{"Warning ProvisioningFailed"},
|
||||
noerrors, wrapTestWithProvisionCalls([]provisionCall{}, testSyncClaim),
|
||||
},
|
||||
{
|
||||
// Provision success - first save of a PV to API server fails (API
|
||||
// server has written the object to etcd, but crashed before sending
|
||||
// 200 OK response to the controller). Controller retries and the
|
||||
// second save of the PV returns "AlreadyExists" because the PV
|
||||
// object already is in the API server.
|
||||
//
|
||||
"11-19 - provisioned volume saved but API server crashed",
|
||||
novolumes,
|
||||
// We don't actually simulate API server saving the object and
|
||||
// crashing afterwards, Create() just returns error without saving
|
||||
// the volume in this test. So the set of expected volumes at the
|
||||
// end of the test is empty.
|
||||
novolumes,
|
||||
newClaimArray("claim11-19", "uid11-19", "1Gi", "", v1.ClaimPending, &classGold),
|
||||
newClaimArray("claim11-19", "uid11-19", "1Gi", "", v1.ClaimPending, &classGold, annStorageProvisioner),
|
||||
noevents,
|
||||
[]reactorError{
|
||||
// Inject errors to simulate crashed API server during
|
||||
// kubeclient.PersistentVolumes.Create()
|
||||
{"create", "persistentvolumes", errors.New("Mock creation error1")},
|
||||
{"create", "persistentvolumes", apierrs.NewAlreadyExists(api.Resource("persistentvolumes"), "")},
|
||||
},
|
||||
wrapTestWithPluginCalls(
|
||||
nil, // recycle calls
|
||||
nil, // delete calls - if Delete was called the test would fail
|
||||
[]provisionCall{provision1Success},
|
||||
testSyncClaim,
|
||||
),
|
||||
},
|
||||
}
|
||||
runSyncTests(t, tests, storageClasses)
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
storage "k8s.io/api/storage/v1"
|
||||
apierrs "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
@ -1364,14 +1365,19 @@ func (ctrl *PersistentVolumeController) provisionClaimOperation(claimObj interfa
|
||||
for i := 0; i < ctrl.createProvisionedPVRetryCount; i++ {
|
||||
glog.V(4).Infof("provisionClaimOperation [%s]: trying to save volume %s", claimToClaimKey(claim), volume.Name)
|
||||
var newVol *v1.PersistentVolume
|
||||
if newVol, err = ctrl.kubeClient.Core().PersistentVolumes().Create(volume); err == nil {
|
||||
if newVol, err = ctrl.kubeClient.Core().PersistentVolumes().Create(volume); err == nil || apierrs.IsAlreadyExists(err) {
|
||||
// Save succeeded.
|
||||
glog.V(3).Infof("volume %q for claim %q saved", volume.Name, claimToClaimKey(claim))
|
||||
if err != nil {
|
||||
glog.V(3).Infof("volume %q for claim %q already exists, reusing", volume.Name, claimToClaimKey(claim))
|
||||
err = nil
|
||||
} else {
|
||||
glog.V(3).Infof("volume %q for claim %q saved", volume.Name, claimToClaimKey(claim))
|
||||
|
||||
_, updateErr := ctrl.storeVolumeUpdate(newVol)
|
||||
if updateErr != nil {
|
||||
// We will get an "volume added" event soon, this is not a big error
|
||||
glog.V(4).Infof("provisionClaimOperation [%s]: cannot update internal cache: %v", volume.Name, updateErr)
|
||||
_, updateErr := ctrl.storeVolumeUpdate(newVol)
|
||||
if updateErr != nil {
|
||||
// We will get an "volume added" event soon, this is not a big error
|
||||
glog.V(4).Infof("provisionClaimOperation [%s]: cannot update internal cache: %v", volume.Name, updateErr)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user