Merge pull request #53491 from jsafrane/fix-recycler-reuse

Automatic merge from submit-queue (batch tested with PRs 52794, 54243, 54248, 53491, 53841). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

PV recycler: don't reuse old recycler pod.

It might be forged by user and block Kubernetes from recycling. Try to kill it instead.

+ report error when Kubernetes failed to delete recycler pod. PV controller will re-try recycling the PV again and delete the pod eventually.

**Which issue this PR fixes**
fixes #53413

**Release note**:
```release-note
None
```

@kubernetes/sig-storage-pr-reviews 
/assign @tallclair
This commit is contained in:
Kubernetes Submit Queue 2017-10-19 16:27:13 -07:00 committed by GitHub
commit d98911c58f
2 changed files with 42 additions and 35 deletions

View File

@ -48,8 +48,8 @@ type RecycleEventRecorder func(eventtype, message string)
// attempted before returning.
//
// In case there is a pod with the same namespace+name already running, this
// function assumes it's an older instance of the recycler pod and watches
// this old pod instead of starting a new one.
// function deletes it as it is not able to judge if it is an old recycler
// or user has forged a fake recycler to block Kubernetes from recycling.//
//
// pod - the pod designed by a volume plugin to recycle the volume. pod.Name
// will be overwritten with unique name based on PV.Name.
@ -81,20 +81,44 @@ func internalRecycleVolumeByWatchingPodUntilCompletion(pvName string, pod *v1.Po
_, err = recyclerClient.CreatePod(pod)
if err != nil {
if errors.IsAlreadyExists(err) {
glog.V(5).Infof("old recycler pod %q found for volume", pod.Name)
deleteErr := recyclerClient.DeletePod(pod.Name, pod.Namespace)
if deleteErr != nil {
return fmt.Errorf("failed to delete old recycler pod %s/%s: %s", pod.Namespace, pod.Name, deleteErr)
}
// Recycler will try again and the old pod will be hopefuly deleted
// at that time.
return fmt.Errorf("old recycler pod found, will retry later")
} else {
return fmt.Errorf("unexpected error creating recycler pod: %+v\n", err)
}
}
defer func(pod *v1.Pod) {
glog.V(2).Infof("deleting recycler pod %s/%s", pod.Namespace, pod.Name)
if err := recyclerClient.DeletePod(pod.Name, pod.Namespace); err != nil {
glog.Errorf("failed to delete recycler pod %s/%s: %v", pod.Namespace, pod.Name, err)
}
}(pod)
err = waitForPod(pod, recyclerClient, podCh)
// Now only the old pod or the new pod run. Watch it until it finishes
// and send all events on the pod to the PV
// In all cases delete the recycler pod and log its result.
glog.V(2).Infof("deleting recycler pod %s/%s", pod.Namespace, pod.Name)
deleteErr := recyclerClient.DeletePod(pod.Name, pod.Namespace)
if deleteErr != nil {
glog.Errorf("failed to delete recycler pod %s/%s: %v", pod.Namespace, pod.Name, err)
}
// Returning recycler error is preferred, the pod will be deleted again on
// the next retry.
if err != nil {
return fmt.Errorf("failed to recycle volume: %s", err)
}
// Recycle succeeded but we failed to delete the recycler pod. Report it,
// the controller will re-try recycling the PV again shortly.
if deleteErr != nil {
return fmt.Errorf("failed to delete recycler pod: %s", deleteErr)
}
return nil
}
// waitForPod watches the pod it until it finishes and send all events on the
// pod to the PV.
func waitForPod(pod *v1.Pod, recyclerClient recyclerClient, podCh <-chan watch.Event) error {
for {
event, ok := <-podCh
if !ok {

View File

@ -128,7 +128,7 @@ func TestRecyclerPod(t *testing.T) {
{v1.EventTypeWarning, "Unable to mount volumes for pod \"recycler-for-podRecyclerFailure_default(3c9809e5-347c-11e6-a79b-3c970e965218)\": timeout expired waiting for volumes to attach/mount"},
{v1.EventTypeWarning, "Error syncing pod, skipping: timeout expired waiting for volumes to attach/mount for pod \"default\"/\"recycler-for-podRecyclerFailure\". list of unattached/unmounted"},
},
expectedError: "Pod was active on the node longer than specified deadline",
expectedError: "failed to recycle volume: Pod was active on the node longer than specified deadline",
},
{
// Recycler pod gets deleted
@ -143,32 +143,15 @@ func TestRecyclerPod(t *testing.T) {
expectedEvents: []mockEvent{
{v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerDeleted to 127.0.0.1"},
},
expectedError: "recycler pod was deleted",
expectedError: "failed to recycle volume: recycler pod was deleted",
},
{
// Another recycler pod is already running
name: "RecyclerRunning",
existingPod: newPod("podOldRecycler", v1.PodRunning, ""),
createPod: newPod("podNewRecycler", v1.PodFailed, "mock message"),
eventSequence: []watch.Event{
// Old pod succeeds
newPodEvent(watch.Modified, "podOldRecycler", v1.PodSucceeded, ""),
},
// No error = old pod succeeded. If the new pod was used, there
// would be error with "mock message".
expectedError: "",
},
{
// Another recycler pod is already running and fails
name: "FailedRecyclerRunning",
existingPod: newPod("podOldRecycler", v1.PodRunning, ""),
createPod: newPod("podNewRecycler", v1.PodFailed, "mock message"),
eventSequence: []watch.Event{
// Old pod failure
newPodEvent(watch.Modified, "podOldRecycler", v1.PodFailed, "Pod was active on the node longer than specified deadline"),
},
// If the new pod was used, there would be error with "mock message".
expectedError: "Pod was active on the node longer than specified deadline",
name: "RecyclerRunning",
existingPod: newPod("podOldRecycler", v1.PodRunning, ""),
createPod: newPod("podNewRecycler", v1.PodFailed, "mock message"),
eventSequence: []watch.Event{},
expectedError: "old recycler pod found, will retry later",
},
}