Send recycle events from pod to pv.

This allows users to diagnose what's wrong with recycler. Recycler pods are
started automatically with a cryptic name and they are deleted immediately
when they finish.

kubectl describe pods will show:

  FirstSeen     LastSeen        Count   From                            SubobjectPath   Type            Reason          Message
  ---------     --------        -----   ----                            -------------   --------        ------          -------
  59m           59m             1       {persistentvolume-controller }                  Warning         RecyclerPod     Recycler pod: Unable to mount volumes for pod "recycler-for-nfs_default(5421800e-347b-11e6-a79b-3c970e965218)": timeout expired waiting for volumes to attach/mount for pod "recycler-for-nfs"/"default". list of unattached/unmounted volumes=[vol]
  53m           53m             1       {persistentvolume-controller }                  Warning         RecyclerPod     Recycler pod: Unable to mount volumes for pod "recycler-for-nfs_default(3c9809e5-347c-11e6-a79b-3c970e965218)": timeout expired waiting for volumes to attach/mount for pod "recycler-for-nfs"/"default". list of unattached/unmounted volumes=[vol]
  46m           46m             1       {persistentvolume-controller }                  Warning         RecyclerPod     Recycler pod: Unable to mount volumes for pod "recycler-for-nfs_default(250dd2a2-347d-11e6-a79b-3c970e965218)": timeout expired waiting for volumes to attach/mount for pod "recycler-for-nfs"/"default". list of unattached/unmounted volumes=[vol]
  40m           40m             1       {persistentvolume-controller }                  Warning         RecyclerPod     Recycler pod: Unable to mount volumes for pod "recycler-for-nfs_default(0d84ea33-347e-11e6-a79b-3c970e965218)": timeout expired waiting for volumes to attach/mount for pod "recycler-for-nfs"/"default". list of unattached/unmounted volumes=[vol]
  33m           33m             1       {persistentvolume-controller }                  Warning         RecyclerPod     Recycler pod: Unable to mount volumes for pod "recycler-for-nfs_default(f5fb63bf-347e-11e6-a79b-3c970e965218)": timeout expired waiting for volumes to attach/mount for pod "recycler-for-nfs"/"default". list of unattached/unmounted volumes=[vol]
  27m           27m             1       {persistentvolume-controller }                  Warning         RecyclerPod     Recycler pod: Unable to mount volumes for pod "recycler-for-nfs_default(de7128fd-347f-11e6-a79b-3c970e965218)": timeout expired waiting for volumes to attach/mount for pod "recycler-for-nfs"/"default". list of unattached/unmounted volumes=[vol]
  1h            3m              75      {persistentvolume-controller }                  Normal          RecyclerPod     Recycler pod: Successfully assigned recycler-for-nfs to 127.0.0.1
  1h            3m              76      {persistentvolume-controller }                  Normal          RecyclerPod     Recycler pod: Pod was active on the node longer than specified deadline
  1h            1m              12      {persistentvolume-controller }                  Warning         RecyclerPod     Recycler pod: Error syncing pod, skipping: timeout expired waiting for volumes to attach/mount for pod "recycler-for-nfs"/"default". list of unattached/unmounted volumes=[vol]
  20m           1m              4       {persistentvolume-controller }                  Warning         RecyclerPod     (events with common reason combined)


These steps were necessary:

- added event watcher to volume.RecycleVolumeByWatchingPodUntilCompletion

- pass all these events through volume plugins to volume controller

- rework volume.RecycleVolumeByWatchingPodUntilCompletion unit tests to a table
  (too much copy-paste)

- fix all unit tests along the way
This commit is contained in:
Jan Safranek
2016-09-08 12:57:57 +02:00
parent bf4e9e9db8
commit d7111b282f
10 changed files with 344 additions and 163 deletions

View File

@@ -19,13 +19,10 @@ package volume
import (
"fmt"
"reflect"
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/client/cache"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/watch"
"hash/fnv"
@@ -39,6 +36,8 @@ import (
"k8s.io/kubernetes/pkg/util/sets"
)
type RecycleEventRecorder func(eventtype, message string)
// RecycleVolumeByWatchingPodUntilCompletion is intended for use with volume
// Recyclers. This function will save the given Pod to the API and watch it
// until it completes, fails, or the pod's ActiveDeadlineSeconds is exceeded,
@@ -52,8 +51,8 @@ import (
// pod - the pod designed by a volume plugin to recycle the volume. pod.Name
// will be overwritten with unique name based on PV.Name.
// client - kube client for API operations.
func RecycleVolumeByWatchingPodUntilCompletion(pvName string, pod *api.Pod, kubeClient clientset.Interface) error {
return internalRecycleVolumeByWatchingPodUntilCompletion(pvName, pod, newRecyclerClient(kubeClient))
func RecycleVolumeByWatchingPodUntilCompletion(pvName string, pod *api.Pod, kubeClient clientset.Interface, recorder RecycleEventRecorder) error {
return internalRecycleVolumeByWatchingPodUntilCompletion(pvName, pod, newRecyclerClient(kubeClient, recorder))
}
// same as above func comments, except 'recyclerClient' is a narrower pod API
@@ -67,34 +66,61 @@ func internalRecycleVolumeByWatchingPodUntilCompletion(pvName string, pod *api.P
pod.Name = "recycler-for-" + pvName
pod.GenerateName = ""
stopChannel := make(chan struct{})
defer close(stopChannel)
podCh, err := recyclerClient.WatchPod(pod.Name, pod.Namespace, stopChannel)
if err != nil {
glog.V(4).Infof("cannot start watcher for pod %s/%s: %v", pod.Namespace, pod.Name, err)
return err
}
// Start the pod
_, err := recyclerClient.CreatePod(pod)
_, err = recyclerClient.CreatePod(pod)
if err != nil {
if errors.IsAlreadyExists(err) {
glog.V(5).Infof("old recycler pod %q found for volume", pod.Name)
} else {
return fmt.Errorf("Unexpected error creating recycler pod: %+v\n", err)
return fmt.Errorf("unexpected error creating recycler pod: %+v\n", err)
}
}
defer recyclerClient.DeletePod(pod.Name, pod.Namespace)
// Now only the old pod or the new pod run. Watch it until it finishes.
stopChannel := make(chan struct{})
defer close(stopChannel)
nextPod := recyclerClient.WatchPod(pod.Name, pod.Namespace, stopChannel)
// Now only the old pod or the new pod run. Watch it until it finishes
// and send all events on the pod to the PV
for {
watchedPod := nextPod()
if watchedPod.Status.Phase == api.PodSucceeded {
// volume.Recycle() returns nil on success, else error
return nil
}
if watchedPod.Status.Phase == api.PodFailed {
// volume.Recycle() returns nil on success, else error
if watchedPod.Status.Message != "" {
return fmt.Errorf(watchedPod.Status.Message)
} else {
return fmt.Errorf("pod failed, pod.Status.Message unknown.")
event := <-podCh
switch event.Object.(type) {
case *api.Pod:
// POD changed
pod := event.Object.(*api.Pod)
glog.V(4).Infof("recycler pod update received: %s %s/%s %s", event.Type, pod.Namespace, pod.Name, pod.Status.Phase)
switch event.Type {
case watch.Added, watch.Modified:
if pod.Status.Phase == api.PodSucceeded {
// Recycle succeeded.
return nil
}
if pod.Status.Phase == api.PodFailed {
if pod.Status.Message != "" {
return fmt.Errorf(pod.Status.Message)
} else {
return fmt.Errorf("pod failed, pod.Status.Message unknown.")
}
}
case watch.Deleted:
return fmt.Errorf("recycler pod was deleted")
case watch.Error:
return fmt.Errorf("recycler pod watcher failed")
}
case *api.Event:
// Event received
podEvent := event.Object.(*api.Event)
glog.V(4).Infof("recycler event received: %s %s/%s %s/%s %s", event.Type, podEvent.Namespace, podEvent.Name, podEvent.InvolvedObject.Namespace, podEvent.InvolvedObject.Name, podEvent.Message)
if event.Type == watch.Added {
recyclerClient.Event(podEvent.Type, podEvent.Message)
}
}
}
@@ -106,15 +132,24 @@ type recyclerClient interface {
CreatePod(pod *api.Pod) (*api.Pod, error)
GetPod(name, namespace string) (*api.Pod, error)
DeletePod(name, namespace string) error
WatchPod(name, namespace string, stopChannel chan struct{}) func() *api.Pod
// WatchPod returns a ListWatch for watching a pod. The stopChannel is used
// to close the reflector backing the watch. The caller is responsible for
// derring a close on the channel to stop the reflector.
WatchPod(name, namespace string, stopChannel chan struct{}) (<-chan watch.Event, error)
// Event sends an event to the volume that is being recycled.
Event(eventtype, message string)
}
func newRecyclerClient(client clientset.Interface) recyclerClient {
return &realRecyclerClient{client}
func newRecyclerClient(client clientset.Interface, recorder RecycleEventRecorder) recyclerClient {
return &realRecyclerClient{
client,
recorder,
}
}
type realRecyclerClient struct {
client clientset.Interface
client clientset.Interface
recorder RecycleEventRecorder
}
func (c *realRecyclerClient) CreatePod(pod *api.Pod) (*api.Pod, error) {
@@ -129,28 +164,60 @@ func (c *realRecyclerClient) DeletePod(name, namespace string) error {
return c.client.Core().Pods(namespace).Delete(name, nil)
}
// WatchPod returns a ListWatch for watching a pod. The stopChannel is used
// to close the reflector backing the watch. The caller is responsible for
// derring a close on the channel to stop the reflector.
func (c *realRecyclerClient) WatchPod(name, namespace string, stopChannel chan struct{}) func() *api.Pod {
fieldSelector, _ := fields.ParseSelector("metadata.name=" + name)
func (c *realRecyclerClient) Event(eventtype, message string) {
c.recorder(eventtype, message)
}
podLW := &cache.ListWatch{
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
options.FieldSelector = fieldSelector
return c.client.Core().Pods(namespace).List(options)
},
WatchFunc: func(options api.ListOptions) (watch.Interface, error) {
options.FieldSelector = fieldSelector
return c.client.Core().Pods(namespace).Watch(options)
},
func (c *realRecyclerClient) WatchPod(name, namespace string, stopChannel chan struct{}) (<-chan watch.Event, error) {
podSelector, _ := fields.ParseSelector("metadata.name=" + name)
options := api.ListOptions{
FieldSelector: podSelector,
Watch: true,
}
queue := cache.NewFIFO(cache.MetaNamespaceKeyFunc)
cache.NewReflector(podLW, &api.Pod{}, queue, 1*time.Minute).RunUntil(stopChannel)
return func() *api.Pod {
return cache.Pop(queue).(*api.Pod)
podWatch, err := c.client.Core().Pods(namespace).Watch(options)
if err != nil {
return nil, err
}
eventSelector, _ := fields.ParseSelector("involvedObject.name=" + name)
eventWatch, err := c.client.Core().Events(namespace).Watch(api.ListOptions{
FieldSelector: eventSelector,
Watch: true,
})
if err != nil {
podWatch.Stop()
return nil, err
}
eventCh := make(chan watch.Event, 0)
go func() {
defer eventWatch.Stop()
defer podWatch.Stop()
defer close(eventCh)
for {
select {
case _ = <-stopChannel:
return
case podEvent, ok := <-podWatch.ResultChan():
if !ok {
return
}
eventCh <- podEvent
case eventEvent, ok := <-eventWatch.ResultChan():
if !ok {
return
}
eventCh <- eventEvent
}
}
}()
return eventCh, nil
}
// CalculateTimeoutForVolume calculates time for a Recycler pod to complete a