From b276132e214677b88864ec396e2bbf232621359a Mon Sep 17 00:00:00 2001 From: Federico Simoncelli Date: Wed, 1 Apr 2015 09:23:16 -0400 Subject: [PATCH 1/3] event: improve logging on recording failure Signed-off-by: Federico Simoncelli --- pkg/client/record/event.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pkg/client/record/event.go b/pkg/client/record/event.go index 33db471816e..37247f6f480 100644 --- a/pkg/client/record/event.go +++ b/pkg/client/record/event.go @@ -155,24 +155,20 @@ func recordEvent(sink EventSink, event *api.Event, updateExistingEvent bool) boo // If we can't contact the server, then hold everything while we keep trying. // Otherwise, something about the event is malformed and we should abandon it. - giveUp := false switch err.(type) { case *client.RequestConstructionError: // We will construct the request the same next time, so don't keep trying. - giveUp = true + glog.Errorf("Unable to construct event '%#v': '%v' (will not retry!)", event, err) + return true case *errors.StatusError: - // This indicates that the server understood and rejected our request. - giveUp = true + glog.Errorf("Server rejected event '%#v': '%v' (will not retry!)", event, err) + return true case *errors.UnexpectedObjectError: // We don't expect this; it implies the server's response didn't match a // known pattern. Go ahead and retry. default: // This case includes actual http transport errors. Go ahead and retry. } - if giveUp { - glog.Errorf("Unable to write event '%#v': '%v' (will not retry!)", event, err) - return true - } glog.Errorf("Unable to write event: '%v' (may retry after sleeping)", err) return false } From f78301c5ab694a29a89c79bb440eeaa66b2c2972 Mon Sep 17 00:00:00 2001 From: Federico Simoncelli Date: Wed, 1 Apr 2015 09:27:50 -0400 Subject: [PATCH 2/3] kubelet: log offline event message recording Signed-off-by: Federico Simoncelli --- pkg/cloudprovider/controller/nodecontroller.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/cloudprovider/controller/nodecontroller.go b/pkg/cloudprovider/controller/nodecontroller.go index b1efd723b30..ba3e087e62a 100644 --- a/pkg/cloudprovider/controller/nodecontroller.go +++ b/pkg/cloudprovider/controller/nodecontroller.go @@ -454,6 +454,7 @@ func (nc *NodeController) recordNodeOfflineEvent(node *api.Node) { UID: types.UID(node.Name), Namespace: "", } + glog.V(2).Infof("Recording offline event message for node %s", node.Name) // TODO: This requires a transaction, either both node status is updated // and event is recorded or neither should happen, see issue #6055. nc.recorder.Eventf(ref, "offline", "Node %s is now offline", node.Name) From 8849cf21d3621fbe1319047480da60d139778efe Mon Sep 17 00:00:00 2001 From: Federico Simoncelli Date: Wed, 1 Apr 2015 09:28:07 -0400 Subject: [PATCH 3/3] event: fallback to creation when update fails Updating may fail because the item was removed from apiserver. This patch makes sure to retry using creation instead of update. Fixes #6289 Signed-off-by: Federico Simoncelli --- pkg/client/record/event.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pkg/client/record/event.go b/pkg/client/record/event.go index 37247f6f480..58792133d40 100644 --- a/pkg/client/record/event.go +++ b/pkg/client/record/event.go @@ -136,6 +136,17 @@ func (eventBroadcaster *eventBroadcasterImpl) StartRecordingToSink(sink EventSin }) } +func isKeyNotFoundError(err error) bool { + statusErr, _ := err.(*errors.StatusError) + // At the moment the server is returning 500 instead of a more specific + // error. When changing this remember that it should be backward compatible + // with old api servers that may be still returning 500. + if statusErr != nil && statusErr.Status().Code == 500 { + return true + } + return false +} + // recordEvent attempts to write event to a sink. It returns true if the event // was successfully recorded or discarded, false if it should be retried. // If updateExistingEvent is false, it creates a new event, otherwise it updates @@ -145,7 +156,11 @@ func recordEvent(sink EventSink, event *api.Event, updateExistingEvent bool) boo var err error if updateExistingEvent { newEvent, err = sink.Update(event) - } else { + } + // Update can fail because the event may have been removed and it no longer exists. + if !updateExistingEvent || (updateExistingEvent && isKeyNotFoundError(err)) { + // Making sure that ResourceVersion is empty on creation + event.ResourceVersion = "" newEvent, err = sink.Create(event) } if err == nil {