From f96e23e477df38c8f804824422e71c944b5011a3 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 30 Aug 2023 15:16:20 +0200 Subject: [PATCH 1/2] dra controller helper: merge log entries for informer events Instead of emitting two log entries for new or updated ResourceClaim and PodSchedulingContext, add the additional information for V(6) to the logger via WithValues and emit one log entry. This makes the log shorter and easier to read because related information is on one line. --- .../controller/controller.go | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go index 38507391303..215ecdc90aa 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go @@ -263,28 +263,34 @@ const ( schedulingCtxKeyPrefix = "schedulingCtx:" ) -func (ctrl *controller) add(logger *klog.Logger, obj interface{}) { - if logger != nil { - logger.Info("new object", "content", prettyPrint(obj)) +func (ctrl *controller) add(loggerV6 *klog.Logger, obj interface{}) { + var logger klog.Logger + if loggerV6 != nil { + logger = loggerV6.WithValues("object", prettyPrint(obj)) + } else { + logger = ctrl.logger.V(5) } - ctrl.addNewOrUpdated("Adding new work item", obj) + ctrl.addNewOrUpdated(logger, "Adding new work item", obj) } -func (ctrl *controller) update(logger *klog.Logger, oldObj, newObj interface{}) { - if logger != nil { +func (ctrl *controller) update(loggerV6 *klog.Logger, oldObj, newObj interface{}) { + var logger klog.Logger + if loggerV6 != nil { diff := cmp.Diff(oldObj, newObj) - logger.Info("updated object", "content", prettyPrint(newObj), "diff", diff) + logger = loggerV6.WithValues("object", prettyPrint(newObj), "diff", diff) + } else { + logger = ctrl.logger.V(5) } - ctrl.addNewOrUpdated("Adding updated work item", newObj) + ctrl.addNewOrUpdated(logger, "Adding updated work item", newObj) } -func (ctrl *controller) addNewOrUpdated(msg string, obj interface{}) { +func (ctrl *controller) addNewOrUpdated(loggerV klog.Logger, msg string, obj interface{}) { objKey, err := getKey(obj) if err != nil { - ctrl.logger.Error(err, "Failed to get key", "obj", obj) + loggerV.Error(err, "Failed to get key", "obj", obj) return } - ctrl.logger.V(5).Info(msg, "key", objKey) + loggerV.Info(msg, "key", objKey) ctrl.queue.Add(objKey) } From 062c57ae92f7bebc71a34edfd2599cb1e4f457ad Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 30 Aug 2023 15:21:30 +0200 Subject: [PATCH 2/2] dra controller helper: make allocation error message more readable Here's what it looked like before: err="allocation of one or more pod claims failed. Claim test-dra5z9nh-resource-1an8wcr: failed allocating claim 4739f9a2-eedc-4702-ab17-e201e2dc0ad2. Claim test-dra5z9nh-resource-1bbdsj2: failed allocating claim e5525e5a-3397-40b4-a633-9ac354605303." Some observations: - Inserting the claim UID is just noise because it doesn't get logged anywhere else. - Concatenating on a single line makes it hard to see the individual errors. Joining with errors.Join leads to more readable output. The claim name only gets inserted if not present already, to keep the individual error entries short: err=< claim test-dralmx55-resource-1asvr5d: resources exhausted on node "scheduler-perf-dra-w62wm" claim test-dralmx55-resource-1bqq6c9: resources exhausted on node "scheduler-perf-dra-w62wm" > --- .../controller/controller.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go index 215ecdc90aa..da2941191cf 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go @@ -780,16 +780,20 @@ func (ctrl *controller) syncPodSchedulingContexts(ctx context.Context, schedulin ctrl.allocateClaims(ctx, claims, selectedNode, selectedUser) - allErrorsStr := "allocation of one or more pod claims failed." - allocationFailed := false + var allErrors []error for _, delayed := range claims { if delayed.Error != nil { - allErrorsStr = fmt.Sprintf("%s Claim %s: %s.", allErrorsStr, delayed.Claim.Name, delayed.Error) - allocationFailed = true + if strings.Contains(delayed.Error.Error(), delayed.Claim.Name) { + // Avoid adding redundant information. + allErrors = append(allErrors, delayed.Error) + } else { + // Include claim name, it's not in the underlying error. + allErrors = append(allErrors, fmt.Errorf("claim %s: %v", delayed.Claim.Name, delayed.Error)) + } } } - if allocationFailed { - return fmt.Errorf(allErrorsStr) + if len(allErrors) > 0 { + return errors.Join(allErrors...) } } }