diff --git a/staging/src/k8s.io/cloud-provider/controllers/node/metrics.go b/staging/src/k8s.io/cloud-provider/controllers/node/metrics.go new file mode 100644 index 00000000000..08d574e5d92 --- /dev/null +++ b/staging/src/k8s.io/cloud-provider/controllers/node/metrics.go @@ -0,0 +1,59 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloud + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const ( + metricsSubsystem = "node_controller" +) + +var ( + removeCloudProviderTaintDelay = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: metricsSubsystem, + Name: "cloud_provider_taint_removal_delay_seconds", + Help: "Number of seconds after node creation when NodeController removed the cloud-provider taint of a single node.", + Buckets: metrics.ExponentialBuckets(1, 4, 6), // 1s -> ~17m + StabilityLevel: metrics.ALPHA, + }, + ) + initialNodeSyncDelay = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: metricsSubsystem, + Name: "initial_node_sync_delay_seconds", + Help: "Number of seconds after node creation when NodeController finished the initial synchronization of a single node.", + Buckets: metrics.ExponentialBuckets(1, 4, 6), // 1s -> ~17m + StabilityLevel: metrics.ALPHA, + }, + ) +) + +var metricRegistration sync.Once + +// registerMetrics registers the metrics that are to be monitored. +func registerMetrics() { + metricRegistration.Do(func() { + legacyregistry.MustRegister(removeCloudProviderTaintDelay) + legacyregistry.MustRegister(initialNodeSyncDelay) + }) +} diff --git a/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go b/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go index ace70dd0aaf..06d2030ff96 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go +++ b/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go @@ -48,6 +48,10 @@ import ( "k8s.io/klog/v2" ) +func init() { + registerMetrics() +} + // labelReconcileInfo lists Node labels to reconcile, and how to reconcile them. // primaryKey and secondaryKey are keys of labels to reconcile. // - If both keys exist, but their values don't match. Use the value from the @@ -489,6 +493,8 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e return err } + removeCloudProviderTaintDelay.Observe(time.Since(newNode.ObjectMeta.CreationTimestamp.Time).Seconds()) + // After adding, call UpdateNodeAddress to set the CloudProvider provided IPAddresses // So that users do not see any significant delay in IP addresses being filled into the node cnc.updateNodeAddress(ctx, newNode, instanceMetadata) @@ -501,6 +507,7 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e } cnc.recorder.Event(copyNode, v1.EventTypeNormal, "Synced", "Node synced successfully") + initialNodeSyncDelay.Observe(time.Since(curNode.ObjectMeta.CreationTimestamp.Time).Seconds()) return nil }