diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 45889f0b68f..e9c1af9470b 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -834,6 +834,10 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend kubeDeps.PodStartupLatencyTracker = kubeletutil.NewPodStartupLatencyTracker() } + if kubeDeps.NodeStartupLatencyTracker == nil { + kubeDeps.NodeStartupLatencyTracker = kubeletutil.NewNodeStartupLatencyTracker() + } + // TODO(vmarmol): Do this through container config. oomAdjuster := kubeDeps.OOMAdjuster if err := oomAdjuster.ApplyOOMScoreAdj(0, int(s.OOMScoreAdj)); err != nil { diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index c8708c0520d..e6040d1b256 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -248,29 +248,30 @@ type Dependencies struct { Options []Option // Injected Dependencies - Auth server.AuthInterface - CAdvisorInterface cadvisor.Interface - Cloud cloudprovider.Interface - ContainerManager cm.ContainerManager - EventClient v1core.EventsGetter - HeartbeatClient clientset.Interface - OnHeartbeatFailure func() - KubeClient clientset.Interface - Mounter mount.Interface - HostUtil hostutil.HostUtils - OOMAdjuster *oom.OOMAdjuster - OSInterface kubecontainer.OSInterface - PodConfig *config.PodConfig - ProbeManager prober.Manager - Recorder record.EventRecorder - Subpather subpath.Interface - TracerProvider trace.TracerProvider - VolumePlugins []volume.VolumePlugin - DynamicPluginProber volume.DynamicPluginProber - TLSOptions *server.TLSOptions - RemoteRuntimeService internalapi.RuntimeService - RemoteImageService internalapi.ImageManagerService - PodStartupLatencyTracker util.PodStartupLatencyTracker + Auth server.AuthInterface + CAdvisorInterface cadvisor.Interface + Cloud cloudprovider.Interface + ContainerManager cm.ContainerManager + EventClient v1core.EventsGetter + HeartbeatClient clientset.Interface + OnHeartbeatFailure func() + KubeClient clientset.Interface + Mounter mount.Interface + HostUtil hostutil.HostUtils + OOMAdjuster *oom.OOMAdjuster + OSInterface kubecontainer.OSInterface + PodConfig *config.PodConfig + ProbeManager prober.Manager + Recorder record.EventRecorder + Subpather subpath.Interface + TracerProvider trace.TracerProvider + VolumePlugins []volume.VolumePlugin + DynamicPluginProber volume.DynamicPluginProber + TLSOptions *server.TLSOptions + RemoteRuntimeService internalapi.RuntimeService + RemoteImageService internalapi.ImageManagerService + PodStartupLatencyTracker util.PodStartupLatencyTracker + NodeStartupLatencyTracker util.NodeStartupLatencyTracker // remove it after cadvisor.UsingLegacyCadvisorStats dropped. useLegacyCadvisorStats bool } @@ -552,6 +553,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, keepTerminatedPodVolumes: keepTerminatedPodVolumes, nodeStatusMaxImages: nodeStatusMaxImages, tracer: tracer, + nodeStartupLatencyTracker: kubeDeps.NodeStartupLatencyTracker, } if klet.cloud != nil { @@ -1293,6 +1295,9 @@ type Kubelet struct { // OpenTelemetry Tracer tracer trace.Tracer + + // Track node startup latencies + nodeStartupLatencyTracker util.NodeStartupLatencyTracker } // ListPodStats is delegated to StatsProvider, which implements stats.Provider interface diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index d598c55b79a..7723abc64b2 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -52,6 +52,9 @@ func (kl *Kubelet) registerWithAPIServer() { if kl.registrationCompleted { return } + + kl.nodeStartupLatencyTracker.RecordAttemptRegisterNode() + step := 100 * time.Millisecond for { @@ -85,6 +88,7 @@ func (kl *Kubelet) registerWithAPIServer() { func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool { _, err := kl.kubeClient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) if err == nil { + kl.nodeStartupLatencyTracker.RecordRegisteredNewNode() return true } @@ -633,6 +637,12 @@ func (kl *Kubelet) patchNodeStatus(originalNode, node *v1.Node) (*v1.Node, error } kl.lastStatusReportTime = kl.clock.Now() kl.setLastObservedNodeAddresses(updatedNode.Status.Addresses) + + readyIdx, readyCondition := nodeutil.GetNodeCondition(&updatedNode.Status, v1.NodeReady) + if readyIdx >= 0 && readyCondition.Status == v1.ConditionTrue { + kl.nodeStartupLatencyTracker.RecordNodeReady() + } + return updatedNode, nil } diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 18f225aade5..42866dc6df6 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -268,6 +268,7 @@ func newTestKubeletWithImageList( kubelet.podManager = kubepod.NewBasicPodManager() podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker() kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, kubelet.getRootDir()) + kubelet.nodeStartupLatencyTracker = kubeletutil.NewNodeStartupLatencyTracker() kubelet.containerRuntime = fakeRuntime kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime) diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 0897b59eb21..cd80e63c5f7 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -33,6 +33,11 @@ const ( KubeletSubsystem = "kubelet" NodeNameKey = "node_name" NodeLabelKey = "node" + NodeStartupPreKubeletKey = "node_startup_pre_kubelet_duration_seconds" + NodeStartupPreRegistrationKey = "node_startup_pre_registration_duration_seconds" + NodeStartupRegistrationKey = "node_startup_registration_duration_seconds" + NodeStartupPostRegistrationKey = "node_startup_post_registration_duration_seconds" + NodeStartupKey = "node_startup_duration_seconds" PodWorkerDurationKey = "pod_worker_duration_seconds" PodStartDurationKey = "pod_start_duration_seconds" PodStartSLIDurationKey = "pod_start_sli_duration_seconds" @@ -736,6 +741,51 @@ var ( StabilityLevel: metrics.ALPHA, }, ) + + NodeStartupPreKubeletDuration = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: NodeStartupPreKubeletKey, + Help: "Duration in seconds of node startup before kubelet starts.", + StabilityLevel: metrics.ALPHA, + }, + ) + + NodeStartupPreRegistrationDuration = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: NodeStartupPreRegistrationKey, + Help: "Duration in seconds of node startup before registration.", + StabilityLevel: metrics.ALPHA, + }, + ) + + NodeStartupRegistrationDuration = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: NodeStartupRegistrationKey, + Help: "Duration in seconds of node startup during registration.", + StabilityLevel: metrics.ALPHA, + }, + ) + + NodeStartupPostRegistrationDuration = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: NodeStartupPostRegistrationKey, + Help: "Duration in seconds of node startup after registration.", + StabilityLevel: metrics.ALPHA, + }, + ) + + NodeStartupDuration = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: NodeStartupKey, + Help: "Duration in seconds of node startup in total.", + StabilityLevel: metrics.ALPHA, + }, + ) ) var registerMetrics sync.Once @@ -748,6 +798,11 @@ func Register(collectors ...metrics.StableCollector) { legacyregistry.MustRegister(PodWorkerDuration) legacyregistry.MustRegister(PodStartDuration) legacyregistry.MustRegister(PodStartSLIDuration) + legacyregistry.MustRegister(NodeStartupPreKubeletDuration) + legacyregistry.MustRegister(NodeStartupPreRegistrationDuration) + legacyregistry.MustRegister(NodeStartupRegistrationDuration) + legacyregistry.MustRegister(NodeStartupPostRegistrationDuration) + legacyregistry.MustRegister(NodeStartupDuration) legacyregistry.MustRegister(CgroupManagerDuration) legacyregistry.MustRegister(PodWorkerStartDuration) legacyregistry.MustRegister(PodStatusSyncDuration) diff --git a/pkg/kubelet/util/node_startup_latency_tracker.go b/pkg/kubelet/util/node_startup_latency_tracker.go new file mode 100644 index 00000000000..815e4e81eaf --- /dev/null +++ b/pkg/kubelet/util/node_startup_latency_tracker.go @@ -0,0 +1,103 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "sync" + "time" + + "k8s.io/kubernetes/pkg/kubelet/metrics" + "k8s.io/utils/clock" +) + +type NodeStartupLatencyTracker interface { + // This function may be called across Kubelet restart. + RecordAttemptRegisterNode() + // This function should not be called across Kubelet restart. + RecordRegisteredNewNode() + // This function may be called across Kubelet restart. + RecordNodeReady() +} + +type basicNodeStartupLatencyTracker struct { + lock sync.Mutex + + bootTime time.Time + kubeletStartTime time.Time + firstRegistrationAttemptTime time.Time + firstRegisteredNewNodeTime time.Time + firstNodeReadyTime time.Time + + // For testability + clock clock.Clock +} + +func NewNodeStartupLatencyTracker() NodeStartupLatencyTracker { + bootTime, err := GetBootTime() + if err != nil { + bootTime = time.Time{} + } + return &basicNodeStartupLatencyTracker{ + bootTime: bootTime, + kubeletStartTime: time.Now(), + clock: clock.RealClock{}, + } +} + +func (n *basicNodeStartupLatencyTracker) RecordAttemptRegisterNode() { + n.lock.Lock() + defer n.lock.Unlock() + + if !n.firstRegistrationAttemptTime.IsZero() { + return + } + + n.firstRegistrationAttemptTime = n.clock.Now() +} + +func (n *basicNodeStartupLatencyTracker) RecordRegisteredNewNode() { + n.lock.Lock() + defer n.lock.Unlock() + + if n.firstRegistrationAttemptTime.IsZero() || !n.firstRegisteredNewNodeTime.IsZero() { + return + } + + n.firstRegisteredNewNodeTime = n.clock.Now() + + if !n.bootTime.IsZero() { + metrics.NodeStartupPreKubeletDuration.Set(n.kubeletStartTime.Sub(n.bootTime).Seconds()) + } + metrics.NodeStartupPreRegistrationDuration.Set(n.firstRegistrationAttemptTime.Sub(n.kubeletStartTime).Seconds()) + metrics.NodeStartupRegistrationDuration.Set(n.firstRegisteredNewNodeTime.Sub(n.firstRegistrationAttemptTime).Seconds()) +} + +func (n *basicNodeStartupLatencyTracker) RecordNodeReady() { + n.lock.Lock() + defer n.lock.Unlock() + + if n.firstRegisteredNewNodeTime.IsZero() || !n.firstNodeReadyTime.IsZero() { + return + } + + n.firstNodeReadyTime = n.clock.Now() + + metrics.NodeStartupPostRegistrationDuration.Set(n.firstNodeReadyTime.Sub(n.firstRegisteredNewNodeTime).Seconds()) + if !n.bootTime.IsZero() { + metrics.NodeStartupDuration.Set(n.firstNodeReadyTime.Sub(n.bootTime).Seconds()) + } +} diff --git a/pkg/kubelet/util/node_startup_latency_tracker_test.go b/pkg/kubelet/util/node_startup_latency_tracker_test.go new file mode 100644 index 00000000000..bcb341100b3 --- /dev/null +++ b/pkg/kubelet/util/node_startup_latency_tracker_test.go @@ -0,0 +1,418 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "k8s.io/component-base/metrics/testutil" + "k8s.io/kubernetes/pkg/kubelet/metrics" + "k8s.io/utils/clock" + testingclock "k8s.io/utils/clock/testing" +) + +const ( + metricsNameNodeStartupPreKubelet = "kubelet_node_startup_pre_kubelet_duration_seconds" + metricsNameNodeStartupPreRegistration = "kubelet_node_startup_pre_registration_duration_seconds" + metricsNameNodeStartupRegistration = "kubelet_node_startup_registration_duration_seconds" + metricsNameNodeStartupPostRegistration = "kubelet_node_startup_post_registration_duration_seconds" + metricsNameNodeStartup = "kubelet_node_startup_duration_seconds" +) + +func TestNodeStartupLatencyNoEvents(t *testing.T) { + t.Run("metrics registered; no incoming events", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: clock.RealClock{}, + } + + wants := ` + # HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total. + # TYPE kubelet_node_startup_duration_seconds gauge + kubelet_node_startup_duration_seconds 0 + # HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration. + # TYPE kubelet_node_startup_post_registration_duration_seconds gauge + kubelet_node_startup_post_registration_duration_seconds 0 + # HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts. + # TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge + kubelet_node_startup_pre_kubelet_duration_seconds 0 + # HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration. + # TYPE kubelet_node_startup_pre_registration_duration_seconds gauge + kubelet_node_startup_pre_registration_duration_seconds 0 + # HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration. + # TYPE kubelet_node_startup_registration_duration_seconds gauge + kubelet_node_startup_registration_duration_seconds 0 + ` + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), + metricsNameNodeStartupPreKubelet, + metricsNameNodeStartupPreRegistration, + metricsNameNodeStartupRegistration, + metricsNameNodeStartupPostRegistration, + metricsNameNodeStartup, + ); err != nil { + t.Error(err) + } + + assert.Equal(t, frozenTime.Add(-100*time.Millisecond), tracker.bootTime) + assert.Equal(t, frozenTime, tracker.kubeletStartTime) + assert.True(t, tracker.firstRegistrationAttemptTime.IsZero()) + assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero()) + assert.True(t, tracker.firstNodeReadyTime.IsZero()) + }) +} + +func TestRecordAllTimestamps(t *testing.T) { + t.Run("all timestamps are recorded", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + fakeClock := testingclock.NewFakeClock(frozenTime) + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: fakeClock, + } + + fakeClock.Step(800 * time.Millisecond) + tracker.RecordAttemptRegisterNode() + + assert.Equal(t, frozenTime.Add(800*time.Millisecond), tracker.firstRegistrationAttemptTime) + + fakeClock.Step(400 * time.Millisecond) + tracker.RecordRegisteredNewNode() + + assert.Equal(t, frozenTime.Add(1200*time.Millisecond), tracker.firstRegisteredNewNodeTime) + + fakeClock.Step(1100 * time.Millisecond) + tracker.RecordNodeReady() + + assert.Equal(t, frozenTime.Add(2300*time.Millisecond), tracker.firstNodeReadyTime) + + wants := ` + # HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total. + # TYPE kubelet_node_startup_duration_seconds gauge + kubelet_node_startup_duration_seconds 2.4 + # HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration. + # TYPE kubelet_node_startup_post_registration_duration_seconds gauge + kubelet_node_startup_post_registration_duration_seconds 1.1 + # HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts. + # TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge + kubelet_node_startup_pre_kubelet_duration_seconds 0.1 + # HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration. + # TYPE kubelet_node_startup_pre_registration_duration_seconds gauge + kubelet_node_startup_pre_registration_duration_seconds 0.8 + # HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration. + # TYPE kubelet_node_startup_registration_duration_seconds gauge + kubelet_node_startup_registration_duration_seconds 0.4 + ` + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), + metricsNameNodeStartupPreKubelet, + metricsNameNodeStartupPreRegistration, + metricsNameNodeStartupRegistration, + metricsNameNodeStartupPostRegistration, + metricsNameNodeStartup, + ); err != nil { + t.Error(err) + } + }) +} + +func TestRecordAttemptRegister(t *testing.T) { + t.Run("record attempt register node", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + fakeClock := testingclock.NewFakeClock(frozenTime) + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: fakeClock, + } + + fakeClock.Step(600 * time.Millisecond) + tracker.RecordAttemptRegisterNode() + + assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstRegistrationAttemptTime) + assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero()) + assert.True(t, tracker.firstNodeReadyTime.IsZero()) + + wants := ` + # HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total. + # TYPE kubelet_node_startup_duration_seconds gauge + kubelet_node_startup_duration_seconds 0 + # HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration. + # TYPE kubelet_node_startup_post_registration_duration_seconds gauge + kubelet_node_startup_post_registration_duration_seconds 0 + # HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts. + # TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge + kubelet_node_startup_pre_kubelet_duration_seconds 0 + # HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration. + # TYPE kubelet_node_startup_pre_registration_duration_seconds gauge + kubelet_node_startup_pre_registration_duration_seconds 0 + # HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration. + # TYPE kubelet_node_startup_registration_duration_seconds gauge + kubelet_node_startup_registration_duration_seconds 0 + ` + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), + metricsNameNodeStartupPreKubelet, + metricsNameNodeStartupPreRegistration, + metricsNameNodeStartupRegistration, + metricsNameNodeStartupPostRegistration, + metricsNameNodeStartup, + ); err != nil { + t.Error(err) + } + }) +} + +func TestRecordAttemptRegisterTwice(t *testing.T) { + t.Run("record attempt register node twice", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + fakeClock := testingclock.NewFakeClock(frozenTime) + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: fakeClock, + } + + fakeClock.Step(600 * time.Millisecond) + tracker.RecordAttemptRegisterNode() + + fakeClock.Step(300 * time.Millisecond) + tracker.RecordAttemptRegisterNode() + + assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstRegistrationAttemptTime) + assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero()) + assert.True(t, tracker.firstNodeReadyTime.IsZero()) + }) +} + +func TestSkippingRecordRegisteredNewNode(t *testing.T) { + t.Run("record register new node twice", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + fakeClock := testingclock.NewFakeClock(frozenTime) + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: fakeClock, + } + + fakeClock.Step(100 * time.Millisecond) + tracker.RecordAttemptRegisterNode() + + fakeClock.Step(500 * time.Millisecond) + tracker.RecordRegisteredNewNode() + + fakeClock.Step(300 * time.Millisecond) + tracker.RecordRegisteredNewNode() + + assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstRegisteredNewNodeTime) + + wants := ` + # HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total. + # TYPE kubelet_node_startup_duration_seconds gauge + kubelet_node_startup_duration_seconds 0 + # HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration. + # TYPE kubelet_node_startup_post_registration_duration_seconds gauge + kubelet_node_startup_post_registration_duration_seconds 0 + # HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts. + # TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge + kubelet_node_startup_pre_kubelet_duration_seconds 0.1 + # HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration. + # TYPE kubelet_node_startup_pre_registration_duration_seconds gauge + kubelet_node_startup_pre_registration_duration_seconds 0.1 + # HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration. + # TYPE kubelet_node_startup_registration_duration_seconds gauge + kubelet_node_startup_registration_duration_seconds 0.5 + ` + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), + metricsNameNodeStartupPreKubelet, + metricsNameNodeStartupPreRegistration, + metricsNameNodeStartupRegistration, + metricsNameNodeStartupPostRegistration, + metricsNameNodeStartup, + ); err != nil { + t.Error(err) + } + }) + + t.Run("record register new node without previous step", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + fakeClock := testingclock.NewFakeClock(frozenTime) + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: fakeClock, + } + + fakeClock.Step(700 * time.Millisecond) + tracker.RecordRegisteredNewNode() + + assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero()) + + wants := ` + # HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total. + # TYPE kubelet_node_startup_duration_seconds gauge + kubelet_node_startup_duration_seconds 0 + # HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration. + # TYPE kubelet_node_startup_post_registration_duration_seconds gauge + kubelet_node_startup_post_registration_duration_seconds 0 + # HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts. + # TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge + kubelet_node_startup_pre_kubelet_duration_seconds 0 + # HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration. + # TYPE kubelet_node_startup_pre_registration_duration_seconds gauge + kubelet_node_startup_pre_registration_duration_seconds 0 + # HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration. + # TYPE kubelet_node_startup_registration_duration_seconds gauge + kubelet_node_startup_registration_duration_seconds 0 + ` + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), + metricsNameNodeStartupPreKubelet, + metricsNameNodeStartupPreRegistration, + metricsNameNodeStartupRegistration, + metricsNameNodeStartupPostRegistration, + metricsNameNodeStartup, + ); err != nil { + t.Error(err) + } + }) +} + +func TestSkippingRecordNodeReady(t *testing.T) { + t.Run("record node ready twice", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + fakeClock := testingclock.NewFakeClock(frozenTime) + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: fakeClock, + } + + fakeClock.Step(100 * time.Millisecond) + tracker.RecordAttemptRegisterNode() + + fakeClock.Step(200 * time.Millisecond) + tracker.RecordRegisteredNewNode() + + fakeClock.Step(300 * time.Millisecond) + tracker.RecordNodeReady() + + fakeClock.Step(700 * time.Millisecond) + tracker.RecordNodeReady() + + assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstNodeReadyTime) + + wants := ` + # HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total. + # TYPE kubelet_node_startup_duration_seconds gauge + kubelet_node_startup_duration_seconds 0.7 + # HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration. + # TYPE kubelet_node_startup_post_registration_duration_seconds gauge + kubelet_node_startup_post_registration_duration_seconds 0.3 + # HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts. + # TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge + kubelet_node_startup_pre_kubelet_duration_seconds 0.1 + # HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration. + # TYPE kubelet_node_startup_pre_registration_duration_seconds gauge + kubelet_node_startup_pre_registration_duration_seconds 0.1 + # HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration. + # TYPE kubelet_node_startup_registration_duration_seconds gauge + kubelet_node_startup_registration_duration_seconds 0.2 + ` + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), + metricsNameNodeStartupPreKubelet, + metricsNameNodeStartupPreRegistration, + metricsNameNodeStartupRegistration, + metricsNameNodeStartupPostRegistration, + metricsNameNodeStartup, + ); err != nil { + t.Error(err) + } + }) + + t.Run("record node ready without previous step", func(t *testing.T) { + metrics.Register() + defer clearMetrics() + + fakeClock := testingclock.NewFakeClock(frozenTime) + tracker := &basicNodeStartupLatencyTracker{ + bootTime: frozenTime.Add(-100 * time.Millisecond), + kubeletStartTime: frozenTime, + clock: fakeClock, + } + + fakeClock.Step(100 * time.Millisecond) + tracker.RecordAttemptRegisterNode() + + fakeClock.Step(700 * time.Millisecond) + tracker.RecordNodeReady() + + assert.True(t, tracker.firstNodeReadyTime.IsZero()) + + wants := ` + # HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total. + # TYPE kubelet_node_startup_duration_seconds gauge + kubelet_node_startup_duration_seconds 0 + # HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration. + # TYPE kubelet_node_startup_post_registration_duration_seconds gauge + kubelet_node_startup_post_registration_duration_seconds 0 + # HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts. + # TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge + kubelet_node_startup_pre_kubelet_duration_seconds 0 + # HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration. + # TYPE kubelet_node_startup_pre_registration_duration_seconds gauge + kubelet_node_startup_pre_registration_duration_seconds 0 + # HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration. + # TYPE kubelet_node_startup_registration_duration_seconds gauge + kubelet_node_startup_registration_duration_seconds 0 + ` + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), + metricsNameNodeStartupPreKubelet, + metricsNameNodeStartupPreRegistration, + metricsNameNodeStartupRegistration, + metricsNameNodeStartupPostRegistration, + metricsNameNodeStartup, + ); err != nil { + t.Error(err) + } + }) +} + +func clearMetrics() { + metrics.NodeStartupPreKubeletDuration.Set(0) + metrics.NodeStartupPreRegistrationDuration.Set(0) + metrics.NodeStartupRegistrationDuration.Set(0) + metrics.NodeStartupPostRegistrationDuration.Set(0) + metrics.NodeStartupDuration.Set(0) +} diff --git a/pkg/kubemark/hollow_kubelet.go b/pkg/kubemark/hollow_kubelet.go index 29ba3d4e1b9..74db6c95c37 100644 --- a/pkg/kubemark/hollow_kubelet.go +++ b/pkg/kubemark/hollow_kubelet.go @@ -97,24 +97,25 @@ func NewHollowKubelet( runtimeService internalapi.RuntimeService, containerManager cm.ContainerManager) *HollowKubelet { d := &kubelet.Dependencies{ - KubeClient: client, - HeartbeatClient: heartbeatClient, - ProbeManager: probetest.FakeManager{}, - RemoteRuntimeService: runtimeService, - RemoteImageService: imageService, - CAdvisorInterface: cadvisorInterface, - Cloud: nil, - OSInterface: &containertest.FakeOS{}, - ContainerManager: containerManager, - VolumePlugins: volumePlugins(), - TLSOptions: nil, - OOMAdjuster: oom.NewFakeOOMAdjuster(), - Mounter: &mount.FakeMounter{}, - Subpather: &subpath.FakeSubpath{}, - HostUtil: hostutil.NewFakeHostUtil(nil), - PodStartupLatencyTracker: kubeletutil.NewPodStartupLatencyTracker(), - TracerProvider: trace.NewNoopTracerProvider(), - Recorder: &record.FakeRecorder{}, // With real recorder we attempt to read /dev/kmsg. + KubeClient: client, + HeartbeatClient: heartbeatClient, + ProbeManager: probetest.FakeManager{}, + RemoteRuntimeService: runtimeService, + RemoteImageService: imageService, + CAdvisorInterface: cadvisorInterface, + Cloud: nil, + OSInterface: &containertest.FakeOS{}, + ContainerManager: containerManager, + VolumePlugins: volumePlugins(), + TLSOptions: nil, + OOMAdjuster: oom.NewFakeOOMAdjuster(), + Mounter: &mount.FakeMounter{}, + Subpather: &subpath.FakeSubpath{}, + HostUtil: hostutil.NewFakeHostUtil(nil), + PodStartupLatencyTracker: kubeletutil.NewPodStartupLatencyTracker(), + NodeStartupLatencyTracker: kubeletutil.NewNodeStartupLatencyTracker(), + TracerProvider: trace.NewNoopTracerProvider(), + Recorder: &record.FakeRecorder{}, // With real recorder we attempt to read /dev/kmsg. } return &HollowKubelet{