Create a node startup latency tracker

Signed-off-by: Qiutong Song <songqt01@gmail.com>
This commit is contained in:
Qiutong Song 2023-06-08 09:37:40 +00:00
parent 2c6c4566ef
commit d3eb082568
8 changed files with 638 additions and 41 deletions

View File

@ -833,6 +833,10 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
kubeDeps.PodStartupLatencyTracker = kubeletutil.NewPodStartupLatencyTracker() kubeDeps.PodStartupLatencyTracker = kubeletutil.NewPodStartupLatencyTracker()
} }
if kubeDeps.NodeStartupLatencyTracker == nil {
kubeDeps.NodeStartupLatencyTracker = kubeletutil.NewNodeStartupLatencyTracker()
}
// TODO(vmarmol): Do this through container config. // TODO(vmarmol): Do this through container config.
oomAdjuster := kubeDeps.OOMAdjuster oomAdjuster := kubeDeps.OOMAdjuster
if err := oomAdjuster.ApplyOOMScoreAdj(0, int(s.OOMScoreAdj)); err != nil { if err := oomAdjuster.ApplyOOMScoreAdj(0, int(s.OOMScoreAdj)); err != nil {

View File

@ -248,29 +248,30 @@ type Dependencies struct {
Options []Option Options []Option
// Injected Dependencies // Injected Dependencies
Auth server.AuthInterface Auth server.AuthInterface
CAdvisorInterface cadvisor.Interface CAdvisorInterface cadvisor.Interface
Cloud cloudprovider.Interface Cloud cloudprovider.Interface
ContainerManager cm.ContainerManager ContainerManager cm.ContainerManager
EventClient v1core.EventsGetter EventClient v1core.EventsGetter
HeartbeatClient clientset.Interface HeartbeatClient clientset.Interface
OnHeartbeatFailure func() OnHeartbeatFailure func()
KubeClient clientset.Interface KubeClient clientset.Interface
Mounter mount.Interface Mounter mount.Interface
HostUtil hostutil.HostUtils HostUtil hostutil.HostUtils
OOMAdjuster *oom.OOMAdjuster OOMAdjuster *oom.OOMAdjuster
OSInterface kubecontainer.OSInterface OSInterface kubecontainer.OSInterface
PodConfig *config.PodConfig PodConfig *config.PodConfig
ProbeManager prober.Manager ProbeManager prober.Manager
Recorder record.EventRecorder Recorder record.EventRecorder
Subpather subpath.Interface Subpather subpath.Interface
TracerProvider trace.TracerProvider TracerProvider trace.TracerProvider
VolumePlugins []volume.VolumePlugin VolumePlugins []volume.VolumePlugin
DynamicPluginProber volume.DynamicPluginProber DynamicPluginProber volume.DynamicPluginProber
TLSOptions *server.TLSOptions TLSOptions *server.TLSOptions
RemoteRuntimeService internalapi.RuntimeService RemoteRuntimeService internalapi.RuntimeService
RemoteImageService internalapi.ImageManagerService RemoteImageService internalapi.ImageManagerService
PodStartupLatencyTracker util.PodStartupLatencyTracker PodStartupLatencyTracker util.PodStartupLatencyTracker
NodeStartupLatencyTracker util.NodeStartupLatencyTracker
// remove it after cadvisor.UsingLegacyCadvisorStats dropped. // remove it after cadvisor.UsingLegacyCadvisorStats dropped.
useLegacyCadvisorStats bool useLegacyCadvisorStats bool
} }
@ -552,6 +553,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
keepTerminatedPodVolumes: keepTerminatedPodVolumes, keepTerminatedPodVolumes: keepTerminatedPodVolumes,
nodeStatusMaxImages: nodeStatusMaxImages, nodeStatusMaxImages: nodeStatusMaxImages,
tracer: tracer, tracer: tracer,
nodeStartupLatencyTracker: kubeDeps.NodeStartupLatencyTracker,
} }
if klet.cloud != nil { if klet.cloud != nil {
@ -1293,6 +1295,9 @@ type Kubelet struct {
// OpenTelemetry Tracer // OpenTelemetry Tracer
tracer trace.Tracer tracer trace.Tracer
// Track node startup latencies
nodeStartupLatencyTracker util.NodeStartupLatencyTracker
} }
// ListPodStats is delegated to StatsProvider, which implements stats.Provider interface // ListPodStats is delegated to StatsProvider, which implements stats.Provider interface

View File

@ -52,6 +52,9 @@ func (kl *Kubelet) registerWithAPIServer() {
if kl.registrationCompleted { if kl.registrationCompleted {
return return
} }
kl.nodeStartupLatencyTracker.RecordAttemptRegisterNode()
step := 100 * time.Millisecond step := 100 * time.Millisecond
for { for {
@ -85,6 +88,7 @@ func (kl *Kubelet) registerWithAPIServer() {
func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool { func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
_, err := kl.kubeClient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) _, err := kl.kubeClient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
if err == nil { if err == nil {
kl.nodeStartupLatencyTracker.RecordRegisteredNewNode()
return true return true
} }
@ -633,6 +637,12 @@ func (kl *Kubelet) patchNodeStatus(originalNode, node *v1.Node) (*v1.Node, error
} }
kl.lastStatusReportTime = kl.clock.Now() kl.lastStatusReportTime = kl.clock.Now()
kl.setLastObservedNodeAddresses(updatedNode.Status.Addresses) kl.setLastObservedNodeAddresses(updatedNode.Status.Addresses)
readyIdx, readyCondition := nodeutil.GetNodeCondition(&updatedNode.Status, v1.NodeReady)
if readyIdx >= 0 && readyCondition.Status == v1.ConditionTrue {
kl.nodeStartupLatencyTracker.RecordNodeReady()
}
return updatedNode, nil return updatedNode, nil
} }

View File

@ -268,6 +268,7 @@ func newTestKubeletWithImageList(
kubelet.podManager = kubepod.NewBasicPodManager() kubelet.podManager = kubepod.NewBasicPodManager()
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker() podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, kubelet.getRootDir()) kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, kubelet.getRootDir())
kubelet.nodeStartupLatencyTracker = kubeletutil.NewNodeStartupLatencyTracker()
kubelet.containerRuntime = fakeRuntime kubelet.containerRuntime = fakeRuntime
kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime) kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime)

View File

@ -33,6 +33,11 @@ const (
KubeletSubsystem = "kubelet" KubeletSubsystem = "kubelet"
NodeNameKey = "node_name" NodeNameKey = "node_name"
NodeLabelKey = "node" NodeLabelKey = "node"
NodeStartupPreKubeletKey = "node_startup_pre_kubelet_duration_seconds"
NodeStartupPreRegistrationKey = "node_startup_pre_registration_duration_seconds"
NodeStartupRegistrationKey = "node_startup_registration_duration_seconds"
NodeStartupPostRegistrationKey = "node_startup_post_registration_duration_seconds"
NodeStartupKey = "node_startup_duration_seconds"
PodWorkerDurationKey = "pod_worker_duration_seconds" PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds" PodStartDurationKey = "pod_start_duration_seconds"
PodStartSLIDurationKey = "pod_start_sli_duration_seconds" PodStartSLIDurationKey = "pod_start_sli_duration_seconds"
@ -736,6 +741,51 @@ var (
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
) )
NodeStartupPreKubeletDuration = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: NodeStartupPreKubeletKey,
Help: "Duration in seconds of node startup before kubelet starts.",
StabilityLevel: metrics.ALPHA,
},
)
NodeStartupPreRegistrationDuration = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: NodeStartupPreRegistrationKey,
Help: "Duration in seconds of node startup before registration.",
StabilityLevel: metrics.ALPHA,
},
)
NodeStartupRegistrationDuration = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: NodeStartupRegistrationKey,
Help: "Duration in seconds of node startup during registration.",
StabilityLevel: metrics.ALPHA,
},
)
NodeStartupPostRegistrationDuration = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: NodeStartupPostRegistrationKey,
Help: "Duration in seconds of node startup after registration.",
StabilityLevel: metrics.ALPHA,
},
)
NodeStartupDuration = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: NodeStartupKey,
Help: "Duration in seconds of node startup in total.",
StabilityLevel: metrics.ALPHA,
},
)
) )
var registerMetrics sync.Once var registerMetrics sync.Once
@ -748,6 +798,11 @@ func Register(collectors ...metrics.StableCollector) {
legacyregistry.MustRegister(PodWorkerDuration) legacyregistry.MustRegister(PodWorkerDuration)
legacyregistry.MustRegister(PodStartDuration) legacyregistry.MustRegister(PodStartDuration)
legacyregistry.MustRegister(PodStartSLIDuration) legacyregistry.MustRegister(PodStartSLIDuration)
legacyregistry.MustRegister(NodeStartupPreKubeletDuration)
legacyregistry.MustRegister(NodeStartupPreRegistrationDuration)
legacyregistry.MustRegister(NodeStartupRegistrationDuration)
legacyregistry.MustRegister(NodeStartupPostRegistrationDuration)
legacyregistry.MustRegister(NodeStartupDuration)
legacyregistry.MustRegister(CgroupManagerDuration) legacyregistry.MustRegister(CgroupManagerDuration)
legacyregistry.MustRegister(PodWorkerStartDuration) legacyregistry.MustRegister(PodWorkerStartDuration)
legacyregistry.MustRegister(PodStatusSyncDuration) legacyregistry.MustRegister(PodStatusSyncDuration)

View File

@ -0,0 +1,103 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"sync"
"time"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/utils/clock"
)
type NodeStartupLatencyTracker interface {
// This function may be called across Kubelet restart.
RecordAttemptRegisterNode()
// This function should not be called across Kubelet restart.
RecordRegisteredNewNode()
// This function may be called across Kubelet restart.
RecordNodeReady()
}
type basicNodeStartupLatencyTracker struct {
lock sync.Mutex
bootTime time.Time
kubeletStartTime time.Time
firstRegistrationAttemptTime time.Time
firstRegisteredNewNodeTime time.Time
firstNodeReadyTime time.Time
// For testability
clock clock.Clock
}
func NewNodeStartupLatencyTracker() NodeStartupLatencyTracker {
bootTime, err := GetBootTime()
if err != nil {
bootTime = time.Time{}
}
return &basicNodeStartupLatencyTracker{
bootTime: bootTime,
kubeletStartTime: time.Now(),
clock: clock.RealClock{},
}
}
func (n *basicNodeStartupLatencyTracker) RecordAttemptRegisterNode() {
n.lock.Lock()
defer n.lock.Unlock()
if !n.firstRegistrationAttemptTime.IsZero() {
return
}
n.firstRegistrationAttemptTime = n.clock.Now()
}
func (n *basicNodeStartupLatencyTracker) RecordRegisteredNewNode() {
n.lock.Lock()
defer n.lock.Unlock()
if n.firstRegistrationAttemptTime.IsZero() || !n.firstRegisteredNewNodeTime.IsZero() {
return
}
n.firstRegisteredNewNodeTime = n.clock.Now()
if !n.bootTime.IsZero() {
metrics.NodeStartupPreKubeletDuration.Set(n.kubeletStartTime.Sub(n.bootTime).Seconds())
}
metrics.NodeStartupPreRegistrationDuration.Set(n.firstRegistrationAttemptTime.Sub(n.kubeletStartTime).Seconds())
metrics.NodeStartupRegistrationDuration.Set(n.firstRegisteredNewNodeTime.Sub(n.firstRegistrationAttemptTime).Seconds())
}
func (n *basicNodeStartupLatencyTracker) RecordNodeReady() {
n.lock.Lock()
defer n.lock.Unlock()
if n.firstRegisteredNewNodeTime.IsZero() || !n.firstNodeReadyTime.IsZero() {
return
}
n.firstNodeReadyTime = n.clock.Now()
metrics.NodeStartupPostRegistrationDuration.Set(n.firstNodeReadyTime.Sub(n.firstRegisteredNewNodeTime).Seconds())
if !n.bootTime.IsZero() {
metrics.NodeStartupDuration.Set(n.firstNodeReadyTime.Sub(n.bootTime).Seconds())
}
}

View File

@ -0,0 +1,418 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"k8s.io/component-base/metrics/testutil"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/utils/clock"
testingclock "k8s.io/utils/clock/testing"
)
const (
metricsNameNodeStartupPreKubelet = "kubelet_node_startup_pre_kubelet_duration_seconds"
metricsNameNodeStartupPreRegistration = "kubelet_node_startup_pre_registration_duration_seconds"
metricsNameNodeStartupRegistration = "kubelet_node_startup_registration_duration_seconds"
metricsNameNodeStartupPostRegistration = "kubelet_node_startup_post_registration_duration_seconds"
metricsNameNodeStartup = "kubelet_node_startup_duration_seconds"
)
func TestNodeStartupLatencyNoEvents(t *testing.T) {
t.Run("metrics registered; no incoming events", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: clock.RealClock{},
}
wants := `
# HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total.
# TYPE kubelet_node_startup_duration_seconds gauge
kubelet_node_startup_duration_seconds 0
# HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration.
# TYPE kubelet_node_startup_post_registration_duration_seconds gauge
kubelet_node_startup_post_registration_duration_seconds 0
# HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts.
# TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge
kubelet_node_startup_pre_kubelet_duration_seconds 0
# HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration.
# TYPE kubelet_node_startup_pre_registration_duration_seconds gauge
kubelet_node_startup_pre_registration_duration_seconds 0
# HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration.
# TYPE kubelet_node_startup_registration_duration_seconds gauge
kubelet_node_startup_registration_duration_seconds 0
`
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants),
metricsNameNodeStartupPreKubelet,
metricsNameNodeStartupPreRegistration,
metricsNameNodeStartupRegistration,
metricsNameNodeStartupPostRegistration,
metricsNameNodeStartup,
); err != nil {
t.Error(err)
}
assert.Equal(t, frozenTime.Add(-100*time.Millisecond), tracker.bootTime)
assert.Equal(t, frozenTime, tracker.kubeletStartTime)
assert.True(t, tracker.firstRegistrationAttemptTime.IsZero())
assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero())
assert.True(t, tracker.firstNodeReadyTime.IsZero())
})
}
func TestRecordAllTimestamps(t *testing.T) {
t.Run("all timestamps are recorded", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
fakeClock := testingclock.NewFakeClock(frozenTime)
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: fakeClock,
}
fakeClock.Step(800 * time.Millisecond)
tracker.RecordAttemptRegisterNode()
assert.Equal(t, frozenTime.Add(800*time.Millisecond), tracker.firstRegistrationAttemptTime)
fakeClock.Step(400 * time.Millisecond)
tracker.RecordRegisteredNewNode()
assert.Equal(t, frozenTime.Add(1200*time.Millisecond), tracker.firstRegisteredNewNodeTime)
fakeClock.Step(1100 * time.Millisecond)
tracker.RecordNodeReady()
assert.Equal(t, frozenTime.Add(2300*time.Millisecond), tracker.firstNodeReadyTime)
wants := `
# HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total.
# TYPE kubelet_node_startup_duration_seconds gauge
kubelet_node_startup_duration_seconds 2.4
# HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration.
# TYPE kubelet_node_startup_post_registration_duration_seconds gauge
kubelet_node_startup_post_registration_duration_seconds 1.1
# HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts.
# TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge
kubelet_node_startup_pre_kubelet_duration_seconds 0.1
# HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration.
# TYPE kubelet_node_startup_pre_registration_duration_seconds gauge
kubelet_node_startup_pre_registration_duration_seconds 0.8
# HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration.
# TYPE kubelet_node_startup_registration_duration_seconds gauge
kubelet_node_startup_registration_duration_seconds 0.4
`
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants),
metricsNameNodeStartupPreKubelet,
metricsNameNodeStartupPreRegistration,
metricsNameNodeStartupRegistration,
metricsNameNodeStartupPostRegistration,
metricsNameNodeStartup,
); err != nil {
t.Error(err)
}
})
}
func TestRecordAttemptRegister(t *testing.T) {
t.Run("record attempt register node", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
fakeClock := testingclock.NewFakeClock(frozenTime)
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: fakeClock,
}
fakeClock.Step(600 * time.Millisecond)
tracker.RecordAttemptRegisterNode()
assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstRegistrationAttemptTime)
assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero())
assert.True(t, tracker.firstNodeReadyTime.IsZero())
wants := `
# HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total.
# TYPE kubelet_node_startup_duration_seconds gauge
kubelet_node_startup_duration_seconds 0
# HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration.
# TYPE kubelet_node_startup_post_registration_duration_seconds gauge
kubelet_node_startup_post_registration_duration_seconds 0
# HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts.
# TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge
kubelet_node_startup_pre_kubelet_duration_seconds 0
# HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration.
# TYPE kubelet_node_startup_pre_registration_duration_seconds gauge
kubelet_node_startup_pre_registration_duration_seconds 0
# HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration.
# TYPE kubelet_node_startup_registration_duration_seconds gauge
kubelet_node_startup_registration_duration_seconds 0
`
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants),
metricsNameNodeStartupPreKubelet,
metricsNameNodeStartupPreRegistration,
metricsNameNodeStartupRegistration,
metricsNameNodeStartupPostRegistration,
metricsNameNodeStartup,
); err != nil {
t.Error(err)
}
})
}
func TestRecordAttemptRegisterTwice(t *testing.T) {
t.Run("record attempt register node twice", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
fakeClock := testingclock.NewFakeClock(frozenTime)
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: fakeClock,
}
fakeClock.Step(600 * time.Millisecond)
tracker.RecordAttemptRegisterNode()
fakeClock.Step(300 * time.Millisecond)
tracker.RecordAttemptRegisterNode()
assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstRegistrationAttemptTime)
assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero())
assert.True(t, tracker.firstNodeReadyTime.IsZero())
})
}
func TestSkippingRecordRegisteredNewNode(t *testing.T) {
t.Run("record register new node twice", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
fakeClock := testingclock.NewFakeClock(frozenTime)
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: fakeClock,
}
fakeClock.Step(100 * time.Millisecond)
tracker.RecordAttemptRegisterNode()
fakeClock.Step(500 * time.Millisecond)
tracker.RecordRegisteredNewNode()
fakeClock.Step(300 * time.Millisecond)
tracker.RecordRegisteredNewNode()
assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstRegisteredNewNodeTime)
wants := `
# HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total.
# TYPE kubelet_node_startup_duration_seconds gauge
kubelet_node_startup_duration_seconds 0
# HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration.
# TYPE kubelet_node_startup_post_registration_duration_seconds gauge
kubelet_node_startup_post_registration_duration_seconds 0
# HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts.
# TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge
kubelet_node_startup_pre_kubelet_duration_seconds 0.1
# HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration.
# TYPE kubelet_node_startup_pre_registration_duration_seconds gauge
kubelet_node_startup_pre_registration_duration_seconds 0.1
# HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration.
# TYPE kubelet_node_startup_registration_duration_seconds gauge
kubelet_node_startup_registration_duration_seconds 0.5
`
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants),
metricsNameNodeStartupPreKubelet,
metricsNameNodeStartupPreRegistration,
metricsNameNodeStartupRegistration,
metricsNameNodeStartupPostRegistration,
metricsNameNodeStartup,
); err != nil {
t.Error(err)
}
})
t.Run("record register new node without previous step", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
fakeClock := testingclock.NewFakeClock(frozenTime)
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: fakeClock,
}
fakeClock.Step(700 * time.Millisecond)
tracker.RecordRegisteredNewNode()
assert.True(t, tracker.firstRegisteredNewNodeTime.IsZero())
wants := `
# HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total.
# TYPE kubelet_node_startup_duration_seconds gauge
kubelet_node_startup_duration_seconds 0
# HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration.
# TYPE kubelet_node_startup_post_registration_duration_seconds gauge
kubelet_node_startup_post_registration_duration_seconds 0
# HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts.
# TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge
kubelet_node_startup_pre_kubelet_duration_seconds 0
# HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration.
# TYPE kubelet_node_startup_pre_registration_duration_seconds gauge
kubelet_node_startup_pre_registration_duration_seconds 0
# HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration.
# TYPE kubelet_node_startup_registration_duration_seconds gauge
kubelet_node_startup_registration_duration_seconds 0
`
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants),
metricsNameNodeStartupPreKubelet,
metricsNameNodeStartupPreRegistration,
metricsNameNodeStartupRegistration,
metricsNameNodeStartupPostRegistration,
metricsNameNodeStartup,
); err != nil {
t.Error(err)
}
})
}
func TestSkippingRecordNodeReady(t *testing.T) {
t.Run("record node ready twice", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
fakeClock := testingclock.NewFakeClock(frozenTime)
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: fakeClock,
}
fakeClock.Step(100 * time.Millisecond)
tracker.RecordAttemptRegisterNode()
fakeClock.Step(200 * time.Millisecond)
tracker.RecordRegisteredNewNode()
fakeClock.Step(300 * time.Millisecond)
tracker.RecordNodeReady()
fakeClock.Step(700 * time.Millisecond)
tracker.RecordNodeReady()
assert.Equal(t, frozenTime.Add(600*time.Millisecond), tracker.firstNodeReadyTime)
wants := `
# HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total.
# TYPE kubelet_node_startup_duration_seconds gauge
kubelet_node_startup_duration_seconds 0.7
# HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration.
# TYPE kubelet_node_startup_post_registration_duration_seconds gauge
kubelet_node_startup_post_registration_duration_seconds 0.3
# HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts.
# TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge
kubelet_node_startup_pre_kubelet_duration_seconds 0.1
# HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration.
# TYPE kubelet_node_startup_pre_registration_duration_seconds gauge
kubelet_node_startup_pre_registration_duration_seconds 0.1
# HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration.
# TYPE kubelet_node_startup_registration_duration_seconds gauge
kubelet_node_startup_registration_duration_seconds 0.2
`
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants),
metricsNameNodeStartupPreKubelet,
metricsNameNodeStartupPreRegistration,
metricsNameNodeStartupRegistration,
metricsNameNodeStartupPostRegistration,
metricsNameNodeStartup,
); err != nil {
t.Error(err)
}
})
t.Run("record node ready without previous step", func(t *testing.T) {
metrics.Register()
defer clearMetrics()
fakeClock := testingclock.NewFakeClock(frozenTime)
tracker := &basicNodeStartupLatencyTracker{
bootTime: frozenTime.Add(-100 * time.Millisecond),
kubeletStartTime: frozenTime,
clock: fakeClock,
}
fakeClock.Step(100 * time.Millisecond)
tracker.RecordAttemptRegisterNode()
fakeClock.Step(700 * time.Millisecond)
tracker.RecordNodeReady()
assert.True(t, tracker.firstNodeReadyTime.IsZero())
wants := `
# HELP kubelet_node_startup_duration_seconds [ALPHA] Duration in seconds of node startup in total.
# TYPE kubelet_node_startup_duration_seconds gauge
kubelet_node_startup_duration_seconds 0
# HELP kubelet_node_startup_post_registration_duration_seconds [ALPHA] Duration in seconds of node startup after registration.
# TYPE kubelet_node_startup_post_registration_duration_seconds gauge
kubelet_node_startup_post_registration_duration_seconds 0
# HELP kubelet_node_startup_pre_kubelet_duration_seconds [ALPHA] Duration in seconds of node startup before kubelet starts.
# TYPE kubelet_node_startup_pre_kubelet_duration_seconds gauge
kubelet_node_startup_pre_kubelet_duration_seconds 0
# HELP kubelet_node_startup_pre_registration_duration_seconds [ALPHA] Duration in seconds of node startup before registration.
# TYPE kubelet_node_startup_pre_registration_duration_seconds gauge
kubelet_node_startup_pre_registration_duration_seconds 0
# HELP kubelet_node_startup_registration_duration_seconds [ALPHA] Duration in seconds of node startup during registration.
# TYPE kubelet_node_startup_registration_duration_seconds gauge
kubelet_node_startup_registration_duration_seconds 0
`
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants),
metricsNameNodeStartupPreKubelet,
metricsNameNodeStartupPreRegistration,
metricsNameNodeStartupRegistration,
metricsNameNodeStartupPostRegistration,
metricsNameNodeStartup,
); err != nil {
t.Error(err)
}
})
}
func clearMetrics() {
metrics.NodeStartupPreKubeletDuration.Set(0)
metrics.NodeStartupPreRegistrationDuration.Set(0)
metrics.NodeStartupRegistrationDuration.Set(0)
metrics.NodeStartupPostRegistrationDuration.Set(0)
metrics.NodeStartupDuration.Set(0)
}

View File

@ -97,24 +97,25 @@ func NewHollowKubelet(
runtimeService internalapi.RuntimeService, runtimeService internalapi.RuntimeService,
containerManager cm.ContainerManager) *HollowKubelet { containerManager cm.ContainerManager) *HollowKubelet {
d := &kubelet.Dependencies{ d := &kubelet.Dependencies{
KubeClient: client, KubeClient: client,
HeartbeatClient: heartbeatClient, HeartbeatClient: heartbeatClient,
ProbeManager: probetest.FakeManager{}, ProbeManager: probetest.FakeManager{},
RemoteRuntimeService: runtimeService, RemoteRuntimeService: runtimeService,
RemoteImageService: imageService, RemoteImageService: imageService,
CAdvisorInterface: cadvisorInterface, CAdvisorInterface: cadvisorInterface,
Cloud: nil, Cloud: nil,
OSInterface: &containertest.FakeOS{}, OSInterface: &containertest.FakeOS{},
ContainerManager: containerManager, ContainerManager: containerManager,
VolumePlugins: volumePlugins(), VolumePlugins: volumePlugins(),
TLSOptions: nil, TLSOptions: nil,
OOMAdjuster: oom.NewFakeOOMAdjuster(), OOMAdjuster: oom.NewFakeOOMAdjuster(),
Mounter: &mount.FakeMounter{}, Mounter: &mount.FakeMounter{},
Subpather: &subpath.FakeSubpath{}, Subpather: &subpath.FakeSubpath{},
HostUtil: hostutil.NewFakeHostUtil(nil), HostUtil: hostutil.NewFakeHostUtil(nil),
PodStartupLatencyTracker: kubeletutil.NewPodStartupLatencyTracker(), PodStartupLatencyTracker: kubeletutil.NewPodStartupLatencyTracker(),
TracerProvider: trace.NewNoopTracerProvider(), NodeStartupLatencyTracker: kubeletutil.NewNodeStartupLatencyTracker(),
Recorder: &record.FakeRecorder{}, // With real recorder we attempt to read /dev/kmsg. TracerProvider: trace.NewNoopTracerProvider(),
Recorder: &record.FakeRecorder{}, // With real recorder we attempt to read /dev/kmsg.
} }
return &HollowKubelet{ return &HollowKubelet{