From 54e581930c719460263aef501a8b92239a577307 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Wed, 5 Dec 2018 15:07:52 -0800 Subject: [PATCH] fix node and kubelet start times --- pkg/kubelet/server/stats/BUILD | 1 + pkg/kubelet/server/stats/summary.go | 24 +++++++++- .../server/stats/summary_sys_containers.go | 29 ++++++++---- pkg/kubelet/server/stats/summary_test.go | 8 ++-- pkg/kubelet/util/BUILD | 2 + pkg/kubelet/util/boottime_util_darwin.go | 44 +++++++++++++++++++ pkg/kubelet/util/boottime_util_linux.go | 36 +++++++++++++++ pkg/kubelet/util/util_unsupported.go | 5 +++ pkg/kubelet/util/util_windows.go | 13 ++++++ test/e2e_node/node_problem_detector_linux.go | 25 +++-------- 10 files changed, 154 insertions(+), 33 deletions(-) create mode 100644 pkg/kubelet/util/boottime_util_darwin.go create mode 100644 pkg/kubelet/util/boottime_util_linux.go diff --git a/pkg/kubelet/server/stats/BUILD b/pkg/kubelet/server/stats/BUILD index 6fec6f2eb99..dde9b3cbc7d 100644 --- a/pkg/kubelet/server/stats/BUILD +++ b/pkg/kubelet/server/stats/BUILD @@ -18,6 +18,7 @@ go_library( "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/container:go_default_library", + "//pkg/kubelet/util:go_default_library", "//pkg/kubelet/util/format:go_default_library", "//pkg/volume:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", diff --git a/pkg/kubelet/server/stats/summary.go b/pkg/kubelet/server/stats/summary.go index fb646c5d2f3..93aef69ed87 100644 --- a/pkg/kubelet/server/stats/summary.go +++ b/pkg/kubelet/server/stats/summary.go @@ -19,7 +19,11 @@ package stats import ( "fmt" + "k8s.io/klog" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" + "k8s.io/kubernetes/pkg/kubelet/util" ) type SummaryProvider interface { @@ -32,6 +36,11 @@ type SummaryProvider interface { // summaryProviderImpl implements the SummaryProvider interface. type summaryProviderImpl struct { + // kubeletCreationTime is the time at which the summaryProvider was created. + kubeletCreationTime metav1.Time + // systemBootTime is the time at which the system was started + systemBootTime metav1.Time + provider StatsProvider } @@ -40,7 +49,18 @@ var _ SummaryProvider = &summaryProviderImpl{} // NewSummaryProvider returns a SummaryProvider using the stats provided by the // specified statsProvider. func NewSummaryProvider(statsProvider StatsProvider) SummaryProvider { - return &summaryProviderImpl{statsProvider} + kubeletCreationTime := metav1.Now() + bootTime, err := util.GetBootTime() + if err != nil { + // bootTime will be zero if we encounter an error getting the boot time. + klog.Warningf("Error getting system boot time. Node metrics will have an incorrect start time: %v", err) + } + + return &summaryProviderImpl{ + kubeletCreationTime: kubeletCreationTime, + systemBootTime: metav1.NewTime(bootTime), + provider: statsProvider, + } } func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error) { @@ -77,7 +97,7 @@ func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error) CPU: rootStats.CPU, Memory: rootStats.Memory, Network: networkStats, - StartTime: rootStats.StartTime, + StartTime: sp.systemBootTime, Fs: rootFsStats, Runtime: &statsapi.RuntimeStats{ImageFs: imageFsStats}, Rlimit: rlimit, diff --git a/pkg/kubelet/server/stats/summary_sys_containers.go b/pkg/kubelet/server/stats/summary_sys_containers.go index baaff0ab1bd..4526f2a7695 100644 --- a/pkg/kubelet/server/stats/summary_sys_containers.go +++ b/pkg/kubelet/server/stats/summary_sys_containers.go @@ -21,6 +21,7 @@ package stats import ( "k8s.io/klog" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/kubelet/cm" ) @@ -29,11 +30,12 @@ func (sp *summaryProviderImpl) GetSystemContainersStats(nodeConfig cm.NodeConfig systemContainers := map[string]struct { name string forceStatsUpdate bool + startTime metav1.Time }{ - statsapi.SystemContainerKubelet: {nodeConfig.KubeletCgroupsName, false}, - statsapi.SystemContainerRuntime: {nodeConfig.RuntimeCgroupsName, false}, - statsapi.SystemContainerMisc: {nodeConfig.SystemCgroupsName, false}, - statsapi.SystemContainerPods: {sp.provider.GetPodCgroupRoot(), updateStats}, + statsapi.SystemContainerKubelet: {name: nodeConfig.KubeletCgroupsName, forceStatsUpdate: false, startTime: sp.kubeletCreationTime}, + statsapi.SystemContainerRuntime: {name: nodeConfig.RuntimeCgroupsName, forceStatsUpdate: false}, + statsapi.SystemContainerMisc: {name: nodeConfig.SystemCgroupsName, forceStatsUpdate: false}, + statsapi.SystemContainerPods: {name: sp.provider.GetPodCgroupRoot(), forceStatsUpdate: updateStats}, } for sys, cont := range systemContainers { // skip if cgroup name is undefined (not all system containers are required) @@ -48,6 +50,11 @@ func (sp *summaryProviderImpl) GetSystemContainersStats(nodeConfig cm.NodeConfig // System containers don't have a filesystem associated with them. s.Logs, s.Rootfs = nil, nil s.Name = sys + + // if we know the start time of a system container, use that instead of the start time provided by cAdvisor + if !cont.startTime.IsZero() { + s.StartTime = cont.startTime + } stats = append(stats, *s) } @@ -58,11 +65,12 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c systemContainers := map[string]struct { name string forceStatsUpdate bool + startTime metav1.Time }{ - statsapi.SystemContainerKubelet: {nodeConfig.KubeletCgroupsName, false}, - statsapi.SystemContainerRuntime: {nodeConfig.RuntimeCgroupsName, false}, - statsapi.SystemContainerMisc: {nodeConfig.SystemCgroupsName, false}, - statsapi.SystemContainerPods: {sp.provider.GetPodCgroupRoot(), updateStats}, + statsapi.SystemContainerKubelet: {name: nodeConfig.KubeletCgroupsName, forceStatsUpdate: false, startTime: sp.kubeletCreationTime}, + statsapi.SystemContainerRuntime: {name: nodeConfig.RuntimeCgroupsName, forceStatsUpdate: false}, + statsapi.SystemContainerMisc: {name: nodeConfig.SystemCgroupsName, forceStatsUpdate: false}, + statsapi.SystemContainerPods: {name: sp.provider.GetPodCgroupRoot(), forceStatsUpdate: updateStats}, } for sys, cont := range systemContainers { // skip if cgroup name is undefined (not all system containers are required) @@ -75,6 +83,11 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c continue } s.Name = sys + + // if we know the start time of a system container, use that instead of the start time provided by cAdvisor + if !cont.startTime.IsZero() { + s.StartTime = cont.startTime + } stats = append(stats, *s) } diff --git a/pkg/kubelet/server/stats/summary_test.go b/pkg/kubelet/server/stats/summary_test.go index d210298f59f..081036855a2 100644 --- a/pkg/kubelet/server/stats/summary_test.go +++ b/pkg/kubelet/server/stats/summary_test.go @@ -83,12 +83,14 @@ func TestSummaryProviderGetStats(t *testing.T) { On("GetCgroupStats", "/kubelet", false).Return(cgroupStatsMap["/kubelet"].cs, cgroupStatsMap["/kubelet"].ns, nil). On("GetCgroupStats", "/kubepods", true).Return(cgroupStatsMap["/pods"].cs, cgroupStatsMap["/pods"].ns, nil) - provider := NewSummaryProvider(mockStatsProvider) + kubeletCreationTime := metav1.Now() + systemBootTime := metav1.Now() + provider := summaryProviderImpl{kubeletCreationTime: kubeletCreationTime, systemBootTime: systemBootTime, provider: mockStatsProvider} summary, err := provider.Get(true) assert.NoError(err) assert.Equal(summary.Node.NodeName, "test-node") - assert.Equal(summary.Node.StartTime, cgroupStatsMap["/"].cs.StartTime) + assert.Equal(summary.Node.StartTime, systemBootTime) assert.Equal(summary.Node.CPU, cgroupStatsMap["/"].cs.CPU) assert.Equal(summary.Node.Memory, cgroupStatsMap["/"].cs.Memory) assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns) @@ -98,7 +100,7 @@ func TestSummaryProviderGetStats(t *testing.T) { assert.Equal(len(summary.Node.SystemContainers), 4) assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{ Name: "kubelet", - StartTime: cgroupStatsMap["/kubelet"].cs.StartTime, + StartTime: kubeletCreationTime, CPU: cgroupStatsMap["/kubelet"].cs.CPU, Memory: cgroupStatsMap["/kubelet"].cs.Memory, Accelerators: cgroupStatsMap["/kubelet"].cs.Accelerators, diff --git a/pkg/kubelet/util/BUILD b/pkg/kubelet/util/BUILD index 4165864e0a3..7b590024861 100644 --- a/pkg/kubelet/util/BUILD +++ b/pkg/kubelet/util/BUILD @@ -34,6 +34,8 @@ go_test( go_library( name = "go_default_library", srcs = [ + "boottime_util_darwin.go", + "boottime_util_linux.go", "doc.go", "util.go", "util_unix.go", diff --git a/pkg/kubelet/util/boottime_util_darwin.go b/pkg/kubelet/util/boottime_util_darwin.go new file mode 100644 index 00000000000..09d3b8865da --- /dev/null +++ b/pkg/kubelet/util/boottime_util_darwin.go @@ -0,0 +1,44 @@ +// +build darwin + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "fmt" + "syscall" + "time" + "unsafe" + + "golang.org/x/sys/unix" +) + +// GetBootTime returns the time at which the machine was started, truncated to the nearest second +func GetBootTime() (time.Time, error) { + output, err := unix.SysctlRaw("kern.boottime") + if err != nil { + return time.Time{}, err + } + var timeval syscall.Timeval + if len(output) != int(unsafe.Sizeof(timeval)) { + return time.Time{}, fmt.Errorf("unexpected output when calling syscall kern.bootime. Expected len(output) to be %v, but got %v", + int(unsafe.Sizeof(timeval)), len(output)) + } + timeval = *(*syscall.Timeval)(unsafe.Pointer(&output[0])) + sec, nsec := timeval.Unix() + return time.Unix(sec, nsec).Truncate(time.Second), nil +} diff --git a/pkg/kubelet/util/boottime_util_linux.go b/pkg/kubelet/util/boottime_util_linux.go new file mode 100644 index 00000000000..f00e7c06bfa --- /dev/null +++ b/pkg/kubelet/util/boottime_util_linux.go @@ -0,0 +1,36 @@ +// +build freebsd linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "fmt" + "time" + + "golang.org/x/sys/unix" +) + +// GetBootTime returns the time at which the machine was started, truncated to the nearest second +func GetBootTime() (time.Time, error) { + currentTime := time.Now() + var info unix.Sysinfo_t + if err := unix.Sysinfo(&info); err != nil { + return time.Time{}, fmt.Errorf("error getting system uptime: %s", err) + } + return currentTime.Add(-time.Duration(info.Uptime) * time.Second).Truncate(time.Second), nil +} diff --git a/pkg/kubelet/util/util_unsupported.go b/pkg/kubelet/util/util_unsupported.go index 6661678aced..68a2bdf01b7 100644 --- a/pkg/kubelet/util/util_unsupported.go +++ b/pkg/kubelet/util/util_unsupported.go @@ -45,3 +45,8 @@ func UnlockPath(fileHandles []uintptr) { func LocalEndpoint(path, file string) string { return "" } + +// GetBootTime empty implementation +func GetBootTime() (time.Time, error) { + return time.Time{}, fmt.Errorf("GetBootTime is unsupported in this build") +} diff --git a/pkg/kubelet/util/util_windows.go b/pkg/kubelet/util/util_windows.go index 7123728ff94..92accc55e14 100644 --- a/pkg/kubelet/util/util_windows.go +++ b/pkg/kubelet/util/util_windows.go @@ -23,6 +23,7 @@ import ( "net" "net/url" "strings" + "syscall" "time" "github.com/Microsoft/go-winio" @@ -112,3 +113,15 @@ func LocalEndpoint(path, file string) string { } return u.String() + "//./pipe/" + file } + +var tickCount = syscall.NewLazyDLL("kernel32.dll").NewProc("GetTickCount64") + +// GetBootTime returns the time at which the machine was started, truncated to the nearest second +func GetBootTime() (time.Time, error) { + currentTime := time.Now() + output, _, err := tickCount.Call() + if errno, ok := err.(syscall.Errno); !ok || errno != 0 { + return time.Time{}, err + } + return currentTime.Add(-time.Duration(output) * time.Millisecond).Truncate(time.Second), nil +} diff --git a/test/e2e_node/node_problem_detector_linux.go b/test/e2e_node/node_problem_detector_linux.go index 64aa7ac7207..36a63193c9e 100644 --- a/test/e2e_node/node_problem_detector_linux.go +++ b/test/e2e_node/node_problem_detector_linux.go @@ -22,7 +22,6 @@ import ( "fmt" "os" "path" - "syscall" "time" "k8s.io/api/core/v1" @@ -34,6 +33,7 @@ import ( clientset "k8s.io/client-go/kubernetes" coreclientset "k8s.io/client-go/kubernetes/typed/core/v1" nodeutil "k8s.io/kubernetes/pkg/api/v1/node" + "k8s.io/kubernetes/pkg/kubelet/util" "k8s.io/kubernetes/test/e2e/framework" . "github.com/onsi/ginkgo" @@ -97,8 +97,11 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete BeforeEach(func() { By("Calculate Lookback duration") var err error - nodeTime, bootTime, err = getNodeTime() + + nodeTime = time.Now() + bootTime, err = util.GetBootTime() Expect(err).To(BeNil()) + // Set lookback duration longer than node up time. // Assume the test won't take more than 1 hour, in fact it usually only takes 90 seconds. lookback = nodeTime.Sub(bootTime) + time.Hour @@ -387,24 +390,6 @@ func injectLog(file string, timestamp time.Time, log string, num int) error { return nil } -// getNodeTime gets node boot time and current time. -func getNodeTime() (time.Time, time.Time, error) { - // Get node current time. - nodeTime := time.Now() - - // Get system uptime. - var info syscall.Sysinfo_t - if err := syscall.Sysinfo(&info); err != nil { - return time.Time{}, time.Time{}, err - } - // Get node boot time. NOTE that because we get node current time before uptime, the boot time - // calculated will be a little earlier than the real boot time. This won't affect the correctness - // of the test result. - bootTime := nodeTime.Add(-time.Duration(info.Uptime) * time.Second) - - return nodeTime, bootTime, nil -} - // verifyEvents verifies there are num specific events generated func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error { events, err := e.List(options)