Merge pull request #80176 from liyanhui1228/cpuusage

Add CPU usage nano cores for windows nodes
2025-09-08 04:32:37 +00:00 · 2019-07-22 13:52:53 -07:00
parent 0d2b85878d 8851a42655
commit 04c9bd9bfa
4 changed files with 90 additions and 4 deletions
--- a/pkg/kubelet/winstats/perfcounter_nodestats.go
+++ b/pkg/kubelet/winstats/perfcounter_nodestats.go
@@ -53,7 +53,11 @@ var (

 // NewPerfCounterClient creates a client using perf counters
 func NewPerfCounterClient() (Client, error) {
-	return newClient(&perfCounterNodeStatsClient{})
+	// Initialize the cache
+	initCache := cpuUsageCoreNanoSecondsCache{0, 0}
+	return newClient(&perfCounterNodeStatsClient{
+		cpuUsageCoreNanoSecondsCache: initCache,
+	})
 }

 // perfCounterNodeStatsClient is a client that provides Windows Stats via PerfCounters
@@ -61,6 +65,8 @@ type perfCounterNodeStatsClient struct {
 	nodeMetrics
 	mu sync.RWMutex // mu protects nodeMetrics
 	nodeInfo
+	// cpuUsageCoreNanoSecondsCache caches the cpu usage for nodes.
+	cpuUsageCoreNanoSecondsCache
 }

 func (p *perfCounterNodeStatsClient) startMonitoring() error {
@@ -110,6 +116,17 @@ func (p *perfCounterNodeStatsClient) startMonitoring() error {
 		p.collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter, networkAdapterCounter)
 	}, perfCounterUpdatePeriod)

+	// Cache the CPU usage every defaultCachePeriod
+	go wait.Forever(func() {
+		newValue := p.nodeMetrics.cpuUsageCoreNanoSeconds
+		p.mu.Lock()
+		defer p.mu.Unlock()
+		p.cpuUsageCoreNanoSecondsCache = cpuUsageCoreNanoSecondsCache{
+			previousValue: p.cpuUsageCoreNanoSecondsCache.latestValue,
+			latestValue:   newValue,
+		}
+	}, defaultCachePeriod)
+
 	return nil
 }

@@ -145,6 +162,7 @@ func (p *perfCounterNodeStatsClient) getNodeInfo() nodeInfo {

 func (p *perfCounterNodeStatsClient) collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter *perfCounter, networkAdapterCounter *networkCounter) {
 	cpuValue, err := cpuCounter.getData()
+	cpuCores := runtime.NumCPU()
 	if err != nil {
 		klog.Errorf("Unable to get cpu perf counter data; err: %v", err)
 		return
@@ -171,7 +189,8 @@ func (p *perfCounterNodeStatsClient) collectMetricsData(cpuCounter, memWorkingSe
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	p.nodeMetrics = nodeMetrics{
-		cpuUsageCoreNanoSeconds:   p.convertCPUValue(cpuValue),
+		cpuUsageCoreNanoSeconds:   p.convertCPUValue(cpuCores, cpuValue),
+		cpuUsageNanoCores:         p.getCPUUsageNanoCores(),
 		memoryPrivWorkingSetBytes: memWorkingSetValue,
 		memoryCommittedBytes:      memCommittedBytesValue,
 		interfaceStats:            networkAdapterStats,
@@ -179,8 +198,7 @@ func (p *perfCounterNodeStatsClient) collectMetricsData(cpuCounter, memWorkingSe
 	}
 }

-func (p *perfCounterNodeStatsClient) convertCPUValue(cpuValue uint64) uint64 {
-	cpuCores := runtime.NumCPU()
+func (p *perfCounterNodeStatsClient) convertCPUValue(cpuCores int, cpuValue uint64) uint64 {
 	// This converts perf counter data which is cpu percentage for all cores into nanoseconds.
 	// The formula is (cpuPercentage / 100.0) * #cores * 1e+9 (nano seconds). More info here:
 	// https://github.com/kubernetes/heapster/issues/650
@@ -188,6 +206,12 @@ func (p *perfCounterNodeStatsClient) convertCPUValue(cpuValue uint64) uint64 {
 	return newValue
 }

+func (p *perfCounterNodeStatsClient) getCPUUsageNanoCores() uint64 {
+	cachePeriodSeconds := uint64(defaultCachePeriod / time.Second)
+	cpuUsageNanoCores := (p.cpuUsageCoreNanoSecondsCache.latestValue - p.cpuUsageCoreNanoSecondsCache.previousValue) / cachePeriodSeconds
+	return cpuUsageNanoCores
+}
+
 func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) {
 	// We use GlobalMemoryStatusEx instead of GetPhysicallyInstalledSystemMemory
 	// on Windows node for the following reasons:
--- a/pkg/kubelet/winstats/perfcounters.go
+++ b/pkg/kubelet/winstats/perfcounters.go
@@ -34,6 +34,10 @@ const (
 	// Perf counters are updated every second. This is the same as the default cadvisor collection period
 	// see https://github.com/google/cadvisor/blob/master/docs/runtime_options.md#housekeeping
 	perfCounterUpdatePeriod = 1 * time.Second
+	// defaultCachePeriod is the default cache period for each cpuUsage.
+	// This matches with the cadvisor setting and the time interval we use for containers.
+	// see https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/cadvisor/cadvisor_linux.go#L63
+	defaultCachePeriod = 10 * time.Second
 )

 type perfCounter struct {
--- a/pkg/kubelet/winstats/winstats.go
+++ b/pkg/kubelet/winstats/winstats.go
@@ -55,6 +55,7 @@ type winNodeStatsClient interface {

 type nodeMetrics struct {
 	cpuUsageCoreNanoSeconds   uint64
+	cpuUsageNanoCores         uint64
 	memoryPrivWorkingSetBytes uint64
 	memoryCommittedBytes      uint64
 	timeStamp                 time.Time
@@ -69,6 +70,11 @@ type nodeInfo struct {
 	startTime time.Time
 }

+type cpuUsageCoreNanoSecondsCache struct {
+	latestValue   uint64
+	previousValue uint64
+}
+
 // newClient constructs a Client.
 func newClient(statsNodeClient winNodeStatsClient) (Client, error) {
 	statsClient := new(StatsClient)
@@ -122,6 +128,11 @@ func (c *StatsClient) createRootContainerInfo() (*cadvisorapiv2.ContainerInfo, e
 				Total: nodeMetrics.cpuUsageCoreNanoSeconds,
 			},
 		},
+		CpuInst: &cadvisorapiv2.CpuInstStats{
+			Usage: cadvisorapiv2.CpuInstUsage{
+				Total: nodeMetrics.cpuUsageNanoCores,
+			},
+		},
 		Memory: &cadvisorapi.MemoryStats{
 			WorkingSet: nodeMetrics.memoryPrivWorkingSetBytes,
 			Usage:      nodeMetrics.memoryCommittedBytes,
--- a/pkg/kubelet/winstats/winstats_test.go
+++ b/pkg/kubelet/winstats/winstats_test.go
@@ -38,6 +38,7 @@ func (f fakeWinNodeStatsClient) startMonitoring() error {
 func (f fakeWinNodeStatsClient) getNodeMetrics() (nodeMetrics, error) {
 	return nodeMetrics{
 		cpuUsageCoreNanoSeconds:   123,
+		cpuUsageNanoCores:         23,
 		memoryPrivWorkingSetBytes: 1234,
 		memoryCommittedBytes:      12345,
 		timeStamp:                 timeStamp,
@@ -78,6 +79,11 @@ func TestWinContainerInfos(t *testing.T) {
 				Total: 123,
 			},
 		},
+		CpuInst: &cadvisorapiv2.CpuInstStats{
+			Usage: cadvisorapiv2.CpuInstUsage{
+				Total: 23,
+			},
+		},
 		Memory: &cadvisorapi.MemoryStats{
 			WorkingSet: 1234,
 			Usage:      12345,
@@ -100,6 +106,7 @@ func TestWinContainerInfos(t *testing.T) {
 	assert.Equal(t, actualRootInfos["/"].Spec, infos["/"].Spec)
 	assert.Equal(t, len(actualRootInfos["/"].Stats), len(infos["/"].Stats))
 	assert.Equal(t, actualRootInfos["/"].Stats[0].Cpu, infos["/"].Stats[0].Cpu)
+	assert.Equal(t, actualRootInfos["/"].Stats[0].CpuInst, infos["/"].Stats[0].CpuInst)
 	assert.Equal(t, actualRootInfos["/"].Stats[0].Memory, infos["/"].Stats[0].Memory)
 }

@@ -123,6 +130,46 @@ func TestWinVersionInfo(t *testing.T) {
 		KernelVersion: "v42"})
 }

+func TestConvertCPUValue(t *testing.T) {
+	testCases := []struct {
+		cpuValue uint64
+		expected uint64
+	}{
+		{cpuValue: uint64(50), expected: uint64(2000000000)},
+		{cpuValue: uint64(0), expected: uint64(0)},
+		{cpuValue: uint64(100), expected: uint64(4000000000)},
+	}
+	var cpuCores = 4
+
+	for _, tc := range testCases {
+		p := perfCounterNodeStatsClient{}
+		newValue := p.convertCPUValue(cpuCores, tc.cpuValue)
+		assert.Equal(t, newValue, tc.expected)
+	}
+}
+
+func TestGetCPUUsageNanoCores(t *testing.T) {
+	testCases := []struct {
+		latestValue   uint64
+		previousValue uint64
+		expected      uint64
+	}{
+		{latestValue: uint64(0), previousValue: uint64(0), expected: uint64(0)},
+		{latestValue: uint64(2000000000), previousValue: uint64(0), expected: uint64(200000000)},
+		{latestValue: uint64(5000000000), previousValue: uint64(2000000000), expected: uint64(300000000)},
+	}
+
+	for _, tc := range testCases {
+		p := perfCounterNodeStatsClient{}
+		p.cpuUsageCoreNanoSecondsCache = cpuUsageCoreNanoSecondsCache{
+			latestValue:   tc.latestValue,
+			previousValue: tc.previousValue,
+		}
+		cpuUsageNanoCores := p.getCPUUsageNanoCores()
+		assert.Equal(t, cpuUsageNanoCores, tc.expected)
+	}
+}
+
 func getClient(t *testing.T) Client {
 	f := fakeWinNodeStatsClient{}
 	c, err := newClient(f)