mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-11-04 07:49:35 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			294 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			294 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// +build linux
 | 
						|
 | 
						|
/*
 | 
						|
Copyright 2015 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package e2e_node
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"strings"
 | 
						|
	"time"
 | 
						|
 | 
						|
	clientset "k8s.io/client-go/kubernetes"
 | 
						|
	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 | 
						|
	"k8s.io/kubernetes/test/e2e/framework"
 | 
						|
	imageutils "k8s.io/kubernetes/test/utils/image"
 | 
						|
 | 
						|
	. "github.com/onsi/ginkgo"
 | 
						|
	. "github.com/onsi/gomega"
 | 
						|
)
 | 
						|
 | 
						|
var _ = SIGDescribe("Resource-usage [Serial] [Slow]", func() {
 | 
						|
	const (
 | 
						|
		// Interval to poll /stats/container on a node
 | 
						|
		containerStatsPollingPeriod = 10 * time.Second
 | 
						|
	)
 | 
						|
 | 
						|
	var (
 | 
						|
		rc *ResourceCollector
 | 
						|
		om *framework.RuntimeOperationMonitor
 | 
						|
	)
 | 
						|
 | 
						|
	f := framework.NewDefaultFramework("resource-usage")
 | 
						|
 | 
						|
	BeforeEach(func() {
 | 
						|
		om = framework.NewRuntimeOperationMonitor(f.ClientSet)
 | 
						|
		// The test collects resource usage from a standalone Cadvisor pod.
 | 
						|
		// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
 | 
						|
		// show the resource usage spikes. But changing its interval increases the overhead
 | 
						|
		// of kubelet. Hence we use a Cadvisor pod.
 | 
						|
		f.PodClient().CreateSync(getCadvisorPod())
 | 
						|
		rc = NewResourceCollector(containerStatsPollingPeriod)
 | 
						|
	})
 | 
						|
 | 
						|
	AfterEach(func() {
 | 
						|
		result := om.GetLatestRuntimeOperationErrorRate()
 | 
						|
		framework.Logf("runtime operation error metrics:\n%s", framework.FormatRuntimeOperationErrorRate(result))
 | 
						|
	})
 | 
						|
 | 
						|
	// This test measures and verifies the steady resource usage of node is within limit
 | 
						|
	// It collects data from a standalone Cadvisor with housekeeping interval 1s.
 | 
						|
	// It verifies CPU percentiles and the lastest memory usage.
 | 
						|
	Context("regular resource usage tracking", func() {
 | 
						|
		rTests := []resourceTest{
 | 
						|
			{
 | 
						|
				podsNr: 10,
 | 
						|
				cpuLimits: framework.ContainersCPUSummary{
 | 
						|
					stats.SystemContainerKubelet: {0.50: 0.30, 0.95: 0.35},
 | 
						|
					stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
 | 
						|
				},
 | 
						|
				memLimits: framework.ResourceUsagePerContainer{
 | 
						|
					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 200 * 1024 * 1024},
 | 
						|
					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
 | 
						|
				},
 | 
						|
			},
 | 
						|
		}
 | 
						|
 | 
						|
		for _, testArg := range rTests {
 | 
						|
			itArg := testArg
 | 
						|
			desc := fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr)
 | 
						|
			It(desc, func() {
 | 
						|
				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
 | 
						|
 | 
						|
				runResourceUsageTest(f, rc, itArg)
 | 
						|
 | 
						|
				// Log and verify resource usage
 | 
						|
				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, true)
 | 
						|
			})
 | 
						|
		}
 | 
						|
	})
 | 
						|
 | 
						|
	Context("regular resource usage tracking", func() {
 | 
						|
		rTests := []resourceTest{
 | 
						|
			{
 | 
						|
				podsNr: 0,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				podsNr: 10,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				podsNr: 35,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				podsNr: 105,
 | 
						|
			},
 | 
						|
		}
 | 
						|
 | 
						|
		for _, testArg := range rTests {
 | 
						|
			itArg := testArg
 | 
						|
			desc := fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr)
 | 
						|
			It(desc, func() {
 | 
						|
				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
 | 
						|
 | 
						|
				runResourceUsageTest(f, rc, itArg)
 | 
						|
 | 
						|
				// Log and verify resource usage
 | 
						|
				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false)
 | 
						|
			})
 | 
						|
		}
 | 
						|
	})
 | 
						|
})
 | 
						|
 | 
						|
type resourceTest struct {
 | 
						|
	podsNr    int
 | 
						|
	cpuLimits framework.ContainersCPUSummary
 | 
						|
	memLimits framework.ResourceUsagePerContainer
 | 
						|
}
 | 
						|
 | 
						|
func (rt *resourceTest) getTestName() string {
 | 
						|
	return fmt.Sprintf("resource_%d", rt.podsNr)
 | 
						|
}
 | 
						|
 | 
						|
// runResourceUsageTest runs the resource usage test
 | 
						|
func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg resourceTest) {
 | 
						|
	const (
 | 
						|
		// The monitoring time for one test
 | 
						|
		monitoringTime = 10 * time.Minute
 | 
						|
		// The periodic reporting period
 | 
						|
		reportingPeriod = 5 * time.Minute
 | 
						|
		// sleep for an interval here to measure steady data
 | 
						|
		sleepAfterCreatePods = 10 * time.Second
 | 
						|
	)
 | 
						|
	pods := newTestPods(testArg.podsNr, true, imageutils.GetPauseImageName(), "test_pod")
 | 
						|
 | 
						|
	rc.Start()
 | 
						|
	// Explicitly delete pods to prevent namespace controller cleanning up timeout
 | 
						|
	defer deletePodsSync(f, append(pods, getCadvisorPod()))
 | 
						|
	defer rc.Stop()
 | 
						|
 | 
						|
	By("Creating a batch of Pods")
 | 
						|
	f.PodClient().CreateBatch(pods)
 | 
						|
 | 
						|
	// wait for a while to let the node be steady
 | 
						|
	time.Sleep(sleepAfterCreatePods)
 | 
						|
 | 
						|
	// Log once and flush the stats.
 | 
						|
	rc.LogLatest()
 | 
						|
	rc.Reset()
 | 
						|
 | 
						|
	By("Start monitoring resource usage")
 | 
						|
	// Periodically dump the cpu summary until the deadline is met.
 | 
						|
	// Note that without calling framework.ResourceMonitor.Reset(), the stats
 | 
						|
	// would occupy increasingly more memory. This should be fine
 | 
						|
	// for the current test duration, but we should reclaim the
 | 
						|
	// entries if we plan to monitor longer (e.g., 8 hours).
 | 
						|
	deadline := time.Now().Add(monitoringTime)
 | 
						|
	for time.Now().Before(deadline) {
 | 
						|
		timeLeft := deadline.Sub(time.Now())
 | 
						|
		framework.Logf("Still running...%v left", timeLeft)
 | 
						|
		if timeLeft < reportingPeriod {
 | 
						|
			time.Sleep(timeLeft)
 | 
						|
		} else {
 | 
						|
			time.Sleep(reportingPeriod)
 | 
						|
		}
 | 
						|
		logPods(f.ClientSet)
 | 
						|
	}
 | 
						|
 | 
						|
	By("Reporting overall resource usage")
 | 
						|
	logPods(f.ClientSet)
 | 
						|
}
 | 
						|
 | 
						|
// logAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit.
 | 
						|
func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary,
 | 
						|
	memLimits framework.ResourceUsagePerContainer, testInfo map[string]string, isVerify bool) {
 | 
						|
	nodeName := framework.TestContext.NodeName
 | 
						|
 | 
						|
	// Obtain memory PerfData
 | 
						|
	usagePerContainer, err := rc.GetLatest()
 | 
						|
	Expect(err).NotTo(HaveOccurred())
 | 
						|
	framework.Logf("%s", formatResourceUsageStats(usagePerContainer))
 | 
						|
 | 
						|
	usagePerNode := make(framework.ResourceUsagePerNode)
 | 
						|
	usagePerNode[nodeName] = usagePerContainer
 | 
						|
 | 
						|
	// Obtain CPU PerfData
 | 
						|
	cpuSummary := rc.GetCPUSummary()
 | 
						|
	framework.Logf("%s", formatCPUSummary(cpuSummary))
 | 
						|
 | 
						|
	cpuSummaryPerNode := make(framework.NodesCPUSummary)
 | 
						|
	cpuSummaryPerNode[nodeName] = cpuSummary
 | 
						|
 | 
						|
	// Print resource usage
 | 
						|
	logPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo), "memory")
 | 
						|
	logPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo), "cpu")
 | 
						|
 | 
						|
	// Verify resource usage
 | 
						|
	if isVerify {
 | 
						|
		verifyMemoryLimits(f.ClientSet, memLimits, usagePerNode)
 | 
						|
		verifyCPULimits(cpuLimits, cpuSummaryPerNode)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func verifyMemoryLimits(c clientset.Interface, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) {
 | 
						|
	if expected == nil {
 | 
						|
		return
 | 
						|
	}
 | 
						|
	var errList []string
 | 
						|
	for nodeName, nodeSummary := range actual {
 | 
						|
		var nodeErrs []string
 | 
						|
		for cName, expectedResult := range expected {
 | 
						|
			container, ok := nodeSummary[cName]
 | 
						|
			if !ok {
 | 
						|
				nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName))
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			expectedValue := expectedResult.MemoryRSSInBytes
 | 
						|
			actualValue := container.MemoryRSSInBytes
 | 
						|
			if expectedValue != 0 && actualValue > expectedValue {
 | 
						|
				nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected RSS memory (MB) < %d; got %d",
 | 
						|
					cName, expectedValue, actualValue))
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if len(nodeErrs) > 0 {
 | 
						|
			errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", ")))
 | 
						|
			heapStats, err := framework.GetKubeletHeapStats(c, nodeName)
 | 
						|
			if err != nil {
 | 
						|
				framework.Logf("Unable to get heap stats from %q", nodeName)
 | 
						|
			} else {
 | 
						|
				framework.Logf("Heap stats on %q\n:%v", nodeName, heapStats)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if len(errList) > 0 {
 | 
						|
		framework.Failf("Memory usage exceeding limits:\n %s", strings.Join(errList, "\n"))
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.NodesCPUSummary) {
 | 
						|
	if expected == nil {
 | 
						|
		return
 | 
						|
	}
 | 
						|
	var errList []string
 | 
						|
	for nodeName, perNodeSummary := range actual {
 | 
						|
		var nodeErrs []string
 | 
						|
		for cName, expectedResult := range expected {
 | 
						|
			perContainerSummary, ok := perNodeSummary[cName]
 | 
						|
			if !ok {
 | 
						|
				nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName))
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			for p, expectedValue := range expectedResult {
 | 
						|
				actualValue, ok := perContainerSummary[p]
 | 
						|
				if !ok {
 | 
						|
					nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing percentile %v", cName, p))
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				if actualValue > expectedValue {
 | 
						|
					nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected %.0fth%% usage < %.3f; got %.3f",
 | 
						|
						cName, p*100, expectedValue, actualValue))
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if len(nodeErrs) > 0 {
 | 
						|
			errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", ")))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if len(errList) > 0 {
 | 
						|
		framework.Failf("CPU usage exceeding limits:\n %s", strings.Join(errList, "\n"))
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func logPods(c clientset.Interface) {
 | 
						|
	nodeName := framework.TestContext.NodeName
 | 
						|
	podList, err := framework.GetKubeletRunningPods(c, nodeName)
 | 
						|
	if err != nil {
 | 
						|
		framework.Logf("Unable to retrieve kubelet pods for node %v", nodeName)
 | 
						|
	}
 | 
						|
	framework.Logf("%d pods are running on node %v", len(podList.Items), nodeName)
 | 
						|
}
 |