mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 04:33:26 +00:00
e2e: adapt kubelet_perf.go to use the new summary metrics API
This commit switch most functions in kubelet_stats.go to use the new API. However, the functions that perform one-time resource usage retrieval remain unchanged to be compatible with reource_usage_gatherer.go. They should be handled separately. Also, the new summary API does not provide the RSS memory yet, so all memory checking tests will *always* pass. We plan to add this metrics in the API and restore the functionality of the test.
This commit is contained in:
parent
e93c0d727f
commit
a8c685921f
@ -22,6 +22,7 @@ import (
|
||||
"time"
|
||||
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/sets"
|
||||
|
||||
@ -31,7 +32,7 @@ import (
|
||||
|
||||
const (
|
||||
// Interval to poll /stats/container on a node
|
||||
containerStatsPollingPeriod = 3 * time.Second
|
||||
containerStatsPollingPeriod = 10 * time.Second
|
||||
// The monitoring time for one test.
|
||||
monitoringTime = 20 * time.Minute
|
||||
// The periodic reporting period.
|
||||
@ -210,27 +211,26 @@ var _ = KubeDescribe("Kubelet [Serial] [Slow]", func() {
|
||||
{
|
||||
podsPerNode: 0,
|
||||
cpuLimits: containersCPUSummary{
|
||||
"/kubelet": {0.50: 0.06, 0.95: 0.08},
|
||||
"/docker-daemon": {0.50: 0.05, 0.95: 0.06},
|
||||
stats.SystemContainerKubelet: {0.50: 0.06, 0.95: 0.08},
|
||||
stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.06},
|
||||
},
|
||||
// We set the memory limits generously because the distribution
|
||||
// of the addon pods affect the memory usage on each node.
|
||||
memLimits: resourceUsagePerContainer{
|
||||
"/kubelet": &containerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
|
||||
"/docker-daemon": &containerResourceUsage{MemoryRSSInBytes: 85 * 1024 * 1024},
|
||||
stats.SystemContainerKubelet: &containerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &containerResourceUsage{MemoryRSSInBytes: 85 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
{
|
||||
podsPerNode: 35,
|
||||
cpuLimits: containersCPUSummary{
|
||||
"/kubelet": {0.50: 0.12, 0.95: 0.14},
|
||||
"/docker-daemon": {0.50: 0.06, 0.95: 0.08},
|
||||
stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.14},
|
||||
stats.SystemContainerRuntime: {0.50: 0.06, 0.95: 0.08},
|
||||
},
|
||||
// We set the memory limits generously because the distribution
|
||||
// of the addon pods affect the memory usage on each node.
|
||||
memLimits: resourceUsagePerContainer{
|
||||
"/kubelet": &containerResourceUsage{MemoryRSSInBytes: 75 * 1024 * 1024},
|
||||
"/docker-daemon": &containerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &containerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -33,8 +33,9 @@ import (
|
||||
"github.com/prometheus/common/model"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
kubeletstats "k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
"k8s.io/kubernetes/pkg/master/ports"
|
||||
utilerrors "k8s.io/kubernetes/pkg/util/errors"
|
||||
"k8s.io/kubernetes/pkg/util/sets"
|
||||
@ -148,7 +149,9 @@ func HighLatencyKubeletOperations(c *client.Client, threshold time.Duration, nod
|
||||
|
||||
// getContainerInfo contacts kubelet for the container information. The "Stats"
|
||||
// in the returned ContainerInfo is subject to the requirements in statsRequest.
|
||||
func getContainerInfo(c *client.Client, nodeName string, req *stats.StatsRequest) (map[string]cadvisorapi.ContainerInfo, error) {
|
||||
// TODO: This function uses the deprecated kubelet stats API; it should be
|
||||
// removed.
|
||||
func getContainerInfo(c *client.Client, nodeName string, req *kubeletstats.StatsRequest) (map[string]cadvisorapi.ContainerInfo, error) {
|
||||
reqBody, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -191,47 +194,6 @@ func getContainerInfo(c *client.Client, nodeName string, req *stats.StatsRequest
|
||||
return containers, nil
|
||||
}
|
||||
|
||||
const (
|
||||
// cadvisor records stats about every second.
|
||||
cadvisorStatsPollingIntervalInSeconds float64 = 1.0
|
||||
// cadvisor caches up to 2 minutes of stats (configured by kubelet).
|
||||
maxNumStatsToRequest int = 120
|
||||
)
|
||||
|
||||
// A list of containers for which we want to collect resource usage.
|
||||
func targetContainers() []string {
|
||||
if providerIs("gce", "gke") {
|
||||
return []string{
|
||||
"/",
|
||||
"/docker-daemon",
|
||||
"/kubelet",
|
||||
"/system",
|
||||
}
|
||||
} else {
|
||||
return []string{
|
||||
"/",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type containerResourceUsage struct {
|
||||
Name string
|
||||
Timestamp time.Time
|
||||
CPUUsageInCores float64
|
||||
MemoryUsageInBytes uint64
|
||||
MemoryWorkingSetInBytes uint64
|
||||
MemoryRSSInBytes uint64
|
||||
// The interval used to calculate CPUUsageInCores.
|
||||
CPUInterval time.Duration
|
||||
}
|
||||
|
||||
func (r *containerResourceUsage) isStrictlyGreaterThan(rhs *containerResourceUsage) bool {
|
||||
return r.CPUUsageInCores > rhs.CPUUsageInCores && r.MemoryWorkingSetInBytes > rhs.MemoryWorkingSetInBytes
|
||||
}
|
||||
|
||||
type resourceUsagePerContainer map[string]*containerResourceUsage
|
||||
type resourceUsagePerNode map[string]resourceUsagePerContainer
|
||||
|
||||
// getOneTimeResourceUsageOnNode queries the node's /stats/container endpoint
|
||||
// and returns the resource usage of all containerNames for the past
|
||||
// cpuInterval.
|
||||
@ -252,6 +214,8 @@ type resourceUsagePerNode map[string]resourceUsagePerContainer
|
||||
// should fail if one of containers listed by containerNames is missing on any node
|
||||
// (useful e.g. when looking for system containers or daemons). If set to true function
|
||||
// is more forgiving and ignores missing containers.
|
||||
// TODO: This function relies on the deprecated kubelet stats API and should be
|
||||
// removed and/or rewritten.
|
||||
func getOneTimeResourceUsageOnNode(
|
||||
c *client.Client,
|
||||
nodeName string,
|
||||
@ -259,12 +223,19 @@ func getOneTimeResourceUsageOnNode(
|
||||
containerNames func() []string,
|
||||
expectMissingContainers bool,
|
||||
) (resourceUsagePerContainer, error) {
|
||||
const (
|
||||
// cadvisor records stats about every second.
|
||||
cadvisorStatsPollingIntervalInSeconds float64 = 1.0
|
||||
// cadvisor caches up to 2 minutes of stats (configured by kubelet).
|
||||
maxNumStatsToRequest int = 120
|
||||
)
|
||||
|
||||
numStats := int(float64(cpuInterval.Seconds()) / cadvisorStatsPollingIntervalInSeconds)
|
||||
if numStats < 2 || numStats > maxNumStatsToRequest {
|
||||
return nil, fmt.Errorf("numStats needs to be > 1 and < %d", maxNumStatsToRequest)
|
||||
}
|
||||
// Get information of all containers on the node.
|
||||
containerInfos, err := getContainerInfo(c, nodeName, &stats.StatsRequest{
|
||||
containerInfos, err := getContainerInfo(c, nodeName, &kubeletstats.StatsRequest{
|
||||
ContainerName: "/",
|
||||
NumStats: numStats,
|
||||
Subcontainers: true,
|
||||
@ -272,6 +243,18 @@ func getOneTimeResourceUsageOnNode(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f := func(name string, oldStats, newStats *cadvisorapi.ContainerStats) *containerResourceUsage {
|
||||
return &containerResourceUsage{
|
||||
Name: name,
|
||||
Timestamp: newStats.Timestamp,
|
||||
CPUUsageInCores: float64(newStats.Cpu.Usage.Total-oldStats.Cpu.Usage.Total) / float64(newStats.Timestamp.Sub(oldStats.Timestamp).Nanoseconds()),
|
||||
MemoryUsageInBytes: newStats.Memory.Usage,
|
||||
MemoryWorkingSetInBytes: newStats.Memory.WorkingSet,
|
||||
MemoryRSSInBytes: newStats.Memory.RSS,
|
||||
CPUInterval: newStats.Timestamp.Sub(oldStats.Timestamp),
|
||||
}
|
||||
}
|
||||
// Process container infos that are relevant to us.
|
||||
containers := containerNames()
|
||||
usageMap := make(resourceUsagePerContainer, len(containers))
|
||||
@ -285,26 +268,96 @@ func getOneTimeResourceUsageOnNode(
|
||||
}
|
||||
first := info.Stats[0]
|
||||
last := info.Stats[len(info.Stats)-1]
|
||||
usageMap[name] = computeContainerResourceUsage(name, first, last)
|
||||
usageMap[name] = f(name, first, last)
|
||||
}
|
||||
return usageMap, nil
|
||||
}
|
||||
|
||||
// logOneTimeResourceUsageSummary collects container resource for the list of
|
||||
// nodes, formats and logs the stats.
|
||||
func logOneTimeResourceUsageSummary(c *client.Client, nodeNames []string, cpuInterval time.Duration) {
|
||||
var summary []string
|
||||
for _, nodeName := range nodeNames {
|
||||
stats, err := getOneTimeResourceUsageOnNode(c, nodeName, cpuInterval, targetContainers, false)
|
||||
if err != nil {
|
||||
summary = append(summary, fmt.Sprintf("Error getting resource usage from node %q, err: %v", nodeName, err))
|
||||
} else {
|
||||
summary = append(summary, formatResourceUsageStats(nodeName, stats))
|
||||
}
|
||||
func getNodeStatsSummary(c *client.Client, nodeName string) (*stats.Summary, error) {
|
||||
subResourceProxyAvailable, err := serverVersionGTE(subResourceServiceAndNodeProxyVersion, c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
Logf("\n%s", strings.Join(summary, "\n"))
|
||||
|
||||
var data []byte
|
||||
if subResourceProxyAvailable {
|
||||
data, err = c.Get().
|
||||
Resource("nodes").
|
||||
SubResource("proxy").
|
||||
Name(fmt.Sprintf("%v:%v", nodeName, ports.KubeletPort)).
|
||||
Suffix("stats/summary").
|
||||
SetHeader("Content-Type", "application/json").
|
||||
Do().Raw()
|
||||
|
||||
} else {
|
||||
data, err = c.Get().
|
||||
Prefix("proxy").
|
||||
Resource("nodes").
|
||||
Name(fmt.Sprintf("%v:%v", nodeName, ports.KubeletPort)).
|
||||
Suffix("stats/summary").
|
||||
SetHeader("Content-Type", "application/json").
|
||||
Do().Raw()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var summary *stats.Summary
|
||||
err = json.Unmarshal(data, &summary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return summary, nil
|
||||
}
|
||||
|
||||
func getSystemContainerStats(summary *stats.Summary) map[string]*stats.ContainerStats {
|
||||
statsList := summary.Node.SystemContainers
|
||||
statsMap := make(map[string]*stats.ContainerStats)
|
||||
for i := range statsList {
|
||||
statsMap[statsList[i].Name] = &statsList[i]
|
||||
}
|
||||
|
||||
// Create a root container stats using information available in
|
||||
// stats.NodeStats. This is necessary since it is a different type.
|
||||
statsMap[rootContainerName] = &stats.ContainerStats{
|
||||
CPU: summary.Node.CPU,
|
||||
Memory: summary.Node.Memory,
|
||||
}
|
||||
return statsMap
|
||||
}
|
||||
|
||||
const (
|
||||
rootContainerName = "/"
|
||||
)
|
||||
|
||||
// A list of containers for which we want to collect resource usage.
|
||||
func targetContainers() []string {
|
||||
return []string{
|
||||
rootContainerName,
|
||||
stats.SystemContainerRuntime,
|
||||
stats.SystemContainerKubelet,
|
||||
stats.SystemContainerMisc,
|
||||
}
|
||||
}
|
||||
|
||||
type containerResourceUsage struct {
|
||||
Name string
|
||||
Timestamp time.Time
|
||||
CPUUsageInCores float64
|
||||
MemoryUsageInBytes uint64
|
||||
MemoryWorkingSetInBytes uint64
|
||||
MemoryRSSInBytes uint64
|
||||
// The interval used to calculate CPUUsageInCores.
|
||||
CPUInterval time.Duration
|
||||
}
|
||||
|
||||
func (r *containerResourceUsage) isStrictlyGreaterThan(rhs *containerResourceUsage) bool {
|
||||
return r.CPUUsageInCores > rhs.CPUUsageInCores && r.MemoryWorkingSetInBytes > rhs.MemoryWorkingSetInBytes
|
||||
}
|
||||
|
||||
type resourceUsagePerContainer map[string]*containerResourceUsage
|
||||
type resourceUsagePerNode map[string]resourceUsagePerContainer
|
||||
|
||||
func formatResourceUsageStats(nodeName string, containerStats resourceUsagePerContainer) string {
|
||||
// Example output:
|
||||
//
|
||||
@ -395,15 +448,15 @@ func PrintAllKubeletPods(c *client.Client, nodeName string) {
|
||||
}
|
||||
}
|
||||
|
||||
func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapi.ContainerStats) *containerResourceUsage {
|
||||
func computeContainerResourceUsage(name string, oldStats, newStats *stats.ContainerStats) *containerResourceUsage {
|
||||
return &containerResourceUsage{
|
||||
Name: name,
|
||||
Timestamp: newStats.Timestamp,
|
||||
CPUUsageInCores: float64(newStats.Cpu.Usage.Total-oldStats.Cpu.Usage.Total) / float64(newStats.Timestamp.Sub(oldStats.Timestamp).Nanoseconds()),
|
||||
MemoryUsageInBytes: newStats.Memory.Usage,
|
||||
MemoryWorkingSetInBytes: newStats.Memory.WorkingSet,
|
||||
MemoryRSSInBytes: newStats.Memory.RSS,
|
||||
CPUInterval: newStats.Timestamp.Sub(oldStats.Timestamp),
|
||||
Timestamp: newStats.CPU.Time.Time,
|
||||
CPUUsageInCores: float64(*newStats.CPU.UsageCoreNanoSeconds-*oldStats.CPU.UsageCoreNanoSeconds) / float64(newStats.CPU.Time.Time.Sub(oldStats.CPU.Time.Time).Nanoseconds()),
|
||||
MemoryUsageInBytes: *newStats.Memory.UsageBytes,
|
||||
MemoryWorkingSetInBytes: *newStats.Memory.WorkingSetBytes,
|
||||
MemoryRSSInBytes: *newStats.Memory.RSSBytes,
|
||||
CPUInterval: newStats.CPU.Time.Time.Sub(oldStats.CPU.Time.Time),
|
||||
}
|
||||
}
|
||||
|
||||
@ -435,7 +488,7 @@ func newResourceCollector(c *client.Client, nodeName string, containerNames []st
|
||||
func (r *resourceCollector) Start() {
|
||||
r.stopCh = make(chan struct{}, 1)
|
||||
// Keep the last observed stats for comparison.
|
||||
oldStats := make(map[string]*cadvisorapi.ContainerStats)
|
||||
oldStats := make(map[string]*stats.ContainerStats)
|
||||
go wait.Until(func() { r.collectStats(oldStats) }, r.pollingInterval, r.stopCh)
|
||||
}
|
||||
|
||||
@ -444,35 +497,33 @@ func (r *resourceCollector) Stop() {
|
||||
close(r.stopCh)
|
||||
}
|
||||
|
||||
// collectStats gets the latest stats from kubelet's /stats/container, computes
|
||||
// collectStats gets the latest stats from kubelet stats summary API, computes
|
||||
// the resource usage, and pushes it to the buffer.
|
||||
func (r *resourceCollector) collectStats(oldStats map[string]*cadvisorapi.ContainerStats) {
|
||||
infos, err := getContainerInfo(r.client, r.node, &stats.StatsRequest{
|
||||
ContainerName: "/",
|
||||
NumStats: 1,
|
||||
Subcontainers: true,
|
||||
})
|
||||
func (r *resourceCollector) collectStats(oldStatsMap map[string]*stats.ContainerStats) {
|
||||
summary, err := getNodeStatsSummary(r.client, r.node)
|
||||
cStatsMap := getSystemContainerStats(summary)
|
||||
if err != nil {
|
||||
Logf("Error getting container info on %q, err: %v", r.node, err)
|
||||
Logf("Error getting node stats summary on %q, err: %v", r.node, err)
|
||||
return
|
||||
}
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
for _, name := range r.containers {
|
||||
info, ok := infos[name]
|
||||
if !ok || len(info.Stats) < 1 {
|
||||
cStats, ok := cStatsMap[name]
|
||||
if !ok {
|
||||
Logf("Missing info/stats for container %q on node %q", name, r.node)
|
||||
return
|
||||
}
|
||||
if oldInfo, ok := oldStats[name]; ok {
|
||||
newInfo := info.Stats[0]
|
||||
if oldInfo.Timestamp.Equal(newInfo.Timestamp) {
|
||||
|
||||
if oldStats, ok := oldStatsMap[name]; ok {
|
||||
if oldStats.CPU.Time.Equal(cStats.CPU.Time) {
|
||||
// No change -> skip this stat.
|
||||
continue
|
||||
}
|
||||
r.buffers[name] = append(r.buffers[name], computeContainerResourceUsage(name, oldInfo, newInfo))
|
||||
r.buffers[name] = append(r.buffers[name], computeContainerResourceUsage(name, oldStats, cStats))
|
||||
}
|
||||
oldStats[name] = info.Stats[0]
|
||||
// Update the old stats.
|
||||
oldStatsMap[name] = cStats
|
||||
}
|
||||
}
|
||||
|
||||
@ -613,9 +664,9 @@ func (r *resourceMonitor) FormatCPUSummary(summary nodesCPUSummary) string {
|
||||
// CPU usage of containers on node "e2e-test-foo-minion-0vj7":
|
||||
// container 5th% 50th% 90th% 95th%
|
||||
// "/" 0.051 0.159 0.387 0.455
|
||||
// "/docker-daemon" 0.000 0.000 0.146 0.166
|
||||
// "/runtime 0.000 0.000 0.146 0.166
|
||||
// "/kubelet" 0.036 0.053 0.091 0.154
|
||||
// "/system" 0.001 0.001 0.001 0.002
|
||||
// "/misc" 0.001 0.001 0.001 0.002
|
||||
var summaryStrings []string
|
||||
var header []string
|
||||
header = append(header, "container")
|
||||
|
Loading…
Reference in New Issue
Block a user