Fix resource gatherer

This commit is contained in:
gmarek 2017-04-13 18:29:14 +02:00
parent 3b9eb1a875
commit fbeac32a70
3 changed files with 68 additions and 87 deletions

View File

@ -69,7 +69,6 @@ go_library(
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/sysctl:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/master/ports:go_default_library",
@ -92,7 +91,6 @@ go_library(
"//vendor:github.com/aws/aws-sdk-go/service/autoscaling",
"//vendor:github.com/aws/aws-sdk-go/service/ec2",
"//vendor:github.com/golang/glog",
"//vendor:github.com/google/cadvisor/info/v1",
"//vendor:github.com/onsi/ginkgo",
"//vendor:github.com/onsi/ginkgo/config",
"//vendor:github.com/onsi/gomega",

View File

@ -28,7 +28,6 @@ import (
"text/tabwriter"
"time"
cadvisorapi "github.com/google/cadvisor/info/v1"
"github.com/prometheus/common/model"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
@ -37,7 +36,6 @@ import (
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
kubeletstats "k8s.io/kubernetes/pkg/kubelet/server/stats"
"k8s.io/kubernetes/pkg/master/ports"
"k8s.io/kubernetes/pkg/metrics"
)
@ -276,15 +274,8 @@ func HighLatencyKubeletOperations(c clientset.Interface, threshold time.Duration
return badMetrics, nil
}
// getContainerInfo contacts kubelet for the container information. The "Stats"
// in the returned ContainerInfo is subject to the requirements in statsRequest.
// TODO: This function uses the deprecated kubelet stats API; it should be
// removed.
func getContainerInfo(c clientset.Interface, nodeName string, req *kubeletstats.StatsRequest) (map[string]cadvisorapi.ContainerInfo, error) {
reqBody, err := json.Marshal(req)
if err != nil {
return nil, err
}
// getStatsSummary contacts kubelet for the container information.
func getStatsSummary(c clientset.Interface, nodeName string) (*stats.Summary, error) {
subResourceProxyAvailable, err := ServerVersionGTE(SubResourceServiceAndNodeProxyVersion, c.Discovery())
if err != nil {
return nil, err
@ -295,40 +286,43 @@ func getContainerInfo(c clientset.Interface, nodeName string, req *kubeletstats.
var data []byte
if subResourceProxyAvailable {
data, err = c.Core().RESTClient().Post().
data, err = c.Core().RESTClient().Get().
Context(ctx).
Resource("nodes").
SubResource("proxy").
Name(fmt.Sprintf("%v:%v", nodeName, ports.KubeletPort)).
Suffix("stats/container").
SetHeader("Content-Type", "application/json").
Body(reqBody).
Suffix("stats/summary").
Do().Raw()
} else {
data, err = c.Core().RESTClient().Post().
data, err = c.Core().RESTClient().Get().
Context(ctx).
Prefix("proxy").
Resource("nodes").
Name(fmt.Sprintf("%v:%v", nodeName, ports.KubeletPort)).
Suffix("stats/container").
SetHeader("Content-Type", "application/json").
Body(reqBody).
Suffix("stats/summary").
Do().Raw()
}
if err != nil {
return nil, err
}
var containers map[string]cadvisorapi.ContainerInfo
err = json.Unmarshal(data, &containers)
summary := stats.Summary{}
err = json.Unmarshal(data, &summary)
if err != nil {
return nil, err
}
return containers, nil
return &summary, nil
}
// getOneTimeResourceUsageOnNode queries the node's /stats/container endpoint
func removeUint64Ptr(ptr *uint64) uint64 {
if ptr == nil {
return 0
}
return *ptr
}
// getOneTimeResourceUsageOnNode queries the node's /stats/summary endpoint
// and returns the resource usage of all containerNames for the past
// cpuInterval.
// The acceptable range of the interval is 2s~120s. Be warned that as the
@ -344,18 +338,12 @@ func getContainerInfo(c clientset.Interface, nodeName string, req *kubeletstats.
// the stats points in ContainerResourceUsage.CPUInterval.
//
// containerNames is a function returning a collection of container names in which
// user is interested in. ExpectMissingContainers is a flag which says if the test
// should fail if one of containers listed by containerNames is missing on any node
// (useful e.g. when looking for system containers or daemons). If set to true function
// is more forgiving and ignores missing containers.
// TODO: This function relies on the deprecated kubelet stats API and should be
// removed and/or rewritten.
// user is interested in.
func getOneTimeResourceUsageOnNode(
c clientset.Interface,
nodeName string,
cpuInterval time.Duration,
containerNames func() []string,
expectMissingContainers bool,
) (ResourceUsagePerContainer, error) {
const (
// cadvisor records stats about every second.
@ -369,40 +357,41 @@ func getOneTimeResourceUsageOnNode(
return nil, fmt.Errorf("numStats needs to be > 1 and < %d", maxNumStatsToRequest)
}
// Get information of all containers on the node.
containerInfos, err := getContainerInfo(c, nodeName, &kubeletstats.StatsRequest{
ContainerName: "/",
NumStats: numStats,
Subcontainers: true,
})
summary, err := getStatsSummary(c, nodeName)
if err != nil {
return nil, err
}
f := func(name string, oldStats, newStats *cadvisorapi.ContainerStats) *ContainerResourceUsage {
f := func(name string, newStats *stats.ContainerStats) *ContainerResourceUsage {
return &ContainerResourceUsage{
Name: name,
Timestamp: newStats.Timestamp,
CPUUsageInCores: float64(newStats.Cpu.Usage.Total-oldStats.Cpu.Usage.Total) / float64(newStats.Timestamp.Sub(oldStats.Timestamp).Nanoseconds()),
MemoryUsageInBytes: newStats.Memory.Usage,
MemoryWorkingSetInBytes: newStats.Memory.WorkingSet,
MemoryRSSInBytes: newStats.Memory.RSS,
CPUInterval: newStats.Timestamp.Sub(oldStats.Timestamp),
Timestamp: newStats.StartTime.Time,
CPUUsageInCores: float64(removeUint64Ptr(newStats.CPU.UsageNanoCores)) / 1000000000,
MemoryUsageInBytes: removeUint64Ptr(newStats.Memory.UsageBytes),
MemoryWorkingSetInBytes: removeUint64Ptr(newStats.Memory.WorkingSetBytes),
MemoryRSSInBytes: removeUint64Ptr(newStats.Memory.RSSBytes),
CPUInterval: 0,
}
}
// Process container infos that are relevant to us.
containers := containerNames()
usageMap := make(ResourceUsagePerContainer, len(containers))
for _, name := range containers {
info, ok := containerInfos[name]
if !ok {
if !expectMissingContainers {
return nil, fmt.Errorf("missing info for container %q on node %q", name, nodeName)
observedContainers := []string{}
for _, pod := range summary.Pods {
for _, container := range pod.Containers {
isInteresting := false
for _, interestingContainerName := range containers {
if container.Name == interestingContainerName {
isInteresting = true
observedContainers = append(observedContainers, container.Name)
break
}
}
if !isInteresting {
continue
}
first := info.Stats[0]
last := info.Stats[len(info.Stats)-1]
usageMap[name] = f(name, first, last)
usageMap[pod.PodRef.Name+"/"+container.Name] = f(pod.PodRef.Name+"/"+container.Name, &container)
}
}
return usageMap, nil
}

View File

@ -130,7 +130,6 @@ type resourceGatherWorker struct {
c clientset.Interface
nodeName string
wg *sync.WaitGroup
containerIDToNameMap map[string]string
containerIDs []string
stopCh chan struct{}
dataSeries []ResourceUsagePerContainer
@ -153,13 +152,13 @@ func (w *resourceGatherWorker) singleProbe() {
}
}
} else {
nodeUsage, err := getOneTimeResourceUsageOnNode(w.c, w.nodeName, probeDuration, func() []string { return w.containerIDs }, true)
nodeUsage, err := getOneTimeResourceUsageOnNode(w.c, w.nodeName, probeDuration, func() []string { return w.containerIDs })
if err != nil {
Logf("Error while reading data from %v: %v", w.nodeName, err)
return
}
for k, v := range nodeUsage {
data[w.containerIDToNameMap[k]] = v
data[k] = v
}
}
w.dataSeries = append(w.dataSeries, data)
@ -204,7 +203,6 @@ type containerResourceGatherer struct {
stopCh chan struct{}
workers []resourceGatherWorker
workerWg sync.WaitGroup
containerIDToNameMap map[string]string
containerIDs []string
options ResourceGathererOptions
}
@ -218,7 +216,6 @@ func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOpt
g := containerResourceGatherer{
client: c,
stopCh: make(chan struct{}),
containerIDToNameMap: make(map[string]string),
containerIDs: make([]string, 0),
options: options,
}
@ -239,9 +236,7 @@ func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOpt
}
for _, pod := range pods.Items {
for _, container := range pod.Status.ContainerStatuses {
containerID := strings.TrimPrefix(container.ContainerID, "docker:/")
g.containerIDToNameMap[containerID] = pod.Name + "/" + container.Name
g.containerIDs = append(g.containerIDs, containerID)
g.containerIDs = append(g.containerIDs, container.Name)
}
}
nodeList, err := c.Core().Nodes().List(metav1.ListOptions{})
@ -257,7 +252,6 @@ func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOpt
c: c,
nodeName: node.Name,
wg: &g.workerWg,
containerIDToNameMap: g.containerIDToNameMap,
containerIDs: g.containerIDs,
stopCh: g.stopCh,
finished: false,