Change StatsProvider interface to provide container stats from either cadvisor or CRI and implement this interface using cadvisor

This commit is contained in:
Yang Guo
2017-08-18 15:08:44 -07:00
parent acdf625e46
commit f9767d2f71
32 changed files with 2722 additions and 1615 deletions

View File

@@ -18,513 +18,85 @@ package stats
import (
"fmt"
"sort"
"strings"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/leaky"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/types"
"github.com/golang/glog"
cadvisorapiv1 "github.com/google/cadvisor/info/v1"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
)
type SummaryProvider interface {
// Get provides a new Summary using the latest results from cadvisor
Get() (*stats.Summary, error)
Get() (*statsapi.Summary, error)
}
// summaryProviderImpl implements the SummaryProvider interface.
type summaryProviderImpl struct {
provider StatsProvider
fsResourceAnalyzer fsResourceAnalyzerInterface
runtime container.Runtime
provider StatsProvider
}
var _ SummaryProvider = &summaryProviderImpl{}
// NewSummaryProvider returns a new SummaryProvider
func NewSummaryProvider(statsProvider StatsProvider, fsResourceAnalyzer fsResourceAnalyzerInterface, cruntime container.Runtime) SummaryProvider {
return &summaryProviderImpl{statsProvider, fsResourceAnalyzer, cruntime}
// NewSummaryProvider returns a SummaryProvider using the stats provided by the
// specified statsProvider.
func NewSummaryProvider(statsProvider StatsProvider) SummaryProvider {
return &summaryProviderImpl{statsProvider}
}
// Get implements the SummaryProvider interface
// Query cadvisor for the latest resource metrics and build into a summary
func (sp *summaryProviderImpl) Get() (*stats.Summary, error) {
options := cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2, // 2 samples are needed to compute "instantaneous" CPU
Recursive: true,
}
infos, err := sp.provider.GetContainerInfoV2("/", options)
if err != nil {
if _, ok := infos["/"]; ok {
// If the failure is partial, log it and return a best-effort response.
glog.Errorf("Partial failure issuing GetContainerInfoV2: %v", err)
} else {
return nil, fmt.Errorf("failed GetContainerInfoV2: %v", err)
}
}
// TODO(tallclair): Consider returning a best-effort response if any of the following errors
// occur.
// Get provides a new Summary with the stats from Kubelet.
func (sp *summaryProviderImpl) Get() (*statsapi.Summary, error) {
// TODO(timstclair): Consider returning a best-effort response if any of
// the following errors occur.
node, err := sp.provider.GetNode()
if err != nil {
return nil, fmt.Errorf("failed GetNode: %v", err)
return nil, fmt.Errorf("failed to get node info: %v", err)
}
nodeConfig := sp.provider.GetNodeConfig()
rootFsInfo, err := sp.provider.RootFsInfo()
rootStats, networkStats, err := sp.provider.GetCgroupStats("/")
if err != nil {
return nil, fmt.Errorf("failed RootFsInfo: %v", err)
return nil, fmt.Errorf("failed to get root cgroup stats: %v", err)
}
imageFsInfo, err := sp.provider.ImagesFsInfo()
rootFsStats, err := sp.provider.RootFsStats()
if err != nil {
return nil, fmt.Errorf("failed DockerImagesFsInfo: %v", err)
return nil, fmt.Errorf("failed to get rootFs stats: %v", err)
}
imageStats, err := sp.runtime.ImageStats()
if err != nil || imageStats == nil {
return nil, fmt.Errorf("failed ImageStats: %v", err)
imageFsStats, err := sp.provider.ImageFsStats()
if err != nil {
return nil, fmt.Errorf("failed to get imageFs stats: %v", err)
}
sb := &summaryBuilder{sp.fsResourceAnalyzer, node, nodeConfig, rootFsInfo, imageFsInfo, *imageStats, infos}
return sb.build()
}
// summaryBuilder aggregates the datastructures provided by cadvisor into a Summary result
type summaryBuilder struct {
fsResourceAnalyzer fsResourceAnalyzerInterface
node *v1.Node
nodeConfig cm.NodeConfig
rootFsInfo cadvisorapiv2.FsInfo
imageFsInfo cadvisorapiv2.FsInfo
imageStats container.ImageStats
infos map[string]cadvisorapiv2.ContainerInfo
}
// build returns a Summary from aggregating the input data
func (sb *summaryBuilder) build() (*stats.Summary, error) {
rootInfo, found := sb.infos["/"]
if !found {
return nil, fmt.Errorf("Missing stats for root container")
podStats, err := sp.provider.ListPodStats()
if err != nil {
return nil, fmt.Errorf("failed to list pod stats: %v", err)
}
var nodeFsInodesUsed *uint64
if sb.rootFsInfo.Inodes != nil && sb.rootFsInfo.InodesFree != nil {
nodeFsIU := *sb.rootFsInfo.Inodes - *sb.rootFsInfo.InodesFree
nodeFsInodesUsed = &nodeFsIU
}
var imageFsInodesUsed *uint64
if sb.imageFsInfo.Inodes != nil && sb.imageFsInfo.InodesFree != nil {
imageFsIU := *sb.imageFsInfo.Inodes - *sb.imageFsInfo.InodesFree
imageFsInodesUsed = &imageFsIU
}
rootStats := sb.containerInfoV2ToStats("", &rootInfo)
cStats, _ := latestContainerStats(&rootInfo)
nodeStats := stats.NodeStats{
NodeName: sb.node.Name,
CPU: rootStats.CPU,
Memory: rootStats.Memory,
Network: sb.containerInfoV2ToNetworkStats("node:"+sb.node.Name, &rootInfo),
Fs: &stats.FsStats{
Time: metav1.NewTime(cStats.Timestamp),
AvailableBytes: &sb.rootFsInfo.Available,
CapacityBytes: &sb.rootFsInfo.Capacity,
UsedBytes: &sb.rootFsInfo.Usage,
InodesFree: sb.rootFsInfo.InodesFree,
Inodes: sb.rootFsInfo.Inodes,
InodesUsed: nodeFsInodesUsed,
},
nodeStats := statsapi.NodeStats{
NodeName: node.Name,
CPU: rootStats.CPU,
Memory: rootStats.Memory,
Network: networkStats,
StartTime: rootStats.StartTime,
Runtime: &stats.RuntimeStats{
ImageFs: &stats.FsStats{
Time: metav1.NewTime(cStats.Timestamp),
AvailableBytes: &sb.imageFsInfo.Available,
CapacityBytes: &sb.imageFsInfo.Capacity,
UsedBytes: &sb.imageStats.TotalStorageBytes,
InodesFree: sb.imageFsInfo.InodesFree,
Inodes: sb.imageFsInfo.Inodes,
InodesUsed: imageFsInodesUsed,
},
},
Fs: rootFsStats,
Runtime: &statsapi.RuntimeStats{ImageFs: imageFsStats},
}
systemContainers := map[string]string{
stats.SystemContainerKubelet: sb.nodeConfig.KubeletCgroupsName,
stats.SystemContainerRuntime: sb.nodeConfig.RuntimeCgroupsName,
stats.SystemContainerMisc: sb.nodeConfig.SystemCgroupsName,
statsapi.SystemContainerKubelet: nodeConfig.KubeletCgroupsName,
statsapi.SystemContainerRuntime: nodeConfig.RuntimeCgroupsName,
statsapi.SystemContainerMisc: nodeConfig.SystemCgroupsName,
}
for sys, name := range systemContainers {
if info, ok := sb.infos[name]; ok {
sysCont := sb.containerInfoV2ToStats(sys, &info)
// System containers don't have a filesystem associated with them.
sysCont.Rootfs = nil
sysCont.Logs = nil
nodeStats.SystemContainers = append(nodeStats.SystemContainers, sysCont)
s, _, err := sp.provider.GetCgroupStats(name)
if err != nil {
glog.Errorf("Failed to get system container stats for %q: %v", name, err)
continue
}
// System containers don't have a filesystem associated with them.
s.Logs, s.Rootfs = nil, nil
s.Name = sys
nodeStats.SystemContainers = append(nodeStats.SystemContainers, *s)
}
summary := stats.Summary{
summary := statsapi.Summary{
Node: nodeStats,
Pods: sb.buildSummaryPods(),
Pods: podStats,
}
return &summary, nil
}
// containerInfoV2FsStats populates the container fs stats
func (sb *summaryBuilder) containerInfoV2FsStats(
info *cadvisorapiv2.ContainerInfo,
cs *stats.ContainerStats) {
lcs, found := latestContainerStats(info)
if !found {
return
}
// The container logs live on the node rootfs device
cs.Logs = &stats.FsStats{
Time: metav1.NewTime(lcs.Timestamp),
AvailableBytes: &sb.rootFsInfo.Available,
CapacityBytes: &sb.rootFsInfo.Capacity,
InodesFree: sb.rootFsInfo.InodesFree,
Inodes: sb.rootFsInfo.Inodes,
}
if sb.rootFsInfo.Inodes != nil && sb.rootFsInfo.InodesFree != nil {
logsInodesUsed := *sb.rootFsInfo.Inodes - *sb.rootFsInfo.InodesFree
cs.Logs.InodesUsed = &logsInodesUsed
}
// The container rootFs lives on the imageFs devices (which may not be the node root fs)
cs.Rootfs = &stats.FsStats{
Time: metav1.NewTime(lcs.Timestamp),
AvailableBytes: &sb.imageFsInfo.Available,
CapacityBytes: &sb.imageFsInfo.Capacity,
InodesFree: sb.imageFsInfo.InodesFree,
Inodes: sb.imageFsInfo.Inodes,
}
cfs := lcs.Filesystem
if cfs != nil {
if cfs.BaseUsageBytes != nil {
rootfsUsage := *cfs.BaseUsageBytes
cs.Rootfs.UsedBytes = &rootfsUsage
if cfs.TotalUsageBytes != nil {
logsUsage := *cfs.TotalUsageBytes - *cfs.BaseUsageBytes
cs.Logs.UsedBytes = &logsUsage
}
}
if cfs.InodeUsage != nil {
rootInodes := *cfs.InodeUsage
cs.Rootfs.InodesUsed = &rootInodes
}
}
}
// latestContainerStats returns the latest container stats from cadvisor, or nil if none exist
func latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) {
stats := info.Stats
if len(stats) < 1 {
return nil, false
}
latest := stats[len(stats)-1]
if latest == nil {
return nil, false
}
return latest, true
}
// hasMemoryAndCPUInstUsage returns true if the specified container info has
// both non-zero CPU instantaneous usage and non-zero memory RSS usage, and
// false otherwise.
func hasMemoryAndCPUInstUsage(info *cadvisorapiv2.ContainerInfo) bool {
if !info.Spec.HasCpu || !info.Spec.HasMemory {
return false
}
cstat, found := latestContainerStats(info)
if !found {
return false
}
if cstat.CpuInst == nil {
return false
}
return cstat.CpuInst.Usage.Total != 0 && cstat.Memory.RSS != 0
}
// ByCreationTime implements sort.Interface for []containerInfoWithCgroup based
// on the cinfo.Spec.CreationTime field.
type ByCreationTime []containerInfoWithCgroup
func (a ByCreationTime) Len() int { return len(a) }
func (a ByCreationTime) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByCreationTime) Less(i, j int) bool {
if a[i].cinfo.Spec.CreationTime.Equal(a[j].cinfo.Spec.CreationTime) {
// There shouldn't be two containers with the same name and/or the same
// creation time. However, to make the logic here robust, we break the
// tie by moving the one without CPU instantaneous or memory RSS usage
// to the beginning.
return hasMemoryAndCPUInstUsage(&a[j].cinfo)
}
return a[i].cinfo.Spec.CreationTime.Before(a[j].cinfo.Spec.CreationTime)
}
// containerID is the identity of a container in a pod.
type containerID struct {
podRef stats.PodReference
containerName string
}
// containerInfoWithCgroup contains the ContainerInfo and its cgroup name.
type containerInfoWithCgroup struct {
cinfo cadvisorapiv2.ContainerInfo
cgroup string
}
// removeTerminatedContainerInfo returns the specified containerInfo but with
// the stats of the terminated containers removed.
//
// A ContainerInfo is considered to be of a terminated container if it has an
// older CreationTime and zero CPU instantaneous and memory RSS usage.
func removeTerminatedContainerInfo(containerInfo map[string]cadvisorapiv2.ContainerInfo) map[string]cadvisorapiv2.ContainerInfo {
cinfoMap := make(map[containerID][]containerInfoWithCgroup)
for key, cinfo := range containerInfo {
if !isPodManagedContainer(&cinfo) {
continue
}
cinfoID := containerID{
podRef: buildPodRef(&cinfo),
containerName: types.GetContainerName(cinfo.Spec.Labels),
}
cinfoMap[cinfoID] = append(cinfoMap[cinfoID], containerInfoWithCgroup{
cinfo: cinfo,
cgroup: key,
})
}
result := make(map[string]cadvisorapiv2.ContainerInfo)
for _, refs := range cinfoMap {
if len(refs) == 1 {
result[refs[0].cgroup] = refs[0].cinfo
continue
}
sort.Sort(ByCreationTime(refs))
i := 0
for ; i < len(refs); i++ {
if hasMemoryAndCPUInstUsage(&refs[i].cinfo) {
// Stops removing when we first see an info with non-zero
// CPU/Memory usage.
break
}
}
for ; i < len(refs); i++ {
result[refs[i].cgroup] = refs[i].cinfo
}
}
return result
}
// buildSummaryPods aggregates and returns the container stats in cinfos by the Pod managing the container.
// Containers not managed by a Pod are omitted.
func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats {
// Map each container to a pod and update the PodStats with container data
podToStats := map[stats.PodReference]*stats.PodStats{}
infos := removeTerminatedContainerInfo(sb.infos)
for key, cinfo := range infos {
// on systemd using devicemapper each mount into the container has an associated cgroup.
// we ignore them to ensure we do not get duplicate entries in our summary.
// for details on .mount units: http://man7.org/linux/man-pages/man5/systemd.mount.5.html
if strings.HasSuffix(key, ".mount") {
continue
}
// Build the Pod key if this container is managed by a Pod
if !isPodManagedContainer(&cinfo) {
continue
}
ref := buildPodRef(&cinfo)
// Lookup the PodStats for the pod using the PodRef. If none exists, initialize a new entry.
podStats, found := podToStats[ref]
if !found {
podStats = &stats.PodStats{PodRef: ref}
podToStats[ref] = podStats
}
// Update the PodStats entry with the stats from the container by adding it to stats.Containers
containerName := types.GetContainerName(cinfo.Spec.Labels)
if containerName == leaky.PodInfraContainerName {
// Special case for infrastructure container which is hidden from the user and has network stats
podStats.Network = sb.containerInfoV2ToNetworkStats("pod:"+ref.Namespace+"_"+ref.Name, &cinfo)
podStats.StartTime = metav1.NewTime(cinfo.Spec.CreationTime)
} else {
podStats.Containers = append(podStats.Containers, sb.containerInfoV2ToStats(containerName, &cinfo))
}
}
// Add each PodStats to the result
result := make([]stats.PodStats, 0, len(podToStats))
for _, podStats := range podToStats {
// Lookup the volume stats for each pod
podUID := kubetypes.UID(podStats.PodRef.UID)
if vstats, found := sb.fsResourceAnalyzer.GetPodVolumeStats(podUID); found {
podStats.VolumeStats = vstats.Volumes
}
result = append(result, *podStats)
}
return result
}
// buildPodRef returns a PodReference that identifies the Pod managing cinfo
func buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats.PodReference {
podName := types.GetPodName(cinfo.Spec.Labels)
podNamespace := types.GetPodNamespace(cinfo.Spec.Labels)
podUID := types.GetPodUID(cinfo.Spec.Labels)
return stats.PodReference{Name: podName, Namespace: podNamespace, UID: podUID}
}
// isPodManagedContainer returns true if the cinfo container is managed by a Pod
func isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool {
podName := types.GetPodName(cinfo.Spec.Labels)
podNamespace := types.GetPodNamespace(cinfo.Spec.Labels)
managed := podName != "" && podNamespace != ""
if !managed && podName != podNamespace {
glog.Warningf(
"Expect container to have either both podName (%s) and podNamespace (%s) labels, or neither.",
podName, podNamespace)
}
return managed
}
func (sb *summaryBuilder) containerInfoV2ToStats(
name string,
info *cadvisorapiv2.ContainerInfo) stats.ContainerStats {
cStats := stats.ContainerStats{
StartTime: metav1.NewTime(info.Spec.CreationTime),
Name: name,
}
cstat, found := latestContainerStats(info)
if !found {
return cStats
}
if info.Spec.HasCpu {
cpuStats := stats.CPUStats{
Time: metav1.NewTime(cstat.Timestamp),
}
if cstat.CpuInst != nil {
cpuStats.UsageNanoCores = &cstat.CpuInst.Usage.Total
}
if cstat.Cpu != nil {
cpuStats.UsageCoreNanoSeconds = &cstat.Cpu.Usage.Total
}
cStats.CPU = &cpuStats
}
if info.Spec.HasMemory {
pageFaults := cstat.Memory.ContainerData.Pgfault
majorPageFaults := cstat.Memory.ContainerData.Pgmajfault
cStats.Memory = &stats.MemoryStats{
Time: metav1.NewTime(cstat.Timestamp),
UsageBytes: &cstat.Memory.Usage,
WorkingSetBytes: &cstat.Memory.WorkingSet,
RSSBytes: &cstat.Memory.RSS,
PageFaults: &pageFaults,
MajorPageFaults: &majorPageFaults,
}
// availableBytes = memory limit (if known) - workingset
if !isMemoryUnlimited(info.Spec.Memory.Limit) {
availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet
cStats.Memory.AvailableBytes = &availableBytes
}
}
sb.containerInfoV2FsStats(info, &cStats)
cStats.UserDefinedMetrics = sb.containerInfoV2ToUserDefinedMetrics(info)
return cStats
}
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
// TODO: cadvisor should export this https://github.com/google/cadvisor/blob/master/metrics/prometheus.go#L596
const maxMemorySize = uint64(1 << 62)
func isMemoryUnlimited(v uint64) bool {
return v > maxMemorySize
}
func (sb *summaryBuilder) containerInfoV2ToNetworkStats(name string, info *cadvisorapiv2.ContainerInfo) *stats.NetworkStats {
if !info.Spec.HasNetwork {
return nil
}
cstat, found := latestContainerStats(info)
if !found {
return nil
}
for _, inter := range cstat.Network.Interfaces {
if inter.Name == network.DefaultInterfaceName {
return &stats.NetworkStats{
Time: metav1.NewTime(cstat.Timestamp),
RxBytes: &inter.RxBytes,
RxErrors: &inter.RxErrors,
TxBytes: &inter.TxBytes,
TxErrors: &inter.TxErrors,
}
}
}
glog.V(4).Infof("Missing default interface %q for %s", network.DefaultInterfaceName, name)
return nil
}
func (sb *summaryBuilder) containerInfoV2ToUserDefinedMetrics(info *cadvisorapiv2.ContainerInfo) []stats.UserDefinedMetric {
type specVal struct {
ref stats.UserDefinedMetricDescriptor
valType cadvisorapiv1.DataType
time time.Time
value float64
}
udmMap := map[string]*specVal{}
for _, spec := range info.Spec.CustomMetrics {
udmMap[spec.Name] = &specVal{
ref: stats.UserDefinedMetricDescriptor{
Name: spec.Name,
Type: stats.UserDefinedMetricType(spec.Type),
Units: spec.Units,
},
valType: spec.Format,
}
}
for _, stat := range info.Stats {
for name, values := range stat.CustomMetrics {
specVal, ok := udmMap[name]
if !ok {
glog.Warningf("spec for custom metric %q is missing from cAdvisor output. Spec: %+v, Metrics: %+v", name, info.Spec, stat.CustomMetrics)
continue
}
for _, value := range values {
// Pick the most recent value
if value.Timestamp.Before(specVal.time) {
continue
}
specVal.time = value.Timestamp
specVal.value = value.FloatValue
if specVal.valType == cadvisorapiv1.IntType {
specVal.value = float64(value.IntValue)
}
}
}
}
var udm []stats.UserDefinedMetric
for _, specVal := range udmMap {
udm = append(udm, stats.UserDefinedMetric{
UserDefinedMetricDescriptor: specVal.ref,
Time: metav1.NewTime(specVal.time),
Value: specVal.value,
})
}
return udm
}