Integrating cadvisor stats to CRI Pod stats collection

This commit addresses the issue described here
https://github.com/kubernetes-incubator/cri-containerd/issues/341
The changes include using cadvisor stats in addition to CRI stats
for CRI runtimes. As described in the issue above , the CRI stats
currently doesnt provide all the necessary stats for the kubelet.
This commit addreses the need to extract stats from cadvisor which
is not available as CRI stats.

Signed-off-by: abhi <abhi@docker.com>
This commit is contained in:
abhi 2017-11-01 07:06:01 -07:00
parent ccb15fb498
commit 84b0e82db4
4 changed files with 122 additions and 38 deletions

View File

@ -76,20 +76,9 @@ func (p *cadvisorStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
if err != nil {
return nil, fmt.Errorf("failed to get imageFs info: %v", err)
}
infos, err := p.cadvisor.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2, // 2 samples are needed to compute "instantaneous" CPU
Recursive: true,
})
infos, err := getCadvisorContainerInfo(p.cadvisor)
if err != nil {
if _, ok := infos["/"]; ok {
// If the failure is partial, log it and return a best-effort
// response.
glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err)
} else {
return nil, fmt.Errorf("failed to get root cgroup stats: %v", err)
}
return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err)
}
// removeTerminatedContainerInfo will also remove pod level cgroups, so save the infos into allInfos first
allInfos := infos
@ -352,3 +341,21 @@ func hasMemoryAndCPUInstUsage(info *cadvisorapiv2.ContainerInfo) bool {
}
return cstat.CpuInst.Usage.Total != 0 && cstat.Memory.RSS != 0
}
func getCadvisorContainerInfo(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) {
infos, err := ca.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2, // 2 samples are needed to compute "instantaneous" CPU
Recursive: true,
})
if err != nil {
if _, ok := infos["/"]; ok {
// If the failure is partial, log it and return a best-effort
// response.
glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err)
} else {
return nil, fmt.Errorf("failed to get root cgroup stats: %v", err)
}
}
return infos, nil
}

View File

@ -18,7 +18,9 @@ package stats
import (
"fmt"
"path"
"sort"
"strings"
"time"
"github.com/golang/glog"
@ -112,6 +114,11 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
containerMap[c.Id] = c
}
caInfos, err := getCRICadvisorStats(p.cadvisor)
if err != nil {
return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err)
}
for _, stats := range resp {
containerID := stats.Attributes.Id
container, found := containerMap[containerID]
@ -132,10 +139,25 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
ps, found := sandboxIDToPodStats[podSandboxID]
if !found {
ps = buildPodStats(podSandbox)
// Fill stats from cadvisor is available for full set of required pod stats
caPodSandbox, found := caInfos[podSandboxID]
if !found {
glog.V(4).Info("Unable to find cadvisor stats for sandbox %q", podSandboxID)
} else {
p.addCadvisorPodStats(ps, &caPodSandbox)
}
sandboxIDToPodStats[podSandboxID] = ps
}
containerStats := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo)
ps.Containers = append(ps.Containers, *containerStats)
cs := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo)
// If cadvisor stats is available for the container, use it to populate
// container stats
caStats, caFound := caInfos[containerID]
if !caFound {
glog.V(4).Info("Unable to find cadvisor stats for %q", containerID)
} else {
p.addCadvisorContainerStats(cs, &caStats)
}
ps.Containers = append(ps.Containers, *cs)
}
result := make([]statsapi.PodStats, 0, len(sandboxIDToPodStats))
@ -201,7 +223,7 @@ func (p *criStatsProvider) getFsInfo(storageID *runtimeapi.StorageIdentifier) *c
return &fsInfo
}
// buildPodRef returns a PodStats that identifies the Pod managing cinfo
// buildPodStats returns a PodStats that identifies the Pod managing cinfo
func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats {
return &statsapi.PodStats{
PodRef: statsapi.PodReference{
@ -211,7 +233,6 @@ func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats {
},
// The StartTime in the summary API is the pod creation time.
StartTime: metav1.NewTime(time.Unix(0, podSandbox.CreatedAt)),
// Network stats are not supported by CRI.
}
}
@ -226,6 +247,13 @@ func (p *criStatsProvider) makePodStorageStats(s *statsapi.PodStats, rootFsInfo
return s
}
func (p *criStatsProvider) addCadvisorPodStats(
ps *statsapi.PodStats,
caPodSandbox *cadvisorapiv2.ContainerInfo,
) {
ps.Network = cadvisorInfoToNetworkStats(ps.PodRef.Name, caPodSandbox)
}
func (p *criStatsProvider) makeContainerStats(
stats *runtimeapi.ContainerStats,
container *runtimeapi.Container,
@ -336,3 +364,44 @@ func removeTerminatedContainer(containers []*runtimeapi.Container) []*runtimeapi
}
return result
}
func (p *criStatsProvider) addCadvisorContainerStats(
cs *statsapi.ContainerStats,
caPodStats *cadvisorapiv2.ContainerInfo,
) {
if caPodStats.Spec.HasCustomMetrics {
cs.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(caPodStats)
}
cpu, memory := cadvisorInfoToCPUandMemoryStats(caPodStats)
if cpu != nil {
cs.CPU = cpu
}
if memory != nil {
cs.Memory = memory
}
}
func getCRICadvisorStats(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) {
stats := make(map[string]cadvisorapiv2.ContainerInfo)
infos, err := getCadvisorContainerInfo(ca)
if err != nil {
return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err)
}
infos = removeTerminatedContainerInfo(infos)
for key, info := range infos {
// On systemd using devicemapper each mount into the container has an
// associated cgroup. We ignore them to ensure we do not get duplicate
// entries in our summary. For details on .mount units:
// http://man7.org/linux/man-pages/man5/systemd.mount.5.html
if strings.HasSuffix(key, ".mount") {
continue
}
// Build the Pod key if this container is managed by a Pod
if !isPodManagedContainer(&info) {
continue
}
stats[path.Base(key)] = info
}
return stats, nil
}

View File

@ -22,9 +22,8 @@ import (
"time"
cadvisorfs "github.com/google/cadvisor/fs"
"github.com/stretchr/testify/assert"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
critest "k8s.io/kubernetes/pkg/kubelet/apis/cri/testing"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"

View File

@ -86,29 +86,12 @@ func cadvisorInfoToContainerStats(name string, info *cadvisorapiv2.ContainerInfo
if rootFs != nil {
// The container logs live on the node rootfs device
result.Logs = &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &rootFs.Available,
CapacityBytes: &rootFs.Capacity,
InodesFree: rootFs.InodesFree,
Inodes: rootFs.Inodes,
}
if rootFs.Inodes != nil && rootFs.InodesFree != nil {
logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree
result.Logs.InodesUsed = &logsInodesUsed
}
result.Logs = buildLogsStats(cstat, rootFs)
}
if imageFs != nil {
// The container rootFs lives on the imageFs devices (which may not be the node root fs)
result.Rootfs = &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &imageFs.Available,
CapacityBytes: &imageFs.Capacity,
InodesFree: imageFs.InodesFree,
Inodes: imageFs.Inodes,
}
result.Rootfs = buildRootfsStats(cstat, imageFs)
}
cfs := cstat.Filesystem
@ -274,3 +257,29 @@ func getCgroupStats(cadvisor cadvisor.Interface, containerName string) (*cadviso
}
return stats, nil
}
func buildLogsStats(cstat *cadvisorapiv2.ContainerStats, rootFs *cadvisorapiv2.FsInfo) *statsapi.FsStats {
fsStats := &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &rootFs.Available,
CapacityBytes: &rootFs.Capacity,
InodesFree: rootFs.InodesFree,
Inodes: rootFs.Inodes,
}
if rootFs.Inodes != nil && rootFs.InodesFree != nil {
logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree
fsStats.InodesUsed = &logsInodesUsed
}
return fsStats
}
func buildRootfsStats(cstat *cadvisorapiv2.ContainerStats, imageFs *cadvisorapiv2.FsInfo) *statsapi.FsStats {
return &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &imageFs.Available,
CapacityBytes: &imageFs.Capacity,
InodesFree: imageFs.InodesFree,
Inodes: imageFs.Inodes,
}
}