mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-02 00:07:50 +00:00
Integrating cadvisor stats to CRI Pod stats collection
This commit addresses the issue described here https://github.com/kubernetes-incubator/cri-containerd/issues/341 The changes include using cadvisor stats in addition to CRI stats for CRI runtimes. As described in the issue above , the CRI stats currently doesnt provide all the necessary stats for the kubelet. This commit addreses the need to extract stats from cadvisor which is not available as CRI stats. Signed-off-by: abhi <abhi@docker.com>
This commit is contained in:
parent
ccb15fb498
commit
84b0e82db4
@ -76,20 +76,9 @@ func (p *cadvisorStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get imageFs info: %v", err)
|
||||
}
|
||||
|
||||
infos, err := p.cadvisor.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{
|
||||
IdType: cadvisorapiv2.TypeName,
|
||||
Count: 2, // 2 samples are needed to compute "instantaneous" CPU
|
||||
Recursive: true,
|
||||
})
|
||||
infos, err := getCadvisorContainerInfo(p.cadvisor)
|
||||
if err != nil {
|
||||
if _, ok := infos["/"]; ok {
|
||||
// If the failure is partial, log it and return a best-effort
|
||||
// response.
|
||||
glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err)
|
||||
} else {
|
||||
return nil, fmt.Errorf("failed to get root cgroup stats: %v", err)
|
||||
}
|
||||
return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err)
|
||||
}
|
||||
// removeTerminatedContainerInfo will also remove pod level cgroups, so save the infos into allInfos first
|
||||
allInfos := infos
|
||||
@ -352,3 +341,21 @@ func hasMemoryAndCPUInstUsage(info *cadvisorapiv2.ContainerInfo) bool {
|
||||
}
|
||||
return cstat.CpuInst.Usage.Total != 0 && cstat.Memory.RSS != 0
|
||||
}
|
||||
|
||||
func getCadvisorContainerInfo(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) {
|
||||
infos, err := ca.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{
|
||||
IdType: cadvisorapiv2.TypeName,
|
||||
Count: 2, // 2 samples are needed to compute "instantaneous" CPU
|
||||
Recursive: true,
|
||||
})
|
||||
if err != nil {
|
||||
if _, ok := infos["/"]; ok {
|
||||
// If the failure is partial, log it and return a best-effort
|
||||
// response.
|
||||
glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err)
|
||||
} else {
|
||||
return nil, fmt.Errorf("failed to get root cgroup stats: %v", err)
|
||||
}
|
||||
}
|
||||
return infos, nil
|
||||
}
|
||||
|
@ -18,7 +18,9 @@ package stats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
@ -112,6 +114,11 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
|
||||
containerMap[c.Id] = c
|
||||
}
|
||||
|
||||
caInfos, err := getCRICadvisorStats(p.cadvisor)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err)
|
||||
}
|
||||
|
||||
for _, stats := range resp {
|
||||
containerID := stats.Attributes.Id
|
||||
container, found := containerMap[containerID]
|
||||
@ -132,10 +139,25 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
|
||||
ps, found := sandboxIDToPodStats[podSandboxID]
|
||||
if !found {
|
||||
ps = buildPodStats(podSandbox)
|
||||
// Fill stats from cadvisor is available for full set of required pod stats
|
||||
caPodSandbox, found := caInfos[podSandboxID]
|
||||
if !found {
|
||||
glog.V(4).Info("Unable to find cadvisor stats for sandbox %q", podSandboxID)
|
||||
} else {
|
||||
p.addCadvisorPodStats(ps, &caPodSandbox)
|
||||
}
|
||||
sandboxIDToPodStats[podSandboxID] = ps
|
||||
}
|
||||
containerStats := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo)
|
||||
ps.Containers = append(ps.Containers, *containerStats)
|
||||
cs := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo)
|
||||
// If cadvisor stats is available for the container, use it to populate
|
||||
// container stats
|
||||
caStats, caFound := caInfos[containerID]
|
||||
if !caFound {
|
||||
glog.V(4).Info("Unable to find cadvisor stats for %q", containerID)
|
||||
} else {
|
||||
p.addCadvisorContainerStats(cs, &caStats)
|
||||
}
|
||||
ps.Containers = append(ps.Containers, *cs)
|
||||
}
|
||||
|
||||
result := make([]statsapi.PodStats, 0, len(sandboxIDToPodStats))
|
||||
@ -201,7 +223,7 @@ func (p *criStatsProvider) getFsInfo(storageID *runtimeapi.StorageIdentifier) *c
|
||||
return &fsInfo
|
||||
}
|
||||
|
||||
// buildPodRef returns a PodStats that identifies the Pod managing cinfo
|
||||
// buildPodStats returns a PodStats that identifies the Pod managing cinfo
|
||||
func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats {
|
||||
return &statsapi.PodStats{
|
||||
PodRef: statsapi.PodReference{
|
||||
@ -211,7 +233,6 @@ func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats {
|
||||
},
|
||||
// The StartTime in the summary API is the pod creation time.
|
||||
StartTime: metav1.NewTime(time.Unix(0, podSandbox.CreatedAt)),
|
||||
// Network stats are not supported by CRI.
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,6 +247,13 @@ func (p *criStatsProvider) makePodStorageStats(s *statsapi.PodStats, rootFsInfo
|
||||
return s
|
||||
}
|
||||
|
||||
func (p *criStatsProvider) addCadvisorPodStats(
|
||||
ps *statsapi.PodStats,
|
||||
caPodSandbox *cadvisorapiv2.ContainerInfo,
|
||||
) {
|
||||
ps.Network = cadvisorInfoToNetworkStats(ps.PodRef.Name, caPodSandbox)
|
||||
}
|
||||
|
||||
func (p *criStatsProvider) makeContainerStats(
|
||||
stats *runtimeapi.ContainerStats,
|
||||
container *runtimeapi.Container,
|
||||
@ -336,3 +364,44 @@ func removeTerminatedContainer(containers []*runtimeapi.Container) []*runtimeapi
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (p *criStatsProvider) addCadvisorContainerStats(
|
||||
cs *statsapi.ContainerStats,
|
||||
caPodStats *cadvisorapiv2.ContainerInfo,
|
||||
) {
|
||||
if caPodStats.Spec.HasCustomMetrics {
|
||||
cs.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(caPodStats)
|
||||
}
|
||||
|
||||
cpu, memory := cadvisorInfoToCPUandMemoryStats(caPodStats)
|
||||
if cpu != nil {
|
||||
cs.CPU = cpu
|
||||
}
|
||||
if memory != nil {
|
||||
cs.Memory = memory
|
||||
}
|
||||
}
|
||||
|
||||
func getCRICadvisorStats(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) {
|
||||
stats := make(map[string]cadvisorapiv2.ContainerInfo)
|
||||
infos, err := getCadvisorContainerInfo(ca)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err)
|
||||
}
|
||||
infos = removeTerminatedContainerInfo(infos)
|
||||
for key, info := range infos {
|
||||
// On systemd using devicemapper each mount into the container has an
|
||||
// associated cgroup. We ignore them to ensure we do not get duplicate
|
||||
// entries in our summary. For details on .mount units:
|
||||
// http://man7.org/linux/man-pages/man5/systemd.mount.5.html
|
||||
if strings.HasSuffix(key, ".mount") {
|
||||
continue
|
||||
}
|
||||
// Build the Pod key if this container is managed by a Pod
|
||||
if !isPodManagedContainer(&info) {
|
||||
continue
|
||||
}
|
||||
stats[path.Base(key)] = info
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
@ -22,9 +22,8 @@ import (
|
||||
"time"
|
||||
|
||||
cadvisorfs "github.com/google/cadvisor/fs"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
critest "k8s.io/kubernetes/pkg/kubelet/apis/cri/testing"
|
||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
|
||||
|
@ -86,29 +86,12 @@ func cadvisorInfoToContainerStats(name string, info *cadvisorapiv2.ContainerInfo
|
||||
|
||||
if rootFs != nil {
|
||||
// The container logs live on the node rootfs device
|
||||
result.Logs = &statsapi.FsStats{
|
||||
Time: metav1.NewTime(cstat.Timestamp),
|
||||
AvailableBytes: &rootFs.Available,
|
||||
CapacityBytes: &rootFs.Capacity,
|
||||
InodesFree: rootFs.InodesFree,
|
||||
Inodes: rootFs.Inodes,
|
||||
}
|
||||
|
||||
if rootFs.Inodes != nil && rootFs.InodesFree != nil {
|
||||
logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree
|
||||
result.Logs.InodesUsed = &logsInodesUsed
|
||||
}
|
||||
result.Logs = buildLogsStats(cstat, rootFs)
|
||||
}
|
||||
|
||||
if imageFs != nil {
|
||||
// The container rootFs lives on the imageFs devices (which may not be the node root fs)
|
||||
result.Rootfs = &statsapi.FsStats{
|
||||
Time: metav1.NewTime(cstat.Timestamp),
|
||||
AvailableBytes: &imageFs.Available,
|
||||
CapacityBytes: &imageFs.Capacity,
|
||||
InodesFree: imageFs.InodesFree,
|
||||
Inodes: imageFs.Inodes,
|
||||
}
|
||||
result.Rootfs = buildRootfsStats(cstat, imageFs)
|
||||
}
|
||||
|
||||
cfs := cstat.Filesystem
|
||||
@ -274,3 +257,29 @@ func getCgroupStats(cadvisor cadvisor.Interface, containerName string) (*cadviso
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func buildLogsStats(cstat *cadvisorapiv2.ContainerStats, rootFs *cadvisorapiv2.FsInfo) *statsapi.FsStats {
|
||||
fsStats := &statsapi.FsStats{
|
||||
Time: metav1.NewTime(cstat.Timestamp),
|
||||
AvailableBytes: &rootFs.Available,
|
||||
CapacityBytes: &rootFs.Capacity,
|
||||
InodesFree: rootFs.InodesFree,
|
||||
Inodes: rootFs.Inodes,
|
||||
}
|
||||
|
||||
if rootFs.Inodes != nil && rootFs.InodesFree != nil {
|
||||
logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree
|
||||
fsStats.InodesUsed = &logsInodesUsed
|
||||
}
|
||||
return fsStats
|
||||
}
|
||||
|
||||
func buildRootfsStats(cstat *cadvisorapiv2.ContainerStats, imageFs *cadvisorapiv2.FsInfo) *statsapi.FsStats {
|
||||
return &statsapi.FsStats{
|
||||
Time: metav1.NewTime(cstat.Timestamp),
|
||||
AvailableBytes: &imageFs.Available,
|
||||
CapacityBytes: &imageFs.Capacity,
|
||||
InodesFree: imageFs.InodesFree,
|
||||
Inodes: imageFs.Inodes,
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user