Merge pull request #113291 from mengjiao-liu/fix-podAndContainerStatsFromCRI

Fix stats for podAndContainerStatsFromCRI
This commit is contained in:
Kubernetes Prow Robot 2022-11-07 19:48:17 -08:00 committed by GitHub
commit f328d3dc3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 341 additions and 11 deletions

View File

@ -137,18 +137,20 @@ func (p *criStatsProvider) listPodStats(ctx context.Context, updateCPUNanoCoreUs
}
if p.podAndContainerStatsFromCRI {
_, err := p.listPodStatsStrictlyFromCRI(ctx, updateCPUNanoCoreUsage, containerMap, podSandboxMap, &rootFsInfo)
if err != nil {
s, ok := status.FromError(err)
// Legitimate failure, rather than the CRI implementation does not support ListPodSandboxStats.
if !ok || s.Code() != codes.Unimplemented {
return nil, err
}
// CRI implementation doesn't support ListPodSandboxStats, warn and fallback.
klog.V(5).ErrorS(err,
"CRI implementation must be updated to support ListPodSandboxStats if PodAndContainerStatsFromCRI feature gate is enabled. Falling back to populating with cAdvisor; this call will fail in the future.",
)
result, err := p.listPodStatsStrictlyFromCRI(ctx, updateCPUNanoCoreUsage, containerMap, podSandboxMap, &rootFsInfo)
if err == nil {
// Call succeeded
return result, nil
}
s, ok := status.FromError(err)
// Legitimate failure, rather than the CRI implementation does not support ListPodSandboxStats.
if !ok || s.Code() != codes.Unimplemented {
return nil, err
}
// CRI implementation doesn't support ListPodSandboxStats, warn and fallback.
klog.V(5).ErrorS(err,
"CRI implementation must be updated to support ListPodSandboxStats if PodAndContainerStatsFromCRI feature gate is enabled. Falling back to populating with cAdvisor; this call will fail in the future.",
)
}
return p.listPodStatsPartiallyFromCRI(ctx, updateCPUNanoCoreUsage, containerMap, podSandboxMap, &rootFsInfo)
}

View File

@ -52,6 +52,11 @@ const (
offsetUsage
)
const (
// This offset offsetCRI is to distinguish it from Cadvisor stats
offsetCRI = 1000
)
const (
seedRoot = 0
seedKubelet = 200
@ -324,6 +329,201 @@ func TestCRIListPodStats(t *testing.T) {
checkCRIPodCPUAndMemoryStats(assert, p3, infos[sandbox3Cgroup].Stats[0])
}
func TestListPodStatsStrictlyFromCRI(t *testing.T) {
ctx := context.Background()
var (
imageFsMountpoint = "/test/mount/point"
unknownMountpoint = "/unknown/mount/point"
imageFsInfo = getTestFsInfo(2000)
rootFsInfo = getTestFsInfo(1000)
// A pod that CRI returns stats and cadvisor returns stats
// The pod stats from CRI stats
sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns", false)
sandbox0Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox0.PodSandboxStatus.Metadata.Uid))
container0 = makeFakeContainer(sandbox0, cName0, 0, false)
containerStats0 = makeFakeContainerStatsStrictlyFromCRI(seedContainer0, container0, imageFsMountpoint)
containerLogStats0 = makeFakeLogStats(1000)
container1 = makeFakeContainer(sandbox0, cName1, 0, false)
containerStats1 = makeFakeContainerStatsStrictlyFromCRI(seedContainer1, container1, unknownMountpoint)
containerLogStats1 = makeFakeLogStats(2000)
sandboxPodStats0 = makeFakePodSandboxStatsStrictlyFromCRI(seedSandbox0, sandbox0, containerStats0, containerStats1)
// A pod that CRI returns stats and cadvisor returns no stats
// The pod stats from CRI stats
sandbox1 = makeFakePodSandbox("sandbox1-name", "sandbox1-uid", "sandbox1-ns", false)
sandbox1Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox1.PodSandboxStatus.Metadata.Uid))
container2 = makeFakeContainer(sandbox1, cName2, 0, false)
containerStats2 = makeFakeContainerStatsStrictlyFromCRI(seedContainer2, container2, imageFsMountpoint)
containerLogStats2 = makeFakeLogStats(3000)
sandboxPodStats1 = makeFakePodSandboxStatsStrictlyFromCRI(seedSandbox1, sandbox1, containerStats2)
podLogName0 = "pod-log-0"
podLogName1 = "pod-log-1"
podLogStats0 = makeFakeLogStats(5000)
podLogStats1 = makeFakeLogStats(6000)
)
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
var (
mockCadvisor = cadvisortest.NewMockInterface(mockCtrl)
mockRuntimeCache = new(kubecontainertest.MockRuntimeCache)
mockPodManager = new(kubepodtest.MockManager)
resourceAnalyzer = new(fakeResourceAnalyzer)
fakeRuntimeService = critest.NewFakeRuntimeService()
fakeImageService = critest.NewFakeImageService()
)
infos := map[string]cadvisorapiv2.ContainerInfo{
"/": getTestContainerInfo(seedRoot, "", "", ""),
"/kubelet": getTestContainerInfo(seedKubelet, "", "", ""),
"/system": getTestContainerInfo(seedMisc, "", "", ""),
sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName),
sandbox0Cgroup: getTestContainerInfo(seedSandbox0, "", "", ""),
container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0),
container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1),
}
exceptedContainerStatsMap := map[string]statsapi.ContainerStats{
cName0: getCRIContainerStatsStrictlyFromCRI(seedContainer0, cName0),
cName1: getCRIContainerStatsStrictlyFromCRI(seedContainer1, cName1),
cName2: getCRIContainerStatsStrictlyFromCRI(seedContainer2, cName2),
}
prf0 := statsapi.PodReference{Name: "sandbox0-name", UID: "sandbox0-uid", Namespace: "sandbox0-ns"}
prf1 := statsapi.PodReference{Name: "sandbox1-name", UID: "sandbox1-uid", Namespace: "sandbox1-ns"}
exceptedPodStatsMap := map[statsapi.PodReference]statsapi.PodStats{
prf0: getPodSandboxStatsStrictlyFromCRI(seedSandbox0, sandbox0),
prf1: getPodSandboxStatsStrictlyFromCRI(seedSandbox1, sandbox1),
}
options := cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2,
Recursive: true,
}
mockCadvisor.EXPECT().ContainerInfoV2("/", options).Return(infos, nil)
mockCadvisor.EXPECT().RootFsInfo().Return(rootFsInfo, nil)
mockCadvisor.EXPECT().GetDirFsInfo(imageFsMountpoint).Return(imageFsInfo, nil)
mockCadvisor.EXPECT().GetDirFsInfo(unknownMountpoint).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice)
fakeRuntimeService.SetFakeSandboxes([]*critest.FakePodSandbox{
sandbox0, sandbox1,
})
fakeRuntimeService.SetFakeContainers([]*critest.FakeContainer{
container0, container1, container2,
})
fakeRuntimeService.SetFakeContainerStats([]*runtimeapi.ContainerStats{
containerStats0, containerStats1, containerStats2,
})
fakeRuntimeService.SetFakePodSandboxStats([]*runtimeapi.PodSandboxStats{
sandboxPodStats0, sandboxPodStats1,
})
ephemeralVolumes := makeFakeVolumeStats([]string{"ephVolume1, ephVolumes2"})
persistentVolumes := makeFakeVolumeStats([]string{"persisVolume1, persisVolumes2"})
resourceAnalyzer.podVolumeStats = serverstats.PodVolumeStats{
EphemeralVolumes: ephemeralVolumes,
PersistentVolumes: persistentVolumes,
}
fakeStats := map[string]*volume.Metrics{
kuberuntime.BuildContainerLogsDirectory("sandbox0-ns", "sandbox0-name", types.UID("sandbox0-uid"), cName0): containerLogStats0,
kuberuntime.BuildContainerLogsDirectory("sandbox0-ns", "sandbox0-name", types.UID("sandbox0-uid"), cName1): containerLogStats1,
kuberuntime.BuildContainerLogsDirectory("sandbox1-ns", "sandbox1-name", types.UID("sandbox1-uid"), cName2): containerLogStats2,
filepath.Join(kuberuntime.BuildPodLogsDirectory("sandbox0-ns", "sandbox0-name", types.UID("sandbox0-uid")), podLogName0): podLogStats0,
filepath.Join(kuberuntime.BuildPodLogsDirectory("sandbox1-ns", "sandbox1-name", types.UID("sandbox1-uid")), podLogName1): podLogStats1,
}
ctrl := gomock.NewController(t)
defer ctrl.Finish()
fakeOS := &kubecontainertest.FakeOS{}
fakeOS.ReadDirFn = func(path string) ([]os.DirEntry, error) {
var dirEntries []os.DirEntry
mockDE := kubecontainertest.NewMockDirEntry(ctrl)
switch path {
case kuberuntime.BuildPodLogsDirectory("sandbox0-ns", "sandbox0-name", types.UID("sandbox0-uid")):
mockDE.EXPECT().Name().Return(podLogName0)
case kuberuntime.BuildPodLogsDirectory("sandbox1-ns", "sandbox1-name", types.UID("sandbox1-uid")):
mockDE.EXPECT().Name().Return(podLogName1)
default:
return nil, nil
}
mockDE.EXPECT().IsDir().Return(false)
dirEntries = append(dirEntries, mockDE)
return dirEntries, nil
}
provider := NewCRIStatsProvider(
mockCadvisor,
resourceAnalyzer,
mockPodManager,
mockRuntimeCache,
fakeRuntimeService,
fakeImageService,
NewFakeHostStatsProviderWithData(fakeStats, fakeOS),
true,
)
cadvisorInfos, err := getCadvisorContainerInfo(mockCadvisor)
if err != nil {
t.Errorf("failed to get container info from cadvisor: %v", err)
}
stats, err := provider.ListPodStats(ctx)
assert := assert.New(t)
assert.NoError(err)
assert.Equal(2, len(stats))
podStatsMap := make(map[statsapi.PodReference]statsapi.PodStats)
for _, s := range stats {
podStatsMap[s.PodRef] = s
}
p0 := podStatsMap[prf0]
assert.Equal(sandbox0.CreatedAt, p0.StartTime.UnixNano())
assert.Equal(2, len(p0.Containers))
checkEphemeralStorageStats(assert, p0, ephemeralVolumes, []*runtimeapi.ContainerStats{containerStats0, containerStats1},
[]*volume.Metrics{containerLogStats0, containerLogStats1}, podLogStats0)
containerStatsMap := make(map[string]statsapi.ContainerStats)
for _, s := range p0.Containers {
containerStatsMap[s.Name] = s
}
c0 := containerStatsMap[cName0]
assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano())
checkCRICPUAndMemoryStatsForStrictlyFromCRI(assert, c0, exceptedContainerStatsMap[cName0])
assert.Nil(c0.Accelerators)
checkCRIRootfsStats(assert, c0, containerStats0, &imageFsInfo)
checkCRILogsStats(assert, c0, &rootFsInfo, containerLogStats0)
c1 := containerStatsMap[cName1]
assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano())
checkCRICPUAndMemoryStatsForStrictlyFromCRI(assert, c1, exceptedContainerStatsMap[cName1])
assert.Nil(c0.Accelerators)
checkCRIRootfsStats(assert, c1, containerStats1, nil)
checkCRILogsStats(assert, c1, &rootFsInfo, containerLogStats1)
checkCRIPodCPUAndMemoryStatsStrictlyFromCRI(assert, p0, exceptedPodStatsMap[prf0])
assert.NotNil(cadvisorInfos[sandbox0Cgroup].Stats[0].Cpu)
assert.NotNil(cadvisorInfos[sandbox0Cgroup].Stats[0].Memory)
p1 := podStatsMap[prf1]
assert.Equal(sandbox1.CreatedAt, p1.StartTime.UnixNano())
assert.Equal(1, len(p1.Containers))
checkEphemeralStorageStats(assert, p1, ephemeralVolumes, []*runtimeapi.ContainerStats{containerStats2},
[]*volume.Metrics{containerLogStats2}, podLogStats1)
c2 := p1.Containers[0]
assert.Equal(cName2, c2.Name)
assert.Equal(container2.CreatedAt, c2.StartTime.UnixNano())
checkCRICPUAndMemoryStatsForStrictlyFromCRI(assert, c2, exceptedContainerStatsMap[cName2])
assert.Nil(c0.Accelerators)
checkCRIRootfsStats(assert, c2, containerStats2, &imageFsInfo)
checkCRILogsStats(assert, c2, &rootFsInfo, containerLogStats2)
checkCRIPodCPUAndMemoryStatsStrictlyFromCRI(assert, p1, exceptedPodStatsMap[prf1])
if runtime.GOOS == "linux" {
if _, ok := cadvisorInfos[sandbox1Cgroup]; ok {
t.Errorf("expect no cadvisor stats for pod %v", prf1)
}
}
}
func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
ctx := context.Background()
@ -656,6 +856,91 @@ func makeFakeContainerStats(container *critest.FakeContainer, imageFsMountpoint
return containerStats
}
// makeFakeContainerStatsStrictlyFromCRI use CRI offset to fake CRI container stats to distinguish cadvisor stats.
func makeFakeContainerStatsStrictlyFromCRI(seed int, container *critest.FakeContainer, imageFsMountpoint string) *runtimeapi.ContainerStats {
containerStats := &runtimeapi.ContainerStats{
Attributes: &runtimeapi.ContainerAttributes{
Id: container.ContainerStatus.Id,
Metadata: container.ContainerStatus.Metadata,
},
WritableLayer: &runtimeapi.FilesystemUsage{
Timestamp: timestamp.UnixNano(),
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: imageFsMountpoint},
UsedBytes: &runtimeapi.UInt64Value{Value: uint64(seed + offsetCRI + offsetFsUsage)},
InodesUsed: &runtimeapi.UInt64Value{Value: uint64(seed + offsetCRI + offsetFsInodeUsage)},
},
}
if container.State == runtimeapi.ContainerState_CONTAINER_EXITED {
containerStats.Cpu = nil
containerStats.Memory = nil
} else {
containerStats.Cpu = &runtimeapi.CpuUsage{
Timestamp: timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtimeapi.UInt64Value{Value: uint64(seed + offsetCRI + offsetCPUUsageCoreSeconds)},
}
containerStats.Memory = &runtimeapi.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtimeapi.UInt64Value{Value: uint64(seed + offsetCRI + offsetMemWorkingSetBytes)},
}
}
return containerStats
}
func makeFakePodSandboxStatsStrictlyFromCRI(seed int, podSandbox *critest.FakePodSandbox, podContainerStats ...*runtimeapi.ContainerStats) *runtimeapi.PodSandboxStats {
podSandboxStats := &runtimeapi.PodSandboxStats{
Attributes: &runtimeapi.PodSandboxAttributes{
Id: podSandbox.Id,
Metadata: podSandbox.Metadata,
},
Linux: &runtimeapi.LinuxPodSandboxStats{},
}
for _, cs := range podContainerStats {
podSandboxStats.Linux.Containers = append(podSandboxStats.Linux.Containers, cs)
}
if podSandbox.State == runtimeapi.PodSandboxState_SANDBOX_NOTREADY {
podSandboxStats.Linux.Cpu = nil
podSandboxStats.Linux.Memory = nil
} else {
podSandboxStats.Linux.Cpu = &runtimeapi.CpuUsage{
Timestamp: timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtimeapi.UInt64Value{Value: uint64(seed + offsetCRI + offsetCPUUsageCoreSeconds)},
}
podSandboxStats.Linux.Memory = &runtimeapi.MemoryUsage{
Timestamp: timestamp.UnixNano(),
WorkingSetBytes: &runtimeapi.UInt64Value{Value: uint64(seed + offsetCRI + offsetMemWorkingSetBytes)},
}
}
return podSandboxStats
}
func getPodSandboxStatsStrictlyFromCRI(seed int, podSandbox *critest.FakePodSandbox) statsapi.PodStats {
podStats := statsapi.PodStats{
PodRef: statsapi.PodReference{
Name: podSandbox.Metadata.Name,
UID: podSandbox.Metadata.Uid,
Namespace: podSandbox.Metadata.Namespace,
},
// The StartTime in the summary API is the pod creation time.
StartTime: metav1.NewTime(time.Unix(0, podSandbox.CreatedAt)),
}
if podSandbox.State == runtimeapi.PodSandboxState_SANDBOX_NOTREADY {
podStats.CPU = nil
podStats.Memory = nil
} else {
usageCoreNanoSeconds := uint64(seed + offsetCRI + offsetCPUUsageCoreSeconds)
workingSetBytes := uint64(seed + offsetCRI + offsetMemWorkingSetBytes)
podStats.CPU = &statsapi.CPUStats{
Time: metav1.NewTime(timestamp),
UsageCoreNanoSeconds: &usageCoreNanoSeconds,
}
podStats.Memory = &statsapi.MemoryStats{
Time: metav1.NewTime(timestamp),
WorkingSetBytes: &workingSetBytes,
}
}
return podStats
}
func makeFakeImageFsUsage(fsMountpoint string) *runtimeapi.FilesystemUsage {
return &runtimeapi.FilesystemUsage{
Timestamp: time.Now().UnixNano(),
@ -703,6 +988,12 @@ func checkCRICPUAndMemoryStats(assert *assert.Assertions, actual statsapi.Contai
assert.Equal(cs.Memory.ContainerData.Pgmajfault, *actual.Memory.MajorPageFaults)
}
func checkCRICPUAndMemoryStatsForStrictlyFromCRI(assert *assert.Assertions, actual statsapi.ContainerStats, excepted statsapi.ContainerStats) {
assert.Equal(excepted.CPU.Time.UnixNano(), actual.CPU.Time.UnixNano())
assert.Equal(*excepted.CPU.UsageCoreNanoSeconds, *actual.CPU.UsageCoreNanoSeconds)
assert.Equal(*excepted.Memory.WorkingSetBytes, *actual.Memory.WorkingSetBytes)
}
func checkCRIRootfsStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *runtimeapi.ContainerStats, imageFsInfo *cadvisorapiv2.FsInfo) {
assert.Equal(cs.WritableLayer.Timestamp, actual.Rootfs.Time.UnixNano())
if imageFsInfo != nil {
@ -783,6 +1074,15 @@ func checkCRIPodCPUAndMemoryStats(assert *assert.Assertions, actual statsapi.Pod
assert.Equal(cs.Memory.ContainerData.Pgmajfault, *actual.Memory.MajorPageFaults)
}
func checkCRIPodCPUAndMemoryStatsStrictlyFromCRI(assert *assert.Assertions, actual statsapi.PodStats, excepted statsapi.PodStats) {
if runtime.GOOS != "linux" {
return
}
assert.Equal(excepted.CPU.Time.UnixNano(), actual.CPU.Time.UnixNano())
assert.Equal(*excepted.CPU.UsageCoreNanoSeconds, *actual.CPU.UsageCoreNanoSeconds)
assert.Equal(*excepted.Memory.WorkingSetBytes, *actual.Memory.WorkingSetBytes)
}
func makeFakeLogStats(seed int) *volume.Metrics {
m := &volume.Metrics{}
m.Used = resource.NewQuantity(int64(seed+offsetUsage), resource.BinarySI)
@ -986,3 +1286,31 @@ func TestExtractIDFromCgroupPath(t *testing.T) {
assert.Equal(t, test.expected, id)
}
}
func getCRIContainerStatsStrictlyFromCRI(seed int, containerName string) statsapi.ContainerStats {
result := statsapi.ContainerStats{
Name: containerName,
StartTime: metav1.NewTime(timestamp),
CPU: &statsapi.CPUStats{},
Memory: &statsapi.MemoryStats{},
// UserDefinedMetrics is not supported by CRI.
Rootfs: &statsapi.FsStats{},
}
result.CPU.Time = metav1.NewTime(timestamp)
usageCoreNanoSeconds := uint64(seed + offsetCRI + offsetCPUUsageCoreSeconds)
result.CPU.UsageCoreNanoSeconds = &usageCoreNanoSeconds
result.Memory.Time = metav1.NewTime(timestamp)
workingSetBytes := uint64(seed + offsetCRI + offsetMemWorkingSetBytes)
result.Memory.WorkingSetBytes = &workingSetBytes
result.Rootfs.Time = metav1.NewTime(timestamp)
usedBytes := uint64(seed + offsetCRI + offsetFsUsage)
result.Rootfs.UsedBytes = &usedBytes
inodesUsed := uint64(seed + offsetCRI + offsetFsInodeUsage)
result.Rootfs.InodesUsed = &inodesUsed
return result
}