Merge pull request #129486 from iholder101/bugfix/swap-container-cri-stats

[KEP-2400] [Bugfix]: Ensure container-level swap metrics are collected
This commit is contained in:
Kubernetes Prow Robot 2025-02-04 08:14:59 -08:00 committed by GitHub
commit f82439f536
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 126 additions and 1 deletions

View File

@ -313,6 +313,7 @@ func TestCadvisorListPodStats(t *testing.T) {
}
if ps.Swap != nil {
checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap)
checkContainersSwapStats(t, ps, infos["/pod0-c0"], infos["/pod0-c1"])
}
// Validate Pod1 Results
@ -325,6 +326,7 @@ func TestCadvisorListPodStats(t *testing.T) {
checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory)
checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap)
checkNetworkStats(t, "Pod1", seedPod1Infra, ps.Network)
checkContainersSwapStats(t, ps, infos["/pod1-c0"])
// Validate Pod2 Results
ps, found = indexPods[prf2]
@ -336,6 +338,7 @@ func TestCadvisorListPodStats(t *testing.T) {
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap)
checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network)
checkContainersSwapStats(t, ps, infos["/pod2-c0"])
// Validate Pod3 Results
@ -352,6 +355,7 @@ func TestCadvisorListPodStats(t *testing.T) {
checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU)
checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory)
checkSwapStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Swap)
checkContainersSwapStats(t, ps, infos["/pod3-c1"])
}
func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
@ -467,6 +471,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
assert.EqualValues(t, testTime(creationTime, seedPod0Container0).Unix(), con.StartTime.Time.Unix())
checkCPUStats(t, "Pod0Container0", seedPod0Container0, con.CPU)
checkMemoryStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Memory)
checkSwapStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Swap)
assert.Nil(t, con.Rootfs)
assert.Nil(t, con.Logs)
assert.Nil(t, con.Accelerators)
@ -476,6 +481,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
assert.EqualValues(t, testTime(creationTime, seedPod0Container1).Unix(), con.StartTime.Time.Unix())
checkCPUStats(t, "Pod0Container1", seedPod0Container1, con.CPU)
checkMemoryStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Memory)
checkSwapStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Swap)
assert.Nil(t, con.Rootfs)
assert.Nil(t, con.Logs)
assert.Nil(t, con.Accelerators)
@ -491,6 +497,10 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
if ps.Memory != nil {
checkMemoryStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Memory)
}
if ps.Swap != nil {
checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap)
checkContainersSwapStats(t, ps, infos["/pod0-c0"], infos["/pod0-c1"])
}
// Validate Pod1 Results
ps, found = indexPods[prf1]
@ -500,6 +510,8 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
assert.Equal(t, cName10, con.Name)
checkCPUStats(t, "Pod1Container0", seedPod1Container, con.CPU)
checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory)
checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap)
checkContainersSwapStats(t, ps, infos["/pod1-c0"])
assert.Nil(t, ps.EphemeralStorage)
assert.Nil(t, ps.VolumeStats)
assert.Nil(t, ps.Network)
@ -512,6 +524,8 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
assert.Equal(t, cName20, con.Name)
checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU)
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap)
checkContainersSwapStats(t, ps, infos["/pod2-c0"])
assert.Nil(t, ps.EphemeralStorage)
assert.Nil(t, ps.VolumeStats)
assert.Nil(t, ps.Network)

View File

@ -905,6 +905,11 @@ func (p *criStatsProvider) addCadvisorContainerStats(
if memory != nil {
cs.Memory = memory
}
swap := cadvisorInfoToSwapStats(caPodStats)
if swap != nil {
cs.Swap = swap
}
}
func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats(

View File

@ -279,6 +279,8 @@ func TestCRIListPodStats(t *testing.T) {
checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0])
checkCRIPodSwapStats(assert, p0, infos[sandbox0Cgroup].Stats[0])
checkContainersSwapStats(t, p0, infos[container0.Id], infos[container1.Id])
p1 := podStatsMap[statsapi.PodReference{Name: "sandbox1-name", UID: "sandbox1-uid", Namespace: "sandbox1-ns"}]
assert.Equal(sandbox1.CreatedAt, p1.StartTime.UnixNano())
assert.Len(p1.Containers, 1)
@ -296,6 +298,8 @@ func TestCRIListPodStats(t *testing.T) {
checkCRIPodCPUAndMemoryStats(assert, p1, infos[sandbox1Cgroup].Stats[0])
checkCRIPodSwapStats(assert, p1, infos[sandbox1Cgroup].Stats[0])
checkContainersSwapStats(t, p1, infos[container2.Id])
p2 := podStatsMap[statsapi.PodReference{Name: "sandbox2-name", UID: "sandbox2-uid", Namespace: "sandbox2-ns"}]
assert.Equal(sandbox2.CreatedAt, p2.StartTime.UnixNano())
assert.Len(p2.Containers, 1)
@ -315,6 +319,8 @@ func TestCRIListPodStats(t *testing.T) {
checkCRIPodCPUAndMemoryStats(assert, p2, infos[sandbox2Cgroup].Stats[0])
checkCRIPodSwapStats(assert, p2, infos[sandbox2Cgroup].Stats[0])
checkContainersSwapStats(t, p2, infos[container4.Id])
p3 := podStatsMap[statsapi.PodReference{Name: "sandbox3-name", UID: "sandbox3-uid", Namespace: "sandbox3-ns"}]
assert.Equal(sandbox3.CreatedAt, p3.StartTime.UnixNano())
assert.Len(p3.Containers, 1)

View File

@ -155,6 +155,7 @@ func cadvisorInfoToContainerCPUAndMemoryStats(name string, info *cadvisorapiv2.C
cpu, memory := cadvisorInfoToCPUandMemoryStats(info)
result.CPU = cpu
result.Memory = memory
result.Swap = cadvisorInfoToSwapStats(info)
return result
}

View File

@ -19,6 +19,8 @@ package stats
import (
"context"
"fmt"
"runtime"
"strings"
"testing"
"time"
@ -505,6 +507,39 @@ func checkFsStats(t *testing.T, label string, seed int, stats *statsapi.FsStats)
assert.EqualValues(t, seed+offsetFsInodesFree, *stats.InodesFree, label+".InodesFree")
}
func checkContainersSwapStats(t *testing.T, podStats statsapi.PodStats, containerStats ...cadvisorapiv2.ContainerInfo) {
if runtime.GOOS != "linux" {
return
}
podContainers := make(map[string]struct{}, len(podStats.Containers))
for _, container := range podStats.Containers {
podContainers[container.Name] = struct{}{}
}
for _, container := range containerStats {
found := false
containerName := container.Spec.Labels["io.kubernetes.container.name"]
for _, containerPodStats := range podStats.Containers {
if containerPodStats.Name == containerName {
assert.Equal(t, container.Stats[0].Memory.Swap, *containerPodStats.Swap.SwapUsageBytes)
found = true
}
}
assert.True(t, found, "container %s not found in pod stats", container.Spec.Labels["io.kubernetes.container.name"])
delete(podContainers, containerName)
}
var missingContainerNames []string
for containerName := range podContainers {
missingContainerNames = append(missingContainerNames, containerName)
}
assert.Emptyf(t, podContainers, "containers not found in pod stats: %v", strings.Join(missingContainerNames, " "))
if len(missingContainerNames) > 0 {
assert.FailNow(t, "containers not found in pod stats")
}
}
func checkEphemeralStats(t *testing.T, label string, containerSeeds []int, volumeSeeds []int, containerLogStats []*volume.Metrics, stats *statsapi.FsStats) {
var usedBytes, inodeUsage int
for _, cseed := range containerSeeds {

View File

@ -33,6 +33,7 @@ import (
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/onsi/gomega/gstruct"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -56,7 +57,7 @@ var (
noLimits *resource.Quantity = nil
)
var _ = SIGDescribe("Swap", "[LinuxOnly]", feature.Swap, framework.WithSerial(), func() {
var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framework.WithSerial(), func() {
f := framework.NewDefaultFramework("swap-qos")
addAfterEachForCleaningUpPods(f)
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
@ -287,6 +288,69 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", feature.Swap, framework.WithSerial(),
err := podClient.Delete(context.Background(), stressPod.Name, metav1.DeleteOptions{})
framework.ExpectNoError(err)
})
ginkgo.It("ensure summary API properly reports swap", func() {
stressSize := divideQuantity(nodeTotalMemory, 5)
ginkgo.By("Creating a stress pod with stress size: " + stressSize.String())
stressPod := getStressPod(stressSize)
memoryLimit := cloneQuantity(stressSize)
memoryLimit.Sub(resource.MustParse("50Mi"))
memoryRequest := divideQuantity(memoryLimit, 2)
ginkgo.By("Adding memory request of " + memoryRequest.String() + " and memory limit of " + memoryLimit.String())
setPodMemoryResources(stressPod, memoryRequest, memoryLimit)
gomega.Expect(qos.GetPodQOS(stressPod)).To(gomega.Equal(v1.PodQOSBurstable))
stressPod = runPodAndWaitUntilScheduled(f, stressPod)
ginkgo.By("Ensuring the pod is using swap")
var swapUsage *resource.Quantity
gomega.Eventually(func() error {
stressPod = getUpdatedPod(f, stressPod)
gomega.Expect(stressPod.Status.Phase).To(gomega.Equal(v1.PodRunning), "pod should be running")
var err error
swapUsage, err = getSwapUsage(f, stressPod)
if err != nil {
return err
}
if swapUsage.IsZero() {
return fmt.Errorf("swap usage is zero")
}
return nil
}, 5*time.Minute, 1*time.Second).Should(gomega.Succeed())
ginkgo.By("Waiting 15 seconds for cAdvisor to collect 2 stats points")
time.Sleep(15 * time.Second)
getSwapExpectation := func() gomega.OmegaMatcher {
return gstruct.PointTo(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
"Time": recent(maxStatsAge),
"SwapUsageBytes": bounded(1, memoryLimit.Value()),
}))
}
matchExpectations := gstruct.PointTo(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
"Pods": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{
fmt.Sprintf("%s::%s", f.Namespace.Name, stressPod.Name): gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
"Containers": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{
stressPod.Spec.Containers[0].Name: gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
"Swap": getSwapExpectation(),
}),
}),
"Swap": getSwapExpectation(),
}),
}),
}))
ginkgo.By("Validating /stats/summary")
// Give pods a minute to actually start up.
gomega.Eventually(context.Background(), getNodeSummary, 180*time.Second, 15*time.Second).Should(matchExpectations)
// Then the summary should match the expectations a few more times.
gomega.Consistently(context.Background(), getNodeSummary, 30*time.Second, 15*time.Second).Should(matchExpectations)
})
})
})
})