From 748b52a130826fa23c1133b8a6ee62a10ef5245a Mon Sep 17 00:00:00 2001 From: Itamar Holder Date: Sun, 5 Jan 2025 15:25:01 +0200 Subject: [PATCH 1/6] cri_provider, bugfix: Add cadvisor container stats Without this fix, when CRI stats provided collects cadvisor stats, pod swap stats are being collected but corresponding container swap stats are not. This commit fixes this. Signed-off-by: Itamar Holder --- pkg/kubelet/stats/cri_stats_provider.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index 9d8091e62c1..bb42e8470d4 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -905,6 +905,11 @@ func (p *criStatsProvider) addCadvisorContainerStats( if memory != nil { cs.Memory = memory } + + swap := cadvisorInfoToSwapStats(caPodStats) + if swap != nil { + cs.Swap = swap + } } func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats( From e6c19f315f922f8c59be785059fedabc30241147 Mon Sep 17 00:00:00 2001 From: Itamar Holder Date: Mon, 6 Jan 2025 10:42:40 +0200 Subject: [PATCH 2/6] cri_provider, unit tests: ensure container-level metrics are collected Signed-off-by: Itamar Holder --- pkg/kubelet/stats/cri_stats_provider_test.go | 6 ++++ pkg/kubelet/stats/provider_test.go | 35 ++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/pkg/kubelet/stats/cri_stats_provider_test.go b/pkg/kubelet/stats/cri_stats_provider_test.go index cfe47faff72..1011b05f0a5 100644 --- a/pkg/kubelet/stats/cri_stats_provider_test.go +++ b/pkg/kubelet/stats/cri_stats_provider_test.go @@ -279,6 +279,8 @@ func TestCRIListPodStats(t *testing.T) { checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0]) checkCRIPodSwapStats(assert, p0, infos[sandbox0Cgroup].Stats[0]) + checkContainersSwapStats(t, p0, infos[container0.Id], infos[container1.Id]) + p1 := podStatsMap[statsapi.PodReference{Name: "sandbox1-name", UID: "sandbox1-uid", Namespace: "sandbox1-ns"}] assert.Equal(sandbox1.CreatedAt, p1.StartTime.UnixNano()) assert.Len(p1.Containers, 1) @@ -296,6 +298,8 @@ func TestCRIListPodStats(t *testing.T) { checkCRIPodCPUAndMemoryStats(assert, p1, infos[sandbox1Cgroup].Stats[0]) checkCRIPodSwapStats(assert, p1, infos[sandbox1Cgroup].Stats[0]) + checkContainersSwapStats(t, p1, infos[container2.Id]) + p2 := podStatsMap[statsapi.PodReference{Name: "sandbox2-name", UID: "sandbox2-uid", Namespace: "sandbox2-ns"}] assert.Equal(sandbox2.CreatedAt, p2.StartTime.UnixNano()) assert.Len(p2.Containers, 1) @@ -315,6 +319,8 @@ func TestCRIListPodStats(t *testing.T) { checkCRIPodCPUAndMemoryStats(assert, p2, infos[sandbox2Cgroup].Stats[0]) checkCRIPodSwapStats(assert, p2, infos[sandbox2Cgroup].Stats[0]) + checkContainersSwapStats(t, p2, infos[container4.Id]) + p3 := podStatsMap[statsapi.PodReference{Name: "sandbox3-name", UID: "sandbox3-uid", Namespace: "sandbox3-ns"}] assert.Equal(sandbox3.CreatedAt, p3.StartTime.UnixNano()) assert.Len(p3.Containers, 1) diff --git a/pkg/kubelet/stats/provider_test.go b/pkg/kubelet/stats/provider_test.go index 978537fe106..c58830ff5ef 100644 --- a/pkg/kubelet/stats/provider_test.go +++ b/pkg/kubelet/stats/provider_test.go @@ -19,6 +19,8 @@ package stats import ( "context" "fmt" + "runtime" + "strings" "testing" "time" @@ -505,6 +507,39 @@ func checkFsStats(t *testing.T, label string, seed int, stats *statsapi.FsStats) assert.EqualValues(t, seed+offsetFsInodesFree, *stats.InodesFree, label+".InodesFree") } +func checkContainersSwapStats(t *testing.T, podStats statsapi.PodStats, containerStats ...cadvisorapiv2.ContainerInfo) { + if runtime.GOOS != "linux" { + return + } + + podContainers := make(map[string]struct{}, len(podStats.Containers)) + for _, container := range podStats.Containers { + podContainers[container.Name] = struct{}{} + } + + for _, container := range containerStats { + found := false + containerName := container.Spec.Labels["io.kubernetes.container.name"] + for _, containerPodStats := range podStats.Containers { + if containerPodStats.Name == containerName { + assert.Equal(t, container.Stats[0].Memory.Swap, *containerPodStats.Swap.SwapUsageBytes) + found = true + } + } + assert.True(t, found, "container %s not found in pod stats", container.Spec.Labels["io.kubernetes.container.name"]) + delete(podContainers, containerName) + } + + var missingContainerNames []string + for containerName := range podContainers { + missingContainerNames = append(missingContainerNames, containerName) + } + assert.Emptyf(t, podContainers, "containers not found in pod stats: %v", strings.Join(missingContainerNames, " ")) + if len(missingContainerNames) > 0 { + assert.FailNow(t, "containers not found in pod stats") + } +} + func checkEphemeralStats(t *testing.T, label string, containerSeeds []int, volumeSeeds []int, containerLogStats []*volume.Metrics, stats *statsapi.FsStats) { var usedBytes, inodeUsage int for _, cseed := range containerSeeds { From c111266609872e396e27cf7cc0780ce9b4393815 Mon Sep 17 00:00:00 2001 From: Itamar Holder Date: Mon, 6 Jan 2025 11:04:22 +0200 Subject: [PATCH 3/6] cadvisor_provider, bugfix: Add swap stats to CPU and Memory stats Signed-off-by: Itamar Holder --- pkg/kubelet/stats/helper.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index e90d96d6aec..c51d6e4f276 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -155,6 +155,7 @@ func cadvisorInfoToContainerCPUAndMemoryStats(name string, info *cadvisorapiv2.C cpu, memory := cadvisorInfoToCPUandMemoryStats(info) result.CPU = cpu result.Memory = memory + result.Swap = cadvisorInfoToSwapStats(info) return result } From ceeba21d3d0b0bec1795bef06b3c9d29b10f27dc Mon Sep 17 00:00:00 2001 From: Itamar Holder Date: Mon, 6 Jan 2025 11:04:28 +0200 Subject: [PATCH 4/6] cadvisor_provider, unit test: Add swap stats to cadvisor CPU and Memory stats Signed-off-by: Itamar Holder --- pkg/kubelet/stats/cadvisor_stats_provider_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/kubelet/stats/cadvisor_stats_provider_test.go b/pkg/kubelet/stats/cadvisor_stats_provider_test.go index ed7c7f7e4e1..32cba277e0f 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider_test.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider_test.go @@ -467,6 +467,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { assert.EqualValues(t, testTime(creationTime, seedPod0Container0).Unix(), con.StartTime.Time.Unix()) checkCPUStats(t, "Pod0Container0", seedPod0Container0, con.CPU) checkMemoryStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Memory) + checkSwapStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Swap) assert.Nil(t, con.Rootfs) assert.Nil(t, con.Logs) assert.Nil(t, con.Accelerators) @@ -476,6 +477,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { assert.EqualValues(t, testTime(creationTime, seedPod0Container1).Unix(), con.StartTime.Time.Unix()) checkCPUStats(t, "Pod0Container1", seedPod0Container1, con.CPU) checkMemoryStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Memory) + checkSwapStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Swap) assert.Nil(t, con.Rootfs) assert.Nil(t, con.Logs) assert.Nil(t, con.Accelerators) @@ -491,6 +493,9 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { if ps.Memory != nil { checkMemoryStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Memory) } + if ps.Swap != nil { + checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap) + } // Validate Pod1 Results ps, found = indexPods[prf1] @@ -500,6 +505,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { assert.Equal(t, cName10, con.Name) checkCPUStats(t, "Pod1Container0", seedPod1Container, con.CPU) checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory) + checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap) assert.Nil(t, ps.EphemeralStorage) assert.Nil(t, ps.VolumeStats) assert.Nil(t, ps.Network) @@ -512,6 +518,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { assert.Equal(t, cName20, con.Name) checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU) checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory) + checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap) assert.Nil(t, ps.EphemeralStorage) assert.Nil(t, ps.VolumeStats) assert.Nil(t, ps.Network) From 54500bfe692109b0e59c0584cc0d9faadb7c0499 Mon Sep 17 00:00:00 2001 From: Itamar Holder Date: Mon, 6 Jan 2025 11:30:26 +0200 Subject: [PATCH 5/6] cadvisor_provider, unit tests: ensure container-level metrics are collected Signed-off-by: Itamar Holder --- pkg/kubelet/stats/cadvisor_stats_provider_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/kubelet/stats/cadvisor_stats_provider_test.go b/pkg/kubelet/stats/cadvisor_stats_provider_test.go index 32cba277e0f..11a0bc8dd1a 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider_test.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider_test.go @@ -313,6 +313,7 @@ func TestCadvisorListPodStats(t *testing.T) { } if ps.Swap != nil { checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap) + checkContainersSwapStats(t, ps, infos["/pod0-c0"], infos["/pod0-c1"]) } // Validate Pod1 Results @@ -325,6 +326,7 @@ func TestCadvisorListPodStats(t *testing.T) { checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory) checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap) checkNetworkStats(t, "Pod1", seedPod1Infra, ps.Network) + checkContainersSwapStats(t, ps, infos["/pod1-c0"]) // Validate Pod2 Results ps, found = indexPods[prf2] @@ -336,6 +338,7 @@ func TestCadvisorListPodStats(t *testing.T) { checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory) checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap) checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network) + checkContainersSwapStats(t, ps, infos["/pod2-c0"]) // Validate Pod3 Results @@ -352,6 +355,7 @@ func TestCadvisorListPodStats(t *testing.T) { checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU) checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory) checkSwapStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Swap) + checkContainersSwapStats(t, ps, infos["/pod3-c1"]) } func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { @@ -495,6 +499,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { } if ps.Swap != nil { checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap) + checkContainersSwapStats(t, ps, infos["/pod0-c0"], infos["/pod0-c1"]) } // Validate Pod1 Results @@ -506,6 +511,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { checkCPUStats(t, "Pod1Container0", seedPod1Container, con.CPU) checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory) checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap) + checkContainersSwapStats(t, ps, infos["/pod1-c0"]) assert.Nil(t, ps.EphemeralStorage) assert.Nil(t, ps.VolumeStats) assert.Nil(t, ps.Network) @@ -519,6 +525,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU) checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory) checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap) + checkContainersSwapStats(t, ps, infos["/pod2-c0"]) assert.Nil(t, ps.EphemeralStorage) assert.Nil(t, ps.VolumeStats) assert.Nil(t, ps.Network) From 617c094435436b0d392cbea3475db596ca3231f7 Mon Sep 17 00:00:00 2001 From: Itamar Holder Date: Tue, 7 Jan 2025 13:14:42 +0200 Subject: [PATCH 6/6] Add an e2e test Signed-off-by: Itamar Holder --- test/e2e_node/swap_test.go | 66 +++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/test/e2e_node/swap_test.go b/test/e2e_node/swap_test.go index aefd6c2ebb4..eb1cd3d5367 100644 --- a/test/e2e_node/swap_test.go +++ b/test/e2e_node/swap_test.go @@ -33,6 +33,7 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/onsi/gomega/gstruct" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -56,7 +57,7 @@ var ( noLimits *resource.Quantity = nil ) -var _ = SIGDescribe("Swap", "[LinuxOnly]", feature.Swap, framework.WithSerial(), func() { +var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framework.WithSerial(), func() { f := framework.NewDefaultFramework("swap-qos") addAfterEachForCleaningUpPods(f) f.NamespacePodSecurityLevel = admissionapi.LevelBaseline @@ -287,6 +288,69 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", feature.Swap, framework.WithSerial(), err := podClient.Delete(context.Background(), stressPod.Name, metav1.DeleteOptions{}) framework.ExpectNoError(err) }) + + ginkgo.It("ensure summary API properly reports swap", func() { + stressSize := divideQuantity(nodeTotalMemory, 5) + ginkgo.By("Creating a stress pod with stress size: " + stressSize.String()) + stressPod := getStressPod(stressSize) + + memoryLimit := cloneQuantity(stressSize) + memoryLimit.Sub(resource.MustParse("50Mi")) + memoryRequest := divideQuantity(memoryLimit, 2) + ginkgo.By("Adding memory request of " + memoryRequest.String() + " and memory limit of " + memoryLimit.String()) + setPodMemoryResources(stressPod, memoryRequest, memoryLimit) + gomega.Expect(qos.GetPodQOS(stressPod)).To(gomega.Equal(v1.PodQOSBurstable)) + + stressPod = runPodAndWaitUntilScheduled(f, stressPod) + + ginkgo.By("Ensuring the pod is using swap") + var swapUsage *resource.Quantity + gomega.Eventually(func() error { + stressPod = getUpdatedPod(f, stressPod) + gomega.Expect(stressPod.Status.Phase).To(gomega.Equal(v1.PodRunning), "pod should be running") + + var err error + swapUsage, err = getSwapUsage(f, stressPod) + if err != nil { + return err + } + + if swapUsage.IsZero() { + return fmt.Errorf("swap usage is zero") + } + + return nil + }, 5*time.Minute, 1*time.Second).Should(gomega.Succeed()) + + ginkgo.By("Waiting 15 seconds for cAdvisor to collect 2 stats points") + time.Sleep(15 * time.Second) + + getSwapExpectation := func() gomega.OmegaMatcher { + return gstruct.PointTo(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{ + "Time": recent(maxStatsAge), + "SwapUsageBytes": bounded(1, memoryLimit.Value()), + })) + } + + matchExpectations := gstruct.PointTo(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{ + "Pods": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{ + fmt.Sprintf("%s::%s", f.Namespace.Name, stressPod.Name): gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{ + "Containers": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{ + stressPod.Spec.Containers[0].Name: gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{ + "Swap": getSwapExpectation(), + }), + }), + "Swap": getSwapExpectation(), + }), + }), + })) + + ginkgo.By("Validating /stats/summary") + // Give pods a minute to actually start up. + gomega.Eventually(context.Background(), getNodeSummary, 180*time.Second, 15*time.Second).Should(matchExpectations) + // Then the summary should match the expectations a few more times. + gomega.Consistently(context.Background(), getNodeSummary, 30*time.Second, 15*time.Second).Should(matchExpectations) + }) }) }) })