From b00f221a85c34678242ecb9afc18e59f8a2e0cb7 Mon Sep 17 00:00:00 2001 From: yunwang Date: Sat, 10 Aug 2024 18:11:25 +0800 Subject: [PATCH] fix InPlacePodVerticalScaling restore bug: the content wrote to and read from file pod_status are different --- pkg/kubelet/status/state/checkpoint.go | 5 +- .../status/state/state_checkpoint_test.go | 76 ++++++++++--------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/pkg/kubelet/status/state/checkpoint.go b/pkg/kubelet/status/state/checkpoint.go index 2205953cc8d..957fed102cd 100644 --- a/pkg/kubelet/status/state/checkpoint.go +++ b/pkg/kubelet/status/state/checkpoint.go @@ -18,6 +18,7 @@ package state import ( "encoding/json" + "fmt" "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" @@ -32,7 +33,7 @@ type PodResourceAllocationInfo struct { // Checkpoint represents a structure to store pod resource allocation checkpoint data type Checkpoint struct { - // Data is a JSON serialized checkpoint data + // Data is a serialized PodResourceAllocationInfo Data string `json:"data"` // Checksum is a checksum of Data Checksum checksum.Checksum `json:"checksum"` @@ -47,7 +48,7 @@ func NewCheckpoint(allocations *PodResourceAllocationInfo) (*Checkpoint, error) } cp := &Checkpoint{ - Data: string(praData), + Data: string(serializedAllocations), } cp.Checksum = checksum.New(cp.Data) return cp, nil diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go index aaea1cb08c7..e91dc3c40bf 100644 --- a/pkg/kubelet/status/state/state_checkpoint_test.go +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -28,17 +28,19 @@ import ( "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" ) +const testCheckpoint = "pod_status_manager_state" + func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { // create temp dir testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") if err != nil { t.Fatal(err) } - defer func() { + t.Cleanup(func() { if err := os.RemoveAll(testingDir); err != nil { t.Fatal(err) } - }() + }) cache := NewStateMemory() checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir) require.NoError(t, err, "failed to create checkpoint manager") @@ -51,8 +53,31 @@ func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { return sc } +func getTestDir(t *testing.T) string { + testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if err := os.RemoveAll(testingDir); err != nil { + t.Fatal(err) + } + }) + return testingDir +} + +func verifyPodResourceAllocation(t *testing.T, expected, actual *PodResourceAllocation, msgAndArgs string) { + for podUID, containerResourceList := range *expected { + require.Equal(t, len(containerResourceList), len((*actual)[podUID]), msgAndArgs) + for containerName, resourceList := range containerResourceList { + for name, quantity := range resourceList.Requests { + require.True(t, quantity.Equal((*actual)[podUID][containerName].Requests[name]), msgAndArgs) + } + } + } +} + func Test_stateCheckpoint_storeState(t *testing.T) { - sc := newTestStateCheckpoint(t) type args struct { podResourceAllocation PodResourceAllocation } @@ -92,44 +117,21 @@ func Test_stateCheckpoint_storeState(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := sc.cache.ClearState() - require.NoError(t, err, "failed to clear state") + testDir := getTestDir(t) + originalSC, err := NewStateCheckpoint(testDir, testCheckpoint) + require.NoError(t, err) - defer func() { - err = sc.checkpointManager.RemoveCheckpoint(sc.checkpointName) - require.NoError(t, err, "failed to remove checkpoint") - }() + err = originalSC.SetPodResourceAllocation(tt.args.podResourceAllocation) + require.NoError(t, err) - err = sc.cache.SetPodResourceAllocation(tt.args.podResourceAllocation) - require.NoError(t, err, "failed to set pod resource allocation") + actual := originalSC.GetPodResourceAllocation() + verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation") - err = sc.storeState() - require.NoError(t, err, "failed to store state") + newSC, err := NewStateCheckpoint(testDir, testCheckpoint) + require.NoError(t, err) - // deep copy cache - originCache := NewStateMemory() - podAllocation := sc.cache.GetPodResourceAllocation() - err = originCache.SetPodResourceAllocation(podAllocation) - require.NoError(t, err, "failed to set pod resource allocation") - - err = sc.cache.ClearState() - require.NoError(t, err, "failed to clear state") - - err = sc.restoreState() - require.NoError(t, err, "failed to restore state") - - restoredCache := sc.cache - require.Equal(t, len(originCache.GetPodResourceAllocation()), len(restoredCache.GetPodResourceAllocation()), "restored pod resource allocation is not equal to original pod resource allocation") - for podUID, containerResourceList := range originCache.GetPodResourceAllocation() { - require.Equal(t, len(containerResourceList), len(restoredCache.GetPodResourceAllocation()[podUID]), "restored pod resource allocation is not equal to original pod resource allocation") - for containerName, resourceList := range containerResourceList { - for name, quantity := range resourceList.Requests { - if !quantity.Equal(restoredCache.GetPodResourceAllocation()[podUID][containerName].Requests[name]) { - t.Errorf("restored pod resource allocation is not equal to original pod resource allocation") - } - } - } - } + actual = newSC.GetPodResourceAllocation() + verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation") }) } }