From cf8cdaf5a7441dab7d765df605d62d4c6218c7cf Mon Sep 17 00:00:00 2001 From: yunwang Date: Sat, 10 Aug 2024 18:11:25 +0800 Subject: [PATCH 1/9] fix InPlacePodVerticalScaling restore bug: the content wrote to and read from file pod_status are different --- pkg/kubelet/status/state/checkpoint.go | 70 ++++--- pkg/kubelet/status/state/state_checkpoint.go | 41 ++-- .../status/state/state_checkpoint_test.go | 175 ++++++++++++++++++ 3 files changed, 241 insertions(+), 45 deletions(-) create mode 100644 pkg/kubelet/status/state/state_checkpoint_test.go diff --git a/pkg/kubelet/status/state/checkpoint.go b/pkg/kubelet/status/state/checkpoint.go index 525bf675860..31b955d7631 100644 --- a/pkg/kubelet/status/state/checkpoint.go +++ b/pkg/kubelet/status/state/checkpoint.go @@ -18,48 +18,62 @@ package state import ( "encoding/json" - - v1 "k8s.io/api/core/v1" + "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" ) -var _ checkpointmanager.Checkpoint = &PodResourceAllocationCheckpoint{} +var _ checkpointmanager.Checkpoint = &Checkpoint{} -// PodResourceAllocationCheckpoint is used to store resources allocated to a pod in checkpoint -type PodResourceAllocationCheckpoint struct { +type PodResourceAllocationInfo struct { AllocationEntries map[string]map[string]v1.ResourceRequirements `json:"allocationEntries,omitempty"` ResizeStatusEntries map[string]v1.PodResizeStatus `json:"resizeStatusEntries,omitempty"` - Checksum checksum.Checksum `json:"checksum"` } -// NewPodResourceAllocationCheckpoint returns an instance of Checkpoint -func NewPodResourceAllocationCheckpoint() *PodResourceAllocationCheckpoint { - //lint:ignore unexported-type-in-api user-facing error message - return &PodResourceAllocationCheckpoint{ - AllocationEntries: make(map[string]map[string]v1.ResourceRequirements), - ResizeStatusEntries: make(map[string]v1.PodResizeStatus), +// Checkpoint represents a structure to store pod resource allocation checkpoint data +type Checkpoint struct { + // Data is a JSON serialized checkpoint data + Data string `json:"data"` + // Checksum is a checksum of Data + Checksum checksum.Checksum `json:"checksum"` +} + +// NewCheckpoint creates a new checkpoint from a list of claim info states +func NewCheckpoint(data *PodResourceAllocationInfo) (*Checkpoint, error) { + + praData, err := json.Marshal(data) + if err != nil { + return nil, err } + + cp := &Checkpoint{ + Data: string(praData), + } + cp.Checksum = checksum.New(cp.Data) + return cp, nil } -// MarshalCheckpoint returns marshalled checkpoint -func (prc *PodResourceAllocationCheckpoint) MarshalCheckpoint() ([]byte, error) { - // make sure checksum wasn't set before so it doesn't affect output checksum - prc.Checksum = 0 - prc.Checksum = checksum.New(prc) - return json.Marshal(*prc) +func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) { + return json.Marshal(cp) } -// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint -func (prc *PodResourceAllocationCheckpoint) UnmarshalCheckpoint(blob []byte) error { - return json.Unmarshal(blob, prc) +// UnmarshalCheckpoint unmarshals checkpoint from JSON +func (cp *Checkpoint) UnmarshalCheckpoint(blob []byte) error { + return json.Unmarshal(blob, cp) } -// VerifyChecksum verifies that current checksum of checkpoint is valid -func (prc *PodResourceAllocationCheckpoint) VerifyChecksum() error { - ck := prc.Checksum - prc.Checksum = 0 - err := ck.Verify(prc) - prc.Checksum = ck - return err +// VerifyChecksum verifies that current checksum +// of checkpointed Data is valid +func (cp *Checkpoint) VerifyChecksum() error { + return cp.Checksum.Verify(cp.Data) +} + +// GetPodResourceAllocationInfo returns Pod Resource Allocation info states from checkpoint +func (cp *Checkpoint) GetPodResourceAllocationInfo() (*PodResourceAllocationInfo, error) { + var data PodResourceAllocationInfo + if err := json.Unmarshal([]byte(cp.Data), &data); err != nil { + return nil, err + } + + return &data, nil } diff --git a/pkg/kubelet/status/state/state_checkpoint.go b/pkg/kubelet/status/state/state_checkpoint.go index 3230b2adcc4..e4e888d81f4 100644 --- a/pkg/kubelet/status/state/state_checkpoint.go +++ b/pkg/kubelet/status/state/state_checkpoint.go @@ -61,7 +61,10 @@ func (sc *stateCheckpoint) restoreState() error { defer sc.mux.Unlock() var err error - checkpoint := NewPodResourceAllocationCheckpoint() + checkpoint, err := NewCheckpoint(nil) + if err != nil { + return fmt.Errorf("failed to create new checkpoint: %w", err) + } if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint); err != nil { if err == errors.ErrCheckpointNotFound { @@ -69,32 +72,36 @@ func (sc *stateCheckpoint) restoreState() error { } return err } - - sc.cache.SetPodResourceAllocation(checkpoint.AllocationEntries) - sc.cache.SetResizeStatus(checkpoint.ResizeStatusEntries) + praInfo, err := checkpoint.GetPodResourceAllocationInfo() + if err != nil { + return fmt.Errorf("failed to get pod resource allocation info: %w", err) + } + err = sc.cache.SetPodResourceAllocation(praInfo.AllocationEntries) + if err != nil { + return fmt.Errorf("failed to set pod resource allocation: %w", err) + } + err = sc.cache.SetResizeStatus(praInfo.ResizeStatusEntries) + if err != nil { + return fmt.Errorf("failed to set resize status: %w", err) + } klog.V(2).InfoS("State checkpoint: restored pod resource allocation state from checkpoint") return nil } // saves state to a checkpoint, caller is responsible for locking func (sc *stateCheckpoint) storeState() error { - checkpoint := NewPodResourceAllocationCheckpoint() - podAllocation := sc.cache.GetPodResourceAllocation() - for pod := range podAllocation { - checkpoint.AllocationEntries[pod] = make(map[string]v1.ResourceRequirements) - for container, alloc := range podAllocation[pod] { - checkpoint.AllocationEntries[pod][container] = alloc - } - } podResizeStatus := sc.cache.GetResizeStatus() - checkpoint.ResizeStatusEntries = make(map[string]v1.PodResizeStatus) - for pUID, rStatus := range podResizeStatus { - checkpoint.ResizeStatusEntries[pUID] = rStatus + checkpoint, err := NewCheckpoint(&PodResourceAllocationInfo{ + AllocationEntries: podAllocation, + ResizeStatusEntries: podResizeStatus, + }) + if err != nil { + klog.ErrorS(err, "Failed to create checkpoint") + return err } - - err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) + err = sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) if err != nil { klog.ErrorS(err, "Failed to save pod allocation checkpoint") return err diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go new file mode 100644 index 00000000000..572bb13676b --- /dev/null +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -0,0 +1,175 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package state + +import ( + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/require" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" +) + +func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { + // create temp dir + testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") + if err != nil { + t.Fatal(err) + } + defer func() { + if err := os.RemoveAll(testingDir); err != nil { + t.Fatal(err) + } + }() + cache := NewStateMemory() + checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir) + require.NoError(t, err, "failed to create checkpoint manager") + checkpointName := "pod_state_checkpoint" + sc := &stateCheckpoint{ + cache: cache, + checkpointManager: checkpointManager, + checkpointName: checkpointName, + } + return sc +} + +func Test_stateCheckpoint_storeState(t *testing.T) { + sc := newTestStateCheckpoint(t) + type args struct { + podResourceAllocation PodResourceAllocation + } + + tests := []struct { + name string + args args + }{} + suffix := []string{"Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "n", "u", "m", "k", "M", "G", "T", "P", "E", ""} + factor := []string{"1", "0.1", "0.03", "10", "100", "512", "1000", "1024", "700", "10000"} + for _, fact := range factor { + for _, suf := range suffix { + if (suf == "E" || suf == "Ei") && (fact == "1000" || fact == "10000") { + // when fact is 1000 or 10000, suffix "E" or "Ei", the quantity value is overflow + // see detail https://github.com/kubernetes/apimachinery/blob/95b78024e3feada7739b40426690b4f287933fd8/pkg/api/resource/quantity.go#L301 + continue + } + tests = append(tests, struct { + name string + args args + }{ + name: fmt.Sprintf("resource - %s%s", fact, suf), + args: args{ + podResourceAllocation: PodResourceAllocation{ + "pod1": { + "container1": { + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(fmt.Sprintf("%s%s", fact, suf)), + v1.ResourceMemory: resource.MustParse(fmt.Sprintf("%s%s", fact, suf)), + }, + }, + }, + }, + }, + }) + } + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := sc.cache.ClearState() + require.NoError(t, err, "failed to clear state") + + defer func() { + err = sc.checkpointManager.RemoveCheckpoint(sc.checkpointName) + require.NoError(t, err, "failed to remove checkpoint") + }() + + err = sc.cache.SetPodResourceAllocation(tt.args.podResourceAllocation) + require.NoError(t, err, "failed to set pod resource allocation") + + err = sc.storeState() + require.NoError(t, err, "failed to store state") + + // deep copy cache + originCache := NewStateMemory() + podAllocation := sc.cache.GetPodResourceAllocation() + err = originCache.SetPodResourceAllocation(podAllocation) + require.NoError(t, err, "failed to set pod resource allocation") + + err = sc.cache.ClearState() + require.NoError(t, err, "failed to clear state") + + err = sc.restoreState() + require.NoError(t, err, "failed to restore state") + + restoredCache := sc.cache + require.Equal(t, len(originCache.GetPodResourceAllocation()), len(restoredCache.GetPodResourceAllocation()), "restored pod resource allocation is not equal to original pod resource allocation") + for podUID, containerResourceList := range originCache.GetPodResourceAllocation() { + require.Equal(t, len(containerResourceList), len(restoredCache.GetPodResourceAllocation()[podUID]), "restored pod resource allocation is not equal to original pod resource allocation") + for containerName, resourceList := range containerResourceList { + for name, quantity := range resourceList.Requests { + if !quantity.Equal(restoredCache.GetPodResourceAllocation()[podUID][containerName].Requests[name]) { + t.Errorf("restored pod resource allocation is not equal to original pod resource allocation") + } + } + } + } + }) + } +} + +func Test_stateCheckpoint_formatUpgraded(t *testing.T) { + // Based on the PodResourceAllocationInfo struct, it's mostly possible that new field will be added + // in struct PodResourceAllocationInfo, rather than in struct PodResourceAllocationInfo.AllocationEntries. + // Emulate upgrade scenario by pretending that `ResizeStatusEntries` is a new field. + // The checkpoint content doesn't have it and that shouldn't prevent the checkpoint from being loaded. + sc := newTestStateCheckpoint(t) + + // prepare old checkpoint, ResizeStatusEntries is unset, + // pretend that the old checkpoint is unaware for the field ResizeStatusEntries + checkpointContent := `{"data":"{\"allocationEntries\":{\"pod1\":{\"container1\":{\"requests\":{\"cpu\":\"1Ki\",\"memory\":\"1Ki\"}}}}}","checksum":1555601526}` + checkpoint := &Checkpoint{} + err := checkpoint.UnmarshalCheckpoint([]byte(checkpointContent)) + require.NoError(t, err, "failed to unmarshal checkpoint") + + err = sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) + require.NoError(t, err, "failed to create old checkpoint") + + err = sc.restoreState() + require.NoError(t, err, "failed to restore state") + + expectedPodResourceAllocationInfo := &PodResourceAllocationInfo{ + AllocationEntries: map[string]map[string]v1.ResourceRequirements{ + "pod1": { + "container1": { + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1Ki"), + v1.ResourceMemory: resource.MustParse("1Ki"), + }, + }, + }, + }, + ResizeStatusEntries: map[string]v1.PodResizeStatus{}, + } + actualPodResourceAllocationInfo := &PodResourceAllocationInfo{} + actualPodResourceAllocationInfo.AllocationEntries = sc.cache.GetPodResourceAllocation() + actualPodResourceAllocationInfo.ResizeStatusEntries = sc.cache.GetResizeStatus() + require.NoError(t, err, "failed to get pod resource allocation info") + require.Equal(t, expectedPodResourceAllocationInfo, actualPodResourceAllocationInfo, "pod resource allocation info is not equal") +} From b67a5623b17a44e5ba5d2160ae7e4d18136a003c Mon Sep 17 00:00:00 2001 From: yunwang0911 <112385953+yunwang0911@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:39:18 +0800 Subject: [PATCH 2/9] Update pkg/kubelet/status/state/checkpoint.go Co-authored-by: Tim Allclair --- pkg/kubelet/status/state/checkpoint.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/status/state/checkpoint.go b/pkg/kubelet/status/state/checkpoint.go index 31b955d7631..940a70d3e7f 100644 --- a/pkg/kubelet/status/state/checkpoint.go +++ b/pkg/kubelet/status/state/checkpoint.go @@ -39,9 +39,9 @@ type Checkpoint struct { } // NewCheckpoint creates a new checkpoint from a list of claim info states -func NewCheckpoint(data *PodResourceAllocationInfo) (*Checkpoint, error) { +func NewCheckpoint(allocations *PodResourceAllocationInfo) (*Checkpoint, error) { - praData, err := json.Marshal(data) + serializedAllocations, err := json.Marshal(allocations) if err != nil { return nil, err } From 9d18e900c8232e5441b04c1567854e8e49f6a0f2 Mon Sep 17 00:00:00 2001 From: yunwang0911 <112385953+yunwang0911@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:39:32 +0800 Subject: [PATCH 3/9] Update pkg/kubelet/status/state/checkpoint.go Co-authored-by: Tim Allclair --- pkg/kubelet/status/state/checkpoint.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/status/state/checkpoint.go b/pkg/kubelet/status/state/checkpoint.go index 940a70d3e7f..2205953cc8d 100644 --- a/pkg/kubelet/status/state/checkpoint.go +++ b/pkg/kubelet/status/state/checkpoint.go @@ -43,7 +43,7 @@ func NewCheckpoint(allocations *PodResourceAllocationInfo) (*Checkpoint, error) serializedAllocations, err := json.Marshal(allocations) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to serialize allocations for checkpointing: %w", err) } cp := &Checkpoint{ From 8edc80c4709b10d4791722ef6d96356c64e10dd0 Mon Sep 17 00:00:00 2001 From: yunwang0911 <112385953+yunwang0911@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:39:41 +0800 Subject: [PATCH 4/9] Update pkg/kubelet/status/state/state_checkpoint_test.go Co-authored-by: Tim Allclair --- pkg/kubelet/status/state/state_checkpoint_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go index 572bb13676b..aaea1cb08c7 100644 --- a/pkg/kubelet/status/state/state_checkpoint_test.go +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -1,5 +1,5 @@ /* -Copyright 2016 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From 114d4df4b4d25a3f4c4a8c9718fb2767491f8923 Mon Sep 17 00:00:00 2001 From: yunwang0911 <112385953+yunwang0911@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:39:54 +0800 Subject: [PATCH 5/9] Update pkg/kubelet/status/state/state_checkpoint.go Co-authored-by: Tim Allclair --- pkg/kubelet/status/state/state_checkpoint.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/kubelet/status/state/state_checkpoint.go b/pkg/kubelet/status/state/state_checkpoint.go index e4e888d81f4..2cd17ba926f 100644 --- a/pkg/kubelet/status/state/state_checkpoint.go +++ b/pkg/kubelet/status/state/state_checkpoint.go @@ -98,8 +98,7 @@ func (sc *stateCheckpoint) storeState() error { ResizeStatusEntries: podResizeStatus, }) if err != nil { - klog.ErrorS(err, "Failed to create checkpoint") - return err + return fmt.Errorf("failed to create checkpoint: %w", err) } err = sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) if err != nil { From b00f221a85c34678242ecb9afc18e59f8a2e0cb7 Mon Sep 17 00:00:00 2001 From: yunwang Date: Sat, 10 Aug 2024 18:11:25 +0800 Subject: [PATCH 6/9] fix InPlacePodVerticalScaling restore bug: the content wrote to and read from file pod_status are different --- pkg/kubelet/status/state/checkpoint.go | 5 +- .../status/state/state_checkpoint_test.go | 76 ++++++++++--------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/pkg/kubelet/status/state/checkpoint.go b/pkg/kubelet/status/state/checkpoint.go index 2205953cc8d..957fed102cd 100644 --- a/pkg/kubelet/status/state/checkpoint.go +++ b/pkg/kubelet/status/state/checkpoint.go @@ -18,6 +18,7 @@ package state import ( "encoding/json" + "fmt" "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" @@ -32,7 +33,7 @@ type PodResourceAllocationInfo struct { // Checkpoint represents a structure to store pod resource allocation checkpoint data type Checkpoint struct { - // Data is a JSON serialized checkpoint data + // Data is a serialized PodResourceAllocationInfo Data string `json:"data"` // Checksum is a checksum of Data Checksum checksum.Checksum `json:"checksum"` @@ -47,7 +48,7 @@ func NewCheckpoint(allocations *PodResourceAllocationInfo) (*Checkpoint, error) } cp := &Checkpoint{ - Data: string(praData), + Data: string(serializedAllocations), } cp.Checksum = checksum.New(cp.Data) return cp, nil diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go index aaea1cb08c7..e91dc3c40bf 100644 --- a/pkg/kubelet/status/state/state_checkpoint_test.go +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -28,17 +28,19 @@ import ( "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" ) +const testCheckpoint = "pod_status_manager_state" + func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { // create temp dir testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") if err != nil { t.Fatal(err) } - defer func() { + t.Cleanup(func() { if err := os.RemoveAll(testingDir); err != nil { t.Fatal(err) } - }() + }) cache := NewStateMemory() checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir) require.NoError(t, err, "failed to create checkpoint manager") @@ -51,8 +53,31 @@ func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { return sc } +func getTestDir(t *testing.T) string { + testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if err := os.RemoveAll(testingDir); err != nil { + t.Fatal(err) + } + }) + return testingDir +} + +func verifyPodResourceAllocation(t *testing.T, expected, actual *PodResourceAllocation, msgAndArgs string) { + for podUID, containerResourceList := range *expected { + require.Equal(t, len(containerResourceList), len((*actual)[podUID]), msgAndArgs) + for containerName, resourceList := range containerResourceList { + for name, quantity := range resourceList.Requests { + require.True(t, quantity.Equal((*actual)[podUID][containerName].Requests[name]), msgAndArgs) + } + } + } +} + func Test_stateCheckpoint_storeState(t *testing.T) { - sc := newTestStateCheckpoint(t) type args struct { podResourceAllocation PodResourceAllocation } @@ -92,44 +117,21 @@ func Test_stateCheckpoint_storeState(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := sc.cache.ClearState() - require.NoError(t, err, "failed to clear state") + testDir := getTestDir(t) + originalSC, err := NewStateCheckpoint(testDir, testCheckpoint) + require.NoError(t, err) - defer func() { - err = sc.checkpointManager.RemoveCheckpoint(sc.checkpointName) - require.NoError(t, err, "failed to remove checkpoint") - }() + err = originalSC.SetPodResourceAllocation(tt.args.podResourceAllocation) + require.NoError(t, err) - err = sc.cache.SetPodResourceAllocation(tt.args.podResourceAllocation) - require.NoError(t, err, "failed to set pod resource allocation") + actual := originalSC.GetPodResourceAllocation() + verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation") - err = sc.storeState() - require.NoError(t, err, "failed to store state") + newSC, err := NewStateCheckpoint(testDir, testCheckpoint) + require.NoError(t, err) - // deep copy cache - originCache := NewStateMemory() - podAllocation := sc.cache.GetPodResourceAllocation() - err = originCache.SetPodResourceAllocation(podAllocation) - require.NoError(t, err, "failed to set pod resource allocation") - - err = sc.cache.ClearState() - require.NoError(t, err, "failed to clear state") - - err = sc.restoreState() - require.NoError(t, err, "failed to restore state") - - restoredCache := sc.cache - require.Equal(t, len(originCache.GetPodResourceAllocation()), len(restoredCache.GetPodResourceAllocation()), "restored pod resource allocation is not equal to original pod resource allocation") - for podUID, containerResourceList := range originCache.GetPodResourceAllocation() { - require.Equal(t, len(containerResourceList), len(restoredCache.GetPodResourceAllocation()[podUID]), "restored pod resource allocation is not equal to original pod resource allocation") - for containerName, resourceList := range containerResourceList { - for name, quantity := range resourceList.Requests { - if !quantity.Equal(restoredCache.GetPodResourceAllocation()[podUID][containerName].Requests[name]) { - t.Errorf("restored pod resource allocation is not equal to original pod resource allocation") - } - } - } - } + actual = newSC.GetPodResourceAllocation() + verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation") }) } } From e4c8eefeb2c968fec0c280dd267dec9021c5f01b Mon Sep 17 00:00:00 2001 From: yunwang0911 <112385953+yunwang0911@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:08:53 +0800 Subject: [PATCH 7/9] Update pkg/kubelet/status/state/state_checkpoint_test.go Co-authored-by: Tim Allclair --- pkg/kubelet/status/state/state_checkpoint_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go index e91dc3c40bf..44fdb4fb541 100644 --- a/pkg/kubelet/status/state/state_checkpoint_test.go +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -131,7 +131,7 @@ func Test_stateCheckpoint_storeState(t *testing.T) { require.NoError(t, err) actual = newSC.GetPodResourceAllocation() - verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation") + verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "restored pod resource allocation is not equal to original pod resource allocation") }) } } From 05493c0924185b9cb932915a696cb9b0b487e3a4 Mon Sep 17 00:00:00 2001 From: yunwang0911 <112385953+yunwang0911@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:11:10 +0800 Subject: [PATCH 8/9] Update pkg/kubelet/status/state/state_checkpoint_test.go Co-authored-by: Tim Allclair --- pkg/kubelet/status/state/state_checkpoint_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go index 44fdb4fb541..4d7c1ca86de 100644 --- a/pkg/kubelet/status/state/state_checkpoint_test.go +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -145,7 +145,7 @@ func Test_stateCheckpoint_formatUpgraded(t *testing.T) { // prepare old checkpoint, ResizeStatusEntries is unset, // pretend that the old checkpoint is unaware for the field ResizeStatusEntries - checkpointContent := `{"data":"{\"allocationEntries\":{\"pod1\":{\"container1\":{\"requests\":{\"cpu\":\"1Ki\",\"memory\":\"1Ki\"}}}}}","checksum":1555601526}` + const checkpointContent = `{"data":"{\"allocationEntries\":{\"pod1\":{\"container1\":{\"requests\":{\"cpu\":\"1Ki\",\"memory\":\"1Ki\"}}}}}","checksum":1555601526}` checkpoint := &Checkpoint{} err := checkpoint.UnmarshalCheckpoint([]byte(checkpointContent)) require.NoError(t, err, "failed to unmarshal checkpoint") From f428881ec05989b2c62b2f91279c202a974ebf9f Mon Sep 17 00:00:00 2001 From: yunwang0911 Date: Thu, 31 Oct 2024 14:01:16 +0800 Subject: [PATCH 9/9] Update pkg/kubelet/status/state/state_checkpoint_test.go --- pkg/kubelet/status/state/checkpoint.go | 3 +- .../status/state/state_checkpoint_test.go | 35 +++++++------------ 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/pkg/kubelet/status/state/checkpoint.go b/pkg/kubelet/status/state/checkpoint.go index 957fed102cd..68692f8c1f3 100644 --- a/pkg/kubelet/status/state/checkpoint.go +++ b/pkg/kubelet/status/state/checkpoint.go @@ -19,7 +19,8 @@ package state import ( "encoding/json" "fmt" - "k8s.io/api/core/v1" + + v1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" ) diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go index 4d7c1ca86de..ca3546553ff 100644 --- a/pkg/kubelet/status/state/state_checkpoint_test.go +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -31,16 +31,7 @@ import ( const testCheckpoint = "pod_status_manager_state" func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { - // create temp dir - testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") - if err != nil { - t.Fatal(err) - } - t.Cleanup(func() { - if err := os.RemoveAll(testingDir); err != nil { - t.Fatal(err) - } - }) + testingDir := getTestDir(t) cache := NewStateMemory() checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir) require.NoError(t, err, "failed to create checkpoint manager") @@ -55,9 +46,7 @@ func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { func getTestDir(t *testing.T) string { testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err, "failed to create temp dir") t.Cleanup(func() { if err := os.RemoveAll(testingDir); err != nil { t.Fatal(err) @@ -146,16 +135,6 @@ func Test_stateCheckpoint_formatUpgraded(t *testing.T) { // prepare old checkpoint, ResizeStatusEntries is unset, // pretend that the old checkpoint is unaware for the field ResizeStatusEntries const checkpointContent = `{"data":"{\"allocationEntries\":{\"pod1\":{\"container1\":{\"requests\":{\"cpu\":\"1Ki\",\"memory\":\"1Ki\"}}}}}","checksum":1555601526}` - checkpoint := &Checkpoint{} - err := checkpoint.UnmarshalCheckpoint([]byte(checkpointContent)) - require.NoError(t, err, "failed to unmarshal checkpoint") - - err = sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) - require.NoError(t, err, "failed to create old checkpoint") - - err = sc.restoreState() - require.NoError(t, err, "failed to restore state") - expectedPodResourceAllocationInfo := &PodResourceAllocationInfo{ AllocationEntries: map[string]map[string]v1.ResourceRequirements{ "pod1": { @@ -169,6 +148,16 @@ func Test_stateCheckpoint_formatUpgraded(t *testing.T) { }, ResizeStatusEntries: map[string]v1.PodResizeStatus{}, } + checkpoint := &Checkpoint{} + err := checkpoint.UnmarshalCheckpoint([]byte(checkpointContent)) + require.NoError(t, err, "failed to unmarshal checkpoint") + + err = sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) + require.NoError(t, err, "failed to create old checkpoint") + + err = sc.restoreState() + require.NoError(t, err, "failed to restore state") + actualPodResourceAllocationInfo := &PodResourceAllocationInfo{} actualPodResourceAllocationInfo.AllocationEntries = sc.cache.GetPodResourceAllocation() actualPodResourceAllocationInfo.ResizeStatusEntries = sc.cache.GetResizeStatus()