fix InPlacePodVerticalScaling restore bug: the content wrote to and read from file pod_status are different

This commit is contained in:
yunwang 2024-08-10 18:11:25 +08:00 committed by yunwang0911
parent 114d4df4b4
commit b00f221a85
2 changed files with 42 additions and 39 deletions

View File

@ -18,6 +18,7 @@ package state
import ( import (
"encoding/json" "encoding/json"
"fmt"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
@ -32,7 +33,7 @@ type PodResourceAllocationInfo struct {
// Checkpoint represents a structure to store pod resource allocation checkpoint data // Checkpoint represents a structure to store pod resource allocation checkpoint data
type Checkpoint struct { type Checkpoint struct {
// Data is a JSON serialized checkpoint data // Data is a serialized PodResourceAllocationInfo
Data string `json:"data"` Data string `json:"data"`
// Checksum is a checksum of Data // Checksum is a checksum of Data
Checksum checksum.Checksum `json:"checksum"` Checksum checksum.Checksum `json:"checksum"`
@ -47,7 +48,7 @@ func NewCheckpoint(allocations *PodResourceAllocationInfo) (*Checkpoint, error)
} }
cp := &Checkpoint{ cp := &Checkpoint{
Data: string(praData), Data: string(serializedAllocations),
} }
cp.Checksum = checksum.New(cp.Data) cp.Checksum = checksum.New(cp.Data)
return cp, nil return cp, nil

View File

@ -28,17 +28,19 @@ import (
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
) )
const testCheckpoint = "pod_status_manager_state"
func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { func newTestStateCheckpoint(t *testing.T) *stateCheckpoint {
// create temp dir // create temp dir
testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test") testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
defer func() { t.Cleanup(func() {
if err := os.RemoveAll(testingDir); err != nil { if err := os.RemoveAll(testingDir); err != nil {
t.Fatal(err) t.Fatal(err)
} }
}() })
cache := NewStateMemory() cache := NewStateMemory()
checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir) checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir)
require.NoError(t, err, "failed to create checkpoint manager") require.NoError(t, err, "failed to create checkpoint manager")
@ -51,8 +53,31 @@ func newTestStateCheckpoint(t *testing.T) *stateCheckpoint {
return sc return sc
} }
func getTestDir(t *testing.T) string {
testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test")
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := os.RemoveAll(testingDir); err != nil {
t.Fatal(err)
}
})
return testingDir
}
func verifyPodResourceAllocation(t *testing.T, expected, actual *PodResourceAllocation, msgAndArgs string) {
for podUID, containerResourceList := range *expected {
require.Equal(t, len(containerResourceList), len((*actual)[podUID]), msgAndArgs)
for containerName, resourceList := range containerResourceList {
for name, quantity := range resourceList.Requests {
require.True(t, quantity.Equal((*actual)[podUID][containerName].Requests[name]), msgAndArgs)
}
}
}
}
func Test_stateCheckpoint_storeState(t *testing.T) { func Test_stateCheckpoint_storeState(t *testing.T) {
sc := newTestStateCheckpoint(t)
type args struct { type args struct {
podResourceAllocation PodResourceAllocation podResourceAllocation PodResourceAllocation
} }
@ -92,44 +117,21 @@ func Test_stateCheckpoint_storeState(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
err := sc.cache.ClearState() testDir := getTestDir(t)
require.NoError(t, err, "failed to clear state") originalSC, err := NewStateCheckpoint(testDir, testCheckpoint)
require.NoError(t, err)
defer func() { err = originalSC.SetPodResourceAllocation(tt.args.podResourceAllocation)
err = sc.checkpointManager.RemoveCheckpoint(sc.checkpointName) require.NoError(t, err)
require.NoError(t, err, "failed to remove checkpoint")
}()
err = sc.cache.SetPodResourceAllocation(tt.args.podResourceAllocation) actual := originalSC.GetPodResourceAllocation()
require.NoError(t, err, "failed to set pod resource allocation") verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation")
err = sc.storeState() newSC, err := NewStateCheckpoint(testDir, testCheckpoint)
require.NoError(t, err, "failed to store state") require.NoError(t, err)
// deep copy cache actual = newSC.GetPodResourceAllocation()
originCache := NewStateMemory() verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation")
podAllocation := sc.cache.GetPodResourceAllocation()
err = originCache.SetPodResourceAllocation(podAllocation)
require.NoError(t, err, "failed to set pod resource allocation")
err = sc.cache.ClearState()
require.NoError(t, err, "failed to clear state")
err = sc.restoreState()
require.NoError(t, err, "failed to restore state")
restoredCache := sc.cache
require.Equal(t, len(originCache.GetPodResourceAllocation()), len(restoredCache.GetPodResourceAllocation()), "restored pod resource allocation is not equal to original pod resource allocation")
for podUID, containerResourceList := range originCache.GetPodResourceAllocation() {
require.Equal(t, len(containerResourceList), len(restoredCache.GetPodResourceAllocation()[podUID]), "restored pod resource allocation is not equal to original pod resource allocation")
for containerName, resourceList := range containerResourceList {
for name, quantity := range resourceList.Requests {
if !quantity.Equal(restoredCache.GetPodResourceAllocation()[podUID][containerName].Requests[name]) {
t.Errorf("restored pod resource allocation is not equal to original pod resource allocation")
}
}
}
}
}) })
} }
} }