diff --git a/pkg/kubelet/status/fake_status_manager.go b/pkg/kubelet/status/fake_status_manager.go index 6d2031419df..9590b230be9 100644 --- a/pkg/kubelet/status/fake_status_manager.go +++ b/pkg/kubelet/status/fake_status_manager.go @@ -93,6 +93,6 @@ func (m *fakeManager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodRe // NewFakeManager creates empty/fake memory manager func NewFakeManager() Manager { return &fakeManager{ - state: state.NewStateMemory(), + state: state.NewStateMemory(state.PodResourceAllocation{}, state.PodResizeStatus{}), } } diff --git a/pkg/kubelet/status/state/state.go b/pkg/kubelet/status/state/state.go index 10a66be46c3..d980c96dea7 100644 --- a/pkg/kubelet/status/state/state.go +++ b/pkg/kubelet/status/state/state.go @@ -47,7 +47,7 @@ type Reader interface { type writer interface { SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceRequirements) error - SetPodResourceAllocation(PodResourceAllocation) error + SetPodResourceAllocation(podUID string, alloc map[string]v1.ResourceRequirements) error SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) Delete(podUID string, containerName string) error ClearState() error diff --git a/pkg/kubelet/status/state/state_checkpoint.go b/pkg/kubelet/status/state/state_checkpoint.go index 2e1ca729100..faa53db0525 100644 --- a/pkg/kubelet/status/state/state_checkpoint.go +++ b/pkg/kubelet/status/state/state_checkpoint.go @@ -43,7 +43,7 @@ func NewStateCheckpoint(stateDir, checkpointName string) (State, error) { return nil, fmt.Errorf("failed to initialize checkpoint manager for pod allocation tracking: %v", err) } stateCheckpoint := &stateCheckpoint{ - cache: NewStateMemory(), + cache: NewStateMemory(PodResourceAllocation{}, PodResizeStatus{}), checkpointManager: checkpointManager, checkpointName: checkpointName, } @@ -76,10 +76,14 @@ func (sc *stateCheckpoint) restoreState() error { if err != nil { return fmt.Errorf("failed to get pod resource allocation info: %w", err) } - err = sc.cache.SetPodResourceAllocation(praInfo.AllocationEntries) - if err != nil { - return fmt.Errorf("failed to set pod resource allocation: %w", err) + + for podUID, alloc := range praInfo.AllocationEntries { + err = sc.cache.SetPodResourceAllocation(podUID, alloc) + if err != nil { + klog.ErrorS(err, "failed to set pod resource allocation") + } } + klog.V(2).InfoS("State checkpoint: restored pod resource allocation state from checkpoint") return nil } @@ -132,10 +136,15 @@ func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, contain } // SetPodResourceAllocation sets pod resource allocation -func (sc *stateCheckpoint) SetPodResourceAllocation(a PodResourceAllocation) error { +func (sc *stateCheckpoint) SetPodResourceAllocation(podUID string, alloc map[string]v1.ResourceRequirements) error { sc.mux.Lock() defer sc.mux.Unlock() - sc.cache.SetPodResourceAllocation(a) + + err := sc.cache.SetPodResourceAllocation(podUID, alloc) + if err != nil { + return err + } + return sc.storeState() } @@ -185,7 +194,7 @@ func (sc *noopStateCheckpoint) SetContainerResourceAllocation(_ string, _ string return nil } -func (sc *noopStateCheckpoint) SetPodResourceAllocation(_ PodResourceAllocation) error { +func (sc *noopStateCheckpoint) SetPodResourceAllocation(_ string, _ map[string]v1.ResourceRequirements) error { return nil } diff --git a/pkg/kubelet/status/state/state_checkpoint_test.go b/pkg/kubelet/status/state/state_checkpoint_test.go index 03be8c1c46d..f5467542afa 100644 --- a/pkg/kubelet/status/state/state_checkpoint_test.go +++ b/pkg/kubelet/status/state/state_checkpoint_test.go @@ -32,7 +32,7 @@ const testCheckpoint = "pod_status_manager_state" func newTestStateCheckpoint(t *testing.T) *stateCheckpoint { testingDir := getTestDir(t) - cache := NewStateMemory() + cache := NewStateMemory(PodResourceAllocation{}, PodResizeStatus{}) checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir) require.NoError(t, err, "failed to create checkpoint manager") checkpointName := "pod_state_checkpoint" @@ -110,8 +110,10 @@ func Test_stateCheckpoint_storeState(t *testing.T) { originalSC, err := NewStateCheckpoint(testDir, testCheckpoint) require.NoError(t, err) - err = originalSC.SetPodResourceAllocation(tt.args.podResourceAllocation) - require.NoError(t, err) + for podUID, alloc := range tt.args.podResourceAllocation { + err = originalSC.SetPodResourceAllocation(podUID, alloc) + require.NoError(t, err) + } actual := originalSC.GetPodResourceAllocation() verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation") diff --git a/pkg/kubelet/status/state/state_mem.go b/pkg/kubelet/status/state/state_mem.go index c8107dacbe2..8730f0ea1d3 100644 --- a/pkg/kubelet/status/state/state_mem.go +++ b/pkg/kubelet/status/state/state_mem.go @@ -32,11 +32,11 @@ type stateMemory struct { var _ State = &stateMemory{} // NewStateMemory creates new State to track resources allocated to pods -func NewStateMemory() State { +func NewStateMemory(alloc PodResourceAllocation, stats PodResizeStatus) State { klog.V(2).InfoS("Initialized new in-memory state store for pod resource allocation tracking") return &stateMemory{ - podAllocation: PodResourceAllocation{}, - podResizeStatus: PodResizeStatus{}, + podAllocation: alloc, + podResizeStatus: stats, } } @@ -74,12 +74,15 @@ func (s *stateMemory) SetContainerResourceAllocation(podUID string, containerNam return nil } -func (s *stateMemory) SetPodResourceAllocation(a PodResourceAllocation) error { +func (s *stateMemory) SetPodResourceAllocation(podUID string, alloc map[string]v1.ResourceRequirements) error { s.Lock() defer s.Unlock() - s.podAllocation = a.Clone() - klog.V(3).InfoS("Updated pod resource allocation", "allocation", a) + for containerName, containerAlloc := range alloc { + s.podAllocation[podUID][containerName] = containerAlloc + } + + klog.V(3).InfoS("Updated pod resource allocation", "podUID", podUID, "allocation", alloc) return nil } diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index 0156efa2cbf..7937d27bb53 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -297,16 +297,14 @@ func (m *manager) SetPodAllocation(pod *v1.Pod) error { m.podStatusesLock.RLock() defer m.podStatusesLock.RUnlock() - podUID := string(pod.UID) - podAlloc := state.PodResourceAllocation{} - podAlloc[podUID] = make(map[string]v1.ResourceRequirements) + podAlloc := make(map[string]v1.ResourceRequirements) for _, container := range pod.Spec.Containers { alloc := *container.Resources.DeepCopy() - podAlloc[podUID][container.Name] = alloc + podAlloc[container.Name] = alloc } - return m.state.SetPodResourceAllocation(podUID, podAlloc) + return m.state.SetPodResourceAllocation(string(pod.UID), podAlloc) } // SetPodResizeStatus checkpoints the last resizing decision for the pod.