write checkpoint only when allocated devices updated.

2025-08-31 16:46:54 +00:00 · 2020-10-15 21:20:15 +08:00
parent 28b46be97b
commit 009d46f834
1 changed files with 8 additions and 2 deletions
--- a/pkg/kubelet/cm/devicemanager/manager.go
+++ b/pkg/kubelet/cm/devicemanager/manager.go
@@ -833,6 +833,7 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
 	podUID := string(pod.UID)
 	contName := container.Name
 	allocatedDevicesUpdated := false
+	needsUpdateCheckpoint := false
 	// Extended resources are not allowed to be overcommitted.
 	// Since device plugin advertises extended resources,
 	// therefore Requests must be equal to Limits and iterating
@@ -858,6 +859,8 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
 			continue
 		}

+		needsUpdateCheckpoint = true
+
 		startRPCTime := time.Now()
 		// Manager.Allocate involves RPC calls to device plugin, which
 		// could be heavy-weight. Therefore we want to perform this operation outside
@@ -906,10 +909,13 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
 		m.mutex.Unlock()
 	}

-	// Checkpoints device to container allocation information.
+	if needsUpdateCheckpoint {
 		return m.writeCheckpoint()
 	}

+	return nil
+}
+
 // GetDeviceRunContainerOptions checks whether we have cached containerDevices
 // for the passed-in <pod, container> and returns its DeviceRunContainerOptions
 // for the found one. An empty struct is returned in case no cached state is found.