write checkpoint only when allocated devices updated.

This commit is contained in:
chenyw1990
2020-10-15 21:20:15 +08:00
parent 28b46be97b
commit 009d46f834

View File

@@ -833,6 +833,7 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
podUID := string(pod.UID)
contName := container.Name
allocatedDevicesUpdated := false
needsUpdateCheckpoint := false
// Extended resources are not allowed to be overcommitted.
// Since device plugin advertises extended resources,
// therefore Requests must be equal to Limits and iterating
@@ -858,6 +859,8 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
continue
}
needsUpdateCheckpoint = true
startRPCTime := time.Now()
// Manager.Allocate involves RPC calls to device plugin, which
// could be heavy-weight. Therefore we want to perform this operation outside
@@ -906,10 +909,13 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
m.mutex.Unlock()
}
// Checkpoints device to container allocation information.
if needsUpdateCheckpoint {
return m.writeCheckpoint()
}
return nil
}
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
// for the found one. An empty struct is returned in case no cached state is found.