mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 11:13:48 +00:00
node: devicemgr: topomgr: add logs
One of the contributing factors of issues #118559 and #109595 hard to debug and fix is that the devicemanager has very few logs in important flow, so it's unnecessarily hard to reconstruct the state from logs. We add minimal logs to be able to improve troubleshooting. We add minimal logs to be backport-friendly, deferring a more comprehensive review of logging to later PRs. Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
parent
d78671447f
commit
c635a7e7d8
@ -226,6 +226,7 @@ func (m *ManagerImpl) PluginConnected(resourceName string, p plugin.DevicePlugin
|
||||
defer m.mutex.Unlock()
|
||||
m.endpoints[resourceName] = endpointInfo{e, options}
|
||||
|
||||
klog.V(2).InfoS("Device plugin connected", "resourceName", resourceName)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -256,6 +257,7 @@ func (m *ManagerImpl) PluginListAndWatchReceiver(resourceName string, resp *plug
|
||||
}
|
||||
|
||||
func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices []pluginapi.Device) {
|
||||
healthyCount := 0
|
||||
m.mutex.Lock()
|
||||
m.healthyDevices[resourceName] = sets.NewString()
|
||||
m.unhealthyDevices[resourceName] = sets.NewString()
|
||||
@ -264,6 +266,7 @@ func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices [
|
||||
m.allDevices[resourceName][dev.ID] = dev
|
||||
if dev.Health == pluginapi.Healthy {
|
||||
m.healthyDevices[resourceName].Insert(dev.ID)
|
||||
healthyCount++
|
||||
} else {
|
||||
m.unhealthyDevices[resourceName].Insert(dev.ID)
|
||||
}
|
||||
@ -272,6 +275,7 @@ func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices [
|
||||
if err := m.writeCheckpoint(); err != nil {
|
||||
klog.ErrorS(err, "Writing checkpoint encountered")
|
||||
}
|
||||
klog.V(2).InfoS("Processed device updates for resource", "resourceName", resourceName, "totalCount", len(devices), "healthyCount", healthyCount)
|
||||
}
|
||||
|
||||
// GetWatcherHandler returns the plugin handler
|
||||
|
@ -209,7 +209,7 @@ func (m *manager) RemoveContainer(containerID string) error {
|
||||
}
|
||||
|
||||
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
||||
klog.InfoS("Topology Admit Handler")
|
||||
klog.InfoS("Topology Admit Handler", "podUID", attrs.Pod.UID, "podNamespace", attrs.Pod.Namespace, "podName", attrs.Pod.Name)
|
||||
metrics.TopologyManagerAdmissionRequestsTotal.Inc()
|
||||
|
||||
startTime := time.Now()
|
||||
|
Loading…
Reference in New Issue
Block a user