Merge pull request #120569 from ffromani/cpumanager-extra-logs

enhance the cpumanager logs
This commit is contained in:
Kubernetes Prow Robot 2024-09-12 00:25:18 +01:00 committed by GitHub
commit 11e8169a16
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 19 deletions

View File

@ -380,23 +380,27 @@ func (m *manager) removeStaleState() {
assignments := m.state.GetCPUAssignments()
for podUID := range assignments {
for containerName := range assignments[podUID] {
if _, ok := activeContainers[podUID][containerName]; !ok {
klog.ErrorS(nil, "RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil {
klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName)
}
if _, ok := activeContainers[podUID][containerName]; ok {
klog.V(5).InfoS("RemoveStaleState: container still active", "podUID", podUID, "containerName", containerName)
continue
}
klog.V(2).InfoS("RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil {
klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName)
}
}
}
m.containerMap.Visit(func(podUID, containerName, containerID string) {
if _, ok := activeContainers[podUID][containerName]; !ok {
klog.ErrorS(nil, "RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil {
klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName)
}
if _, ok := activeContainers[podUID][containerName]; ok {
klog.V(5).InfoS("RemoveStaleState: containerMap: container still active", "podUID", podUID, "containerName", containerName)
return
}
klog.V(2).InfoS("RemoveStaleState: containerMap: removing container", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil {
klog.ErrorS(err, "RemoveStaleState: containerMap: failed to remove container", "podUID", podUID, "containerName", containerName)
}
})
}
@ -410,7 +414,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
for _, pod := range m.activePods() {
pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID)
if !ok {
klog.V(4).InfoS("ReconcileState: skipping pod; status not found", "pod", klog.KObj(pod))
klog.V(5).InfoS("ReconcileState: skipping pod; status not found", "pod", klog.KObj(pod))
failure = append(failure, reconciledContainer{pod.Name, "", ""})
continue
}
@ -420,14 +424,14 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
for _, container := range allContainers {
containerID, err := findContainerIDByName(&pstatus, container.Name)
if err != nil {
klog.V(4).InfoS("ReconcileState: skipping container; ID not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
klog.V(5).InfoS("ReconcileState: skipping container; ID not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
continue
}
cstatus, err := findContainerStatusByName(&pstatus, container.Name)
if err != nil {
klog.V(4).InfoS("ReconcileState: skipping container; container status not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
klog.V(5).InfoS("ReconcileState: skipping container; container status not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
continue
}
@ -463,14 +467,14 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name)
if cset.IsEmpty() {
// NOTE: This should not happen outside of tests.
klog.V(4).InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name)
klog.V(2).InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
continue
}
lcset := m.lastUpdateState.GetCPUSetOrDefault(string(pod.UID), container.Name)
if !cset.Equals(lcset) {
klog.V(4).InfoS("ReconcileState: updating container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)
klog.V(5).InfoS("ReconcileState: updating container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)
err = m.updateContainerCPUSet(ctx, containerID, cset)
if err != nil {
klog.ErrorS(err, "ReconcileState: failed to update container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)

View File

@ -515,7 +515,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
// kubelet restart, for example.
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
if allocated.Size() != requested {
klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size())
klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size())
// An empty list of hints will be treated as a preference that cannot be satisfied.
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error.
@ -565,7 +565,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin
// kubelet restart, for example.
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
if allocated.Size() != requestedByContainer {
klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size())
klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size())
// An empty list of hints will be treated as a preference that cannot be satisfied.
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error.