Merge pull request #120569 from ffromani/cpumanager-extra-logs

enhance the cpumanager logs
This commit is contained in:
Kubernetes Prow Robot 2024-09-12 00:25:18 +01:00 committed by GitHub
commit 11e8169a16
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 19 deletions

View File

@ -380,23 +380,27 @@ func (m *manager) removeStaleState() {
assignments := m.state.GetCPUAssignments() assignments := m.state.GetCPUAssignments()
for podUID := range assignments { for podUID := range assignments {
for containerName := range assignments[podUID] { for containerName := range assignments[podUID] {
if _, ok := activeContainers[podUID][containerName]; !ok { if _, ok := activeContainers[podUID][containerName]; ok {
klog.ErrorS(nil, "RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName) klog.V(5).InfoS("RemoveStaleState: container still active", "podUID", podUID, "containerName", containerName)
continue
}
klog.V(2).InfoS("RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName) err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil { if err != nil {
klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName) klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName)
} }
} }
} }
}
m.containerMap.Visit(func(podUID, containerName, containerID string) { m.containerMap.Visit(func(podUID, containerName, containerID string) {
if _, ok := activeContainers[podUID][containerName]; !ok { if _, ok := activeContainers[podUID][containerName]; ok {
klog.ErrorS(nil, "RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName) klog.V(5).InfoS("RemoveStaleState: containerMap: container still active", "podUID", podUID, "containerName", containerName)
return
}
klog.V(2).InfoS("RemoveStaleState: containerMap: removing container", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName) err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil { if err != nil {
klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName) klog.ErrorS(err, "RemoveStaleState: containerMap: failed to remove container", "podUID", podUID, "containerName", containerName)
}
} }
}) })
} }
@ -410,7 +414,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
for _, pod := range m.activePods() { for _, pod := range m.activePods() {
pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID) pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID)
if !ok { if !ok {
klog.V(4).InfoS("ReconcileState: skipping pod; status not found", "pod", klog.KObj(pod)) klog.V(5).InfoS("ReconcileState: skipping pod; status not found", "pod", klog.KObj(pod))
failure = append(failure, reconciledContainer{pod.Name, "", ""}) failure = append(failure, reconciledContainer{pod.Name, "", ""})
continue continue
} }
@ -420,14 +424,14 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
for _, container := range allContainers { for _, container := range allContainers {
containerID, err := findContainerIDByName(&pstatus, container.Name) containerID, err := findContainerIDByName(&pstatus, container.Name)
if err != nil { if err != nil {
klog.V(4).InfoS("ReconcileState: skipping container; ID not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err) klog.V(5).InfoS("ReconcileState: skipping container; ID not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
continue continue
} }
cstatus, err := findContainerStatusByName(&pstatus, container.Name) cstatus, err := findContainerStatusByName(&pstatus, container.Name)
if err != nil { if err != nil {
klog.V(4).InfoS("ReconcileState: skipping container; container status not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err) klog.V(5).InfoS("ReconcileState: skipping container; container status not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
continue continue
} }
@ -463,14 +467,14 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name) cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name)
if cset.IsEmpty() { if cset.IsEmpty() {
// NOTE: This should not happen outside of tests. // NOTE: This should not happen outside of tests.
klog.V(4).InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name) klog.V(2).InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
continue continue
} }
lcset := m.lastUpdateState.GetCPUSetOrDefault(string(pod.UID), container.Name) lcset := m.lastUpdateState.GetCPUSetOrDefault(string(pod.UID), container.Name)
if !cset.Equals(lcset) { if !cset.Equals(lcset) {
klog.V(4).InfoS("ReconcileState: updating container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset) klog.V(5).InfoS("ReconcileState: updating container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)
err = m.updateContainerCPUSet(ctx, containerID, cset) err = m.updateContainerCPUSet(ctx, containerID, cset)
if err != nil { if err != nil {
klog.ErrorS(err, "ReconcileState: failed to update container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset) klog.ErrorS(err, "ReconcileState: failed to update container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)

View File

@ -515,7 +515,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
// kubelet restart, for example. // kubelet restart, for example.
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
if allocated.Size() != requested { if allocated.Size() != requested {
klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size())
// An empty list of hints will be treated as a preference that cannot be satisfied. // An empty list of hints will be treated as a preference that cannot be satisfied.
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error.
@ -565,7 +565,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin
// kubelet restart, for example. // kubelet restart, for example.
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
if allocated.Size() != requestedByContainer { if allocated.Size() != requestedByContainer {
klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) klog.InfoS("CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size())
// An empty list of hints will be treated as a preference that cannot be satisfied. // An empty list of hints will be treated as a preference that cannot be satisfied.
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error.