From 8d24c8d0ab65974752e55b3caac8c81b9e9630e6 Mon Sep 17 00:00:00 2001 From: pacoxu Date: Fri, 12 Mar 2021 16:40:48 +0800 Subject: [PATCH] update structured log for cpumanager/cpu_manager.go --- pkg/kubelet/cm/cpumanager/cpu_manager.go | 34 +++++++++---------- pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 8 ++--- pkg/kubelet/cm/cpumanager/policy_static.go | 33 +++++++++--------- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index 673e627db08..5a6e5082f15 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -155,7 +155,7 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo if err != nil { return nil, err } - klog.Infof("[cpumanager] detected CPU topology: %v", topo) + klog.InfoS("Detected CPU topology", "topology", topo) reservedCPUs, ok := nodeAllocatableReservation[v1.ResourceCPU] if !ok { @@ -196,8 +196,8 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo } func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error { - klog.Infof("[cpumanager] starting with %s policy", m.policy.Name()) - klog.Infof("[cpumanager] reconciling every %v", m.reconcilePeriod) + klog.InfoS("Starting CPU manager", "policy", m.policy.Name()) + klog.InfoS("Reconciling", "reconcilePeriod", m.reconcilePeriod) m.sourcesReady = sourcesReady m.activePods = activePods m.podStatusProvider = podStatusProvider @@ -206,14 +206,14 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, cpuManagerStateFileName, m.policy.Name(), m.containerMap) if err != nil { - klog.Errorf("[cpumanager] could not initialize checkpoint manager: %v, please drain node and remove policy state file", err) + klog.ErrorS(err, "Could not initialize checkpoint manager, please drain node and remove policy state file") return err } m.state = stateImpl err = m.policy.Start(m.state) if err != nil { - klog.Errorf("[cpumanager] policy start error: %v", err) + klog.ErrorS(err, "Policy start error") return err } @@ -238,7 +238,7 @@ func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error { // Call down into the policy to assign this container CPUs if required. err := m.policy.Allocate(m.state, p, c) if err != nil { - klog.Errorf("[cpumanager] Allocate error: %v", err) + klog.ErrorS(err, "Allocate error") return err } @@ -257,7 +257,7 @@ func (m *manager) RemoveContainer(containerID string) error { err := m.policyRemoveContainerByID(containerID) if err != nil { - klog.Errorf("[cpumanager] RemoveContainer error: %v", err) + klog.ErrorS(err, "RemoveContainer error") return err } @@ -348,10 +348,10 @@ func (m *manager) removeStaleState() { for podUID := range assignments { for containerName := range assignments[podUID] { if _, ok := activeContainers[podUID][containerName]; !ok { - klog.Errorf("[cpumanager] removeStaleState: removing (pod %s, container: %s)", podUID, containerName) + klog.ErrorS(nil, "RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName) err := m.policyRemoveContainerByRef(podUID, containerName) if err != nil { - klog.Errorf("[cpumanager] removeStaleState: failed to remove (pod %s, container %s), error: %v)", podUID, containerName, err) + klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName) } } } @@ -366,7 +366,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec for _, pod := range m.activePods() { pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID) if !ok { - klog.Warningf("[cpumanager] reconcileState: skipping pod; status not found (pod: %s)", pod.Name) + klog.InfoS("ReconcileState: skipping pod; status not found", "pod", klog.KObj(pod)) failure = append(failure, reconciledContainer{pod.Name, "", ""}) continue } @@ -376,21 +376,21 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec for _, container := range allContainers { containerID, err := findContainerIDByName(&pstatus, container.Name) if err != nil { - klog.Warningf("[cpumanager] reconcileState: skipping container; ID not found in pod status (pod: %s, container: %s, error: %v)", pod.Name, container.Name, err) + klog.InfoS("ReconcileState: skipping container; ID not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err) failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) continue } cstatus, err := findContainerStatusByName(&pstatus, container.Name) if err != nil { - klog.Warningf("[cpumanager] reconcileState: skipping container; container status not found in pod status (pod: %s, container: %s, error: %v)", pod.Name, container.Name, err) + klog.InfoS("ReconcileState: skipping container; container status not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err) failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) continue } if cstatus.State.Waiting != nil || (cstatus.State.Waiting == nil && cstatus.State.Running == nil && cstatus.State.Terminated == nil) { - klog.Warningf("[cpumanager] reconcileState: skipping container; container still in the waiting state (pod: %s, container: %s, error: %v)", pod.Name, container.Name, err) + klog.InfoS("ReconcileState: skipping container; container still in the waiting state", "pod", klog.KObj(pod), "containerName", container.Name, "err", err) failure = append(failure, reconciledContainer{pod.Name, container.Name, ""}) continue } @@ -404,7 +404,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec // was allocated. _, _, err := m.containerMap.GetContainerRef(containerID) if err == nil { - klog.Warningf("[cpumanager] reconcileState: ignoring terminated container (pod: %s, container id: %s)", pod.Name, containerID) + klog.InfoS("ReconcileState: ignoring terminated container", "pod", klog.KObj(pod), "containerID", containerID) } m.Unlock() continue @@ -419,15 +419,15 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name) if cset.IsEmpty() { // NOTE: This should not happen outside of tests. - klog.Infof("[cpumanager] reconcileState: skipping container; assigned cpuset is empty (pod: %s, container: %s)", pod.Name, container.Name) + klog.InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name) failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) continue } - klog.V(4).Infof("[cpumanager] reconcileState: updating container (pod: %s, container: %s, container id: %s, cpuset: \"%v\")", pod.Name, container.Name, containerID, cset) + klog.V(4).InfoS("ReconcileState: updating container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset) err = m.updateContainerCPUSet(containerID, cset) if err != nil { - klog.Errorf("[cpumanager] reconcileState: failed to update container (pod: %s, container: %s, container id: %s, cpuset: \"%v\", error: %v)", pod.Name, container.Name, containerID, cset, err) + klog.ErrorS(err, "ReconcileState: failed to update container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset) failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) continue } diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go index 427dc762d87..856d322fae7 100644 --- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go @@ -56,12 +56,12 @@ func (m *fakeManager) RemoveContainer(containerID string) error { } func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { - klog.InfoS("Get Container Topology Hints") + klog.InfoS("Get container topology hints") return map[string][]topologymanager.TopologyHint{} } func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { - klog.InfoS("Get Pod Topology Hints") + klog.InfoS("Get pod topology hints") return map[string][]topologymanager.TopologyHint{} } @@ -70,12 +70,12 @@ func (m *fakeManager) State() state.Reader { } func (m *fakeManager) GetCPUs(podUID, containerName string) cpuset.CPUSet { - klog.InfoS("GetCPUs", "podUID", podUID, "containerName", container.Name) + klog.InfoS("GetCPUs", "podUID", podUID, "containerName", containerName) return cpuset.CPUSet{} } func (m *fakeManager) GetAllocatableCPUs() cpuset.CPUSet { - klog.InfoS("Get Allocatable Cpus") + klog.InfoS("Get Allocatable CPUs") return cpuset.CPUSet{} } diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 4e8e2c1672f..ec25a15a3c2 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -27,7 +27,6 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" - "k8s.io/kubernetes/pkg/kubelet/util/format" ) // PolicyStatic is the name of the static policy @@ -103,11 +102,11 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv } if reserved.Size() != numReservedCPUs { - err := fmt.ErrorS(nil, "[cpumanager] unable to reserve the required amount of CPUs (not equal)", "reserved", reserved, "numReservedCPUs", numReservedCPUs) + err := fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of %s did not equal %d)", reserved, numReservedCPUs) return nil, err } - klog.InfoS("[cpumanager] reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved) + klog.InfoS("Reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved) return &staticPolicy{ topology: topology, @@ -123,7 +122,7 @@ func (p *staticPolicy) Name() string { func (p *staticPolicy) Start(s state.State) error { if err := p.validateState(s); err != nil { - klog.ErrorS(err, "[cpumanager] static policy invalid state, please drain node and remove policy state file") + klog.ErrorS(err, "Static policy invalid state, please drain node and remove policy state file") return err } return nil @@ -218,23 +217,23 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error { if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 { - klog.InfoS("[cpumanager] static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name) + klog.InfoS("Static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name) // container belongs in an exclusively allocated pool if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { p.updateCPUsToReuse(pod, container, cpuset) - klog.InfoS("[cpumanager] static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) + klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name) return nil } // Call Topology Manager to get the aligned socket affinity across all hint providers. hint := p.affinity.GetAffinity(string(pod.UID), container.Name) - klog.InfoS("[cpumanager] Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) + klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint) // Allocate CPUs according to the NUMA affinity contained in the hint. cpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)]) if err != nil { - klog.ErrorS(err, "[cpumanager] unable to allocate CPUs", "numCPUs", numCPUs, "pod", klog.KObj(pod), "containerName", container.Name) + klog.ErrorS(err, "Unable to allocate CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs) return err } s.SetCPUSet(string(pod.UID), container.Name, cpuset) @@ -246,7 +245,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai } func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error { - klog.InfoS("[cpumanager] static policy: RemoveContainer ", "podUID", podUID, "containerName", containerName) + klog.InfoS("Static policy: RemoveContainer", "podUID", podUID, "containerName", containerName) if toRelease, ok := s.GetCPUSet(podUID, containerName); ok { s.Delete(podUID, containerName) // Mutate the shared pool, adding released cpus. @@ -256,7 +255,7 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa } func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (cpuset.CPUSet, error) { - klog.InfoS("[cpumanager] allocateCpus", "numCPUs", numCPUs, "socket", numaAffinity) + klog.InfoS("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity) allocatableCPUs := p.GetAllocatableCPUs(s).Union(reusableCPUs) @@ -291,7 +290,7 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit // Remove allocated CPUs from the shared CPUSet. s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result)) - klog.InfoS("[cpumanager] allocateCPUs", "result", result) + klog.InfoS("AllocateCPUs", "result", result) return result, nil } @@ -353,7 +352,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requested { - klog.ErrorS(nil, "[cpumanager] CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requested", requested, "allocated", allocated.Size()) + klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. @@ -361,7 +360,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v string(v1.ResourceCPU): {}, } } - klog.InfoS("[cpumanager] Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) + klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name) return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested), } @@ -376,7 +375,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v // Generate hints. cpuHints := p.generateCPUTopologyHints(available, reusable, requested) - klog.InfoS("[cpumanager] TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) + klog.InfoS("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints) return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): cpuHints, @@ -403,7 +402,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin // kubelet restart, for example. if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists { if allocated.Size() != requestedByContainer { - klog.ErrorS(nil, "[cpumanager] CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requested", requested, "requestedByContainer", requestedByContainer, "allocated", allocated.Size()) + klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size()) // An empty list of hints will be treated as a preference that cannot be satisfied. // In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false]. // For all but the best-effort policy, the Topology Manager will throw a pod-admission error. @@ -416,7 +415,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin } } if assignedCPUs.Size() == requested { - klog.InfoS("[cpumanager] Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod)) + klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod)) return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): p.generateCPUTopologyHints(assignedCPUs, cpuset.CPUSet{}, requested), } @@ -434,7 +433,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin // Generate hints. cpuHints := p.generateCPUTopologyHints(available, reusable, requested) - klog.InfoS("[cpumanager] TopologyHints generated", "pod", klog.KObj(pod), "cpuHints", cpuHints) + klog.InfoS("TopologyHints generated", "pod", klog.KObj(pod), "cpuHints", cpuHints) return map[string][]topologymanager.TopologyHint{ string(v1.ResourceCPU): cpuHints,