update structured log for cpumanager/cpu_manager.go

This commit is contained in:
pacoxu 2021-03-12 16:40:48 +08:00
parent 9e024e839b
commit 8d24c8d0ab
3 changed files with 37 additions and 38 deletions

View File

@ -155,7 +155,7 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
if err != nil {
return nil, err
}
klog.Infof("[cpumanager] detected CPU topology: %v", topo)
klog.InfoS("Detected CPU topology", "topology", topo)
reservedCPUs, ok := nodeAllocatableReservation[v1.ResourceCPU]
if !ok {
@ -196,8 +196,8 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
}
func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
klog.Infof("[cpumanager] starting with %s policy", m.policy.Name())
klog.Infof("[cpumanager] reconciling every %v", m.reconcilePeriod)
klog.InfoS("Starting CPU manager", "policy", m.policy.Name())
klog.InfoS("Reconciling", "reconcilePeriod", m.reconcilePeriod)
m.sourcesReady = sourcesReady
m.activePods = activePods
m.podStatusProvider = podStatusProvider
@ -206,14 +206,14 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, cpuManagerStateFileName, m.policy.Name(), m.containerMap)
if err != nil {
klog.Errorf("[cpumanager] could not initialize checkpoint manager: %v, please drain node and remove policy state file", err)
klog.ErrorS(err, "Could not initialize checkpoint manager, please drain node and remove policy state file")
return err
}
m.state = stateImpl
err = m.policy.Start(m.state)
if err != nil {
klog.Errorf("[cpumanager] policy start error: %v", err)
klog.ErrorS(err, "Policy start error")
return err
}
@ -238,7 +238,7 @@ func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error {
// Call down into the policy to assign this container CPUs if required.
err := m.policy.Allocate(m.state, p, c)
if err != nil {
klog.Errorf("[cpumanager] Allocate error: %v", err)
klog.ErrorS(err, "Allocate error")
return err
}
@ -257,7 +257,7 @@ func (m *manager) RemoveContainer(containerID string) error {
err := m.policyRemoveContainerByID(containerID)
if err != nil {
klog.Errorf("[cpumanager] RemoveContainer error: %v", err)
klog.ErrorS(err, "RemoveContainer error")
return err
}
@ -348,10 +348,10 @@ func (m *manager) removeStaleState() {
for podUID := range assignments {
for containerName := range assignments[podUID] {
if _, ok := activeContainers[podUID][containerName]; !ok {
klog.Errorf("[cpumanager] removeStaleState: removing (pod %s, container: %s)", podUID, containerName)
klog.ErrorS(nil, "RemoveStaleState: removing container", "podUID", podUID, "containerName", containerName)
err := m.policyRemoveContainerByRef(podUID, containerName)
if err != nil {
klog.Errorf("[cpumanager] removeStaleState: failed to remove (pod %s, container %s), error: %v)", podUID, containerName, err)
klog.ErrorS(err, "RemoveStaleState: failed to remove container", "podUID", podUID, "containerName", containerName)
}
}
}
@ -366,7 +366,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
for _, pod := range m.activePods() {
pstatus, ok := m.podStatusProvider.GetPodStatus(pod.UID)
if !ok {
klog.Warningf("[cpumanager] reconcileState: skipping pod; status not found (pod: %s)", pod.Name)
klog.InfoS("ReconcileState: skipping pod; status not found", "pod", klog.KObj(pod))
failure = append(failure, reconciledContainer{pod.Name, "", ""})
continue
}
@ -376,21 +376,21 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
for _, container := range allContainers {
containerID, err := findContainerIDByName(&pstatus, container.Name)
if err != nil {
klog.Warningf("[cpumanager] reconcileState: skipping container; ID not found in pod status (pod: %s, container: %s, error: %v)", pod.Name, container.Name, err)
klog.InfoS("ReconcileState: skipping container; ID not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
continue
}
cstatus, err := findContainerStatusByName(&pstatus, container.Name)
if err != nil {
klog.Warningf("[cpumanager] reconcileState: skipping container; container status not found in pod status (pod: %s, container: %s, error: %v)", pod.Name, container.Name, err)
klog.InfoS("ReconcileState: skipping container; container status not found in pod status", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
continue
}
if cstatus.State.Waiting != nil ||
(cstatus.State.Waiting == nil && cstatus.State.Running == nil && cstatus.State.Terminated == nil) {
klog.Warningf("[cpumanager] reconcileState: skipping container; container still in the waiting state (pod: %s, container: %s, error: %v)", pod.Name, container.Name, err)
klog.InfoS("ReconcileState: skipping container; container still in the waiting state", "pod", klog.KObj(pod), "containerName", container.Name, "err", err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, ""})
continue
}
@ -404,7 +404,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
// was allocated.
_, _, err := m.containerMap.GetContainerRef(containerID)
if err == nil {
klog.Warningf("[cpumanager] reconcileState: ignoring terminated container (pod: %s, container id: %s)", pod.Name, containerID)
klog.InfoS("ReconcileState: ignoring terminated container", "pod", klog.KObj(pod), "containerID", containerID)
}
m.Unlock()
continue
@ -419,15 +419,15 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name)
if cset.IsEmpty() {
// NOTE: This should not happen outside of tests.
klog.Infof("[cpumanager] reconcileState: skipping container; assigned cpuset is empty (pod: %s, container: %s)", pod.Name, container.Name)
klog.InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
continue
}
klog.V(4).Infof("[cpumanager] reconcileState: updating container (pod: %s, container: %s, container id: %s, cpuset: \"%v\")", pod.Name, container.Name, containerID, cset)
klog.V(4).InfoS("ReconcileState: updating container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)
err = m.updateContainerCPUSet(containerID, cset)
if err != nil {
klog.Errorf("[cpumanager] reconcileState: failed to update container (pod: %s, container: %s, container id: %s, cpuset: \"%v\", error: %v)", pod.Name, container.Name, containerID, cset, err)
klog.ErrorS(err, "ReconcileState: failed to update container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID, "cpuSet", cset)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
continue
}

View File

@ -56,12 +56,12 @@ func (m *fakeManager) RemoveContainer(containerID string) error {
}
func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
klog.InfoS("Get Container Topology Hints")
klog.InfoS("Get container topology hints")
return map[string][]topologymanager.TopologyHint{}
}
func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
klog.InfoS("Get Pod Topology Hints")
klog.InfoS("Get pod topology hints")
return map[string][]topologymanager.TopologyHint{}
}
@ -70,12 +70,12 @@ func (m *fakeManager) State() state.Reader {
}
func (m *fakeManager) GetCPUs(podUID, containerName string) cpuset.CPUSet {
klog.InfoS("GetCPUs", "podUID", podUID, "containerName", container.Name)
klog.InfoS("GetCPUs", "podUID", podUID, "containerName", containerName)
return cpuset.CPUSet{}
}
func (m *fakeManager) GetAllocatableCPUs() cpuset.CPUSet {
klog.InfoS("Get Allocatable Cpus")
klog.InfoS("Get Allocatable CPUs")
return cpuset.CPUSet{}
}

View File

@ -27,7 +27,6 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
"k8s.io/kubernetes/pkg/kubelet/util/format"
)
// PolicyStatic is the name of the static policy
@ -103,11 +102,11 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
}
if reserved.Size() != numReservedCPUs {
err := fmt.ErrorS(nil, "[cpumanager] unable to reserve the required amount of CPUs (not equal)", "reserved", reserved, "numReservedCPUs", numReservedCPUs)
err := fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of %s did not equal %d)", reserved, numReservedCPUs)
return nil, err
}
klog.InfoS("[cpumanager] reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved)
klog.InfoS("Reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved)
return &staticPolicy{
topology: topology,
@ -123,7 +122,7 @@ func (p *staticPolicy) Name() string {
func (p *staticPolicy) Start(s state.State) error {
if err := p.validateState(s); err != nil {
klog.ErrorS(err, "[cpumanager] static policy invalid state, please drain node and remove policy state file")
klog.ErrorS(err, "Static policy invalid state, please drain node and remove policy state file")
return err
}
return nil
@ -218,23 +217,23 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
klog.InfoS("[cpumanager] static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
klog.InfoS("Static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
// container belongs in an exclusively allocated pool
if cpuset, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
p.updateCPUsToReuse(pod, container, cpuset)
klog.InfoS("[cpumanager] static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name)
klog.InfoS("Static policy: container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name)
return nil
}
// Call Topology Manager to get the aligned socket affinity across all hint providers.
hint := p.affinity.GetAffinity(string(pod.UID), container.Name)
klog.InfoS("[cpumanager] Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint)
klog.InfoS("Topology Affinity", "pod", klog.KObj(pod), "containerName", container.Name, "affinity", hint)
// Allocate CPUs according to the NUMA affinity contained in the hint.
cpuset, err := p.allocateCPUs(s, numCPUs, hint.NUMANodeAffinity, p.cpusToReuse[string(pod.UID)])
if err != nil {
klog.ErrorS(err, "[cpumanager] unable to allocate CPUs", "numCPUs", numCPUs, "pod", klog.KObj(pod), "containerName", container.Name)
klog.ErrorS(err, "Unable to allocate CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "numCPUs", numCPUs)
return err
}
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
@ -246,7 +245,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
}
func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
klog.InfoS("[cpumanager] static policy: RemoveContainer ", "podUID", podUID, "containerName", containerName)
klog.InfoS("Static policy: RemoveContainer", "podUID", podUID, "containerName", containerName)
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
s.Delete(podUID, containerName)
// Mutate the shared pool, adding released cpus.
@ -256,7 +255,7 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
}
func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bitmask.BitMask, reusableCPUs cpuset.CPUSet) (cpuset.CPUSet, error) {
klog.InfoS("[cpumanager] allocateCpus", "numCPUs", numCPUs, "socket", numaAffinity)
klog.InfoS("AllocateCPUs", "numCPUs", numCPUs, "socket", numaAffinity)
allocatableCPUs := p.GetAllocatableCPUs(s).Union(reusableCPUs)
@ -291,7 +290,7 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit
// Remove allocated CPUs from the shared CPUSet.
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result))
klog.InfoS("[cpumanager] allocateCPUs", "result", result)
klog.InfoS("AllocateCPUs", "result", result)
return result, nil
}
@ -353,7 +352,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
// kubelet restart, for example.
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
if allocated.Size() != requested {
klog.ErrorS(nil, "[cpumanager] CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requested", requested, "allocated", allocated.Size())
klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requestedSize", requested, "allocatedSize", allocated.Size())
// An empty list of hints will be treated as a preference that cannot be satisfied.
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error.
@ -361,7 +360,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
string(v1.ResourceCPU): {},
}
}
klog.InfoS("[cpumanager] Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name)
klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod), "containerName", container.Name)
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested),
}
@ -376,7 +375,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
// Generate hints.
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
klog.InfoS("[cpumanager] TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints)
klog.InfoS("TopologyHints generated", "pod", klog.KObj(pod), "containerName", container.Name, "cpuHints", cpuHints)
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): cpuHints,
@ -403,7 +402,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin
// kubelet restart, for example.
if allocated, exists := s.GetCPUSet(string(pod.UID), container.Name); exists {
if allocated.Size() != requestedByContainer {
klog.ErrorS(nil, "[cpumanager] CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "requested", requested, "requestedByContainer", requestedByContainer, "allocated", allocated.Size())
klog.ErrorS(nil, "CPUs already allocated to container with different number than request", "pod", klog.KObj(pod), "containerName", container.Name, "allocatedSize", requested, "requestedByContainer", requestedByContainer, "allocatedSize", allocated.Size())
// An empty list of hints will be treated as a preference that cannot be satisfied.
// In definition of hints this is equal to: TopologyHint[NUMANodeAffinity: nil, Preferred: false].
// For all but the best-effort policy, the Topology Manager will throw a pod-admission error.
@ -416,7 +415,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin
}
}
if assignedCPUs.Size() == requested {
klog.InfoS("[cpumanager] Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod))
klog.InfoS("Regenerating TopologyHints for CPUs already allocated", "pod", klog.KObj(pod))
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): p.generateCPUTopologyHints(assignedCPUs, cpuset.CPUSet{}, requested),
}
@ -434,7 +433,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin
// Generate hints.
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
klog.InfoS("[cpumanager] TopologyHints generated", "pod", klog.KObj(pod), "cpuHints", cpuHints)
klog.InfoS("TopologyHints generated", "pod", klog.KObj(pod), "cpuHints", cpuHints)
return map[string][]topologymanager.TopologyHint{
string(v1.ResourceCPU): cpuHints,