diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index 9e72a7cc97a..8b59ba67121 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -239,6 +239,8 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe return err } + klog.V(4).InfoS("CPU manager started", "policy", m.policy.Name()) + m.allocatableCPUs = m.policy.GetAllocatableCPUs(m.state) if m.policy.Name() == string(PolicyNone) { @@ -465,7 +467,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec cset := m.state.GetCPUSetOrDefault(string(pod.UID), container.Name) if cset.IsEmpty() { // NOTE: This should not happen outside of tests. - klog.V(2).InfoS("ReconcileState: skipping container; assigned cpuset is empty", "pod", klog.KObj(pod), "containerName", container.Name) + klog.V(2).InfoS("ReconcileState: skipping container; empty cpuset assigned", "pod", klog.KObj(pod), "containerName", container.Name) failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) continue } diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 5d470e73db3..0dbe757b3c5 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -388,6 +388,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai p.updateCPUsToReuse(pod, container, cpuset) p.updateMetricsOnAllocate(cpuset) + klog.V(4).InfoS("Allocated exclusive CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", cpuset) return nil } @@ -455,7 +456,9 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit } func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int { - if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + qos := v1qos.GetPodQOS(pod) + if qos != v1.PodQOSGuaranteed { + klog.V(5).InfoS("Exclusive CPU allocation skipped, pod QoS is not guaranteed", "pod", klog.KObj(pod), "containerName", container.Name, "qos", qos) return 0 } cpuQuantity := container.Resources.Requests[v1.ResourceCPU] @@ -474,7 +477,9 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int cpuQuantity = cs.AllocatedResources[v1.ResourceCPU] } } - if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { + cpuValue := cpuQuantity.Value() + if cpuValue*1000 != cpuQuantity.MilliValue() { + klog.V(5).InfoS("Exclusive CPU allocation skipped, pod requested non-integral CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpu", cpuValue) return 0 } // Safe downcast to do for all systems with < 2.1 billion CPUs. diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go index f6acc7c42ce..bda90ba1f4c 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint.go @@ -201,7 +201,7 @@ func (sc *stateCheckpoint) SetCPUSet(podUID string, containerName string, cset c sc.cache.SetCPUSet(podUID, containerName, cset) err := sc.storeState() if err != nil { - klog.InfoS("Store state to checkpoint error", "err", err) + klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName) } } @@ -212,7 +212,7 @@ func (sc *stateCheckpoint) SetDefaultCPUSet(cset cpuset.CPUSet) { sc.cache.SetDefaultCPUSet(cset) err := sc.storeState() if err != nil { - klog.InfoS("Store state to checkpoint error", "err", err) + klog.ErrorS(err, "Failed to store state to checkpoint") } } @@ -223,7 +223,7 @@ func (sc *stateCheckpoint) SetCPUAssignments(a ContainerCPUAssignments) { sc.cache.SetCPUAssignments(a) err := sc.storeState() if err != nil { - klog.InfoS("Store state to checkpoint error", "err", err) + klog.ErrorS(err, "Failed to store state to checkpoint") } } @@ -234,7 +234,7 @@ func (sc *stateCheckpoint) Delete(podUID string, containerName string) { sc.cache.Delete(podUID, containerName) err := sc.storeState() if err != nil { - klog.InfoS("Store state to checkpoint error", "err", err) + klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName) } } @@ -245,6 +245,6 @@ func (sc *stateCheckpoint) ClearState() { sc.cache.ClearState() err := sc.storeState() if err != nil { - klog.InfoS("Store state to checkpoint error", "err", err) + klog.ErrorS(err, "Failed to store state to checkpoint") } }