mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #99974 from knabben/sl-memorymanager
Migrate pkg/kubelet/cm/memorymanager to structured logging
This commit is contained in:
commit
e5309efbdf
@ -32,41 +32,41 @@ type fakeManager struct {
|
||||
}
|
||||
|
||||
func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
klog.Info("[fake memorymanager] Start()")
|
||||
klog.InfoS("Start()")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) Policy() Policy {
|
||||
klog.Info("[fake memorymanager] Policy()")
|
||||
klog.InfoS("Policy()")
|
||||
return NewPolicyNone()
|
||||
}
|
||||
|
||||
func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||
klog.Infof("[fake memorymanager] Allocate (pod: %s, container: %s", pod.Name, container.Name)
|
||||
klog.InfoS("Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
|
||||
klog.Infof("[fake memorymanager] AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||
klog.InfoS("Add container", "pod", klog.KObj(pod), "containerName", container.Name, "containerID", containerID)
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Int {
|
||||
klog.Infof("[fake memorymanager] GetMemoryNUMANodes (pod: %s, container: %s)", pod.Name, container.Name)
|
||||
klog.InfoS("Get MemoryNUMANodes", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) RemoveContainer(containerID string) error {
|
||||
klog.Infof("[fake memorymanager] RemoveContainer (container id: %s)", containerID)
|
||||
klog.InfoS("RemoveContainer", "containerID", containerID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
klog.Infof("[fake memorymanager] Get Topology Hints")
|
||||
klog.InfoS("Get Topology Hints")
|
||||
return map[string][]topologymanager.TopologyHint{}
|
||||
}
|
||||
|
||||
func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
klog.Infof("[fake memorymanager] Get Pod Topology Hints")
|
||||
klog.InfoS("Get Pod Topology Hints")
|
||||
return map[string][]topologymanager.TopologyHint{}
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,7 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll
|
||||
|
||||
// Start starts the memory manager under the kubelet and calls policy start
|
||||
func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
klog.Infof("[memorymanager] starting with %s policy", m.policy.Name())
|
||||
klog.InfoS("Starting memorymanager", "policy", m.policy.Name())
|
||||
m.sourcesReady = sourcesReady
|
||||
m.activePods = activePods
|
||||
m.podStatusProvider = podStatusProvider
|
||||
@ -162,14 +162,14 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
|
||||
|
||||
stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, memoryManagerStateFileName, m.policy.Name())
|
||||
if err != nil {
|
||||
klog.Errorf("[memorymanager] could not initialize checkpoint manager: %v, please drain node and remove policy state file", err)
|
||||
klog.ErrorS(err, "Could not initialize checkpoint manager, please drain node and remove policy state file")
|
||||
return err
|
||||
}
|
||||
m.state = stateImpl
|
||||
|
||||
err = m.policy.Start(m.state)
|
||||
if err != nil {
|
||||
klog.Errorf("[memorymanager] policy start error: %v", err)
|
||||
klog.ErrorS(err, "Policy start error")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -196,11 +196,11 @@ func (m *manager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.
|
||||
}
|
||||
|
||||
if numaNodes.Len() == 0 {
|
||||
klog.V(5).Infof("No allocation is available for (Pod: %s, Container: %s)", pod.Name, container.Name)
|
||||
klog.V(5).InfoS("No allocation is available", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.Infof("(Pod: %s, Container: %s) memory affinity is %v", pod.Name, container.Name, numaNodes)
|
||||
klog.InfoS("Memory affinity", "pod", klog.KObj(pod), "containerName", container.Name, "numaNodes", numaNodes)
|
||||
return numaNodes
|
||||
}
|
||||
|
||||
@ -214,7 +214,7 @@ func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||
|
||||
// Call down into the policy to assign this container memory if required.
|
||||
if err := m.policy.Allocate(m.state, pod, container); err != nil {
|
||||
klog.Errorf("[memorymanager] Allocate error: %v", err)
|
||||
klog.ErrorS(err, "Allocate error")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@ -228,13 +228,13 @@ func (m *manager) RemoveContainer(containerID string) error {
|
||||
// if error appears it means container entry already does not exist under the container map
|
||||
podUID, containerName, err := m.containerMap.GetContainerRef(containerID)
|
||||
if err != nil {
|
||||
klog.Warningf("[memorymanager] Failed to get container %s from container map error: %v", containerID, err)
|
||||
klog.InfoS("Failed to get container from container map", "containerID", containerID, "err", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
err = m.policyRemoveContainerByRef(podUID, containerName)
|
||||
if err != nil {
|
||||
klog.Errorf("[memorymanager] RemoveContainer error: %v", err)
|
||||
klog.ErrorS(err, "RemoveContainer error")
|
||||
return err
|
||||
}
|
||||
|
||||
@ -296,10 +296,10 @@ func (m *manager) removeStaleState() {
|
||||
for podUID := range assignments {
|
||||
for containerName := range assignments[podUID] {
|
||||
if _, ok := activeContainers[podUID][containerName]; !ok {
|
||||
klog.Infof("[memorymanager] removeStaleState: removing (pod %s, container: %s)", podUID, containerName)
|
||||
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
|
||||
err := m.policyRemoveContainerByRef(podUID, containerName)
|
||||
if err != nil {
|
||||
klog.Errorf("[memorymanager] removeStaleState: failed to remove (pod %s, container %s), error: %v)", podUID, containerName, err)
|
||||
klog.ErrorS(err, "RemoveStaleState: failed to remove state", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ func (p *staticPolicy) Name() string {
|
||||
|
||||
func (p *staticPolicy) Start(s state.State) error {
|
||||
if err := p.validateState(s); err != nil {
|
||||
klog.Errorf("[memorymanager] Invalid state: %v, please drain node and remove policy state file", err)
|
||||
klog.ErrorS(err, "Invalid state, please drain node and remove policy state file")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@ -90,15 +90,15 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.Infof("[memorymanager] Allocate (pod: %s, container: %s)", pod.Name, container.Name)
|
||||
klog.InfoS("Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
if blocks := s.GetMemoryBlocks(string(pod.UID), container.Name); blocks != nil {
|
||||
klog.Infof("[memorymanager] Container already present in state, skipping (pod: %s, container: %s)", pod.Name, container.Name)
|
||||
klog.InfoS("Container already present in state, skipping", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Call Topology Manager to get the aligned affinity across all hint providers.
|
||||
hint := p.affinity.GetAffinity(string(pod.UID), container.Name)
|
||||
klog.Infof("[memorymanager] Pod %v, Container %v Topology Affinity is: %v", pod.UID, container.Name, hint)
|
||||
klog.InfoS("Got topology affinity", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "hint", hint)
|
||||
|
||||
requestedResources, err := getRequestedResources(container)
|
||||
if err != nil {
|
||||
@ -185,7 +185,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
|
||||
// RemoveContainer call is idempotent
|
||||
func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error {
|
||||
klog.Infof("[memorymanager] RemoveContainer (pod: %s, container: %s)", podUID, containerName)
|
||||
klog.InfoS("RemoveContainer", "podUID", podUID, "containerName", containerName)
|
||||
blocks := s.GetMemoryBlocks(podUID, containerName)
|
||||
if blocks == nil {
|
||||
return nil
|
||||
@ -245,28 +245,28 @@ func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, re
|
||||
}
|
||||
|
||||
if len(ctnBlocks) != len(reqRsrc) {
|
||||
klog.Errorf("[memorymanager] The number of requested resources by the container %s differs from the number of memory blocks", ctn.Name)
|
||||
klog.ErrorS(nil, "The number of requested resources by the container differs from the number of memory blocks", "containerName", ctn.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, b := range ctnBlocks {
|
||||
if _, ok := reqRsrc[b.Type]; !ok {
|
||||
klog.Errorf("[memorymanager] Container %s requested resources do not have resource of type %s", ctn.Name, b.Type)
|
||||
klog.ErrorS(nil, "Container requested resources do not have resource of this type", "containerName", ctn.Name, "type", b.Type)
|
||||
return nil
|
||||
}
|
||||
|
||||
if b.Size != reqRsrc[b.Type] {
|
||||
klog.Errorf("[memorymanager] Memory %s already allocated to (pod %v, container %v) with different number than request: requested: %d, allocated: %d", b.Type, pod.UID, ctn.Name, reqRsrc[b.Type], b.Size)
|
||||
klog.ErrorS(nil, "Memory already allocated with different numbers than requested", "podUID", pod.UID, "type", b.Type, "containerName", ctn.Name, "requestedResource", reqRsrc[b.Type], "allocatedSize", b.Size)
|
||||
return nil
|
||||
}
|
||||
|
||||
containerNUMAAffinity, err := bitmask.NewBitMask(b.NUMAAffinity...)
|
||||
if err != nil {
|
||||
klog.Errorf("[memorymanager] failed to generate NUMA bitmask: %v", err)
|
||||
klog.ErrorS(err, "Failed to generate NUMA bitmask")
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.Infof("[memorymanager] Regenerating TopologyHints, %s was already allocated to (pod %v, container %v)", b.Type, pod.UID, ctn.Name)
|
||||
klog.InfoS("Regenerating TopologyHints, resource was already allocated to pod", "resourceName", b.Type, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", ctn.Name)
|
||||
hints[string(b.Type)] = append(hints[string(b.Type)], topologymanager.TopologyHint{
|
||||
NUMANodeAffinity: containerNUMAAffinity,
|
||||
Preferred: true,
|
||||
@ -326,7 +326,7 @@ func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[strin
|
||||
|
||||
reqRsrcs, err := getPodRequestedResources(pod)
|
||||
if err != nil {
|
||||
klog.Error(err.Error())
|
||||
klog.ErrorS(err, "Failed to get pod requested resources", "pod", klog.KObj(pod), "podUID", pod.UID)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -352,7 +352,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
|
||||
|
||||
requestedResources, err := getRequestedResources(container)
|
||||
if err != nil {
|
||||
klog.Error(err.Error())
|
||||
klog.ErrorS(err, "Failed to get container requested resources", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -570,41 +570,41 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
|
||||
func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool {
|
||||
if len(ms1) != len(ms2) {
|
||||
klog.Errorf("[memorymanager] node states are different len(ms1) != len(ms2): %d != %d", len(ms1), len(ms2))
|
||||
klog.ErrorS(nil, "Node states are different", "lengthNode1", len(ms1), "lengthNode2", len(ms2))
|
||||
return false
|
||||
}
|
||||
|
||||
for nodeID, nodeState1 := range ms1 {
|
||||
nodeState2, ok := ms2[nodeID]
|
||||
if !ok {
|
||||
klog.Errorf("[memorymanager] node state does not have node ID %d", nodeID)
|
||||
klog.ErrorS(nil, "Node state does not have node ID", "nodeID", nodeID)
|
||||
return false
|
||||
}
|
||||
|
||||
if nodeState1.NumberOfAssignments != nodeState2.NumberOfAssignments {
|
||||
klog.Errorf("[memorymanager] node states number of assignments are different: %d != %d", nodeState1.NumberOfAssignments, nodeState2.NumberOfAssignments)
|
||||
klog.ErrorS(nil, "Node states number of assignments are different", "assignment1", nodeState1.NumberOfAssignments, "assignment2", nodeState2.NumberOfAssignments)
|
||||
return false
|
||||
}
|
||||
|
||||
if !areGroupsEqual(nodeState1.Cells, nodeState2.Cells) {
|
||||
klog.Errorf("[memorymanager] node states groups are different: %v != %v", nodeState1.Cells, nodeState2.Cells)
|
||||
klog.ErrorS(nil, "Node states groups are different", "stateNode1", nodeState1.Cells, "stateNode2", nodeState2.Cells)
|
||||
return false
|
||||
}
|
||||
|
||||
if len(nodeState1.MemoryMap) != len(nodeState2.MemoryMap) {
|
||||
klog.Errorf("[memorymanager] node states memory map have different length: %d != %d", len(nodeState1.MemoryMap), len(nodeState2.MemoryMap))
|
||||
klog.ErrorS(nil, "Node states memory map have different lengths", "lengthNode1", len(nodeState1.MemoryMap), "lengthNode2", len(nodeState2.MemoryMap))
|
||||
return false
|
||||
}
|
||||
|
||||
for resourceName, memoryState1 := range nodeState1.MemoryMap {
|
||||
memoryState2, ok := nodeState2.MemoryMap[resourceName]
|
||||
if !ok {
|
||||
klog.Errorf("[memorymanager] memory state does not have resource %s", resourceName)
|
||||
klog.ErrorS(nil, "Memory state does not have resource", "resource", resourceName)
|
||||
return false
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(*memoryState1, *memoryState2) {
|
||||
klog.Errorf("[memorymanager] memory states for the NUMA node %d and the resource %s are different: %+v != %+v", nodeID, resourceName, *memoryState1, *memoryState2)
|
||||
klog.ErrorS(nil, "Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ func (sc *stateCheckpoint) restoreState() error {
|
||||
sc.cache.SetMachineState(checkpoint.MachineState)
|
||||
sc.cache.SetMemoryAssignments(checkpoint.Entries)
|
||||
|
||||
klog.V(2).Info("[memorymanager] state checkpoint: restored state from checkpoint")
|
||||
klog.V(2).InfoS("State checkpoint: restored state from checkpoint")
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -93,7 +93,7 @@ func (sc *stateCheckpoint) storeState() error {
|
||||
|
||||
err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
|
||||
if err != nil {
|
||||
klog.Errorf("[memorymanager] could not save checkpoint: %v", err)
|
||||
klog.ErrorS(err, "Could not save checkpoint")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@ -131,7 +131,7 @@ func (sc *stateCheckpoint) SetMachineState(memoryMap NUMANodeMap) {
|
||||
sc.cache.SetMachineState(memoryMap)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.Warningf("store state to checkpoint error: %v", err)
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -143,7 +143,7 @@ func (sc *stateCheckpoint) SetMemoryBlocks(podUID string, containerName string,
|
||||
sc.cache.SetMemoryBlocks(podUID, containerName, blocks)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.Warningf("store state to checkpoint error: %v", err)
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -155,7 +155,7 @@ func (sc *stateCheckpoint) SetMemoryAssignments(assignments ContainerMemoryAssig
|
||||
sc.cache.SetMemoryAssignments(assignments)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.Warningf("store state to checkpoint error: %v", err)
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -167,7 +167,7 @@ func (sc *stateCheckpoint) Delete(podUID string, containerName string) {
|
||||
sc.cache.Delete(podUID, containerName)
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.Warningf("store state to checkpoint error: %v", err)
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,6 +179,6 @@ func (sc *stateCheckpoint) ClearState() {
|
||||
sc.cache.ClearState()
|
||||
err := sc.storeState()
|
||||
if err != nil {
|
||||
klog.Warningf("store state to checkpoint error: %v", err)
|
||||
klog.InfoS("Store state to checkpoint error", "err", err)
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ var _ State = &stateMemory{}
|
||||
|
||||
// NewMemoryState creates new State for keeping track of cpu/pod assignment
|
||||
func NewMemoryState() State {
|
||||
klog.Infof("[memorymanager] initializing new in-memory state store")
|
||||
klog.InfoS("Initializing new in-memory state store")
|
||||
return &stateMemory{
|
||||
assignments: ContainerMemoryAssignments{},
|
||||
machineState: NUMANodeMap{},
|
||||
@ -72,7 +72,7 @@ func (s *stateMemory) SetMachineState(nodeMap NUMANodeMap) {
|
||||
defer s.Unlock()
|
||||
|
||||
s.machineState = nodeMap.Clone()
|
||||
klog.Info("[memorymanager] updated machine memory state")
|
||||
klog.InfoS("Updated machine memory state")
|
||||
}
|
||||
|
||||
// SetMemoryBlocks stores memory assignments of container
|
||||
@ -85,7 +85,7 @@ func (s *stateMemory) SetMemoryBlocks(podUID string, containerName string, block
|
||||
}
|
||||
|
||||
s.assignments[podUID][containerName] = append([]Block{}, blocks...)
|
||||
klog.Infof("[memorymanager] updated memory state (pod: %s, container: %s)", podUID, containerName)
|
||||
klog.InfoS("Updated memory state", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
|
||||
// SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter
|
||||
@ -109,7 +109,7 @@ func (s *stateMemory) Delete(podUID string, containerName string) {
|
||||
if len(s.assignments[podUID]) == 0 {
|
||||
delete(s.assignments, podUID)
|
||||
}
|
||||
klog.V(2).Infof("[memorymanager] deleted memory assignment (pod: %s, container: %s)", podUID, containerName)
|
||||
klog.V(2).InfoS("Deleted memory assignment", "podUID", podUID, "containerName", containerName)
|
||||
}
|
||||
|
||||
// ClearState clears machineState and ContainerMemoryAssignments
|
||||
@ -119,5 +119,5 @@ func (s *stateMemory) ClearState() {
|
||||
|
||||
s.machineState = NUMANodeMap{}
|
||||
s.assignments = make(ContainerMemoryAssignments)
|
||||
klog.V(2).Infof("[memorymanager] cleared state")
|
||||
klog.V(2).InfoS("Cleared state")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user