From 0411a3d5651ed17f9a7dda4bb5dc0f97d154dc81 Mon Sep 17 00:00:00 2001 From: Mark Rossetti Date: Wed, 10 Jan 2024 19:28:56 -0800 Subject: [PATCH] Add support for memory pressure evictiong on Windows Signed-off-by: Mark Rossetti --- pkg/kubelet/eviction/defaults_others.go | 4 +- pkg/kubelet/eviction/defaults_windows.go | 30 +++++ pkg/kubelet/eviction/eviction_manager.go | 4 +- pkg/kubelet/eviction/helpers.go | 10 +- pkg/kubelet/eviction/helpers_others.go | 37 ++++++ pkg/kubelet/eviction/helpers_test.go | 11 +- pkg/kubelet/eviction/helpers_windows.go | 48 +++++++ .../eviction/memory_threshold_notifier.go | 106 +-------------- .../memory_threshold_notifier_others.go | 123 ++++++++++++++++++ .../memory_threshold_notifier_test.go | 9 +- .../memory_threshold_notifier_windows.go | 101 ++++++++++++++ .../stats/summary_sys_containers_windows.go | 28 ++++ .../server/stats/summary_windows_test.go | 7 +- ...ts.go => perfcounter_nodestats_windows.go} | 33 +++++ .../kubelet/pkg/apis/stats/v1alpha1/types.go | 3 + 15 files changed, 437 insertions(+), 117 deletions(-) create mode 100644 pkg/kubelet/eviction/defaults_windows.go create mode 100644 pkg/kubelet/eviction/helpers_others.go create mode 100644 pkg/kubelet/eviction/helpers_windows.go create mode 100644 pkg/kubelet/eviction/memory_threshold_notifier_others.go create mode 100644 pkg/kubelet/eviction/memory_threshold_notifier_windows.go rename pkg/kubelet/winstats/{perfcounter_nodestats.go => perfcounter_nodestats_windows.go} (89%) diff --git a/pkg/kubelet/eviction/defaults_others.go b/pkg/kubelet/eviction/defaults_others.go index b226a210f45..912acf178db 100644 --- a/pkg/kubelet/eviction/defaults_others.go +++ b/pkg/kubelet/eviction/defaults_others.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !windows +// +build !linux,!windows /* Copyright 2018 The Kubernetes Authors. diff --git a/pkg/kubelet/eviction/defaults_windows.go b/pkg/kubelet/eviction/defaults_windows.go new file mode 100644 index 00000000000..de6517f6540 --- /dev/null +++ b/pkg/kubelet/eviction/defaults_windows.go @@ -0,0 +1,30 @@ +//go:build windows +// +build windows + +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +// DefaultEvictionHard includes default options for hard eviction for Windows. +// Note: On Linux, the default eviction hard threshold is 100Mi for memory.available +// but Windows generally requires more memory reserved for system processes so we'll +// set the default threshold to 500Mi. +var DefaultEvictionHard = map[string]string{ + "memory.available": "500Mi", + "nodefs.available": "10%", + "imagefs.available": "15%", +} diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index ed946a0e056..00df1a773c6 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -19,6 +19,7 @@ package eviction import ( "context" "fmt" + "runtime" "sort" "sync" "time" @@ -185,7 +186,8 @@ func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePod klog.InfoS(message) m.synchronize(diskInfoProvider, podFunc) } - if m.config.KernelMemcgNotification { + klog.InfoS("Eviction manager: starting control loop") + if m.config.KernelMemcgNotification || runtime.GOOS == "windows" { for _, threshold := range m.config.Thresholds { if threshold.Signal == evictionapi.SignalMemoryAvailable || threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable { notifier, err := NewMemoryThresholdNotifier(threshold, m.config.PodCgroupRoot, &CgroupNotifierFactory{}, thresholdHandler) diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 19d09ec9da5..8050613e672 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -846,13 +846,11 @@ func makeSignalObservations(summary *statsapi.Summary) (signalObservations, stat // build an evaluation context for current eviction signals result := signalObservations{} - if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil { - result[evictionapi.SignalMemoryAvailable] = signalObservation{ - available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI), - capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI), - time: memory.Time, - } + memoryAvailableSignal := makeMemoryAvailableSignalObservation(summary) + if memoryAvailableSignal != nil { + result[evictionapi.SignalMemoryAvailable] = *memoryAvailableSignal } + if allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods); err != nil { klog.ErrorS(err, "Eviction manager: failed to construct signal", "signal", evictionapi.SignalAllocatableMemoryAvailable) } else { diff --git a/pkg/kubelet/eviction/helpers_others.go b/pkg/kubelet/eviction/helpers_others.go new file mode 100644 index 00000000000..6b424bac474 --- /dev/null +++ b/pkg/kubelet/eviction/helpers_others.go @@ -0,0 +1,37 @@ +//go:build !windows +// +build !windows + +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + "k8s.io/apimachinery/pkg/api/resource" + statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" +) + +func makeMemoryAvailableSignalObservation(summary *statsapi.Summary) *signalObservation { + if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil { + return &signalObservation{ + available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI), + capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI), + time: memory.Time, + } + } + + return nil +} diff --git a/pkg/kubelet/eviction/helpers_test.go b/pkg/kubelet/eviction/helpers_test.go index 5df209b2801..93c039c4ee3 100644 --- a/pkg/kubelet/eviction/helpers_test.go +++ b/pkg/kubelet/eviction/helpers_test.go @@ -1279,6 +1279,7 @@ func TestMakeSignalObservations(t *testing.T) { Inodes: &nodeFsInodes, }, SystemContainers: []statsapi.ContainerStats{ + // Used for memory signal observations on linux { Name: statsapi.SystemContainerPods, Memory: &statsapi.MemoryStats{ @@ -1286,6 +1287,14 @@ func TestMakeSignalObservations(t *testing.T) { WorkingSetBytes: &nodeWorkingSetBytes, }, }, + // Used for memory signal observations on windows + { + Name: statsapi.SystemContainerWindowsGlobalCommitMemory, + Memory: &statsapi.MemoryStats{ + AvailableBytes: &nodeAvailableBytes, + UsageBytes: &nodeWorkingSetBytes, + }, + }, }, }, Pods: []statsapi.PodStats{}, @@ -1307,7 +1316,7 @@ func TestMakeSignalObservations(t *testing.T) { actualObservations, statsFunc := makeSignalObservations(fakeStats) allocatableMemQuantity, found := actualObservations[evictionapi.SignalAllocatableMemoryAvailable] if !found { - t.Errorf("Expected allocatable memory observation, but didnt find one") + t.Errorf("Expected allocatable memory observation, but didn't find one") } if expectedBytes := int64(nodeAvailableBytes); allocatableMemQuantity.available.Value() != expectedBytes { t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.available.Value()) diff --git a/pkg/kubelet/eviction/helpers_windows.go b/pkg/kubelet/eviction/helpers_windows.go new file mode 100644 index 00000000000..ac3ea7f5302 --- /dev/null +++ b/pkg/kubelet/eviction/helpers_windows.go @@ -0,0 +1,48 @@ +//go:build windows +// +build windows + +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" + statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" +) + +func makeMemoryAvailableSignalObservation(summary *statsapi.Summary) *signalObservation { + klog.V(4).InfoS("Eviction manager: building memory signal observations for windows") + sysContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerWindowsGlobalCommitMemory) + if err != nil { + klog.ErrorS(err, "Eviction manager: failed to construct signal", "signal", evictionapi.SignalMemoryAvailable) + } + if memory := sysContainer.Memory; memory != nil && memory.AvailableBytes != nil && memory.UsageBytes != nil { + klog.V(4).InfoS( + "Eviction manager: memory signal observations for windows", + "Available", *memory.AvailableBytes, + "Usage", *memory.UsageBytes) + return &signalObservation{ + available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI), + capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.UsageBytes), resource.BinarySI), + time: memory.Time, + } + } else { + return nil + } +} diff --git a/pkg/kubelet/eviction/memory_threshold_notifier.go b/pkg/kubelet/eviction/memory_threshold_notifier.go index 3ce5fb811e1..6bc1bc6ab1d 100644 --- a/pkg/kubelet/eviction/memory_threshold_notifier.go +++ b/pkg/kubelet/eviction/memory_threshold_notifier.go @@ -16,119 +16,19 @@ limitations under the License. package eviction -import ( - "fmt" - "time" - - "k8s.io/klog/v2" - - "k8s.io/apimachinery/pkg/api/resource" - statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" - "k8s.io/kubernetes/pkg/kubelet/cm" - evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" -) +import "time" const ( - memoryUsageAttribute = "memory.usage_in_bytes" // this prevents constantly updating the memcg notifier if synchronize // is run frequently. notifierRefreshInterval = 10 * time.Second ) -type memoryThresholdNotifier struct { - threshold evictionapi.Threshold - cgroupPath string - events chan struct{} - factory NotifierFactory - handler func(string) - notifier CgroupNotifier -} - -var _ ThresholdNotifier = &memoryThresholdNotifier{} - -// NewMemoryThresholdNotifier creates a ThresholdNotifier which is designed to respond to the given threshold. -// UpdateThreshold must be called once before the threshold will be active. -func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) { - cgroups, err := cm.GetCgroupSubsystems() - if err != nil { - return nil, err - } - cgpath, found := cgroups.MountPoints["memory"] - if !found || len(cgpath) == 0 { - return nil, fmt.Errorf("memory cgroup mount point not found") - } - if isAllocatableEvictionThreshold(threshold) { - // for allocatable thresholds, point the cgroup notifier at the allocatable cgroup - cgpath += cgroupRoot - } - return &memoryThresholdNotifier{ - threshold: threshold, - cgroupPath: cgpath, - events: make(chan struct{}), - handler: handler, - factory: factory, - }, nil -} - -func (m *memoryThresholdNotifier) Start() { - klog.InfoS("Eviction manager: created memoryThresholdNotifier", "notifier", m.Description()) - for range m.events { - m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description())) - } -} - -func (m *memoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error { - memoryStats := summary.Node.Memory - if isAllocatableEvictionThreshold(m.threshold) { - allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods) - if err != nil { - return err - } - memoryStats = allocatableContainer.Memory - } - if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil { - return fmt.Errorf("summary was incomplete. Expected MemoryStats and all subfields to be non-nil, but got %+v", memoryStats) - } - // Set threshold on usage to capacity - eviction_hard + inactive_file, - // since we want to be notified when working_set = capacity - eviction_hard - inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI) - capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI) - evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity) - memcgThreshold := capacity.DeepCopy() - memcgThreshold.Sub(*evictionThresholdQuantity) - memcgThreshold.Add(*inactiveFile) - - klog.V(3).InfoS("Eviction manager: setting notifier to capacity", "notifier", m.Description(), "capacity", memcgThreshold.String()) - if m.notifier != nil { - m.notifier.Stop() - } - newNotifier, err := m.factory.NewCgroupNotifier(m.cgroupPath, memoryUsageAttribute, memcgThreshold.Value()) - if err != nil { - return err - } - m.notifier = newNotifier - go m.notifier.Start(m.events) - return nil -} - -func (m *memoryThresholdNotifier) Description() string { - var hard, allocatable string - if isHardEvictionThreshold(m.threshold) { - hard = "hard " - } else { - hard = "soft " - } - if isAllocatableEvictionThreshold(m.threshold) { - allocatable = "allocatable " - } - return fmt.Sprintf("%s%smemory eviction threshold", hard, allocatable) -} - -var _ NotifierFactory = &CgroupNotifierFactory{} - // CgroupNotifierFactory knows how to make CgroupNotifiers which integrate with the kernel type CgroupNotifierFactory struct{} +var _ NotifierFactory = &CgroupNotifierFactory{} + // NewCgroupNotifier implements the NotifierFactory interface func (n *CgroupNotifierFactory) NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) { return NewCgroupNotifier(path, attribute, threshold) diff --git a/pkg/kubelet/eviction/memory_threshold_notifier_others.go b/pkg/kubelet/eviction/memory_threshold_notifier_others.go new file mode 100644 index 00000000000..1b7bc21e05d --- /dev/null +++ b/pkg/kubelet/eviction/memory_threshold_notifier_others.go @@ -0,0 +1,123 @@ +//go:build !windows +// +build !windows + +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + "fmt" + + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" + statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + "k8s.io/kubernetes/pkg/kubelet/cm" + evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" +) + +const ( + memoryUsageAttribute = "memory.usage_in_bytes" +) + +type linuxMemoryThresholdNotifier struct { + threshold evictionapi.Threshold + cgroupPath string + events chan struct{} + factory NotifierFactory + handler func(string) + notifier CgroupNotifier +} + +var _ ThresholdNotifier = &linuxMemoryThresholdNotifier{} + +// NewMemoryThresholdNotifier creates a ThresholdNotifier which is designed to respond to the given threshold. +// UpdateThreshold must be called once before the threshold will be active. +func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) { + cgroups, err := cm.GetCgroupSubsystems() + if err != nil { + return nil, err + } + cgpath, found := cgroups.MountPoints["memory"] + if !found || len(cgpath) == 0 { + return nil, fmt.Errorf("memory cgroup mount point not found") + } + if isAllocatableEvictionThreshold(threshold) { + // for allocatable thresholds, point the cgroup notifier at the allocatable cgroup + cgpath += cgroupRoot + } + return &linuxMemoryThresholdNotifier{ + threshold: threshold, + cgroupPath: cgpath, + events: make(chan struct{}), + handler: handler, + factory: factory, + }, nil +} + +func (m *linuxMemoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error { + memoryStats := summary.Node.Memory + if isAllocatableEvictionThreshold(m.threshold) { + allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods) + if err != nil { + return err + } + memoryStats = allocatableContainer.Memory + } + if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil { + return fmt.Errorf("summary was incomplete. Expected MemoryStats and all subfields to be non-nil, but got %+v", memoryStats) + } + // Set threshold on usage to capacity - eviction_hard + inactive_file, + // since we want to be notified when working_set = capacity - eviction_hard + inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI) + capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI) + evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity) + memcgThreshold := capacity.DeepCopy() + memcgThreshold.Sub(*evictionThresholdQuantity) + memcgThreshold.Add(*inactiveFile) + + klog.V(3).InfoS("Eviction manager: setting notifier to capacity", "notifier", m.Description(), "capacity", memcgThreshold.String()) + if m.notifier != nil { + m.notifier.Stop() + } + newNotifier, err := m.factory.NewCgroupNotifier(m.cgroupPath, memoryUsageAttribute, memcgThreshold.Value()) + if err != nil { + return err + } + m.notifier = newNotifier + go m.notifier.Start(m.events) + return nil +} + +func (m *linuxMemoryThresholdNotifier) Start() { + klog.InfoS("Eviction manager: created memoryThresholdNotifier", "notifier", m.Description()) + for range m.events { + m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description())) + } +} + +func (m *linuxMemoryThresholdNotifier) Description() string { + var hard, allocatable string + if isHardEvictionThreshold(m.threshold) { + hard = "hard " + } else { + hard = "soft " + } + if isAllocatableEvictionThreshold(m.threshold) { + allocatable = "allocatable " + } + return fmt.Sprintf("%s%smemory eviction threshold", hard, allocatable) +} diff --git a/pkg/kubelet/eviction/memory_threshold_notifier_test.go b/pkg/kubelet/eviction/memory_threshold_notifier_test.go index 8e64988bf0c..4969db78385 100644 --- a/pkg/kubelet/eviction/memory_threshold_notifier_test.go +++ b/pkg/kubelet/eviction/memory_threshold_notifier_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + /* Copyright 2018 The Kubernetes Authors. @@ -58,8 +61,8 @@ func nodeSummary(available, workingSet, usage resource.Quantity, allocatable boo } } -func newTestMemoryThresholdNotifier(threshold evictionapi.Threshold, factory NotifierFactory, handler func(string)) *memoryThresholdNotifier { - return &memoryThresholdNotifier{ +func newTestMemoryThresholdNotifier(threshold evictionapi.Threshold, factory NotifierFactory, handler func(string)) *linuxMemoryThresholdNotifier { + return &linuxMemoryThresholdNotifier{ threshold: threshold, cgroupPath: testCgroupPath, events: make(chan struct{}), @@ -245,7 +248,7 @@ func TestThresholdDescription(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - m := &memoryThresholdNotifier{ + m := &linuxMemoryThresholdNotifier{ notifier: &MockCgroupNotifier{}, threshold: tc.evictionThreshold, cgroupPath: testCgroupPath, diff --git a/pkg/kubelet/eviction/memory_threshold_notifier_windows.go b/pkg/kubelet/eviction/memory_threshold_notifier_windows.go new file mode 100644 index 00000000000..8c014fa972a --- /dev/null +++ b/pkg/kubelet/eviction/memory_threshold_notifier_windows.go @@ -0,0 +1,101 @@ +//go:build windows +// +build windows + +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" + statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" + "k8s.io/kubernetes/pkg/kubelet/winstats" +) + +type windowsMemoryThresholdNotifier struct { + threshold evictionapi.Threshold + events chan struct{} + handler func(string) +} + +var _ ThresholdNotifier = &windowsMemoryThresholdNotifier{} + +func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) { + klog.InfoS("Eviction manager: creating new WindowsMemoryThresholdNotifier") + return &windowsMemoryThresholdNotifier{ + threshold: threshold, + events: make(chan struct{}), + handler: handler, + }, nil +} + +func (m *windowsMemoryThresholdNotifier) Start() { + klog.InfoS("Eviction manager: starting windowsMemoryThresholdNotifier", "notifier", m.Description()) + go func() { + for true { + time.Sleep(notifierRefreshInterval) + m.checkMemoryUsage() + } + }() + + for range m.events { + m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description())) + } +} + +func (m *windowsMemoryThresholdNotifier) checkMemoryUsage() { + // Get global commit limit + perfInfo, err := winstats.GetPerformanceInfo() + if err != nil { + klog.ErrorS(err, "Eviction manager: error getting global memory status for node") + } + + commmiLimitBytes := perfInfo.CommitLimitPages * perfInfo.PageSize + capacity := resource.NewQuantity(int64(commmiLimitBytes), resource.BinarySI) + evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity) + + commitTotalBytes := perfInfo.CommitTotalPages * perfInfo.PageSize + commitAvailableBytes := commmiLimitBytes - commitTotalBytes + + if commitAvailableBytes <= uint64(evictionThresholdQuantity.Value()) { + m.events <- struct{}{} + } +} + +func (m *windowsMemoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error { + // Windows doesn't use cgroup notifiers to trigger eviction, so this function is a no-op. + // Instead the go-routine set up in Start() will poll the system for memory usage and + // trigger eviction when the threshold is crossed. + return nil +} + +func (m *windowsMemoryThresholdNotifier) Description() string { + var hard, allocatable string + if isHardEvictionThreshold(m.threshold) { + hard = "hard" + } else { + hard = "soft" + } + if isAllocatableEvictionThreshold(m.threshold) { + allocatable = "allocatable" + } + return fmt.Sprintf("%s %s memory eviction threshold", hard, allocatable) +} diff --git a/pkg/kubelet/server/stats/summary_sys_containers_windows.go b/pkg/kubelet/server/stats/summary_sys_containers_windows.go index 520e0018b59..b651c943a1c 100644 --- a/pkg/kubelet/server/stats/summary_sys_containers_windows.go +++ b/pkg/kubelet/server/stats/summary_sys_containers_windows.go @@ -23,17 +23,21 @@ import ( "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/kubelet/cm" + "k8s.io/kubernetes/pkg/kubelet/winstats" ) func (sp *summaryProviderImpl) GetSystemContainersStats(nodeConfig cm.NodeConfig, podStats []statsapi.PodStats, updateStats bool) (stats []statsapi.ContainerStats) { stats = append(stats, sp.getSystemPodsCPUAndMemoryStats(nodeConfig, podStats, updateStats)) + stats = append(stats, sp.getSystemWindowsGlobalmemoryStats()) return stats } func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig cm.NodeConfig, podStats []statsapi.PodStats, updateStats bool) (stats []statsapi.ContainerStats) { stats = append(stats, sp.getSystemPodsCPUAndMemoryStats(nodeConfig, podStats, updateStats)) + stats = append(stats, sp.getSystemWindowsGlobalmemoryStats()) return stats } @@ -96,3 +100,27 @@ func (sp *summaryProviderImpl) getSystemPodsCPUAndMemoryStats(nodeConfig cm.Node return podsSummary } + +func (sp *summaryProviderImpl) getSystemWindowsGlobalmemoryStats() statsapi.ContainerStats { + now := metav1.NewTime(time.Now()) + globalMemorySummary := statsapi.ContainerStats{ + StartTime: now, + Memory: &statsapi.MemoryStats{}, + Name: statsapi.SystemContainerWindowsGlobalCommitMemory, + } + + perfInfo, err := winstats.GetPerformanceInfo() + if err != nil { + klog.Errorf("Failed to get Windows performance info: %v", err) + return globalMemorySummary + } + + commitLimitBytes := perfInfo.CommitLimitPages * perfInfo.PageSize + commitTotalBytes := perfInfo.CommitTotalPages * perfInfo.PageSize + commitAvailableBytes := commitLimitBytes - commitTotalBytes + globalMemorySummary.Memory.Time = now + globalMemorySummary.Memory.AvailableBytes = &commitAvailableBytes + globalMemorySummary.Memory.UsageBytes = &commitTotalBytes + + return globalMemorySummary +} diff --git a/pkg/kubelet/server/stats/summary_windows_test.go b/pkg/kubelet/server/stats/summary_windows_test.go index 27e74f285ec..21f46d2654f 100644 --- a/pkg/kubelet/server/stats/summary_windows_test.go +++ b/pkg/kubelet/server/stats/summary_windows_test.go @@ -79,13 +79,18 @@ func TestSummaryProvider(t *testing.T) { assert.Equal(summary.Node.Fs, rootFsStats) assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ContainerFs: imageFsStats, ImageFs: imageFsStats}) - assert.Equal(len(summary.Node.SystemContainers), 1) + assert.NoError(err) + assert.Equal(len(summary.Node.SystemContainers), 2) assert.Equal(summary.Node.SystemContainers[0].Name, "pods") assert.Equal(summary.Node.SystemContainers[0].CPU.UsageCoreNanoSeconds, podStats[0].CPU.UsageCoreNanoSeconds) assert.Equal(summary.Node.SystemContainers[0].CPU.UsageNanoCores, podStats[0].CPU.UsageNanoCores) assert.Equal(summary.Node.SystemContainers[0].Memory.WorkingSetBytes, podStats[0].Memory.WorkingSetBytes) assert.Equal(summary.Node.SystemContainers[0].Memory.UsageBytes, podStats[0].Memory.UsageBytes) assert.Equal(summary.Node.SystemContainers[0].Memory.AvailableBytes, podStats[0].Memory.AvailableBytes) + assert.Equal(summary.Node.SystemContainers[1].Name, statsapi.SystemContainerWindowsGlobalCommitMemory) + assert.NotEqual(nil, summary.Node.SystemContainers[1].Memory) + assert.NotEqual(nil, summary.Node.SystemContainers[1].Memory.AvailableBytes) + assert.NotEqual(nil, summary.Node.SystemContainers[1].Memory.UsageBytes) assert.Equal(summary.Pods, podStats) } diff --git a/pkg/kubelet/winstats/perfcounter_nodestats.go b/pkg/kubelet/winstats/perfcounter_nodestats_windows.go similarity index 89% rename from pkg/kubelet/winstats/perfcounter_nodestats.go rename to pkg/kubelet/winstats/perfcounter_nodestats_windows.go index c8412f1af2f..c673c973fc0 100644 --- a/pkg/kubelet/winstats/perfcounter_nodestats.go +++ b/pkg/kubelet/winstats/perfcounter_nodestats_windows.go @@ -56,10 +56,33 @@ type MemoryStatusEx struct { AvailExtendedVirtual uint64 } +// PerformanceInfo is the same as Windows structure PERFORMANCE_INFORMATION +// https://learn.microsoft.com/en-us/windows/win32/api/psapi/ns-psapi-performance_information +type PerformanceInformation struct { + cb uint32 + CommitTotalPages uint64 + CommitLimitPages uint64 + CommitPeakPages uint64 + PhysicalTotalPages uint64 + PhysicalAvailablePages uint64 + SystemCachePages uint64 + KernelTotalPages uint64 + KernelPagesPages uint64 + KernelNonpagedPages uint64 + PageSize uint64 + HandleCount uint32 + ProcessCount uint32 + ThreadCount uint32 +} + var ( + // kernel32.dll system calls modkernel32 = windows.NewLazySystemDLL("kernel32.dll") procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx") procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount") + // psapi.dll system calls + modpsapi = windows.NewLazySystemDLL("psapi.dll") + procGetPerformanceInfo = modpsapi.NewProc("GetPerformanceInfo") ) const allProcessorGroups = 0xFFFF @@ -294,6 +317,16 @@ func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) { return statex.TotalPhys, nil } +func GetPerformanceInfo() (*PerformanceInformation, error) { + var pi PerformanceInformation + pi.cb = uint32(unsafe.Sizeof(pi)) + ret, _, _ := procGetPerformanceInfo.Call(uintptr(unsafe.Pointer(&pi)), uintptr(pi.cb)) + if ret == 0 { + return nil, errors.New("unable to read Windows performance information") + } + return &pi, nil +} + func getBootID() (string, error) { regKey, err := registry.OpenKey(registry.LOCAL_MACHINE, bootIdRegistry, registry.READ) if err != nil { diff --git a/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go b/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go index 54355503f6a..b63a17ace5e 100644 --- a/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go +++ b/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go @@ -99,6 +99,9 @@ const ( SystemContainerMisc = "misc" // SystemContainerPods is the container name for the system container tracking user pods. SystemContainerPods = "pods" + // SystemContainerWindowsGlobalCommitMemory (only used on Windows) is the container name for the system container + // tracking global commit memory usage and is used for memory-pressure eviction. + SystemContainerWindowsGlobalCommitMemory = "windows-global-commit-memory" ) // ProcessStats are stats pertaining to processes.