mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 05:27:21 +00:00
Add support for memory pressure evictiong on Windows
Signed-off-by: Mark Rossetti <marosset@microsoft.com>
This commit is contained in:
parent
5d40866fae
commit
0411a3d565
@ -1,5 +1,5 @@
|
|||||||
//go:build !linux
|
//go:build !linux && !windows
|
||||||
// +build !linux
|
// +build !linux,!windows
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Copyright 2018 The Kubernetes Authors.
|
Copyright 2018 The Kubernetes Authors.
|
||||||
|
30
pkg/kubelet/eviction/defaults_windows.go
Normal file
30
pkg/kubelet/eviction/defaults_windows.go
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
//go:build windows
|
||||||
|
// +build windows
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2024 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eviction
|
||||||
|
|
||||||
|
// DefaultEvictionHard includes default options for hard eviction for Windows.
|
||||||
|
// Note: On Linux, the default eviction hard threshold is 100Mi for memory.available
|
||||||
|
// but Windows generally requires more memory reserved for system processes so we'll
|
||||||
|
// set the default threshold to 500Mi.
|
||||||
|
var DefaultEvictionHard = map[string]string{
|
||||||
|
"memory.available": "500Mi",
|
||||||
|
"nodefs.available": "10%",
|
||||||
|
"imagefs.available": "15%",
|
||||||
|
}
|
@ -19,6 +19,7 @@ package eviction
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"runtime"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -185,7 +186,8 @@ func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePod
|
|||||||
klog.InfoS(message)
|
klog.InfoS(message)
|
||||||
m.synchronize(diskInfoProvider, podFunc)
|
m.synchronize(diskInfoProvider, podFunc)
|
||||||
}
|
}
|
||||||
if m.config.KernelMemcgNotification {
|
klog.InfoS("Eviction manager: starting control loop")
|
||||||
|
if m.config.KernelMemcgNotification || runtime.GOOS == "windows" {
|
||||||
for _, threshold := range m.config.Thresholds {
|
for _, threshold := range m.config.Thresholds {
|
||||||
if threshold.Signal == evictionapi.SignalMemoryAvailable || threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable {
|
if threshold.Signal == evictionapi.SignalMemoryAvailable || threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable {
|
||||||
notifier, err := NewMemoryThresholdNotifier(threshold, m.config.PodCgroupRoot, &CgroupNotifierFactory{}, thresholdHandler)
|
notifier, err := NewMemoryThresholdNotifier(threshold, m.config.PodCgroupRoot, &CgroupNotifierFactory{}, thresholdHandler)
|
||||||
|
@ -846,13 +846,11 @@ func makeSignalObservations(summary *statsapi.Summary) (signalObservations, stat
|
|||||||
// build an evaluation context for current eviction signals
|
// build an evaluation context for current eviction signals
|
||||||
result := signalObservations{}
|
result := signalObservations{}
|
||||||
|
|
||||||
if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
|
memoryAvailableSignal := makeMemoryAvailableSignalObservation(summary)
|
||||||
result[evictionapi.SignalMemoryAvailable] = signalObservation{
|
if memoryAvailableSignal != nil {
|
||||||
available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
|
result[evictionapi.SignalMemoryAvailable] = *memoryAvailableSignal
|
||||||
capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
|
|
||||||
time: memory.Time,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods); err != nil {
|
if allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods); err != nil {
|
||||||
klog.ErrorS(err, "Eviction manager: failed to construct signal", "signal", evictionapi.SignalAllocatableMemoryAvailable)
|
klog.ErrorS(err, "Eviction manager: failed to construct signal", "signal", evictionapi.SignalAllocatableMemoryAvailable)
|
||||||
} else {
|
} else {
|
||||||
|
37
pkg/kubelet/eviction/helpers_others.go
Normal file
37
pkg/kubelet/eviction/helpers_others.go
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
//go:build !windows
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2024 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eviction
|
||||||
|
|
||||||
|
import (
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func makeMemoryAvailableSignalObservation(summary *statsapi.Summary) *signalObservation {
|
||||||
|
if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
|
||||||
|
return &signalObservation{
|
||||||
|
available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
|
||||||
|
capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
|
||||||
|
time: memory.Time,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
@ -1279,6 +1279,7 @@ func TestMakeSignalObservations(t *testing.T) {
|
|||||||
Inodes: &nodeFsInodes,
|
Inodes: &nodeFsInodes,
|
||||||
},
|
},
|
||||||
SystemContainers: []statsapi.ContainerStats{
|
SystemContainers: []statsapi.ContainerStats{
|
||||||
|
// Used for memory signal observations on linux
|
||||||
{
|
{
|
||||||
Name: statsapi.SystemContainerPods,
|
Name: statsapi.SystemContainerPods,
|
||||||
Memory: &statsapi.MemoryStats{
|
Memory: &statsapi.MemoryStats{
|
||||||
@ -1286,6 +1287,14 @@ func TestMakeSignalObservations(t *testing.T) {
|
|||||||
WorkingSetBytes: &nodeWorkingSetBytes,
|
WorkingSetBytes: &nodeWorkingSetBytes,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
// Used for memory signal observations on windows
|
||||||
|
{
|
||||||
|
Name: statsapi.SystemContainerWindowsGlobalCommitMemory,
|
||||||
|
Memory: &statsapi.MemoryStats{
|
||||||
|
AvailableBytes: &nodeAvailableBytes,
|
||||||
|
UsageBytes: &nodeWorkingSetBytes,
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Pods: []statsapi.PodStats{},
|
Pods: []statsapi.PodStats{},
|
||||||
@ -1307,7 +1316,7 @@ func TestMakeSignalObservations(t *testing.T) {
|
|||||||
actualObservations, statsFunc := makeSignalObservations(fakeStats)
|
actualObservations, statsFunc := makeSignalObservations(fakeStats)
|
||||||
allocatableMemQuantity, found := actualObservations[evictionapi.SignalAllocatableMemoryAvailable]
|
allocatableMemQuantity, found := actualObservations[evictionapi.SignalAllocatableMemoryAvailable]
|
||||||
if !found {
|
if !found {
|
||||||
t.Errorf("Expected allocatable memory observation, but didnt find one")
|
t.Errorf("Expected allocatable memory observation, but didn't find one")
|
||||||
}
|
}
|
||||||
if expectedBytes := int64(nodeAvailableBytes); allocatableMemQuantity.available.Value() != expectedBytes {
|
if expectedBytes := int64(nodeAvailableBytes); allocatableMemQuantity.available.Value() != expectedBytes {
|
||||||
t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.available.Value())
|
t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.available.Value())
|
||||||
|
48
pkg/kubelet/eviction/helpers_windows.go
Normal file
48
pkg/kubelet/eviction/helpers_windows.go
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
//go:build windows
|
||||||
|
// +build windows
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2024 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eviction
|
||||||
|
|
||||||
|
import (
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||||
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
func makeMemoryAvailableSignalObservation(summary *statsapi.Summary) *signalObservation {
|
||||||
|
klog.V(4).InfoS("Eviction manager: building memory signal observations for windows")
|
||||||
|
sysContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerWindowsGlobalCommitMemory)
|
||||||
|
if err != nil {
|
||||||
|
klog.ErrorS(err, "Eviction manager: failed to construct signal", "signal", evictionapi.SignalMemoryAvailable)
|
||||||
|
}
|
||||||
|
if memory := sysContainer.Memory; memory != nil && memory.AvailableBytes != nil && memory.UsageBytes != nil {
|
||||||
|
klog.V(4).InfoS(
|
||||||
|
"Eviction manager: memory signal observations for windows",
|
||||||
|
"Available", *memory.AvailableBytes,
|
||||||
|
"Usage", *memory.UsageBytes)
|
||||||
|
return &signalObservation{
|
||||||
|
available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
|
||||||
|
capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.UsageBytes), resource.BinarySI),
|
||||||
|
time: memory.Time,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
@ -16,119 +16,19 @@ limitations under the License.
|
|||||||
|
|
||||||
package eviction
|
package eviction
|
||||||
|
|
||||||
import (
|
import "time"
|
||||||
"fmt"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"k8s.io/klog/v2"
|
|
||||||
|
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
|
||||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
|
||||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
memoryUsageAttribute = "memory.usage_in_bytes"
|
|
||||||
// this prevents constantly updating the memcg notifier if synchronize
|
// this prevents constantly updating the memcg notifier if synchronize
|
||||||
// is run frequently.
|
// is run frequently.
|
||||||
notifierRefreshInterval = 10 * time.Second
|
notifierRefreshInterval = 10 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
type memoryThresholdNotifier struct {
|
|
||||||
threshold evictionapi.Threshold
|
|
||||||
cgroupPath string
|
|
||||||
events chan struct{}
|
|
||||||
factory NotifierFactory
|
|
||||||
handler func(string)
|
|
||||||
notifier CgroupNotifier
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ ThresholdNotifier = &memoryThresholdNotifier{}
|
|
||||||
|
|
||||||
// NewMemoryThresholdNotifier creates a ThresholdNotifier which is designed to respond to the given threshold.
|
|
||||||
// UpdateThreshold must be called once before the threshold will be active.
|
|
||||||
func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) {
|
|
||||||
cgroups, err := cm.GetCgroupSubsystems()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
cgpath, found := cgroups.MountPoints["memory"]
|
|
||||||
if !found || len(cgpath) == 0 {
|
|
||||||
return nil, fmt.Errorf("memory cgroup mount point not found")
|
|
||||||
}
|
|
||||||
if isAllocatableEvictionThreshold(threshold) {
|
|
||||||
// for allocatable thresholds, point the cgroup notifier at the allocatable cgroup
|
|
||||||
cgpath += cgroupRoot
|
|
||||||
}
|
|
||||||
return &memoryThresholdNotifier{
|
|
||||||
threshold: threshold,
|
|
||||||
cgroupPath: cgpath,
|
|
||||||
events: make(chan struct{}),
|
|
||||||
handler: handler,
|
|
||||||
factory: factory,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *memoryThresholdNotifier) Start() {
|
|
||||||
klog.InfoS("Eviction manager: created memoryThresholdNotifier", "notifier", m.Description())
|
|
||||||
for range m.events {
|
|
||||||
m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *memoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error {
|
|
||||||
memoryStats := summary.Node.Memory
|
|
||||||
if isAllocatableEvictionThreshold(m.threshold) {
|
|
||||||
allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
memoryStats = allocatableContainer.Memory
|
|
||||||
}
|
|
||||||
if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil {
|
|
||||||
return fmt.Errorf("summary was incomplete. Expected MemoryStats and all subfields to be non-nil, but got %+v", memoryStats)
|
|
||||||
}
|
|
||||||
// Set threshold on usage to capacity - eviction_hard + inactive_file,
|
|
||||||
// since we want to be notified when working_set = capacity - eviction_hard
|
|
||||||
inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI)
|
|
||||||
capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI)
|
|
||||||
evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity)
|
|
||||||
memcgThreshold := capacity.DeepCopy()
|
|
||||||
memcgThreshold.Sub(*evictionThresholdQuantity)
|
|
||||||
memcgThreshold.Add(*inactiveFile)
|
|
||||||
|
|
||||||
klog.V(3).InfoS("Eviction manager: setting notifier to capacity", "notifier", m.Description(), "capacity", memcgThreshold.String())
|
|
||||||
if m.notifier != nil {
|
|
||||||
m.notifier.Stop()
|
|
||||||
}
|
|
||||||
newNotifier, err := m.factory.NewCgroupNotifier(m.cgroupPath, memoryUsageAttribute, memcgThreshold.Value())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.notifier = newNotifier
|
|
||||||
go m.notifier.Start(m.events)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *memoryThresholdNotifier) Description() string {
|
|
||||||
var hard, allocatable string
|
|
||||||
if isHardEvictionThreshold(m.threshold) {
|
|
||||||
hard = "hard "
|
|
||||||
} else {
|
|
||||||
hard = "soft "
|
|
||||||
}
|
|
||||||
if isAllocatableEvictionThreshold(m.threshold) {
|
|
||||||
allocatable = "allocatable "
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s%smemory eviction threshold", hard, allocatable)
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ NotifierFactory = &CgroupNotifierFactory{}
|
|
||||||
|
|
||||||
// CgroupNotifierFactory knows how to make CgroupNotifiers which integrate with the kernel
|
// CgroupNotifierFactory knows how to make CgroupNotifiers which integrate with the kernel
|
||||||
type CgroupNotifierFactory struct{}
|
type CgroupNotifierFactory struct{}
|
||||||
|
|
||||||
|
var _ NotifierFactory = &CgroupNotifierFactory{}
|
||||||
|
|
||||||
// NewCgroupNotifier implements the NotifierFactory interface
|
// NewCgroupNotifier implements the NotifierFactory interface
|
||||||
func (n *CgroupNotifierFactory) NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
|
func (n *CgroupNotifierFactory) NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
|
||||||
return NewCgroupNotifier(path, attribute, threshold)
|
return NewCgroupNotifier(path, attribute, threshold)
|
||||||
|
123
pkg/kubelet/eviction/memory_threshold_notifier_others.go
Normal file
123
pkg/kubelet/eviction/memory_threshold_notifier_others.go
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
//go:build !windows
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2024 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eviction
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||||
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
memoryUsageAttribute = "memory.usage_in_bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
type linuxMemoryThresholdNotifier struct {
|
||||||
|
threshold evictionapi.Threshold
|
||||||
|
cgroupPath string
|
||||||
|
events chan struct{}
|
||||||
|
factory NotifierFactory
|
||||||
|
handler func(string)
|
||||||
|
notifier CgroupNotifier
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ ThresholdNotifier = &linuxMemoryThresholdNotifier{}
|
||||||
|
|
||||||
|
// NewMemoryThresholdNotifier creates a ThresholdNotifier which is designed to respond to the given threshold.
|
||||||
|
// UpdateThreshold must be called once before the threshold will be active.
|
||||||
|
func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) {
|
||||||
|
cgroups, err := cm.GetCgroupSubsystems()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
cgpath, found := cgroups.MountPoints["memory"]
|
||||||
|
if !found || len(cgpath) == 0 {
|
||||||
|
return nil, fmt.Errorf("memory cgroup mount point not found")
|
||||||
|
}
|
||||||
|
if isAllocatableEvictionThreshold(threshold) {
|
||||||
|
// for allocatable thresholds, point the cgroup notifier at the allocatable cgroup
|
||||||
|
cgpath += cgroupRoot
|
||||||
|
}
|
||||||
|
return &linuxMemoryThresholdNotifier{
|
||||||
|
threshold: threshold,
|
||||||
|
cgroupPath: cgpath,
|
||||||
|
events: make(chan struct{}),
|
||||||
|
handler: handler,
|
||||||
|
factory: factory,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *linuxMemoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error {
|
||||||
|
memoryStats := summary.Node.Memory
|
||||||
|
if isAllocatableEvictionThreshold(m.threshold) {
|
||||||
|
allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
memoryStats = allocatableContainer.Memory
|
||||||
|
}
|
||||||
|
if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil {
|
||||||
|
return fmt.Errorf("summary was incomplete. Expected MemoryStats and all subfields to be non-nil, but got %+v", memoryStats)
|
||||||
|
}
|
||||||
|
// Set threshold on usage to capacity - eviction_hard + inactive_file,
|
||||||
|
// since we want to be notified when working_set = capacity - eviction_hard
|
||||||
|
inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI)
|
||||||
|
capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI)
|
||||||
|
evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity)
|
||||||
|
memcgThreshold := capacity.DeepCopy()
|
||||||
|
memcgThreshold.Sub(*evictionThresholdQuantity)
|
||||||
|
memcgThreshold.Add(*inactiveFile)
|
||||||
|
|
||||||
|
klog.V(3).InfoS("Eviction manager: setting notifier to capacity", "notifier", m.Description(), "capacity", memcgThreshold.String())
|
||||||
|
if m.notifier != nil {
|
||||||
|
m.notifier.Stop()
|
||||||
|
}
|
||||||
|
newNotifier, err := m.factory.NewCgroupNotifier(m.cgroupPath, memoryUsageAttribute, memcgThreshold.Value())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.notifier = newNotifier
|
||||||
|
go m.notifier.Start(m.events)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *linuxMemoryThresholdNotifier) Start() {
|
||||||
|
klog.InfoS("Eviction manager: created memoryThresholdNotifier", "notifier", m.Description())
|
||||||
|
for range m.events {
|
||||||
|
m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *linuxMemoryThresholdNotifier) Description() string {
|
||||||
|
var hard, allocatable string
|
||||||
|
if isHardEvictionThreshold(m.threshold) {
|
||||||
|
hard = "hard "
|
||||||
|
} else {
|
||||||
|
hard = "soft "
|
||||||
|
}
|
||||||
|
if isAllocatableEvictionThreshold(m.threshold) {
|
||||||
|
allocatable = "allocatable "
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s%smemory eviction threshold", hard, allocatable)
|
||||||
|
}
|
@ -1,3 +1,6 @@
|
|||||||
|
//go:build linux
|
||||||
|
// +build linux
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Copyright 2018 The Kubernetes Authors.
|
Copyright 2018 The Kubernetes Authors.
|
||||||
|
|
||||||
@ -58,8 +61,8 @@ func nodeSummary(available, workingSet, usage resource.Quantity, allocatable boo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTestMemoryThresholdNotifier(threshold evictionapi.Threshold, factory NotifierFactory, handler func(string)) *memoryThresholdNotifier {
|
func newTestMemoryThresholdNotifier(threshold evictionapi.Threshold, factory NotifierFactory, handler func(string)) *linuxMemoryThresholdNotifier {
|
||||||
return &memoryThresholdNotifier{
|
return &linuxMemoryThresholdNotifier{
|
||||||
threshold: threshold,
|
threshold: threshold,
|
||||||
cgroupPath: testCgroupPath,
|
cgroupPath: testCgroupPath,
|
||||||
events: make(chan struct{}),
|
events: make(chan struct{}),
|
||||||
@ -245,7 +248,7 @@ func TestThresholdDescription(t *testing.T) {
|
|||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
m := &memoryThresholdNotifier{
|
m := &linuxMemoryThresholdNotifier{
|
||||||
notifier: &MockCgroupNotifier{},
|
notifier: &MockCgroupNotifier{},
|
||||||
threshold: tc.evictionThreshold,
|
threshold: tc.evictionThreshold,
|
||||||
cgroupPath: testCgroupPath,
|
cgroupPath: testCgroupPath,
|
||||||
|
101
pkg/kubelet/eviction/memory_threshold_notifier_windows.go
Normal file
101
pkg/kubelet/eviction/memory_threshold_notifier_windows.go
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
//go:build windows
|
||||||
|
// +build windows
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2024 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eviction
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||||
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/winstats"
|
||||||
|
)
|
||||||
|
|
||||||
|
type windowsMemoryThresholdNotifier struct {
|
||||||
|
threshold evictionapi.Threshold
|
||||||
|
events chan struct{}
|
||||||
|
handler func(string)
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ ThresholdNotifier = &windowsMemoryThresholdNotifier{}
|
||||||
|
|
||||||
|
func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) {
|
||||||
|
klog.InfoS("Eviction manager: creating new WindowsMemoryThresholdNotifier")
|
||||||
|
return &windowsMemoryThresholdNotifier{
|
||||||
|
threshold: threshold,
|
||||||
|
events: make(chan struct{}),
|
||||||
|
handler: handler,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *windowsMemoryThresholdNotifier) Start() {
|
||||||
|
klog.InfoS("Eviction manager: starting windowsMemoryThresholdNotifier", "notifier", m.Description())
|
||||||
|
go func() {
|
||||||
|
for true {
|
||||||
|
time.Sleep(notifierRefreshInterval)
|
||||||
|
m.checkMemoryUsage()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
for range m.events {
|
||||||
|
m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *windowsMemoryThresholdNotifier) checkMemoryUsage() {
|
||||||
|
// Get global commit limit
|
||||||
|
perfInfo, err := winstats.GetPerformanceInfo()
|
||||||
|
if err != nil {
|
||||||
|
klog.ErrorS(err, "Eviction manager: error getting global memory status for node")
|
||||||
|
}
|
||||||
|
|
||||||
|
commmiLimitBytes := perfInfo.CommitLimitPages * perfInfo.PageSize
|
||||||
|
capacity := resource.NewQuantity(int64(commmiLimitBytes), resource.BinarySI)
|
||||||
|
evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity)
|
||||||
|
|
||||||
|
commitTotalBytes := perfInfo.CommitTotalPages * perfInfo.PageSize
|
||||||
|
commitAvailableBytes := commmiLimitBytes - commitTotalBytes
|
||||||
|
|
||||||
|
if commitAvailableBytes <= uint64(evictionThresholdQuantity.Value()) {
|
||||||
|
m.events <- struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *windowsMemoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error {
|
||||||
|
// Windows doesn't use cgroup notifiers to trigger eviction, so this function is a no-op.
|
||||||
|
// Instead the go-routine set up in Start() will poll the system for memory usage and
|
||||||
|
// trigger eviction when the threshold is crossed.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *windowsMemoryThresholdNotifier) Description() string {
|
||||||
|
var hard, allocatable string
|
||||||
|
if isHardEvictionThreshold(m.threshold) {
|
||||||
|
hard = "hard"
|
||||||
|
} else {
|
||||||
|
hard = "soft"
|
||||||
|
}
|
||||||
|
if isAllocatableEvictionThreshold(m.threshold) {
|
||||||
|
allocatable = "allocatable"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s %s memory eviction threshold", hard, allocatable)
|
||||||
|
}
|
@ -23,17 +23,21 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/winstats"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (sp *summaryProviderImpl) GetSystemContainersStats(nodeConfig cm.NodeConfig, podStats []statsapi.PodStats, updateStats bool) (stats []statsapi.ContainerStats) {
|
func (sp *summaryProviderImpl) GetSystemContainersStats(nodeConfig cm.NodeConfig, podStats []statsapi.PodStats, updateStats bool) (stats []statsapi.ContainerStats) {
|
||||||
stats = append(stats, sp.getSystemPodsCPUAndMemoryStats(nodeConfig, podStats, updateStats))
|
stats = append(stats, sp.getSystemPodsCPUAndMemoryStats(nodeConfig, podStats, updateStats))
|
||||||
|
stats = append(stats, sp.getSystemWindowsGlobalmemoryStats())
|
||||||
return stats
|
return stats
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig cm.NodeConfig, podStats []statsapi.PodStats, updateStats bool) (stats []statsapi.ContainerStats) {
|
func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig cm.NodeConfig, podStats []statsapi.PodStats, updateStats bool) (stats []statsapi.ContainerStats) {
|
||||||
stats = append(stats, sp.getSystemPodsCPUAndMemoryStats(nodeConfig, podStats, updateStats))
|
stats = append(stats, sp.getSystemPodsCPUAndMemoryStats(nodeConfig, podStats, updateStats))
|
||||||
|
stats = append(stats, sp.getSystemWindowsGlobalmemoryStats())
|
||||||
return stats
|
return stats
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,3 +100,27 @@ func (sp *summaryProviderImpl) getSystemPodsCPUAndMemoryStats(nodeConfig cm.Node
|
|||||||
|
|
||||||
return podsSummary
|
return podsSummary
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sp *summaryProviderImpl) getSystemWindowsGlobalmemoryStats() statsapi.ContainerStats {
|
||||||
|
now := metav1.NewTime(time.Now())
|
||||||
|
globalMemorySummary := statsapi.ContainerStats{
|
||||||
|
StartTime: now,
|
||||||
|
Memory: &statsapi.MemoryStats{},
|
||||||
|
Name: statsapi.SystemContainerWindowsGlobalCommitMemory,
|
||||||
|
}
|
||||||
|
|
||||||
|
perfInfo, err := winstats.GetPerformanceInfo()
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("Failed to get Windows performance info: %v", err)
|
||||||
|
return globalMemorySummary
|
||||||
|
}
|
||||||
|
|
||||||
|
commitLimitBytes := perfInfo.CommitLimitPages * perfInfo.PageSize
|
||||||
|
commitTotalBytes := perfInfo.CommitTotalPages * perfInfo.PageSize
|
||||||
|
commitAvailableBytes := commitLimitBytes - commitTotalBytes
|
||||||
|
globalMemorySummary.Memory.Time = now
|
||||||
|
globalMemorySummary.Memory.AvailableBytes = &commitAvailableBytes
|
||||||
|
globalMemorySummary.Memory.UsageBytes = &commitTotalBytes
|
||||||
|
|
||||||
|
return globalMemorySummary
|
||||||
|
}
|
||||||
|
@ -79,13 +79,18 @@ func TestSummaryProvider(t *testing.T) {
|
|||||||
assert.Equal(summary.Node.Fs, rootFsStats)
|
assert.Equal(summary.Node.Fs, rootFsStats)
|
||||||
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ContainerFs: imageFsStats, ImageFs: imageFsStats})
|
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ContainerFs: imageFsStats, ImageFs: imageFsStats})
|
||||||
|
|
||||||
assert.Equal(len(summary.Node.SystemContainers), 1)
|
assert.NoError(err)
|
||||||
|
assert.Equal(len(summary.Node.SystemContainers), 2)
|
||||||
assert.Equal(summary.Node.SystemContainers[0].Name, "pods")
|
assert.Equal(summary.Node.SystemContainers[0].Name, "pods")
|
||||||
assert.Equal(summary.Node.SystemContainers[0].CPU.UsageCoreNanoSeconds, podStats[0].CPU.UsageCoreNanoSeconds)
|
assert.Equal(summary.Node.SystemContainers[0].CPU.UsageCoreNanoSeconds, podStats[0].CPU.UsageCoreNanoSeconds)
|
||||||
assert.Equal(summary.Node.SystemContainers[0].CPU.UsageNanoCores, podStats[0].CPU.UsageNanoCores)
|
assert.Equal(summary.Node.SystemContainers[0].CPU.UsageNanoCores, podStats[0].CPU.UsageNanoCores)
|
||||||
assert.Equal(summary.Node.SystemContainers[0].Memory.WorkingSetBytes, podStats[0].Memory.WorkingSetBytes)
|
assert.Equal(summary.Node.SystemContainers[0].Memory.WorkingSetBytes, podStats[0].Memory.WorkingSetBytes)
|
||||||
assert.Equal(summary.Node.SystemContainers[0].Memory.UsageBytes, podStats[0].Memory.UsageBytes)
|
assert.Equal(summary.Node.SystemContainers[0].Memory.UsageBytes, podStats[0].Memory.UsageBytes)
|
||||||
assert.Equal(summary.Node.SystemContainers[0].Memory.AvailableBytes, podStats[0].Memory.AvailableBytes)
|
assert.Equal(summary.Node.SystemContainers[0].Memory.AvailableBytes, podStats[0].Memory.AvailableBytes)
|
||||||
|
assert.Equal(summary.Node.SystemContainers[1].Name, statsapi.SystemContainerWindowsGlobalCommitMemory)
|
||||||
|
assert.NotEqual(nil, summary.Node.SystemContainers[1].Memory)
|
||||||
|
assert.NotEqual(nil, summary.Node.SystemContainers[1].Memory.AvailableBytes)
|
||||||
|
assert.NotEqual(nil, summary.Node.SystemContainers[1].Memory.UsageBytes)
|
||||||
assert.Equal(summary.Pods, podStats)
|
assert.Equal(summary.Pods, podStats)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,10 +56,33 @@ type MemoryStatusEx struct {
|
|||||||
AvailExtendedVirtual uint64
|
AvailExtendedVirtual uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PerformanceInfo is the same as Windows structure PERFORMANCE_INFORMATION
|
||||||
|
// https://learn.microsoft.com/en-us/windows/win32/api/psapi/ns-psapi-performance_information
|
||||||
|
type PerformanceInformation struct {
|
||||||
|
cb uint32
|
||||||
|
CommitTotalPages uint64
|
||||||
|
CommitLimitPages uint64
|
||||||
|
CommitPeakPages uint64
|
||||||
|
PhysicalTotalPages uint64
|
||||||
|
PhysicalAvailablePages uint64
|
||||||
|
SystemCachePages uint64
|
||||||
|
KernelTotalPages uint64
|
||||||
|
KernelPagesPages uint64
|
||||||
|
KernelNonpagedPages uint64
|
||||||
|
PageSize uint64
|
||||||
|
HandleCount uint32
|
||||||
|
ProcessCount uint32
|
||||||
|
ThreadCount uint32
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
// kernel32.dll system calls
|
||||||
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
|
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
|
||||||
procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx")
|
procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx")
|
||||||
procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount")
|
procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount")
|
||||||
|
// psapi.dll system calls
|
||||||
|
modpsapi = windows.NewLazySystemDLL("psapi.dll")
|
||||||
|
procGetPerformanceInfo = modpsapi.NewProc("GetPerformanceInfo")
|
||||||
)
|
)
|
||||||
|
|
||||||
const allProcessorGroups = 0xFFFF
|
const allProcessorGroups = 0xFFFF
|
||||||
@ -294,6 +317,16 @@ func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) {
|
|||||||
return statex.TotalPhys, nil
|
return statex.TotalPhys, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetPerformanceInfo() (*PerformanceInformation, error) {
|
||||||
|
var pi PerformanceInformation
|
||||||
|
pi.cb = uint32(unsafe.Sizeof(pi))
|
||||||
|
ret, _, _ := procGetPerformanceInfo.Call(uintptr(unsafe.Pointer(&pi)), uintptr(pi.cb))
|
||||||
|
if ret == 0 {
|
||||||
|
return nil, errors.New("unable to read Windows performance information")
|
||||||
|
}
|
||||||
|
return &pi, nil
|
||||||
|
}
|
||||||
|
|
||||||
func getBootID() (string, error) {
|
func getBootID() (string, error) {
|
||||||
regKey, err := registry.OpenKey(registry.LOCAL_MACHINE, bootIdRegistry, registry.READ)
|
regKey, err := registry.OpenKey(registry.LOCAL_MACHINE, bootIdRegistry, registry.READ)
|
||||||
if err != nil {
|
if err != nil {
|
@ -99,6 +99,9 @@ const (
|
|||||||
SystemContainerMisc = "misc"
|
SystemContainerMisc = "misc"
|
||||||
// SystemContainerPods is the container name for the system container tracking user pods.
|
// SystemContainerPods is the container name for the system container tracking user pods.
|
||||||
SystemContainerPods = "pods"
|
SystemContainerPods = "pods"
|
||||||
|
// SystemContainerWindowsGlobalCommitMemory (only used on Windows) is the container name for the system container
|
||||||
|
// tracking global commit memory usage and is used for memory-pressure eviction.
|
||||||
|
SystemContainerWindowsGlobalCommitMemory = "windows-global-commit-memory"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ProcessStats are stats pertaining to processes.
|
// ProcessStats are stats pertaining to processes.
|
||||||
|
Loading…
Reference in New Issue
Block a user