mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-11 21:12:07 +00:00
Merge pull request #118770 from marquiz/devel/cgroup-driver-autoconfig
kubelet: get cgroup driver config from CRI
This commit is contained in:
commit
1fef8fd51d
@ -35,6 +35,8 @@ import (
|
||||
"github.com/coreos/go-systemd/v22/daemon"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/pflag"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/mount-utils"
|
||||
|
||||
@ -76,6 +78,7 @@ import (
|
||||
"k8s.io/component-base/version"
|
||||
"k8s.io/component-base/version/verflag"
|
||||
nodeutil "k8s.io/component-helpers/node/util"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1"
|
||||
"k8s.io/kubernetes/cmd/kubelet/app/options"
|
||||
"k8s.io/kubernetes/pkg/api/legacyscheme"
|
||||
@ -625,6 +628,17 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
|
||||
runAuthenticatorCAReload(ctx.Done())
|
||||
}
|
||||
|
||||
if err := kubelet.PreInitRuntimeService(&s.KubeletConfiguration, kubeDeps); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get cgroup driver setting from CRI
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.KubeletCgroupDriverFromCRI) {
|
||||
if err := getCgroupDriverFromCRI(ctx, s, kubeDeps); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
var cgroupRoots []string
|
||||
nodeAllocatableRoot := cm.NodeAllocatableRoot(s.CgroupRoot, s.CgroupsPerQOS, s.CgroupDriver)
|
||||
cgroupRoots = append(cgroupRoots, nodeAllocatableRoot)
|
||||
@ -775,11 +789,6 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
|
||||
klog.InfoS("Failed to ApplyOOMScoreAdj", "err", err)
|
||||
}
|
||||
|
||||
err = kubelet.PreInitRuntimeService(&s.KubeletConfiguration, kubeDeps)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := RunKubelet(s, kubeDeps, s.RunOnce); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -1282,3 +1291,51 @@ func newTracerProvider(s *options.KubeletServer) (oteltrace.TracerProvider, erro
|
||||
}
|
||||
return tp, nil
|
||||
}
|
||||
|
||||
func getCgroupDriverFromCRI(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Dependencies) error {
|
||||
klog.V(4).InfoS("Getting CRI runtime configuration information")
|
||||
|
||||
var (
|
||||
runtimeConfig *runtimeapi.RuntimeConfigResponse
|
||||
err error
|
||||
)
|
||||
// Retry a couple of times, hoping that any errors are transient.
|
||||
// Fail quickly on known, non transient errors.
|
||||
for i := 0; i < 3; i++ {
|
||||
runtimeConfig, err = kubeDeps.RemoteRuntimeService.RuntimeConfig(ctx)
|
||||
if err != nil {
|
||||
s, ok := status.FromError(err)
|
||||
if !ok || s.Code() != codes.Unimplemented {
|
||||
// We could introduce a backoff delay or jitter, but this is largely catching cases
|
||||
// where the runtime is still starting up and we request too early.
|
||||
// Give it a little more time.
|
||||
time.Sleep(time.Second * 2)
|
||||
continue
|
||||
}
|
||||
// CRI implementation doesn't support RuntimeConfig, fallback
|
||||
klog.InfoS("CRI implementation should be updated to support RuntimeConfig when KubeletCgroupDriverFromCRI feature gate has been enabled. Falling back to using cgroupDriver from kubelet config.")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Calling GetLinux().GetCgroupDriver() won't segfault, but it will always default to systemd
|
||||
// which is not intended by the fields not being populated
|
||||
linuxConfig := runtimeConfig.GetLinux()
|
||||
if linuxConfig == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch d := linuxConfig.GetCgroupDriver(); d {
|
||||
case runtimeapi.CgroupDriver_SYSTEMD:
|
||||
s.CgroupDriver = "systemd"
|
||||
case runtimeapi.CgroupDriver_CGROUPFS:
|
||||
s.CgroupDriver = "cgroupfs"
|
||||
default:
|
||||
return fmt.Errorf("runtime returned an unknown cgroup driver %d", d)
|
||||
}
|
||||
klog.InfoS("Using cgroup driver setting received from the CRI runtime", "cgroupDriver", s.CgroupDriver)
|
||||
return nil
|
||||
}
|
||||
|
@ -416,6 +416,17 @@ const (
|
||||
// yet.
|
||||
JobTrackingWithFinalizers featuregate.Feature = "JobTrackingWithFinalizers"
|
||||
|
||||
// owner: @marquiz
|
||||
// kep: http://kep.k8s.io/4033
|
||||
// alpha: v1.28
|
||||
//
|
||||
// Enable detection of the kubelet cgroup driver configuration option from
|
||||
// the CRI. The CRI runtime also needs to support this feature in which
|
||||
// case the kubelet will ignore the cgroupDriver (--cgroup-driver)
|
||||
// configuration option. If runtime doesn't support it, the kubelet will
|
||||
// fallback to using it's cgroupDriver option.
|
||||
KubeletCgroupDriverFromCRI featuregate.Feature = "KubeletCgroupDriverFromCRI"
|
||||
|
||||
// owner: @AkihiroSuda
|
||||
// alpha: v1.22
|
||||
//
|
||||
@ -1014,6 +1025,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
|
||||
JobTrackingWithFinalizers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.28
|
||||
|
||||
KubeletCgroupDriverFromCRI: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
KubeletInUserNamespace: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
KubeletPodResources: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.28, remove in 1.30
|
||||
|
@ -356,3 +356,13 @@ func (f *RemoteRuntime) ListPodSandboxMetrics(ctx context.Context, req *kubeapi.
|
||||
|
||||
return &kubeapi.ListPodSandboxMetricsResponse{PodMetrics: podMetrics}, nil
|
||||
}
|
||||
|
||||
// RuntimeConfig returns the configuration information of the runtime.
|
||||
func (f *RemoteRuntime) RuntimeConfig(ctx context.Context, req *kubeapi.RuntimeConfigRequest) (*kubeapi.RuntimeConfigResponse, error) {
|
||||
resp, err := f.RuntimeService.RuntimeConfig(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
@ -865,3 +865,18 @@ func (r *remoteRuntimeService) ListPodSandboxMetrics(ctx context.Context) ([]*ru
|
||||
|
||||
return resp.GetPodMetrics(), nil
|
||||
}
|
||||
|
||||
// RuntimeConfig returns the configuration information of the runtime.
|
||||
func (r *remoteRuntimeService) RuntimeConfig(ctx context.Context) (*runtimeapi.RuntimeConfigResponse, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, r.timeout)
|
||||
defer cancel()
|
||||
|
||||
resp, err := r.runtimeClient.RuntimeConfig(ctx, &runtimeapi.RuntimeConfigRequest{})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "RuntimeConfig from runtime service failed")
|
||||
return nil, err
|
||||
}
|
||||
klog.V(10).InfoS("[RemoteRuntimeService] RuntimeConfigResponse", "linuxConfig", resp.GetLinux())
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
@ -361,3 +361,12 @@ func (in instrumentedRuntimeService) ListPodSandboxMetrics(ctx context.Context)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) RuntimeConfig(ctx context.Context) (*runtimeapi.RuntimeConfigResponse, error) {
|
||||
const operation = "runtime_config"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.RuntimeConfig(ctx)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -131,6 +131,15 @@ service RuntimeService {
|
||||
|
||||
// ListPodSandboxMetrics gets pod sandbox metrics from CRI Runtime
|
||||
rpc ListPodSandboxMetrics(ListPodSandboxMetricsRequest) returns (ListPodSandboxMetricsResponse) {}
|
||||
|
||||
// RuntimeConfig returns configuration information of the runtime.
|
||||
// A couple of notes:
|
||||
// - The RuntimeConfigRequest object is not to be confused with the contents of UpdateRuntimeConfigRequest.
|
||||
// The former is for having runtime tell Kubelet what to do, the latter vice versa.
|
||||
// - It is the expectation of the Kubelet that these fields are static for the lifecycle of the Kubelet.
|
||||
// The Kubelet will not re-request the RuntimeConfiguration after startup, and CRI implementations should
|
||||
// avoid updating them without a full node reboot.
|
||||
rpc RuntimeConfig(RuntimeConfigRequest) returns (RuntimeConfigResponse) {}
|
||||
}
|
||||
|
||||
// ImageService defines the public APIs for managing images.
|
||||
@ -1804,3 +1813,29 @@ enum MetricType {
|
||||
COUNTER = 0;
|
||||
GAUGE = 1;
|
||||
}
|
||||
|
||||
message RuntimeConfigRequest {}
|
||||
|
||||
message RuntimeConfigResponse {
|
||||
// Configuration information for Linux-based runtimes. This field contains
|
||||
// global runtime configuration options that are not specific to runtime
|
||||
// handlers.
|
||||
LinuxRuntimeConfiguration linux = 1;
|
||||
}
|
||||
|
||||
message LinuxRuntimeConfiguration {
|
||||
// Cgroup driver to use
|
||||
// Note: this field should not change for the lifecycle of the Kubelet,
|
||||
// or while there are running containers.
|
||||
// The Kubelet will not re-request this after startup, and will construct the cgroup
|
||||
// hierarchy assuming it is static.
|
||||
// If the runtime wishes to change this value, it must be accompanied by removal of
|
||||
// all pods, and a restart of the Kubelet. The easiest way to do this is with a full node reboot.
|
||||
CgroupDriver cgroup_driver = 1;
|
||||
}
|
||||
|
||||
enum CgroupDriver {
|
||||
SYSTEMD = 0;
|
||||
CGROUPFS = 1;
|
||||
}
|
||||
|
||||
|
@ -115,6 +115,8 @@ type RuntimeService interface {
|
||||
UpdateRuntimeConfig(ctx context.Context, runtimeConfig *runtimeapi.RuntimeConfig) error
|
||||
// Status returns the status of the runtime.
|
||||
Status(ctx context.Context, verbose bool) (*runtimeapi.StatusResponse, error)
|
||||
// RuntimeConfig returns the configuration information of the runtime.
|
||||
RuntimeConfig(ctx context.Context) (*runtimeapi.RuntimeConfigResponse, error)
|
||||
}
|
||||
|
||||
// ImageManagerService interface should be implemented by a container image
|
||||
|
@ -64,14 +64,15 @@ type FakeRuntimeService struct {
|
||||
Called []string
|
||||
Errors map[string][]error
|
||||
|
||||
FakeStatus *runtimeapi.RuntimeStatus
|
||||
Containers map[string]*FakeContainer
|
||||
Sandboxes map[string]*FakePodSandbox
|
||||
FakeContainerStats map[string]*runtimeapi.ContainerStats
|
||||
FakePodSandboxStats map[string]*runtimeapi.PodSandboxStats
|
||||
FakePodSandboxMetrics map[string]*runtimeapi.PodSandboxMetrics
|
||||
FakeMetricDescriptors map[string]*runtimeapi.MetricDescriptor
|
||||
FakeContainerMetrics map[string]*runtimeapi.ContainerMetrics
|
||||
FakeStatus *runtimeapi.RuntimeStatus
|
||||
Containers map[string]*FakeContainer
|
||||
Sandboxes map[string]*FakePodSandbox
|
||||
FakeContainerStats map[string]*runtimeapi.ContainerStats
|
||||
FakePodSandboxStats map[string]*runtimeapi.PodSandboxStats
|
||||
FakePodSandboxMetrics map[string]*runtimeapi.PodSandboxMetrics
|
||||
FakeMetricDescriptors map[string]*runtimeapi.MetricDescriptor
|
||||
FakeContainerMetrics map[string]*runtimeapi.ContainerMetrics
|
||||
FakeLinuxConfiguration *runtimeapi.LinuxRuntimeConfiguration
|
||||
|
||||
ErrorOnSandboxCreate bool
|
||||
}
|
||||
@ -780,3 +781,16 @@ func (r *FakeRuntimeService) ListPodSandboxMetrics(_ context.Context) ([]*runtim
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// RuntimeConfig returns runtime configuration of the FakeRuntimeService.
|
||||
func (r *FakeRuntimeService) RuntimeConfig(_ context.Context) (*runtimeapi.RuntimeConfigResponse, error) {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
|
||||
r.Called = append(r.Called, "RuntimeConfig")
|
||||
if err := r.popError("RuntimeConfig"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &runtimeapi.RuntimeConfigResponse{Linux: r.FakeLinuxConfiguration}, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user