From bfa62e0009aea8be681f68a71e3b2d59cee057dc Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Tue, 11 Jul 2023 13:09:37 -0400 Subject: [PATCH] kubelet: retry RuntimeConfig call and prep for nil values from GetLinux Signed-off-by: Peter Hunt --- cmd/kubelet/app/server.go | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 55769ec3b4c..64dbb4cd969 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -1294,18 +1294,41 @@ func newTracerProvider(s *options.KubeletServer) (oteltrace.TracerProvider, erro func getCgroupDriverFromCRI(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Dependencies) error { klog.V(4).InfoS("Getting CRI runtime configuration information") - runtimeConfig, err := kubeDeps.RemoteRuntimeService.RuntimeConfig(ctx) - if err != nil { - s, ok := status.FromError(err) - if !ok || s.Code() != codes.Unimplemented { - return err + + var ( + runtimeConfig *runtimeapi.RuntimeConfigResponse + err error + ) + // Retry a couple of times, hoping that any errors are transient. + // Fail quickly on known, non transient errors. + for i := 0; i < 3; i++ { + runtimeConfig, err = kubeDeps.RemoteRuntimeService.RuntimeConfig(ctx) + if err != nil { + s, ok := status.FromError(err) + if !ok || s.Code() != codes.Unimplemented { + // We could introduce a backoff delay or jitter, but this is largely catching cases + // where the runtime is still starting up and we request too early. + // Give it a little more time. + time.Sleep(time.Second * 2) + continue + } + // CRI implementation doesn't support RuntimeConfig, fallback + klog.InfoS("CRI implementation should be updated to support RuntimeConfig when KubeletCgroupDriverFromCRI feature gate has been enabled. Falling back to using cgroupDriver from kubelet config.") + return nil } - // CRI implementation doesn't support RuntimeConfig, fallback - klog.InfoS("CRI implementation should be updated to support RuntimeConfig when KubeletCgroupDriverFromCRI feature gate has been enabled. Falling back to using cgroupDriver from kubelet config.") + } + if err != nil { + return err + } + + // Calling GetLinux().GetCgroupDriver() won't segfault, but it will always default to systemd + // which is not intended by the fields not being populated + linuxConfig := runtimeConfig.GetLinux() + if linuxConfig == nil { return nil } - switch d := runtimeConfig.GetLinux().GetCgroupDriver(); d { + switch d := linuxConfig.GetCgroupDriver(); d { case runtimeapi.CgroupDriver_SYSTEMD: s.CgroupDriver = "systemd" case runtimeapi.CgroupDriver_CGROUPFS: