diff --git a/pkg/kubelet/cri/.import-restrictions b/pkg/kubelet/cri/.import-restrictions index 10215ff9251..ae9260b9bd1 100644 --- a/pkg/kubelet/cri/.import-restrictions +++ b/pkg/kubelet/cri/.import-restrictions @@ -3,3 +3,4 @@ rules: - selectorRegexp: k8s[.]io/kubernetes allowedPrefixes: - k8s.io/kubernetes/pkg/kubelet/cri + - k8s.io/kubernetes/pkg/probe/exec diff --git a/pkg/kubelet/cri/remote/BUILD b/pkg/kubelet/cri/remote/BUILD index ea28193df68..d6cd6b7cf5b 100644 --- a/pkg/kubelet/cri/remote/BUILD +++ b/pkg/kubelet/cri/remote/BUILD @@ -17,6 +17,7 @@ go_library( importpath = "k8s.io/kubernetes/pkg/kubelet/cri/remote", deps = [ "//pkg/kubelet/cri/remote/util:go_default_library", + "//pkg/probe/exec:go_default_library", "//staging/src/k8s.io/component-base/logs/logreduction:go_default_library", "//staging/src/k8s.io/cri-api/pkg/apis:go_default_library", "//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library", diff --git a/pkg/kubelet/cri/remote/remote_runtime.go b/pkg/kubelet/cri/remote/remote_runtime.go index 80ad3aad193..87b41bbf16f 100644 --- a/pkg/kubelet/cri/remote/remote_runtime.go +++ b/pkg/kubelet/cri/remote/remote_runtime.go @@ -32,6 +32,7 @@ import ( internalapi "k8s.io/cri-api/pkg/apis" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" "k8s.io/kubernetes/pkg/kubelet/cri/remote/util" + "k8s.io/kubernetes/pkg/probe/exec" utilexec "k8s.io/utils/exec" ) @@ -390,14 +391,9 @@ func (r *remoteRuntimeService) ExecSync(containerID string, cmd []string, timeou if err != nil { klog.Errorf("ExecSync %s '%s' from runtime service failed: %v", containerID, strings.Join(cmd, " "), err) - // If exec timed out, return utilexec.CodeExitError with an exit status as expected - // from prober for failed probes. - // TODO: utilexec should have a TimedoutError type and we should return it here once available. + // interpret DeadlineExceeded gRPC errors as timedout probes if status.Code(err) == codes.DeadlineExceeded { - err = utilexec.CodeExitError{ - Err: fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), - Code: 1, // exit code here doesn't really matter, as long as it's not 0 - } + err = exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout) } return nil, nil, err diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index 06e3c04e599..4bd4958e11e 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -37,6 +37,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/credentialprovider:go_default_library", + "//pkg/features:go_default_library", "//pkg/kubelet/apis/config:go_default_library", "//pkg/kubelet/checkpointmanager:go_default_library", "//pkg/kubelet/checkpointmanager/checksum:go_default_library", @@ -55,11 +56,13 @@ go_library( "//pkg/kubelet/types:go_default_library", "//pkg/kubelet/util/cache:go_default_library", "//pkg/kubelet/util/ioutils:go_default_library", + "//pkg/probe/exec:go_default_library", "//pkg/util/parsers:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", "//staging/src/k8s.io/client-go/tools/remotecommand:go_default_library", "//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library", "//vendor/github.com/armon/circbuf:go_default_library", diff --git a/pkg/kubelet/dockershim/exec.go b/pkg/kubelet/dockershim/exec.go index 8e53597c8db..d79cff31d97 100644 --- a/pkg/kubelet/dockershim/exec.go +++ b/pkg/kubelet/dockershim/exec.go @@ -27,11 +27,13 @@ import ( dockertypes "github.com/docker/docker/api/types" "k8s.io/klog/v2" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/remotecommand" + "k8s.io/kubernetes/pkg/features" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + "k8s.io/kubernetes/pkg/probe/exec" "k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker" - utilexec "k8s.io/utils/exec" ) // ExecHandler knows how to execute a command in a running Docker container. @@ -112,8 +114,9 @@ func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container return err } + // if ExecProbeTimeout feature gate is disabled, preserve existing behavior to ignore exec timeouts var execTimeout <-chan time.Time - if timeout > 0 { + if timeout > 0 && utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) { execTimeout = time.After(timeout) } else { // skip exec timeout if provided timeout is 0 @@ -126,13 +129,7 @@ func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container for { select { case <-execTimeout: - // If exec timed out, return utilexec.CodeExitError with an exit status as expected - // from prober for failed probes. - // TODO: utilexec should have a TimedoutError type and we should return it here once available. - return utilexec.CodeExitError{ - Err: fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), - Code: 1, // exit code here doesn't really matter, as long as it's not 0 - } + return exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout) // need to use "default" here instead of <-ticker.C, otherwise we delay the initial InspectExec by 2 seconds. default: inspect, inspectErr := client.InspectExec(execObj.ID) diff --git a/pkg/probe/exec/BUILD b/pkg/probe/exec/BUILD index ce381191610..80186df8ca9 100644 --- a/pkg/probe/exec/BUILD +++ b/pkg/probe/exec/BUILD @@ -8,11 +8,16 @@ load( go_library( name = "go_default_library", - srcs = ["exec.go"], + srcs = [ + "errors.go", + "exec.go", + ], importpath = "k8s.io/kubernetes/pkg/probe/exec", deps = [ + "//pkg/features:go_default_library", "//pkg/kubelet/util/ioutils:go_default_library", "//pkg/probe:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", "//vendor/k8s.io/klog/v2:go_default_library", "//vendor/k8s.io/utils/exec:go_default_library", ], diff --git a/pkg/probe/exec/errors.go b/pkg/probe/exec/errors.go new file mode 100644 index 00000000000..6659a94ac7b --- /dev/null +++ b/pkg/probe/exec/errors.go @@ -0,0 +1,47 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exec + +import ( + "time" +) + +// NewTimeoutError returns a new TimeoutError. +func NewTimeoutError(err error, timeout time.Duration) *TimeoutError { + return &TimeoutError{ + err: err, + timeout: timeout, + } +} + +// TimeoutError is an error returned on exec probe timeouts. It should be returned by CRI implementations +// in order for the exec prober to interpret exec timeouts as failed probes. +// TODO: this error type can likely be removed when we support CRI errors. +type TimeoutError struct { + err error + timeout time.Duration +} + +// Error returns the error string. +func (t *TimeoutError) Error() string { + return t.err.Error() +} + +// Timeout returns the timeout duration of the exec probe. +func (t *TimeoutError) Timeout() time.Duration { + return t.timeout +} diff --git a/pkg/probe/exec/exec.go b/pkg/probe/exec/exec.go index 9aa894672ad..164d21bd112 100644 --- a/pkg/probe/exec/exec.go +++ b/pkg/probe/exec/exec.go @@ -19,6 +19,8 @@ package exec import ( "bytes" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/util/ioutils" "k8s.io/kubernetes/pkg/probe" @@ -66,6 +68,16 @@ func (pr execProber) Probe(e exec.Cmd) (probe.Result, string, error) { } return probe.Failure, string(data), nil } + + timeoutErr, ok := err.(*TimeoutError) + if ok { + if utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) { + return probe.Failure, string(data), nil + } + + klog.Warningf("Exec probe timed out after %s but ExecProbeTimeout feature gate was disabled", timeoutErr.Timeout()) + } + return probe.Unknown, "", err } return probe.Success, string(data), nil