mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 22:46:12 +00:00
Merge pull request #94115 from andrewsykim/fix-dockershim-exec
kubelet: respect exec probe timeouts
This commit is contained in:
commit
fe37798329
@ -668,6 +668,14 @@ const (
|
||||
// alpha: v1.21
|
||||
// LoadBalancerIPMode enables the IPMode field in the LoadBalancerIngress status of a Service
|
||||
LoadBalancerIPMode featuregate.Feature = "LoadBalancerIPMode"
|
||||
|
||||
// owner: @andrewsykim @SergeyKanzhelev
|
||||
// GA: v1.20
|
||||
//
|
||||
// Ensure kubelet respects exec probe timeouts. Feature gate exists in-case existing workloads
|
||||
// may depend on old behavior where exec probe timeouts were ignored.
|
||||
// Lock to default in v1.21 and remove in v1.22.
|
||||
ExecProbeTimeout featuregate.Feature = "ExecProbeTimeout"
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -769,6 +777,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
RootCAConfigMap: {Default: true, PreRelease: featuregate.Beta},
|
||||
SizeMemoryBackedVolumes: {Default: false, PreRelease: featuregate.Alpha},
|
||||
LoadBalancerIPMode: {Default: false, PreRelease: featuregate.Alpha},
|
||||
ExecProbeTimeout: {Default: true, PreRelease: featuregate.GA}, // lock to default in v1.21 and remove in v1.22
|
||||
|
||||
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
||||
// unintentionally on either side:
|
||||
|
@ -17,10 +17,13 @@ go_library(
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/cri/remote",
|
||||
deps = [
|
||||
"//pkg/kubelet/cri/remote/util:go_default_library",
|
||||
"//pkg/probe/exec:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/logs/logreduction:go_default_library",
|
||||
"//staging/src/k8s.io/cri-api/pkg/apis:go_default_library",
|
||||
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
|
||||
"//vendor/google.golang.org/grpc:go_default_library",
|
||||
"//vendor/google.golang.org/grpc/codes:go_default_library",
|
||||
"//vendor/google.golang.org/grpc/status:go_default_library",
|
||||
"//vendor/k8s.io/klog/v2:go_default_library",
|
||||
"//vendor/k8s.io/utils/exec:go_default_library",
|
||||
],
|
||||
|
@ -24,12 +24,15 @@ import (
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/component-base/logs/logreduction"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cri/remote/util"
|
||||
"k8s.io/kubernetes/pkg/probe/exec"
|
||||
utilexec "k8s.io/utils/exec"
|
||||
)
|
||||
|
||||
@ -387,6 +390,12 @@ func (r *remoteRuntimeService) ExecSync(containerID string, cmd []string, timeou
|
||||
resp, err := r.runtimeClient.ExecSync(ctx, req)
|
||||
if err != nil {
|
||||
klog.Errorf("ExecSync %s '%s' from runtime service failed: %v", containerID, strings.Join(cmd, " "), err)
|
||||
|
||||
// interpret DeadlineExceeded gRPC errors as timedout probes
|
||||
if status.Code(err) == codes.DeadlineExceeded {
|
||||
err = exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout)
|
||||
}
|
||||
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
|
@ -37,6 +37,7 @@ go_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/credentialprovider:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/apis/config:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||
@ -55,11 +56,13 @@ go_library(
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/kubelet/util/cache:go_default_library",
|
||||
"//pkg/kubelet/util/ioutils:go_default_library",
|
||||
"//pkg/probe/exec:go_default_library",
|
||||
"//pkg/util/parsers:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/tools/remotecommand:go_default_library",
|
||||
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
|
||||
"//vendor/github.com/armon/circbuf:go_default_library",
|
||||
|
@ -21,13 +21,17 @@ package dockershim
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
dockertypes "github.com/docker/docker/api/types"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/remotecommand"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/probe/exec"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
|
||||
)
|
||||
@ -110,29 +114,48 @@ func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container
|
||||
return err
|
||||
}
|
||||
|
||||
// if ExecProbeTimeout feature gate is disabled, preserve existing behavior to ignore exec timeouts
|
||||
var execTimeout <-chan time.Time
|
||||
if timeout > 0 && utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) {
|
||||
execTimeout = time.After(timeout)
|
||||
} else {
|
||||
// skip exec timeout if provided timeout is 0
|
||||
execTimeout = nil
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(2 * time.Second)
|
||||
defer ticker.Stop()
|
||||
count := 0
|
||||
for {
|
||||
inspect, err2 := client.InspectExec(execObj.ID)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
if !inspect.Running {
|
||||
if inspect.ExitCode != 0 {
|
||||
err = &dockerExitError{inspect}
|
||||
select {
|
||||
case <-execTimeout:
|
||||
return exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout)
|
||||
// need to use "default" here instead of <-ticker.C, otherwise we delay the initial InspectExec by 2 seconds.
|
||||
default:
|
||||
inspect, inspectErr := client.InspectExec(execObj.ID)
|
||||
if inspectErr != nil {
|
||||
return inspectErr
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
count++
|
||||
if count == 5 {
|
||||
klog.Errorf("Exec session %s in container %s terminated but process still running!", execObj.ID, container.ID)
|
||||
break
|
||||
}
|
||||
if !inspect.Running {
|
||||
if inspect.ExitCode != 0 {
|
||||
return &dockerExitError{inspect}
|
||||
}
|
||||
|
||||
<-ticker.C
|
||||
return nil
|
||||
}
|
||||
|
||||
// Only limit the amount of InspectExec calls if the exec timeout was not set.
|
||||
// When a timeout is not set, we stop polling the exec session after 5 attempts and allow the process to continue running.
|
||||
if execTimeout == nil {
|
||||
count++
|
||||
if count == 5 {
|
||||
klog.Errorf("Exec session %s in container %s terminated but process still running!", execObj.ID, container.ID)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
<-ticker.C
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
@ -8,11 +8,16 @@ load(
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["exec.go"],
|
||||
srcs = [
|
||||
"errors.go",
|
||||
"exec.go",
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/pkg/probe/exec",
|
||||
deps = [
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/util/ioutils:go_default_library",
|
||||
"//pkg/probe:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/klog/v2:go_default_library",
|
||||
"//vendor/k8s.io/utils/exec:go_default_library",
|
||||
],
|
||||
|
47
pkg/probe/exec/errors.go
Normal file
47
pkg/probe/exec/errors.go
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package exec
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// NewTimeoutError returns a new TimeoutError.
|
||||
func NewTimeoutError(err error, timeout time.Duration) *TimeoutError {
|
||||
return &TimeoutError{
|
||||
err: err,
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// TimeoutError is an error returned on exec probe timeouts. It should be returned by CRI implementations
|
||||
// in order for the exec prober to interpret exec timeouts as failed probes.
|
||||
// TODO: this error type can likely be removed when we support CRI errors.
|
||||
type TimeoutError struct {
|
||||
err error
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// Error returns the error string.
|
||||
func (t *TimeoutError) Error() string {
|
||||
return t.err.Error()
|
||||
}
|
||||
|
||||
// Timeout returns the timeout duration of the exec probe.
|
||||
func (t *TimeoutError) Timeout() time.Duration {
|
||||
return t.timeout
|
||||
}
|
@ -19,6 +19,8 @@ package exec
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/ioutils"
|
||||
"k8s.io/kubernetes/pkg/probe"
|
||||
|
||||
@ -66,6 +68,16 @@ func (pr execProber) Probe(e exec.Cmd) (probe.Result, string, error) {
|
||||
}
|
||||
return probe.Failure, string(data), nil
|
||||
}
|
||||
|
||||
timeoutErr, ok := err.(*TimeoutError)
|
||||
if ok {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) {
|
||||
return probe.Failure, string(data), nil
|
||||
}
|
||||
|
||||
klog.Warningf("Exec probe timed out after %s but ExecProbeTimeout feature gate was disabled", timeoutErr.Timeout())
|
||||
}
|
||||
|
||||
return probe.Unknown, "", err
|
||||
}
|
||||
return probe.Success, string(data), nil
|
||||
|
@ -32,7 +32,6 @@ import (
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2eevents "k8s.io/kubernetes/test/e2e/framework/events"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
||||
testutils "k8s.io/kubernetes/test/utils"
|
||||
|
||||
"github.com/onsi/ginkgo"
|
||||
@ -213,8 +212,6 @@ var _ = framework.KubeDescribe("Probing container", func() {
|
||||
Description: A Pod is created with liveness probe with a Exec action on the Pod. If the liveness probe call does not return within the timeout specified, liveness probe MUST restart the Pod.
|
||||
*/
|
||||
ginkgo.It("should be restarted with a docker exec liveness probe with timeout ", func() {
|
||||
// TODO: enable this test once the default exec handler supports timeout.
|
||||
e2eskipper.Skipf("The default exec handler, dockertools.NativeExecHandler, does not support timeouts due to a limitation in the Docker Remote API")
|
||||
cmd := []string{"/bin/sh", "-c", "sleep 600"}
|
||||
livenessProbe := &v1.Probe{
|
||||
Handler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}),
|
||||
@ -226,6 +223,23 @@ var _ = framework.KubeDescribe("Probing container", func() {
|
||||
RunLivenessTest(f, pod, 1, defaultObservationTimeout)
|
||||
})
|
||||
|
||||
/*
|
||||
Release: v1.20
|
||||
Testname: Pod readiness probe, docker exec, not ready
|
||||
Description: A Pod is created with readiness probe with a Exec action on the Pod. If the readiness probe call does not return within the timeout specified, readiness probe MUST not be Ready.
|
||||
*/
|
||||
ginkgo.It("should not be ready with a docker exec readiness probe timeout ", func() {
|
||||
cmd := []string{"/bin/sh", "-c", "sleep 600"}
|
||||
readinessProbe := &v1.Probe{
|
||||
Handler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}),
|
||||
InitialDelaySeconds: 15,
|
||||
TimeoutSeconds: 1,
|
||||
FailureThreshold: 1,
|
||||
}
|
||||
pod := busyBoxPodSpec(readinessProbe, nil, cmd)
|
||||
runReadinessFailTest(f, pod, time.Minute)
|
||||
})
|
||||
|
||||
/*
|
||||
Release: v1.14
|
||||
Testname: Pod http liveness probe, redirected to a local address
|
||||
@ -625,3 +639,35 @@ func RunLivenessTest(f *framework.Framework, pod *v1.Pod, expectNumRestarts int,
|
||||
ns, pod.Name, expectNumRestarts, observedRestarts)
|
||||
}
|
||||
}
|
||||
|
||||
func runReadinessFailTest(f *framework.Framework, pod *v1.Pod, notReadyUntil time.Duration) {
|
||||
podClient := f.PodClient()
|
||||
ns := f.Namespace.Name
|
||||
gomega.Expect(pod.Spec.Containers).NotTo(gomega.BeEmpty())
|
||||
|
||||
// At the end of the test, clean up by removing the pod.
|
||||
defer func() {
|
||||
ginkgo.By("deleting the pod")
|
||||
podClient.Delete(context.TODO(), pod.Name, *metav1.NewDeleteOptions(0))
|
||||
}()
|
||||
ginkgo.By(fmt.Sprintf("Creating pod %s in namespace %s", pod.Name, ns))
|
||||
podClient.Create(pod)
|
||||
|
||||
// Wait until the pod is not pending. (Here we need to check for something other than
|
||||
// 'Pending', since when failures occur, we go to 'Terminated' which can cause indefinite blocking.)
|
||||
framework.ExpectNoError(e2epod.WaitForPodNotPending(f.ClientSet, ns, pod.Name),
|
||||
fmt.Sprintf("starting pod %s in namespace %s", pod.Name, ns))
|
||||
framework.Logf("Started pod %s in namespace %s", pod.Name, ns)
|
||||
|
||||
// Wait for the not ready state to be true for notReadyUntil duration
|
||||
deadline := time.Now().Add(notReadyUntil)
|
||||
for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) {
|
||||
// poll for Not Ready
|
||||
if podutil.IsPodReady(pod) {
|
||||
framework.Failf("pod %s/%s - expected to be not ready", ns, pod.Name)
|
||||
}
|
||||
|
||||
framework.Logf("pod %s/%s is not ready (%v elapsed)",
|
||||
ns, pod.Name, time.Since(start))
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user