mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 18:54:06 +00:00
Merge pull request #123215 from adrianreber/2024-02-09-forensic-container-checkpointing-beta
Switch 'ContainerCheckpoint' from Alpha to Beta
This commit is contained in:
commit
cde4788a27
@ -174,6 +174,7 @@ const (
|
|||||||
// owner: @adrianreber
|
// owner: @adrianreber
|
||||||
// kep: https://kep.k8s.io/2008
|
// kep: https://kep.k8s.io/2008
|
||||||
// alpha: v1.25
|
// alpha: v1.25
|
||||||
|
// beta: v1.30
|
||||||
//
|
//
|
||||||
// Enables container Checkpoint support in the kubelet
|
// Enables container Checkpoint support in the kubelet
|
||||||
ContainerCheckpoint featuregate.Feature = "ContainerCheckpoint"
|
ContainerCheckpoint featuregate.Feature = "ContainerCheckpoint"
|
||||||
@ -975,7 +976,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
|||||||
|
|
||||||
CloudControllerManagerWebhook: {Default: false, PreRelease: featuregate.Alpha},
|
CloudControllerManagerWebhook: {Default: false, PreRelease: featuregate.Alpha},
|
||||||
|
|
||||||
ContainerCheckpoint: {Default: false, PreRelease: featuregate.Alpha},
|
ContainerCheckpoint: {Default: true, PreRelease: featuregate.Beta},
|
||||||
|
|
||||||
ConsistentHTTPGetHandlers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31
|
ConsistentHTTPGetHandlers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31
|
||||||
|
|
||||||
|
@ -105,6 +105,8 @@ func (n nodeAuthorizerAttributesGetter) GetRequestAttributes(u user.Info, r *htt
|
|||||||
case isSubpath(requestPath, logsPath):
|
case isSubpath(requestPath, logsPath):
|
||||||
// "log" to match other log subresources (pods/log, etc)
|
// "log" to match other log subresources (pods/log, etc)
|
||||||
attrs.Subresource = "log"
|
attrs.Subresource = "log"
|
||||||
|
case isSubpath(requestPath, checkpointPath):
|
||||||
|
attrs.Subresource = "checkpoint"
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(5).InfoS("Node request attributes", "user", attrs.GetUser().GetName(), "verb", attrs.GetVerb(), "resource", attrs.GetResource(), "subresource", attrs.GetSubresource())
|
klog.V(5).InfoS("Node request attributes", "user", attrs.GetUser().GetName(), "verb", attrs.GetVerb(), "resource", attrs.GetResource(), "subresource", attrs.GetSubresource())
|
||||||
|
@ -110,7 +110,7 @@ func AuthzTestCases() []AuthzTestCase {
|
|||||||
testPaths := map[string]string{
|
testPaths := map[string]string{
|
||||||
"/attach/{podNamespace}/{podID}/{containerName}": "proxy",
|
"/attach/{podNamespace}/{podID}/{containerName}": "proxy",
|
||||||
"/attach/{podNamespace}/{podID}/{uid}/{containerName}": "proxy",
|
"/attach/{podNamespace}/{podID}/{uid}/{containerName}": "proxy",
|
||||||
"/checkpoint/{podNamespace}/{podID}/{containerName}": "proxy",
|
"/checkpoint/{podNamespace}/{podID}/{containerName}": "checkpoint",
|
||||||
"/configz": "proxy",
|
"/configz": "proxy",
|
||||||
"/containerLogs/{podNamespace}/{podID}/{containerName}": "proxy",
|
"/containerLogs/{podNamespace}/{podID}/{containerName}": "proxy",
|
||||||
"/debug/flags/v": "proxy",
|
"/debug/flags/v": "proxy",
|
||||||
|
@ -98,6 +98,7 @@ const (
|
|||||||
proberMetricsPath = "/metrics/probes"
|
proberMetricsPath = "/metrics/probes"
|
||||||
statsPath = "/stats/"
|
statsPath = "/stats/"
|
||||||
logsPath = "/logs/"
|
logsPath = "/logs/"
|
||||||
|
checkpointPath = "/checkpoint/"
|
||||||
pprofBasePath = "/debug/pprof/"
|
pprofBasePath = "/debug/pprof/"
|
||||||
debugFlagPath = "/debug/flags/v"
|
debugFlagPath = "/debug/flags/v"
|
||||||
)
|
)
|
||||||
@ -441,7 +442,7 @@ func (s *Server) InstallDefaultHandlers() {
|
|||||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerCheckpoint) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerCheckpoint) {
|
||||||
s.addMetricsBucketMatcher("checkpoint")
|
s.addMetricsBucketMatcher("checkpoint")
|
||||||
ws = &restful.WebService{}
|
ws = &restful.WebService{}
|
||||||
ws.Path("/checkpoint").Produces(restful.MIME_JSON)
|
ws.Path(checkpointPath).Produces(restful.MIME_JSON)
|
||||||
ws.Route(ws.POST("/{podNamespace}/{podID}/{containerName}").
|
ws.Route(ws.POST("/{podNamespace}/{podID}/{containerName}").
|
||||||
To(s.checkpoint).
|
To(s.checkpoint).
|
||||||
Operation("checkpoint"))
|
Operation("checkpoint"))
|
||||||
|
@ -858,18 +858,24 @@ func TestContainerLogsWithInvalidTail(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCheckpointContainer(t *testing.T) {
|
func TestCheckpointContainer(t *testing.T) {
|
||||||
// Enable features.ContainerCheckpoint during test
|
|
||||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, true)()
|
|
||||||
|
|
||||||
fw := newServerTest()
|
|
||||||
defer fw.testHTTPServer.Close()
|
|
||||||
podNamespace := "other"
|
podNamespace := "other"
|
||||||
podName := "foo"
|
podName := "foo"
|
||||||
expectedContainerName := "baz"
|
expectedContainerName := "baz"
|
||||||
// GetPodByName() should always fail
|
|
||||||
fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) {
|
setupTest := func(featureGate bool) *serverTestFramework {
|
||||||
return nil, false
|
// Enable features.ContainerCheckpoint during test
|
||||||
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, featureGate)()
|
||||||
|
|
||||||
|
fw := newServerTest()
|
||||||
|
// GetPodByName() should always fail
|
||||||
|
fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
return fw
|
||||||
}
|
}
|
||||||
|
fw := setupTest(true)
|
||||||
|
defer fw.testHTTPServer.Close()
|
||||||
|
|
||||||
t.Run("wrong pod namespace", func(t *testing.T) {
|
t.Run("wrong pod namespace", func(t *testing.T) {
|
||||||
resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil)
|
resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -927,6 +933,19 @@ func TestCheckpointContainer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
assert.Equal(t, resp.StatusCode, 200)
|
assert.Equal(t, resp.StatusCode, 200)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Now test for 404 if checkpointing support is explicitly disabled.
|
||||||
|
fw.testHTTPServer.Close()
|
||||||
|
fw = setupTest(false)
|
||||||
|
defer fw.testHTTPServer.Close()
|
||||||
|
setPodByNameFunc(fw, podNamespace, podName, expectedContainerName)
|
||||||
|
t.Run("checkpointing fails because disabled", func(t *testing.T) {
|
||||||
|
resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Got error POSTing: %v", err)
|
||||||
|
}
|
||||||
|
assert.Equal(t, 404, resp.StatusCode)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeReq(t *testing.T, method, url, clientProtocol string) *http.Request {
|
func makeReq(t *testing.T, method, url, clientProtocol string) *http.Request {
|
||||||
|
@ -34,11 +34,14 @@ import (
|
|||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
restclient "k8s.io/client-go/rest"
|
restclient "k8s.io/client-go/rest"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
|
||||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||||
"k8s.io/kubernetes/test/e2e/nodefeature"
|
"k8s.io/kubernetes/test/e2e/nodefeature"
|
||||||
testutils "k8s.io/kubernetes/test/utils"
|
testutils "k8s.io/kubernetes/test/utils"
|
||||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
admissionapi "k8s.io/pod-security-admission/api"
|
admissionapi "k8s.io/pod-security-admission/api"
|
||||||
|
|
||||||
|
"github.com/onsi/gomega"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -75,6 +78,58 @@ func proxyPostRequest(ctx context.Context, c clientset.Interface, node, endpoint
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getCheckpointContainerMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) {
|
||||||
|
framework.Logf("Getting 'checkpoint_container' metrics from %q", pod.Spec.NodeName)
|
||||||
|
ms, err := e2emetrics.GetKubeletMetrics(
|
||||||
|
ctx,
|
||||||
|
f.ClientSet,
|
||||||
|
pod.Spec.NodeName,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
runtimeOperationsTotal, ok := ms["runtime_operations_total"]
|
||||||
|
if !ok {
|
||||||
|
// If the metric was not found it was probably not written to, yet.
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range runtimeOperationsTotal {
|
||||||
|
if item.Metric["__name__"] == "kubelet_runtime_operations_total" && item.Metric["operation_type"] == "checkpoint_container" {
|
||||||
|
return int(item.Value), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If the metric was not found it was probably not written to, yet.
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCheckpointContainerErrorMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) {
|
||||||
|
framework.Logf("Getting 'checkpoint_container' error metrics from %q", pod.Spec.NodeName)
|
||||||
|
ms, err := e2emetrics.GetKubeletMetrics(
|
||||||
|
ctx,
|
||||||
|
f.ClientSet,
|
||||||
|
pod.Spec.NodeName,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
runtimeOperationsErrorsTotal, ok := ms["runtime_operations_errors_total"]
|
||||||
|
if !ok {
|
||||||
|
// If the metric was not found it was probably not written to, yet.
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range runtimeOperationsErrorsTotal {
|
||||||
|
if item.Metric["__name__"] == "kubelet_runtime_operations_errors_total" && item.Metric["operation_type"] == "checkpoint_container" {
|
||||||
|
return int(item.Value), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If the metric was not found it was probably not written to, yet.
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, func() {
|
var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, func() {
|
||||||
f := framework.NewDefaultFramework("checkpoint-container-test")
|
f := framework.NewDefaultFramework("checkpoint-container-test")
|
||||||
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
|
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
|
||||||
@ -82,7 +137,10 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
|||||||
ginkgo.By("creating a target pod")
|
ginkgo.By("creating a target pod")
|
||||||
podClient := e2epod.NewPodClient(f)
|
podClient := e2epod.NewPodClient(f)
|
||||||
pod := podClient.CreateSync(ctx, &v1.Pod{
|
pod := podClient.CreateSync(ctx, &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{Name: "checkpoint-container-pod"},
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "checkpoint-container-pod",
|
||||||
|
Namespace: f.Namespace.Name,
|
||||||
|
},
|
||||||
Spec: v1.PodSpec{
|
Spec: v1.PodSpec{
|
||||||
Containers: []v1.Container{
|
Containers: []v1.Container{
|
||||||
{
|
{
|
||||||
@ -108,6 +166,15 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
|||||||
framework.Failf("pod %q should be ready", p.Name)
|
framework.Failf("pod %q should be ready", p.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// No checkpoint operation should have been logged
|
||||||
|
checkpointContainerMetric, err := getCheckpointContainerMetric(ctx, f, pod)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
gomega.Expect(checkpointContainerMetric).To(gomega.Equal(0))
|
||||||
|
// No error should have been logged
|
||||||
|
checkpointContainerErrorMetric, err := getCheckpointContainerErrorMetric(ctx, f, pod)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0))
|
||||||
|
|
||||||
framework.Logf(
|
framework.Logf(
|
||||||
"About to checkpoint container %q on %q",
|
"About to checkpoint container %q on %q",
|
||||||
pod.Spec.Containers[0].Name,
|
pod.Spec.Containers[0].Name,
|
||||||
@ -144,6 +211,12 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
|||||||
// If the container engine has not implemented the Checkpoint CRI API
|
// If the container engine has not implemented the Checkpoint CRI API
|
||||||
// we will get 500 and a message with
|
// we will get 500 and a message with
|
||||||
// '(rpc error: code = Unimplemented desc = unknown method CheckpointContainer'
|
// '(rpc error: code = Unimplemented desc = unknown method CheckpointContainer'
|
||||||
|
// or
|
||||||
|
// '(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)'
|
||||||
|
// if the container engine returns that it explicitly has disabled support for it.
|
||||||
|
// or
|
||||||
|
// '(rpc error: code = Unknown desc = checkpoint/restore support not available)'
|
||||||
|
// if the container engine explicitly disabled the checkpoint/restore support
|
||||||
if (int(statusError.ErrStatus.Code)) == http.StatusInternalServerError {
|
if (int(statusError.ErrStatus.Code)) == http.StatusInternalServerError {
|
||||||
if strings.Contains(
|
if strings.Contains(
|
||||||
statusError.ErrStatus.Message,
|
statusError.ErrStatus.Message,
|
||||||
@ -152,8 +225,26 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
|||||||
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
|
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if strings.Contains(
|
||||||
|
statusError.ErrStatus.Message,
|
||||||
|
"(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)",
|
||||||
|
) {
|
||||||
|
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.Contains(
|
||||||
|
statusError.ErrStatus.Message,
|
||||||
|
"(rpc error: code = Unknown desc = checkpoint/restore support not available)",
|
||||||
|
) {
|
||||||
|
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
framework.Failf("Unexpected status code (%d) during 'CheckpointContainer'", statusError.ErrStatus.Code)
|
framework.Failf(
|
||||||
|
"Unexpected status code (%d) during 'CheckpointContainer': %q",
|
||||||
|
statusError.ErrStatus.Code,
|
||||||
|
statusError.ErrStatus.Message,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
@ -205,5 +296,13 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
|||||||
// cleanup checkpoint archive
|
// cleanup checkpoint archive
|
||||||
os.RemoveAll(item)
|
os.RemoveAll(item)
|
||||||
}
|
}
|
||||||
|
// Exactly one checkpoint operation should have happened
|
||||||
|
checkpointContainerMetric, err = getCheckpointContainerMetric(ctx, f, pod)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
gomega.Expect(checkpointContainerMetric).To(gomega.Equal(1))
|
||||||
|
// No error should have been logged
|
||||||
|
checkpointContainerErrorMetric, err = getCheckpointContainerErrorMetric(ctx, f, pod)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user