mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 19:23:40 +00:00
Merge pull request #123215 from adrianreber/2024-02-09-forensic-container-checkpointing-beta
Switch 'ContainerCheckpoint' from Alpha to Beta
This commit is contained in:
commit
cde4788a27
@ -174,6 +174,7 @@ const (
|
||||
// owner: @adrianreber
|
||||
// kep: https://kep.k8s.io/2008
|
||||
// alpha: v1.25
|
||||
// beta: v1.30
|
||||
//
|
||||
// Enables container Checkpoint support in the kubelet
|
||||
ContainerCheckpoint featuregate.Feature = "ContainerCheckpoint"
|
||||
@ -975,7 +976,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
|
||||
CloudControllerManagerWebhook: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
ContainerCheckpoint: {Default: false, PreRelease: featuregate.Alpha},
|
||||
ContainerCheckpoint: {Default: true, PreRelease: featuregate.Beta},
|
||||
|
||||
ConsistentHTTPGetHandlers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31
|
||||
|
||||
|
@ -105,6 +105,8 @@ func (n nodeAuthorizerAttributesGetter) GetRequestAttributes(u user.Info, r *htt
|
||||
case isSubpath(requestPath, logsPath):
|
||||
// "log" to match other log subresources (pods/log, etc)
|
||||
attrs.Subresource = "log"
|
||||
case isSubpath(requestPath, checkpointPath):
|
||||
attrs.Subresource = "checkpoint"
|
||||
}
|
||||
|
||||
klog.V(5).InfoS("Node request attributes", "user", attrs.GetUser().GetName(), "verb", attrs.GetVerb(), "resource", attrs.GetResource(), "subresource", attrs.GetSubresource())
|
||||
|
@ -110,7 +110,7 @@ func AuthzTestCases() []AuthzTestCase {
|
||||
testPaths := map[string]string{
|
||||
"/attach/{podNamespace}/{podID}/{containerName}": "proxy",
|
||||
"/attach/{podNamespace}/{podID}/{uid}/{containerName}": "proxy",
|
||||
"/checkpoint/{podNamespace}/{podID}/{containerName}": "proxy",
|
||||
"/checkpoint/{podNamespace}/{podID}/{containerName}": "checkpoint",
|
||||
"/configz": "proxy",
|
||||
"/containerLogs/{podNamespace}/{podID}/{containerName}": "proxy",
|
||||
"/debug/flags/v": "proxy",
|
||||
|
@ -98,6 +98,7 @@ const (
|
||||
proberMetricsPath = "/metrics/probes"
|
||||
statsPath = "/stats/"
|
||||
logsPath = "/logs/"
|
||||
checkpointPath = "/checkpoint/"
|
||||
pprofBasePath = "/debug/pprof/"
|
||||
debugFlagPath = "/debug/flags/v"
|
||||
)
|
||||
@ -441,7 +442,7 @@ func (s *Server) InstallDefaultHandlers() {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerCheckpoint) {
|
||||
s.addMetricsBucketMatcher("checkpoint")
|
||||
ws = &restful.WebService{}
|
||||
ws.Path("/checkpoint").Produces(restful.MIME_JSON)
|
||||
ws.Path(checkpointPath).Produces(restful.MIME_JSON)
|
||||
ws.Route(ws.POST("/{podNamespace}/{podID}/{containerName}").
|
||||
To(s.checkpoint).
|
||||
Operation("checkpoint"))
|
||||
|
@ -858,18 +858,24 @@ func TestContainerLogsWithInvalidTail(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCheckpointContainer(t *testing.T) {
|
||||
// Enable features.ContainerCheckpoint during test
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, true)()
|
||||
|
||||
fw := newServerTest()
|
||||
defer fw.testHTTPServer.Close()
|
||||
podNamespace := "other"
|
||||
podName := "foo"
|
||||
expectedContainerName := "baz"
|
||||
// GetPodByName() should always fail
|
||||
fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) {
|
||||
return nil, false
|
||||
|
||||
setupTest := func(featureGate bool) *serverTestFramework {
|
||||
// Enable features.ContainerCheckpoint during test
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, featureGate)()
|
||||
|
||||
fw := newServerTest()
|
||||
// GetPodByName() should always fail
|
||||
fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) {
|
||||
return nil, false
|
||||
}
|
||||
return fw
|
||||
}
|
||||
fw := setupTest(true)
|
||||
defer fw.testHTTPServer.Close()
|
||||
|
||||
t.Run("wrong pod namespace", func(t *testing.T) {
|
||||
resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil)
|
||||
if err != nil {
|
||||
@ -927,6 +933,19 @@ func TestCheckpointContainer(t *testing.T) {
|
||||
}
|
||||
assert.Equal(t, resp.StatusCode, 200)
|
||||
})
|
||||
|
||||
// Now test for 404 if checkpointing support is explicitly disabled.
|
||||
fw.testHTTPServer.Close()
|
||||
fw = setupTest(false)
|
||||
defer fw.testHTTPServer.Close()
|
||||
setPodByNameFunc(fw, podNamespace, podName, expectedContainerName)
|
||||
t.Run("checkpointing fails because disabled", func(t *testing.T) {
|
||||
resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil)
|
||||
if err != nil {
|
||||
t.Errorf("Got error POSTing: %v", err)
|
||||
}
|
||||
assert.Equal(t, 404, resp.StatusCode)
|
||||
})
|
||||
}
|
||||
|
||||
func makeReq(t *testing.T, method, url, clientProtocol string) *http.Request {
|
||||
|
@ -34,11 +34,14 @@ import (
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
"k8s.io/kubernetes/test/e2e/nodefeature"
|
||||
testutils "k8s.io/kubernetes/test/utils"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
|
||||
"github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -75,6 +78,58 @@ func proxyPostRequest(ctx context.Context, c clientset.Interface, node, endpoint
|
||||
}
|
||||
}
|
||||
|
||||
func getCheckpointContainerMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) {
|
||||
framework.Logf("Getting 'checkpoint_container' metrics from %q", pod.Spec.NodeName)
|
||||
ms, err := e2emetrics.GetKubeletMetrics(
|
||||
ctx,
|
||||
f.ClientSet,
|
||||
pod.Spec.NodeName,
|
||||
)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
runtimeOperationsTotal, ok := ms["runtime_operations_total"]
|
||||
if !ok {
|
||||
// If the metric was not found it was probably not written to, yet.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
for _, item := range runtimeOperationsTotal {
|
||||
if item.Metric["__name__"] == "kubelet_runtime_operations_total" && item.Metric["operation_type"] == "checkpoint_container" {
|
||||
return int(item.Value), nil
|
||||
}
|
||||
}
|
||||
// If the metric was not found it was probably not written to, yet.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func getCheckpointContainerErrorMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) {
|
||||
framework.Logf("Getting 'checkpoint_container' error metrics from %q", pod.Spec.NodeName)
|
||||
ms, err := e2emetrics.GetKubeletMetrics(
|
||||
ctx,
|
||||
f.ClientSet,
|
||||
pod.Spec.NodeName,
|
||||
)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
runtimeOperationsErrorsTotal, ok := ms["runtime_operations_errors_total"]
|
||||
if !ok {
|
||||
// If the metric was not found it was probably not written to, yet.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
for _, item := range runtimeOperationsErrorsTotal {
|
||||
if item.Metric["__name__"] == "kubelet_runtime_operations_errors_total" && item.Metric["operation_type"] == "checkpoint_container" {
|
||||
return int(item.Value), nil
|
||||
}
|
||||
}
|
||||
// If the metric was not found it was probably not written to, yet.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, func() {
|
||||
f := framework.NewDefaultFramework("checkpoint-container-test")
|
||||
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
|
||||
@ -82,7 +137,10 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
||||
ginkgo.By("creating a target pod")
|
||||
podClient := e2epod.NewPodClient(f)
|
||||
pod := podClient.CreateSync(ctx, &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "checkpoint-container-pod"},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "checkpoint-container-pod",
|
||||
Namespace: f.Namespace.Name,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
@ -108,6 +166,15 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
||||
framework.Failf("pod %q should be ready", p.Name)
|
||||
}
|
||||
|
||||
// No checkpoint operation should have been logged
|
||||
checkpointContainerMetric, err := getCheckpointContainerMetric(ctx, f, pod)
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Expect(checkpointContainerMetric).To(gomega.Equal(0))
|
||||
// No error should have been logged
|
||||
checkpointContainerErrorMetric, err := getCheckpointContainerErrorMetric(ctx, f, pod)
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0))
|
||||
|
||||
framework.Logf(
|
||||
"About to checkpoint container %q on %q",
|
||||
pod.Spec.Containers[0].Name,
|
||||
@ -144,6 +211,12 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
||||
// If the container engine has not implemented the Checkpoint CRI API
|
||||
// we will get 500 and a message with
|
||||
// '(rpc error: code = Unimplemented desc = unknown method CheckpointContainer'
|
||||
// or
|
||||
// '(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)'
|
||||
// if the container engine returns that it explicitly has disabled support for it.
|
||||
// or
|
||||
// '(rpc error: code = Unknown desc = checkpoint/restore support not available)'
|
||||
// if the container engine explicitly disabled the checkpoint/restore support
|
||||
if (int(statusError.ErrStatus.Code)) == http.StatusInternalServerError {
|
||||
if strings.Contains(
|
||||
statusError.ErrStatus.Message,
|
||||
@ -152,8 +225,26 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
||||
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
|
||||
return
|
||||
}
|
||||
if strings.Contains(
|
||||
statusError.ErrStatus.Message,
|
||||
"(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)",
|
||||
) {
|
||||
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
|
||||
return
|
||||
}
|
||||
if strings.Contains(
|
||||
statusError.ErrStatus.Message,
|
||||
"(rpc error: code = Unknown desc = checkpoint/restore support not available)",
|
||||
) {
|
||||
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
|
||||
return
|
||||
}
|
||||
}
|
||||
framework.Failf("Unexpected status code (%d) during 'CheckpointContainer'", statusError.ErrStatus.Code)
|
||||
framework.Failf(
|
||||
"Unexpected status code (%d) during 'CheckpointContainer': %q",
|
||||
statusError.ErrStatus.Code,
|
||||
statusError.ErrStatus.Message,
|
||||
)
|
||||
}
|
||||
|
||||
framework.ExpectNoError(err)
|
||||
@ -205,5 +296,13 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
|
||||
// cleanup checkpoint archive
|
||||
os.RemoveAll(item)
|
||||
}
|
||||
// Exactly one checkpoint operation should have happened
|
||||
checkpointContainerMetric, err = getCheckpointContainerMetric(ctx, f, pod)
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Expect(checkpointContainerMetric).To(gomega.Equal(1))
|
||||
// No error should have been logged
|
||||
checkpointContainerErrorMetric, err = getCheckpointContainerErrorMetric(ctx, f, pod)
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0))
|
||||
})
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user