From 39bd462431692b4dad0306c0b2ed81ef8d6e5851 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Fri, 22 Mar 2024 09:55:58 +0800 Subject: [PATCH 1/5] runtime: support to set timeout for CreateContainerRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the situation to pull images in the guest #8484, it’s important to account for pulling large images. Presently, the image pull process in the guest hinges on `CreateContainerRequest`, which defaults to a 60-second timeout. However, this duration may prove insufficient for pulling larger images, such as those containing AI models. Consequently, we must devise a method to extend the timeout period for large image pull. Fixes: #8141 Signed-off-by: ChengyuZhu6 --- src/runtime/virtcontainers/kata_agent.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 2de9297da9..83bde40df1 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -86,6 +86,7 @@ type customRequestTimeoutKeyType struct{} var ( checkRequestTimeout = 30 * time.Second + createContainerRequestTimeout = 60 * time.Second defaultRequestTimeout = 60 * time.Second remoteRequestTimeout = 300 * time.Second customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{}) @@ -2207,6 +2208,8 @@ func (k *kataAgent) getReqContext(ctx context.Context, reqName string) (newCtx c // Wait and GetOOMEvent have no timeout case grpcCheckRequest: newCtx, cancel = context.WithTimeout(ctx, checkRequestTimeout) + case grpcCreateContainerRequest: + newCtx, cancel = context.WithTimeout(ctx, createContainerRequestTimeout) default: var requestTimeout = defaultRequestTimeout From 2224f6d63f5a203d84df3ac733549f48fa46d60f Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Fri, 22 Mar 2024 10:13:23 +0800 Subject: [PATCH 2/5] runtime: support to configure CreateContainer timeout in annotation Support to configure CreateContainerRequestTimeout in the annotations. e.g.: annotations: "io.katacontainers.config.runtime.create_container_timeout": "300" Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. In essence, the timeout used for guest pull=runtime-request-timeout --- src/runtime/pkg/katautils/config.go | 2 ++ src/runtime/pkg/oci/utils.go | 12 ++++++++++++ src/runtime/pkg/oci/utils_test.go | 2 ++ src/runtime/virtcontainers/kata_agent.go | 5 +++++ .../virtcontainers/pkg/annotations/annotations.go | 3 +++ src/runtime/virtcontainers/sandbox.go | 4 ++++ 6 files changed, 28 insertions(+) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index f7782ed1f0..997b83ed2d 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -186,6 +186,7 @@ type runtime struct { StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` EnablePprof bool `toml:"enable_pprof"` DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` + CreateContainerTimeout uint64 `toml:"create_container_timeout"` } type agent struct { @@ -1569,6 +1570,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat config.JaegerEndpoint = tomlConf.Runtime.JaegerEndpoint config.JaegerUser = tomlConf.Runtime.JaegerUser config.JaegerPassword = tomlConf.Runtime.JaegerPassword + config.CreateContainerTimeout = tomlConf.Runtime.CreateContainerTimeout for _, f := range tomlConf.Runtime.Experimental { feature := exp.Get(f) if feature == nil { diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 08759c2066..b07951fdfe 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -156,6 +156,10 @@ type RuntimeConfig struct { // Determines if Kata creates emptyDir on the guest DisableGuestEmptyDir bool + + // CreateContainer timeout which, if provided, indicates the createcontainer request timeout + // needed for the workload ( Mostly used for pulling images in the guest ) + CreateContainerTimeout uint64 } // AddKernelParam allows the addition of new kernel parameters to an existing @@ -864,6 +868,12 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.CreateContainerTimeout).setUint(func(createContainerTimeout uint64) { + sbConfig.CreateContainerTimeout = createContainerTimeout + }); err != nil { + return err + } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) { sbConfig.EnableVCPUsPinning = enableVCPUsPinning }); err != nil { @@ -1007,6 +1017,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st GuestSeLinuxLabel: runtime.GuestSeLinuxLabel, Experimental: runtime.Experimental, + + CreateContainerTimeout: runtime.CreateContainerTimeout, } if err := addAnnotations(ocispec, &sandboxConfig, runtime); err != nil { diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index 778db87b55..1f53b831ad 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -810,12 +810,14 @@ func TestAddRuntimeAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.SandboxCgroupOnly] = "true" ocispec.Annotations[vcAnnotations.DisableNewNetNs] = "true" ocispec.Annotations[vcAnnotations.InterNetworkModel] = "macvtap" + ocispec.Annotations[vcAnnotations.CreateContainerTimeout] = "100" addAnnotations(ocispec, &config, runtimeConfig) assert.Equal(config.DisableGuestSeccomp, true) assert.Equal(config.SandboxCgroupOnly, true) assert.Equal(config.NetworkConfig.DisableNewNetwork, true) assert.Equal(config.NetworkConfig.InterworkingModel, vc.NetXConnectMacVtapModel) + assert.Equal(config.CreateContainerTimeout, uint64(100)) } func TestRegexpContains(t *testing.T) { diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 83bde40df1..34e4b39700 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -377,6 +377,11 @@ func (k *kataAgent) init(ctx context.Context, sandbox *Sandbox, config KataAgent k.kmodules = config.KernelModules k.dialTimout = config.DialTimeout + createContainerRequestTimeout = time.Duration(sandbox.config.CreateContainerTimeout) * time.Second + k.Logger().WithFields(logrus.Fields{ + "createContainerRequestTimeout": fmt.Sprintf("%+v", createContainerRequestTimeout), + }).Info("The createContainerRequestTimeout has been set ") + return disableVMShutdown, nil } diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 03498fef75..2b9cfbdd56 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -271,6 +271,9 @@ const ( // VfioMode is a sandbox annotation to specify how attached VFIO devices should be treated // Overrides the runtime.vfio_mode parameter in the global configuration.toml VfioMode = kataAnnotRuntimePrefix + "vfio_mode" + + // CreateContainerTimeout is a sandbox annotaion that sets the create container timeout. + CreateContainerTimeout = kataAnnotRuntimePrefix + "create_container_timeout" ) // Agent related annotations diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index bff8b6b9b0..4a9d9e698d 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -182,6 +182,10 @@ type SandboxConfig struct { // EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU EnableVCPUsPinning bool + + // Create container timeout which, if provided, indicates the create container timeout + // needed for the workload(s) + CreateContainerTimeout uint64 } // valid checks that the sandbox configuration is valid. From c2dc13ebaa797cf35ad34db0784be8c0097fb011 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Fri, 22 Mar 2024 10:18:17 +0800 Subject: [PATCH 3/5] runtime: support to configure CreateContainer Timeout in configurations support to configure CreateContainerRequestTimeout in the configurations. e.g.: [runtime] ... create_container_timeout = 300 Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. In essence, the timeout used for guest pull=runtime-request-timeout --- src/runtime/Makefile | 4 ++++ src/runtime/config/configuration-acrn.toml.in | 8 ++++++++ src/runtime/config/configuration-clh.toml.in | 8 ++++++++ src/runtime/config/configuration-fc.toml.in | 8 ++++++++ src/runtime/config/configuration-qemu-nvidia-gpu.toml.in | 8 ++++++++ src/runtime/config/configuration-qemu-se.toml.in | 8 ++++++++ src/runtime/config/configuration-qemu-sev.toml.in | 8 ++++++++ src/runtime/config/configuration-qemu-snp.toml.in | 8 ++++++++ src/runtime/config/configuration-qemu-tdx.toml.in | 8 ++++++++ src/runtime/config/configuration-qemu.toml.in | 8 ++++++++ src/runtime/config/configuration-remote.toml.in | 8 ++++++++ src/runtime/config/configuration-stratovirt.toml.in | 8 ++++++++ 12 files changed, 92 insertions(+) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index ea209f8613..72345ac9fe 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -265,6 +265,9 @@ DEFBINDMOUNTS := [] # Image Service Offload DEFSERVICEOFFLOAD ?= false +# Create Container Timeout in seconds +DEFCREATECONTAINERTIMEOUT ?= 60 + SED = sed CLI_DIR = cmd @@ -679,6 +682,7 @@ USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFBINDMOUNTS USER_VARS += DEFSERVICEOFFLOAD +USER_VARS += DEFCREATECONTAINERTIMEOUT USER_VARS += DEFVFIOMODE USER_VARS += BUILDFLAGS diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index ef02075897..7297059cad 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -240,3 +240,11 @@ experimental=@DEFAULTEXPFEATURES@ # If enabled, user can run pprof tools with shim v2 process through kata-monitor. # (default: false) # enable_pprof = true + +# Indicates the CreateContainer request timeout needed for the workload(s) +# It using guest_pull this includes the time to pull the image inside the guest +# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s) +# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config +# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. +# In essence, the timeout used for guest pull=runtime-request-timeout Date: Fri, 22 Mar 2024 10:41:35 +0800 Subject: [PATCH 4/5] how-to: add createcontainer timeout to sandbox config documentation add createcontainer timeout annotation to sandbox config documentation. Signed-off-by: ChengyuZhu6 --- docs/how-to/how-to-set-sandbox-config-kata.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index 6540febbf5..4cdacae85b 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -27,6 +27,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` | | `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup | | `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process | +| `io.katacontainers.config.runtime.create_container_timeout` | `uint64` | the timeout for create a container in `seconds`, default is `60` | ## Agent Options | Key | Value Type | Comments | From c50d3ebacc6420970370cbc0df2ea014ac9441e6 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Fri, 22 Mar 2024 12:00:01 +0800 Subject: [PATCH 5/5] tests:k8s: Add a test to pull large images in the guest Add a test to pull large images in the guest. Signed-off-by: ChengyuZhu6 --- .../kubernetes/k8s-guest-pull-image.bats | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/integration/kubernetes/k8s-guest-pull-image.bats b/tests/integration/kubernetes/k8s-guest-pull-image.bats index e6b9a85383..7a7c5d7eba 100644 --- a/tests/integration/kubernetes/k8s-guest-pull-image.bats +++ b/tests/integration/kubernetes/k8s-guest-pull-image.bats @@ -13,6 +13,7 @@ setup() { setup_common unencrypted_image_1="quay.io/sjenning/nginx:1.15-alpine" unencrypted_image_2="quay.io/prometheus/busybox:latest" + large_image="quay.io/confidential-containers/test-images:largeimage" } @test "Test we can pull an unencrypted image outside the guest with runc and then inside the guest successfully" { @@ -58,6 +59,43 @@ setup() { assert_rootfs_count "$node" "$sandbox_id" "1" } +@test "Test we can pull a large image inside the guest" { + [[ " ${SUPPORTED_NON_TEE_HYPERVISORS} " =~ " ${KATA_HYPERVISOR} " ]] && skip "Test not supported for ${KATA_HYPERVISOR}." + skip "This test requires large memory, which the encrypted memory is typically small and valuable in TEE. \ + The test will be skiped until https://github.com/kata-containers/kata-containers/issues/8142 is addressed." + kata_pod_with_nydus_config="$(new_pod_config "$large_image" "kata-${KATA_HYPERVISOR}")" + set_node "$kata_pod_with_nydus_config" "$node" + set_container_command "$kata_pod_with_nydus_config" "0" "sleep" "30" + + # Set annotation to pull large image in guest + set_metadata_annotation "$kata_pod_with_nydus_config" \ + "io.containerd.cri.runtime-handler" \ + "kata-${KATA_HYPERVISOR}" + + # For debug sake + echo "Pod $kata_pod_with_nydus_config file:" + cat $kata_pod_with_nydus_config + + # The pod should be failed because the default timeout of CreateContainerRequest is 60s + assert_pod_fail "$kata_pod_with_nydus_config" + assert_logs_contain "$node" kata "$node_start_time" \ + 'context deadline exceeded' + + kubectl delete -f $kata_pod_with_nydus_config + + # Set CreateContainerRequest timeout in the annotation to pull large image in guest + create_container_timeout=300 + set_metadata_annotation "$kata_pod_with_nydus_config" \ + "io.katacontainers.config.runtime.create_container_timeout" \ + "${create_container_timeout}" + + # For debug sake + echo "Pod $kata_pod_with_nydus_config file:" + cat $kata_pod_with_nydus_config + + k8s_create_pod "$kata_pod_with_nydus_config" +} + @test "Test we can pull an unencrypted image inside the guest twice in a row and then outside the guest successfully" { [[ " ${SUPPORTED_NON_TEE_HYPERVISORS} " =~ " ${KATA_HYPERVISOR} " ]] && skip "Test not supported for ${KATA_HYPERVISOR}." skip "Skip this test until we use containerd 2.0 with 'image pull per runtime class' feature: https://github.com/containerd/containerd/issues/9377"