Merge pull request #7390 from ChengyuZhu6/add_to_configure_request_timeout

runtime: Configure the image request timeout to handle large workloads
This commit is contained in:
Fabiano Fidêncio 2023-07-28 18:54:57 +02:00 committed by GitHub
commit 431c3630f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 91 additions and 3 deletions

View File

@ -27,6 +27,7 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` | | `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
| `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup | | `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
| `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process | | `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process |
| `io.katacontainers.config.runtime.image_request_timeout` | `uint64` | the timeout for pulling an image within the guest in `seconds`, default is `60` |
## Agent Options ## Agent Options
| Key | Value Type | Comments | | Key | Value Type | Comments |

View File

@ -275,6 +275,9 @@ DEFBINDMOUNTS := []
# Image Service Offload # Image Service Offload
DEFSERVICEOFFLOAD ?= false DEFSERVICEOFFLOAD ?= false
# Image Request Timeout in seconds
DEFIMAGEREQUESTTIMEOUT ?= 60
# SEV & SEV-ES Guest Pre-Attestation # SEV & SEV-ES Guest Pre-Attestation
DEFGUESTPREATTESTATION ?= false DEFGUESTPREATTESTATION ?= false
DEFGUESTPREATTESTATIONPROXY ?= localhost:44444 DEFGUESTPREATTESTATIONPROXY ?= localhost:44444
@ -705,6 +708,7 @@ USER_VARS += DEFSTATICRESOURCEMGMT_FC
USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFSTATICRESOURCEMGMT_TEE
USER_VARS += DEFBINDMOUNTS USER_VARS += DEFBINDMOUNTS
USER_VARS += DEFSERVICEOFFLOAD USER_VARS += DEFSERVICEOFFLOAD
USER_VARS += DEFIMAGEREQUESTTIMEOUT
USER_VARS += DEFVFIOMODE USER_VARS += DEFVFIOMODE
USER_VARS += BUILDFLAGS USER_VARS += BUILDFLAGS
USER_VARS += DEFSERVICEOFFLOAD USER_VARS += DEFSERVICEOFFLOAD

View File

@ -424,6 +424,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
service_offload = @DEFSERVICEOFFLOAD@ service_offload = @DEFSERVICEOFFLOAD@
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems, and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -443,6 +443,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
service_offload = @DEFSERVICEOFFLOAD@ service_offload = @DEFSERVICEOFFLOAD@
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems, and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -678,6 +678,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
#service_offload = true #service_offload = true
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems, and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -654,6 +654,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
service_offload = @DEFSERVICEOFFLOAD@ service_offload = @DEFSERVICEOFFLOAD@
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -658,6 +658,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
service_offload = @DEFSERVICEOFFLOAD@ service_offload = @DEFSERVICEOFFLOAD@
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -683,6 +683,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
service_offload = @DEFSERVICEOFFLOAD@ service_offload = @DEFSERVICEOFFLOAD@
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -671,6 +671,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
service_offload = true service_offload = true
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -718,6 +718,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false) # (default: false)
service_offload = @DEFSERVICEOFFLOAD@ service_offload = @DEFSERVICEOFFLOAD@
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -299,6 +299,12 @@ experimental=@DEFAULTEXPFEATURES@
# Note: The remote hypervisor offloads the pulling on images on the peer pod VM, so requries this to be true # Note: The remote hypervisor offloads the pulling on images on the peer pod VM, so requries this to be true
service_offload = true service_offload = true
# Image request timeout in seconds.
# If specified, indicates the image request timeout in the guest needed for the workload(s)
# If unspecified then it will be set @DEFIMAGEREQUESTTIMEOUT@ second(s)
# to reduce image pull failures caused by network problems and quickly obtain request failure information at the same time.
image_request_timeout = @DEFIMAGEREQUESTTIMEOUT@
# Container image decryption keys provisioning. # Container image decryption keys provisioning.
# Applies only if service_offload is true. # Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or # Keys can be provisioned locally (e.g. through a special command or

View File

@ -66,8 +66,9 @@ type tomlConfig struct {
} }
type image struct { type image struct {
Provision string `toml:"provision"` Provision string `toml:"provision"`
ServiceOffload bool `toml:"service_offload"` ServiceOffload bool `toml:"service_offload"`
ImageRequestTimeout uint64 `toml:"image_request_timeout"`
} }
type factory struct { type factory struct {
@ -1456,6 +1457,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
config.JaegerUser = tomlConf.Runtime.JaegerUser config.JaegerUser = tomlConf.Runtime.JaegerUser
config.JaegerPassword = tomlConf.Runtime.JaegerPassword config.JaegerPassword = tomlConf.Runtime.JaegerPassword
config.ServiceOffload = tomlConf.Image.ServiceOffload config.ServiceOffload = tomlConf.Image.ServiceOffload
config.ImageRequestTimeout = tomlConf.Image.ImageRequestTimeout
for _, f := range tomlConf.Runtime.Experimental { for _, f := range tomlConf.Runtime.Experimental {
feature := exp.Get(f) feature := exp.Get(f)
if feature == nil { if feature == nil {

View File

@ -157,6 +157,10 @@ type RuntimeConfig struct {
// Offload the CRI image management service to the Kata agent. // Offload the CRI image management service to the Kata agent.
ServiceOffload bool ServiceOffload bool
// Image request timeout which, if provided, indicates the image request timeout
// in the guest needed for the workload(s)
ImageRequestTimeout uint64
} }
// AddKernelParam allows the addition of new kernel parameters to an existing // AddKernelParam allows the addition of new kernel parameters to an existing
@ -915,7 +919,11 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r
value, vcAnnotations.VfioMode) value, vcAnnotations.VfioMode)
} }
} }
if err := newAnnotationConfiguration(ocispec, vcAnnotations.ImageRequestTimeout).setUint(func(imageRequestTimeout uint64) {
sbConfig.ImageRequestTimeout = imageRequestTimeout
}); err != nil {
return err
}
return nil return nil
} }
@ -1029,6 +1037,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st
Experimental: runtime.Experimental, Experimental: runtime.Experimental,
ServiceOffload: runtime.ServiceOffload, ServiceOffload: runtime.ServiceOffload,
ImageRequestTimeout: runtime.ImageRequestTimeout,
} }
if err := addAnnotations(ocispec, &sandboxConfig, runtime); err != nil { if err := addAnnotations(ocispec, &sandboxConfig, runtime); err != nil {

View File

@ -812,12 +812,15 @@ func TestAddRuntimeAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.SandboxCgroupOnly] = "true" ocispec.Annotations[vcAnnotations.SandboxCgroupOnly] = "true"
ocispec.Annotations[vcAnnotations.DisableNewNetNs] = "true" ocispec.Annotations[vcAnnotations.DisableNewNetNs] = "true"
ocispec.Annotations[vcAnnotations.InterNetworkModel] = "macvtap" ocispec.Annotations[vcAnnotations.InterNetworkModel] = "macvtap"
ocispec.Annotations[vcAnnotations.ImageRequestTimeout] = "100"
addAnnotations(ocispec, &config, runtimeConfig) addAnnotations(ocispec, &config, runtimeConfig)
assert.Equal(config.DisableGuestSeccomp, true) assert.Equal(config.DisableGuestSeccomp, true)
assert.Equal(config.SandboxCgroupOnly, true) assert.Equal(config.SandboxCgroupOnly, true)
assert.Equal(config.NetworkConfig.DisableNewNetwork, true) assert.Equal(config.NetworkConfig.DisableNewNetwork, true)
assert.Equal(config.NetworkConfig.InterworkingModel, vc.NetXConnectMacVtapModel) assert.Equal(config.NetworkConfig.InterworkingModel, vc.NetXConnectMacVtapModel)
assert.Equal(config.ImageRequestTimeout, uint64(100))
} }
func TestRegexpContains(t *testing.T) { func TestRegexpContains(t *testing.T) {

View File

@ -83,6 +83,7 @@ type customRequestTimeoutKeyType struct{}
var ( var (
checkRequestTimeout = 30 * time.Second checkRequestTimeout = 30 * time.Second
defaultRequestTimeout = 60 * time.Second defaultRequestTimeout = 60 * time.Second
imageRequestTimeout = 60 * time.Second
remoteRequestTimeout = 300 * time.Second remoteRequestTimeout = 300 * time.Second
customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{}) customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{})
errorMissingOCISpec = errors.New("Missing OCI specification") errorMissingOCISpec = errors.New("Missing OCI specification")
@ -364,6 +365,11 @@ func (k *kataAgent) init(ctx context.Context, sandbox *Sandbox, config KataAgent
k.kmodules = config.KernelModules k.kmodules = config.KernelModules
k.dialTimout = config.DialTimeout k.dialTimout = config.DialTimeout
imageRequestTimeout = time.Duration(sandbox.config.ImageRequestTimeout) * time.Second
k.Logger().WithFields(logrus.Fields{
"imageRequestTimeout": fmt.Sprintf("%+v", imageRequestTimeout),
}).Info("The imageRequestTimeout has been set ")
return disableVMShutdown, nil return disableVMShutdown, nil
} }
@ -2089,6 +2095,8 @@ func (k *kataAgent) getReqContext(ctx context.Context, reqName string) (newCtx c
// Wait and GetOOMEvent have no timeout // Wait and GetOOMEvent have no timeout
case grpcCheckRequest: case grpcCheckRequest:
newCtx, cancel = context.WithTimeout(ctx, checkRequestTimeout) newCtx, cancel = context.WithTimeout(ctx, checkRequestTimeout)
case grpcPullImageRequest:
newCtx, cancel = context.WithTimeout(ctx, imageRequestTimeout)
default: default:
var requestTimeout = defaultRequestTimeout var requestTimeout = defaultRequestTimeout

View File

@ -292,6 +292,9 @@ const (
// VfioMode is a sandbox annotation to specify how attached VFIO devices should be treated // VfioMode is a sandbox annotation to specify how attached VFIO devices should be treated
// Overrides the runtime.vfio_mode parameter in the global configuration.toml // Overrides the runtime.vfio_mode parameter in the global configuration.toml
VfioMode = kataAnnotRuntimePrefix + "vfio_mode" VfioMode = kataAnnotRuntimePrefix + "vfio_mode"
// ImageRequestTimeout is a sandbox annotaion that sets the image pull timeout in the guest.
ImageRequestTimeout = kataAnnotRuntimePrefix + "image_request_timeout"
) )
// Agent related annotations // Agent related annotations

View File

@ -161,6 +161,9 @@ type SandboxConfig struct {
StaticResourceMgmt bool StaticResourceMgmt bool
// Offload the CRI image management service to the Kata agent. // Offload the CRI image management service to the Kata agent.
ServiceOffload bool ServiceOffload bool
// Image request timeout which, if provided, indicates the image request timeout
// in the guest needed for the workload(s)
ImageRequestTimeout uint64
// SharePidNs sets all containers to share the same sandbox level pid namespace. // SharePidNs sets all containers to share the same sandbox level pid namespace.
SharePidNs bool SharePidNs bool
// SystemdCgroup enables systemd cgroup support // SystemdCgroup enables systemd cgroup support