mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-17 14:58:16 +00:00
Merge pull request #2941 from egernst/sandbox-sizing-feature
Sandbox sizing feature
This commit is contained in:
@@ -157,6 +157,32 @@ docker run --cpus 4 -ti debian bash -c "nproc; cat /sys/fs/cgroup/cpu,cpuacct/cp
|
|||||||
400000 # cfs quota
|
400000 # cfs quota
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Virtual CPU handling without hotplug
|
||||||
|
|
||||||
|
In some cases, the hardware and/or software architecture being utilized does not support
|
||||||
|
hotplug. For example, Firecracker VMM does not support CPU or memory hotplug. Similarly,
|
||||||
|
the current Linux Kernel for aarch64 does not support CPU or memory hotplug. To appropriately
|
||||||
|
size the virtual machine for the workload within the container or pod, we provide a `static_sandbox_resource_mgmt`
|
||||||
|
flag within the Kata Containers configuration. When this is set, the runtime will:
|
||||||
|
- Size the VM based on the workload requirements as well as the `default_vcpus` option specified in the configuration.
|
||||||
|
- Not resize the virtual machine after it has been launched.
|
||||||
|
|
||||||
|
VM size determination varies depending on the type of container being run, and may not always
|
||||||
|
be available. If workload sizing information is not available, the virtual machine will be started with the
|
||||||
|
`default_vcpus`.
|
||||||
|
|
||||||
|
In the case of a pod, the initial sandbox container (pause container) typically doesn't contain any resource
|
||||||
|
information in its runtime `spec`. It is possible that the upper layer runtime
|
||||||
|
(i.e. containerd or CRI-O) may pass sandbox sizing annotations within the pause container's
|
||||||
|
`spec`. If these are provided, we will use this to appropriately size the VM. In particular,
|
||||||
|
we'll calculate the number of CPUs required for the workload and augment this by `default_vcpus`
|
||||||
|
configuration option, and use this for the virtual machine size.
|
||||||
|
|
||||||
|
In the case of a single container (i.e., not a pod), if the container specifies resource requirements,
|
||||||
|
the container's `spec` will provide the sizing information directly. If these are set, we will
|
||||||
|
calculate the number of CPUs required for the workload and augment this by `default_vcpus`
|
||||||
|
configuration option, and use this for the virtual machine size.
|
||||||
|
|
||||||
|
|
||||||
[1]: https://docs.docker.com/config/containers/resource_constraints/#cpu
|
[1]: https://docs.docker.com/config/containers/resource_constraints/#cpu
|
||||||
[2]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource
|
[2]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource
|
||||||
|
@@ -187,6 +187,8 @@ DEFVFIOMODE := guest-kernel
|
|||||||
# Default cgroup model
|
# Default cgroup model
|
||||||
DEFSANDBOXCGROUPONLY ?= false
|
DEFSANDBOXCGROUPONLY ?= false
|
||||||
|
|
||||||
|
DEFSTATICRESOURCEMGMT ?= false
|
||||||
|
|
||||||
DEFBINDMOUNTS := []
|
DEFBINDMOUNTS := []
|
||||||
|
|
||||||
# Features
|
# Features
|
||||||
@@ -279,6 +281,7 @@ ifneq (,$(FCCMD))
|
|||||||
# firecracker-specific options (all should be suffixed by "_FC")
|
# firecracker-specific options (all should be suffixed by "_FC")
|
||||||
DEFBLOCKSTORAGEDRIVER_FC := virtio-mmio
|
DEFBLOCKSTORAGEDRIVER_FC := virtio-mmio
|
||||||
DEFNETWORKMODEL_FC := tcfilter
|
DEFNETWORKMODEL_FC := tcfilter
|
||||||
|
DEFSTATICRESOURCEMGMT_FC = true
|
||||||
KERNELTYPE_FC = uncompressed
|
KERNELTYPE_FC = uncompressed
|
||||||
KERNEL_NAME_FC = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_FC))
|
KERNEL_NAME_FC = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_FC))
|
||||||
KERNELPATH_FC = $(KERNELDIR)/$(KERNEL_NAME_FC)
|
KERNELPATH_FC = $(KERNELDIR)/$(KERNEL_NAME_FC)
|
||||||
@@ -449,6 +452,8 @@ USER_VARS += DEFMSIZE9P
|
|||||||
USER_VARS += DEFENTROPYSOURCE
|
USER_VARS += DEFENTROPYSOURCE
|
||||||
USER_VARS += DEFVALIDENTROPYSOURCES
|
USER_VARS += DEFVALIDENTROPYSOURCES
|
||||||
USER_VARS += DEFSANDBOXCGROUPONLY
|
USER_VARS += DEFSANDBOXCGROUPONLY
|
||||||
|
USER_VARS += DEFSTATICRESOURCEMGMT
|
||||||
|
USER_VARS += DEFSTATICRESOURCEMGMT_FC
|
||||||
USER_VARS += DEFBINDMOUNTS
|
USER_VARS += DEFBINDMOUNTS
|
||||||
USER_VARS += DEFVFIOMODE
|
USER_VARS += DEFVFIOMODE
|
||||||
USER_VARS += FEATURE_SELINUX
|
USER_VARS += FEATURE_SELINUX
|
||||||
|
@@ -180,13 +180,6 @@ block_device_driver = "virtio-blk"
|
|||||||
# the container network interface
|
# the container network interface
|
||||||
# Options:
|
# Options:
|
||||||
#
|
#
|
||||||
# - bridged (Deprecated)
|
|
||||||
# Uses a linux bridge to interconnect the container interface to
|
|
||||||
# the VM. Works for most cases except macvlan and ipvlan.
|
|
||||||
# ***NOTE: This feature has been deprecated with plans to remove this
|
|
||||||
# feature in the future. Please use other network models listed below.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# - macvtap
|
# - macvtap
|
||||||
# Used when the Container network interface can be bridged using
|
# Used when the Container network interface can be bridged using
|
||||||
# macvtap.
|
# macvtap.
|
||||||
@@ -224,7 +217,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
|||||||
|
|
||||||
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
|
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
|
||||||
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
|
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
|
||||||
# `disable_new_netns` conflicts with `internetworking_model=bridged` and `internetworking_model=macvtap`. It works only
|
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
|
||||||
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
|
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
|
||||||
# (like OVS) directly.
|
# (like OVS) directly.
|
||||||
# (default: false)
|
# (default: false)
|
||||||
@@ -238,6 +231,15 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
|||||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||||
|
|
||||||
|
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
|
||||||
|
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
|
||||||
|
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
|
||||||
|
# Compatibility for determining appropriate sandbox (VM) size:
|
||||||
|
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
|
||||||
|
# does not yet support sandbox sizing annotations.
|
||||||
|
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||||
|
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@
|
||||||
|
|
||||||
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
|
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
|
||||||
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
||||||
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
||||||
|
@@ -332,7 +332,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
|||||||
# (default: false)
|
# (default: false)
|
||||||
#disable_new_netns = true
|
#disable_new_netns = true
|
||||||
|
|
||||||
# if enable, the runtime will add all the kata processes inside one dedicated cgroup.
|
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||||
@@ -340,6 +340,15 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
|||||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||||
|
|
||||||
|
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
|
||||||
|
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
|
||||||
|
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
|
||||||
|
# Compatibility for determining appropriate sandbox (VM) size:
|
||||||
|
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
|
||||||
|
# does not yet support sandbox sizing annotations.
|
||||||
|
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||||
|
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_FC@
|
||||||
|
|
||||||
# Enabled experimental feature list, format: ["a", "b"].
|
# Enabled experimental feature list, format: ["a", "b"].
|
||||||
# Experimental features are features not stable enough for production,
|
# Experimental features are features not stable enough for production,
|
||||||
# they may break compatibility, and are prepared for a big version bump.
|
# they may break compatibility, and are prepared for a big version bump.
|
||||||
|
@@ -516,6 +516,15 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
|||||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||||
|
|
||||||
|
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
|
||||||
|
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
|
||||||
|
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
|
||||||
|
# Compatibility for determining appropriate sandbox (VM) size:
|
||||||
|
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
|
||||||
|
# does not yet support sandbox sizing annotations.
|
||||||
|
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||||
|
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@
|
||||||
|
|
||||||
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
|
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
|
||||||
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
||||||
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
||||||
|
@@ -103,6 +103,20 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
|
|||||||
s.ctx = newCtx
|
s.ctx = newCtx
|
||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
|
// Sandbox sizing information *may* be provided in two scenarios:
|
||||||
|
// 1. The upper layer runtime (ie, containerd or crio) provide sandbox sizing information as an annotation
|
||||||
|
// in the 'sandbox container's' spec. This would typically be a scenario where as part of a create sandbox
|
||||||
|
// request the upper layer runtime receives this information as part of a pod, and makes it available to us
|
||||||
|
// for sizing purposes.
|
||||||
|
// 2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
|
||||||
|
// then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
|
||||||
|
// a single container.
|
||||||
|
if containerType == vc.PodSandbox {
|
||||||
|
s.config.SandboxCPUs, s.config.SandboxMemMB = oci.CalculateSandboxSizing(ociSpec)
|
||||||
|
} else {
|
||||||
|
s.config.SandboxCPUs, s.config.SandboxMemMB = oci.CalculateContainerSizing(ociSpec)
|
||||||
|
}
|
||||||
|
|
||||||
if rootFs.Mounted, err = checkAndMount(s, r); err != nil {
|
if rootFs.Mounted, err = checkAndMount(s, r); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@@ -137,19 +137,20 @@ type hypervisor struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type runtime struct {
|
type runtime struct {
|
||||||
InterNetworkModel string `toml:"internetworking_model"`
|
InterNetworkModel string `toml:"internetworking_model"`
|
||||||
JaegerEndpoint string `toml:"jaeger_endpoint"`
|
JaegerEndpoint string `toml:"jaeger_endpoint"`
|
||||||
JaegerUser string `toml:"jaeger_user"`
|
JaegerUser string `toml:"jaeger_user"`
|
||||||
JaegerPassword string `toml:"jaeger_password"`
|
JaegerPassword string `toml:"jaeger_password"`
|
||||||
VfioMode string `toml:"vfio_mode"`
|
VfioMode string `toml:"vfio_mode"`
|
||||||
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
|
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
|
||||||
Experimental []string `toml:"experimental"`
|
Experimental []string `toml:"experimental"`
|
||||||
Debug bool `toml:"enable_debug"`
|
Debug bool `toml:"enable_debug"`
|
||||||
Tracing bool `toml:"enable_tracing"`
|
Tracing bool `toml:"enable_tracing"`
|
||||||
DisableNewNetNs bool `toml:"disable_new_netns"`
|
DisableNewNetNs bool `toml:"disable_new_netns"`
|
||||||
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
|
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
|
||||||
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
|
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
|
||||||
EnablePprof bool `toml:"enable_pprof"`
|
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
|
||||||
|
EnablePprof bool `toml:"enable_pprof"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type agent struct {
|
type agent struct {
|
||||||
@@ -1125,6 +1126,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
|
|||||||
|
|
||||||
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
|
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
|
||||||
|
|
||||||
|
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
|
||||||
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
|
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
|
||||||
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
|
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
|
||||||
config.EnablePprof = tomlConf.Runtime.EnablePprof
|
config.EnablePprof = tomlConf.Runtime.EnablePprof
|
||||||
|
@@ -24,11 +24,13 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
|
||||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||||
|
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
|
||||||
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
|
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
|
||||||
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
||||||
dockershimAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations/dockershim"
|
dockershimAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations/dockershim"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
||||||
|
vcutils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
type annotationContainerType struct {
|
type annotationContainerType struct {
|
||||||
@@ -125,7 +127,16 @@ type RuntimeConfig struct {
|
|||||||
//Determines if seccomp should be applied inside guest
|
//Determines if seccomp should be applied inside guest
|
||||||
DisableGuestSeccomp bool
|
DisableGuestSeccomp bool
|
||||||
|
|
||||||
//Determines if create a netns for hypervisor process
|
// Sandbox sizing information which, if provided, indicates the size of
|
||||||
|
// the sandbox needed for the workload(s)
|
||||||
|
SandboxCPUs uint32
|
||||||
|
SandboxMemMB uint32
|
||||||
|
|
||||||
|
// Determines if we should attempt to size the VM at boot time and skip
|
||||||
|
// any later resource updates.
|
||||||
|
StaticSandboxResourceMgmt bool
|
||||||
|
|
||||||
|
// Determines if create a netns for hypervisor process
|
||||||
DisableNewNetNs bool
|
DisableNewNetNs bool
|
||||||
|
|
||||||
//Determines kata processes are managed only in sandbox cgroup
|
//Determines kata processes are managed only in sandbox cgroup
|
||||||
@@ -873,6 +884,13 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c
|
|||||||
vcAnnotations.BundlePathKey: bundlePath,
|
vcAnnotations.BundlePathKey: bundlePath,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
SandboxResources: vc.SandboxResourceSizing{
|
||||||
|
WorkloadCPUs: runtime.SandboxCPUs,
|
||||||
|
WorkloadMemMB: runtime.SandboxMemMB,
|
||||||
|
},
|
||||||
|
|
||||||
|
StaticResourceMgmt: runtime.StaticSandboxResourceMgmt,
|
||||||
|
|
||||||
ShmSize: shmSize,
|
ShmSize: shmSize,
|
||||||
|
|
||||||
VfioMode: runtime.VfioMode,
|
VfioMode: runtime.VfioMode,
|
||||||
@@ -894,6 +912,25 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c
|
|||||||
return vc.SandboxConfig{}, err
|
return vc.SandboxConfig{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are utilizing static resource management for the sandbox, ensure that the hypervisor is started
|
||||||
|
// with the base number of CPU/memory (which is equal to the default CPU/memory specified for the runtime
|
||||||
|
// configuration or annotations) as well as any specified workload resources.
|
||||||
|
if sandboxConfig.StaticResourceMgmt {
|
||||||
|
sandboxConfig.SandboxResources.BaseCPUs = sandboxConfig.HypervisorConfig.NumVCPUs
|
||||||
|
sandboxConfig.SandboxResources.BaseMemMB = sandboxConfig.HypervisorConfig.MemorySize
|
||||||
|
|
||||||
|
sandboxConfig.HypervisorConfig.NumVCPUs += sandboxConfig.SandboxResources.WorkloadCPUs
|
||||||
|
sandboxConfig.HypervisorConfig.MemorySize += sandboxConfig.SandboxResources.WorkloadMemMB
|
||||||
|
|
||||||
|
ociLog.WithFields(logrus.Fields{
|
||||||
|
"workload cpu": sandboxConfig.SandboxResources.WorkloadCPUs,
|
||||||
|
"default cpu": sandboxConfig.SandboxResources.BaseCPUs,
|
||||||
|
"workload mem in MB": sandboxConfig.SandboxResources.WorkloadMemMB,
|
||||||
|
"default mem": sandboxConfig.SandboxResources.BaseMemMB,
|
||||||
|
}).Debugf("static resources set")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return sandboxConfig, nil
|
return sandboxConfig, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1046,3 +1083,89 @@ func (a *annotationConfiguration) setUintWithCheck(f func(uint64) error) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CalculateSandboxSizing will calculate the number of CPUs and amount of Memory that should
|
||||||
|
// be added to the VM if sandbox annotations are provided with this sizing details
|
||||||
|
func CalculateSandboxSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) {
|
||||||
|
var memory, quota int64
|
||||||
|
var period uint64
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if spec == nil || spec.Annotations == nil {
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each annotation, if it isn't defined, or if there's an error in parsing, we'll log
|
||||||
|
// a warning and continue the calculation with 0 value. We expect values like,
|
||||||
|
// Annotations[SandboxMem] = "1048576"
|
||||||
|
// Annotations[SandboxCPUPeriod] = "100000"
|
||||||
|
// Annotations[SandboxCPUQuota] = "220000"
|
||||||
|
// ... to result in VM resources of 1 (MB) for memory, and 3 for CPU (2200 mCPU rounded up to 3).
|
||||||
|
annotation, ok := spec.Annotations[ctrAnnotations.SandboxCPUPeriod]
|
||||||
|
if ok {
|
||||||
|
period, err = strconv.ParseUint(annotation, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
ociLog.Warningf("sandbox-sizing: failure to parse SandboxCPUPeriod: %s", annotation)
|
||||||
|
period = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
annotation, ok = spec.Annotations[ctrAnnotations.SandboxCPUQuota]
|
||||||
|
if ok {
|
||||||
|
quota, err = strconv.ParseInt(annotation, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
ociLog.Warningf("sandbox-sizing: failure to parse SandboxCPUQuota: %s", annotation)
|
||||||
|
quota = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
annotation, ok = spec.Annotations[ctrAnnotations.SandboxMem]
|
||||||
|
if ok {
|
||||||
|
memory, err = strconv.ParseInt(annotation, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
ociLog.Warningf("sandbox-sizing: failure to parse SandboxMem: %s", annotation)
|
||||||
|
memory = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return calculateVMResources(period, quota, memory)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CalculateContainerSizing will calculate the number of CPUs and amount of memory that is needed
|
||||||
|
// based on the provided LinuxResources
|
||||||
|
func CalculateContainerSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) {
|
||||||
|
var memory, quota int64
|
||||||
|
var period uint64
|
||||||
|
|
||||||
|
if spec == nil || spec.Linux == nil || spec.Linux.Resources == nil {
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
resources := spec.Linux.Resources
|
||||||
|
|
||||||
|
if resources.CPU != nil && resources.CPU.Quota != nil && resources.CPU.Period != nil {
|
||||||
|
quota = *resources.CPU.Quota
|
||||||
|
period = *resources.CPU.Period
|
||||||
|
}
|
||||||
|
|
||||||
|
if resources.Memory != nil && resources.Memory.Limit != nil {
|
||||||
|
memory = *resources.Memory.Limit
|
||||||
|
}
|
||||||
|
|
||||||
|
return calculateVMResources(period, quota, memory)
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateVMResources(period uint64, quota int64, memory int64) (numCPU, memSizeMB uint32) {
|
||||||
|
numCPU = vcutils.CalculateVCpusFromMilliCpus(vcutils.CalculateMilliCPUs(quota, period))
|
||||||
|
|
||||||
|
if memory < 0 {
|
||||||
|
// While spec allows for a negative value to indicate unconstrained, we don't
|
||||||
|
// see this in practice. Since we rely only on default memory if the workload
|
||||||
|
// is unconstrained, we will treat as 0 for VM resource accounting.
|
||||||
|
ociLog.Infof("memory limit provided < 0, treating as 0 MB for VM sizing: %d", memory)
|
||||||
|
memSizeMB = 0
|
||||||
|
} else {
|
||||||
|
memSizeMB = uint32(memory / 1024 / 1024)
|
||||||
|
}
|
||||||
|
return numCPU, memSizeMB
|
||||||
|
}
|
||||||
|
@@ -1061,3 +1061,149 @@ func TestParseAnnotationBoolConfiguration(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getCtrResourceSpec(memory, quota int64, period uint64) *specs.Spec {
|
||||||
|
return &specs.Spec{
|
||||||
|
Linux: &specs.Linux{
|
||||||
|
Resources: &specs.LinuxResources{
|
||||||
|
CPU: &specs.LinuxCPU{
|
||||||
|
Quota: "a,
|
||||||
|
Period: &period,
|
||||||
|
},
|
||||||
|
Memory: &specs.LinuxMemory{
|
||||||
|
Limit: &memory,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeSizingAnnotations(memory, quota, period string) *specs.Spec {
|
||||||
|
spec := specs.Spec{
|
||||||
|
Annotations: make(map[string]string),
|
||||||
|
}
|
||||||
|
spec.Annotations[ctrAnnotations.SandboxCPUPeriod] = period
|
||||||
|
spec.Annotations[ctrAnnotations.SandboxCPUQuota] = quota
|
||||||
|
spec.Annotations[ctrAnnotations.SandboxMem] = memory
|
||||||
|
|
||||||
|
return &spec
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateContainerSizing(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
spec *specs.Spec
|
||||||
|
expectedCPU uint32
|
||||||
|
expectedMem uint32
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
spec: nil,
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: &specs.Spec{},
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: &specs.Spec{
|
||||||
|
Linux: &specs.Linux{
|
||||||
|
Resources: &specs.LinuxResources{
|
||||||
|
CPU: &specs.LinuxCPU{},
|
||||||
|
Memory: &specs.LinuxMemory{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: getCtrResourceSpec(1024*1024, 200, 100),
|
||||||
|
expectedCPU: 2,
|
||||||
|
expectedMem: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: getCtrResourceSpec(1024*1024*1024, 200, 1),
|
||||||
|
expectedCPU: 200,
|
||||||
|
expectedMem: 1024,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: getCtrResourceSpec(-1*1024*1024*1024, 200, 1),
|
||||||
|
expectedCPU: 200,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: getCtrResourceSpec(0, 10, 0),
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: getCtrResourceSpec(-1, 10, 1),
|
||||||
|
expectedCPU: 10,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range testCases {
|
||||||
|
|
||||||
|
cpu, mem := CalculateContainerSizing(tt.spec)
|
||||||
|
assert.Equal(tt.expectedCPU, cpu, "unexpected CPU")
|
||||||
|
assert.Equal(tt.expectedMem, mem, "unexpected memory")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateSandboxSizing(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
spec *specs.Spec
|
||||||
|
expectedCPU uint32
|
||||||
|
expectedMem uint32
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
spec: nil,
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: &specs.Spec{},
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: makeSizingAnnotations("1048576", "200", "100"),
|
||||||
|
expectedCPU: 2,
|
||||||
|
expectedMem: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: makeSizingAnnotations("1024", "200", "1"),
|
||||||
|
expectedCPU: 200,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: makeSizingAnnotations("foobar", "200", "spaghetti"),
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: makeSizingAnnotations("-1048576", "-100", "1"),
|
||||||
|
expectedCPU: 0,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
spec: makeSizingAnnotations("-1", "100", "1"),
|
||||||
|
expectedCPU: 100,
|
||||||
|
expectedMem: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range testCases {
|
||||||
|
|
||||||
|
cpu, mem := CalculateSandboxSizing(tt.spec)
|
||||||
|
assert.Equal(tt.expectedCPU, cpu, "unexpected CPU")
|
||||||
|
assert.Equal(tt.expectedMem, mem, "unexpected memory")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -99,6 +99,17 @@ type SandboxStats struct {
|
|||||||
Cpus int
|
Cpus int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SandboxResourceSizing struct {
|
||||||
|
// The number of CPUs required for the sandbox workload(s)
|
||||||
|
WorkloadCPUs uint32
|
||||||
|
// The base number of CPUs for the VM that are assigned as overhead
|
||||||
|
BaseCPUs uint32
|
||||||
|
// The amount of memory required for the sandbox workload(s)
|
||||||
|
WorkloadMemMB uint32
|
||||||
|
// The base amount of memory required for that VM that is assigned as overhead
|
||||||
|
BaseMemMB uint32
|
||||||
|
}
|
||||||
|
|
||||||
// SandboxConfig is a Sandbox configuration.
|
// SandboxConfig is a Sandbox configuration.
|
||||||
type SandboxConfig struct {
|
type SandboxConfig struct {
|
||||||
// Volumes is a list of shared volumes between the host and the Sandbox.
|
// Volumes is a list of shared volumes between the host and the Sandbox.
|
||||||
@@ -132,6 +143,11 @@ type SandboxConfig struct {
|
|||||||
|
|
||||||
HypervisorConfig HypervisorConfig
|
HypervisorConfig HypervisorConfig
|
||||||
|
|
||||||
|
SandboxResources SandboxResourceSizing
|
||||||
|
|
||||||
|
// StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM)
|
||||||
|
StaticResourceMgmt bool
|
||||||
|
|
||||||
ShmSize uint64
|
ShmSize uint64
|
||||||
|
|
||||||
VfioMode config.VFIOModeType
|
VfioMode config.VFIOModeType
|
||||||
@@ -1573,7 +1589,7 @@ func (s *Sandbox) createContainers(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update resources after having added containers to the sandbox, since
|
// Update resources after having added containers to the sandbox, since
|
||||||
// container status is requiered to know if more resources should be added.
|
// container status is required to know if more resources should be added.
|
||||||
if err := s.updateResources(ctx); err != nil {
|
if err := s.updateResources(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1909,6 +1925,10 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
|
|||||||
return fmt.Errorf("sandbox config is nil")
|
return fmt.Errorf("sandbox config is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.config.StaticResourceMgmt {
|
||||||
|
s.Logger().Debug("no resources updated: static resource management is set")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
sandboxVCPUs, err := s.calculateSandboxCPUs()
|
sandboxVCPUs, err := s.calculateSandboxCPUs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
Reference in New Issue
Block a user