diff --git a/pkg/katautils/config.go b/pkg/katautils/config.go index 5a500eb5e8..51d2ceba4b 100644 --- a/pkg/katautils/config.go +++ b/pkg/katautils/config.go @@ -304,7 +304,7 @@ func (h hypervisor) defaultMaxVCPUs() uint32 { } func (h hypervisor) defaultMemSz() uint32 { - if h.MemorySize < 8 { + if h.MemorySize < vc.MinHypervisorMemory { return defaultMemSize // MiB } diff --git a/virtcontainers/api_test.go b/virtcontainers/api_test.go index b037b85489..a190c5409f 100644 --- a/virtcontainers/api_test.go +++ b/virtcontainers/api_test.go @@ -16,6 +16,7 @@ import ( "testing" ktu "github.com/kata-containers/runtime/pkg/katatestutils" + "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" "github.com/kata-containers/runtime/virtcontainers/pkg/mock" vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types" "github.com/kata-containers/runtime/virtcontainers/store" @@ -69,7 +70,7 @@ func newBasicTestCmd() types.Cmd { func newTestSandboxConfigNoop() SandboxConfig { bundlePath := filepath.Join(testDir, testBundle) - containerAnnotations["com.github.containers.virtcontainers.pkg.oci.bundle_path"] = bundlePath + containerAnnotations[annotations.BundlePathKey] = bundlePath // containerAnnotations["com.github.containers.virtcontainers.pkg.oci.container_type"] = "pod_sandbox" emptySpec := newEmptySpec() diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 8165e3426e..33a5055249 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -68,6 +68,9 @@ const ( // port numbers below 1024 are called privileged ports. Only a process with // CAP_NET_BIND_SERVICE capability may bind to these port numbers. vSockPort = 1024 + + // MinHypervisorMemory is the minimum memory required for a VM. + MinHypervisorMemory = 256 ) // In some architectures the maximum number of vCPUs depends on the number of physical cores. diff --git a/virtcontainers/pkg/annotations/annotations.go b/virtcontainers/pkg/annotations/annotations.go index 42e43c9f1b..08ce422f07 100644 --- a/virtcontainers/pkg/annotations/annotations.go +++ b/virtcontainers/pkg/annotations/annotations.go @@ -6,52 +6,203 @@ package annotations const ( - vcAnnotationsPrefix = "com.github.containers.virtcontainers." + kataAnnotationsPrefix = "io.kata-containers." + kataConfAnnotationsPrefix = kataAnnotationsPrefix + "config." + kataAnnotHypervisorPrefix = kataConfAnnotationsPrefix + "hypervisor." - // KernelPath is a sandbox annotation for passing a per container path pointing at the kernel needed to boot the container VM. - KernelPath = vcAnnotationsPrefix + "KernelPath" - - // ImagePath is a sandbox annotation for passing a per container path pointing at the guest image that will run in the container VM. - ImagePath = vcAnnotationsPrefix + "ImagePath" - - // InitrdPath is a sandbox annotation for passing a per container path pointing at the guest initrd image that will run in the container VM. - InitrdPath = vcAnnotationsPrefix + "InitrdPath" - - // HypervisorPath is a sandbox annotation for passing a per container path pointing at the hypervisor that will run the container VM. - HypervisorPath = vcAnnotationsPrefix + "HypervisorPath" - - // JailerPath is a sandbox annotation for passing a per container path pointing at the jailer that will constrain the container VM. - JailerPath = vcAnnotationsPrefix + "JailerPath" - - // FirmwarePath is a sandbox annotation for passing a per container path pointing at the guest firmware that will run the container VM. - FirmwarePath = vcAnnotationsPrefix + "FirmwarePath" - - // KernelHash is a sandbox annotation for passing a container kernel image SHA-512 hash value. - KernelHash = vcAnnotationsPrefix + "KernelHash" - - // ImageHash is an sandbox annotation for passing a container guest image SHA-512 hash value. - ImageHash = vcAnnotationsPrefix + "ImageHash" - - // InitrdHash is an sandbox annotation for passing a container guest initrd SHA-512 hash value. - InitrdHash = vcAnnotationsPrefix + "InitrdHash" - - // HypervisorHash is an sandbox annotation for passing a container hypervisor binary SHA-512 hash value. - HypervisorHash = vcAnnotationsPrefix + "HypervisorHash" - - // JailerHash is an sandbox annotation for passing a jailer binary SHA-512 hash value. - JailerHash = vcAnnotationsPrefix + "JailerHash" - - // FirmwareHash is an sandbox annotation for passing a container guest firmware SHA-512 hash value. - FirmwareHash = vcAnnotationsPrefix + "FirmwareHash" - - // AssetHashType is the hash type used for assets verification - AssetHashType = vcAnnotationsPrefix + "AssetHashType" + // + // OCI + // // BundlePathKey is the annotation key to fetch the OCI configuration file path. - BundlePathKey = vcAnnotationsPrefix + "pkg.oci.bundle_path" + BundlePathKey = kataAnnotationsPrefix + "pkg.oci.bundle_path" // ContainerTypeKey is the annotation key to fetch container type. - ContainerTypeKey = vcAnnotationsPrefix + "pkg.oci.container_type" + ContainerTypeKey = kataAnnotationsPrefix + "pkg.oci.container_type" +) + +// Annotations related to Hypervisor configuration +const ( + // + // Assets + // + + // KernelPath is a sandbox annotation for passing a per container path pointing at the kernel needed to boot the container VM. + KernelPath = kataAnnotHypervisorPrefix + "kernel" + + // ImagePath is a sandbox annotation for passing a per container path pointing at the guest image that will run in the container VM. + ImagePath = kataAnnotHypervisorPrefix + "image" + + // InitrdPath is a sandbox annotation for passing a per container path pointing at the guest initrd image that will run in the container VM. + InitrdPath = kataAnnotHypervisorPrefix + "initrd" + + // HypervisorPath is a sandbox annotation for passing a per container path pointing at the hypervisor that will run the container VM. + HypervisorPath = kataAnnotHypervisorPrefix + "path" + + // JailerPath is a sandbox annotation for passing a per container path pointing at the jailer that will constrain the container VM. + JailerPath = kataAnnotHypervisorPrefix + "jailer_path" + + // FirmwarePath is a sandbox annotation for passing a per container path pointing at the guest firmware that will run the container VM. + FirmwarePath = kataAnnotHypervisorPrefix + "firmware" + + // KernelHash is a sandbox annotation for passing a container kernel image SHA-512 hash value. + KernelHash = kataAnnotHypervisorPrefix + "kernel_hash" + + // ImageHash is an sandbox annotation for passing a container guest image SHA-512 hash value. + ImageHash = kataAnnotHypervisorPrefix + "image_hash" + + // InitrdHash is an sandbox annotation for passing a container guest initrd SHA-512 hash value. + InitrdHash = kataAnnotHypervisorPrefix + "initrd_hash" + + // HypervisorHash is an sandbox annotation for passing a container hypervisor binary SHA-512 hash value. + HypervisorHash = kataAnnotHypervisorPrefix + "hypervisor_hash" + + // JailerHash is an sandbox annotation for passing a jailer binary SHA-512 hash value. + JailerHash = kataAnnotHypervisorPrefix + "jailer_hash" + + // FirmwareHash is an sandbox annotation for passing a container guest firmware SHA-512 hash value. + FirmwareHash = kataAnnotHypervisorPrefix + "firmware_hash" + + // AssetHashType is the hash type used for assets verification + AssetHashType = kataAnnotationsPrefix + "asset_hash_type" + + // + // Generic annotations + // + + // KernelParams is a sandbox annotation for passing additional guest kernel parameters. + KernelParams = kataAnnotHypervisorPrefix + "kernel_params" + + // MachineType is a sandbox annotation to specify the type of machine being emulated by the hypervisor. + MachineType = kataAnnotHypervisorPrefix + "machine_type" + + // MachineAccelerators is a sandbox annotation to specify machine specific accelerators for the hypervisor. + MachineAccelerators = kataAnnotHypervisorPrefix + "machine_accelerators" + + // DisableVhostNet is a sandbox annotation to specify if vhost-net is not available on the host. + DisableVhostNet = kataAnnotHypervisorPrefix + "disable_vhost_net" + + // GuestHookPath is a sandbox annotation to specify the path within the VM that will be used for 'drop-in' hooks. + GuestHookPath = kataAnnotHypervisorPrefix + "guest_hook_path" + + // UseVSock is a sandbox annotation to specify use of vsock for agent communication. + UseVSock = kataAnnotHypervisorPrefix + "use_vsock" + + // HotplugVFIOOnRootBus is a sandbox annotation used to indicate if devices need to be hotplugged on the + // root bus instead of a bridge. + HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus" + + // EntropySource is a sandbox annotation to specify the path to a host source of + // entropy (/dev/random, /dev/urandom or real hardware RNG device) + EntropySource = kataAnnotHypervisorPrefix + "entropy_source" + + // + // CPU Annotations + // + + // DefaultVCPUs is a sandbox annotation for passing the default vcpus assigned for a VM by the hypervisor. + DefaultVCPUs = kataAnnotHypervisorPrefix + "default_vcpus" + + // DefaultVCPUs is a sandbox annotation that specifies the maximum number of vCPUs allocated for the VM by the hypervisor. + DefaultMaxVCPUs = kataAnnotHypervisorPrefix + "default_max_vcpus" + + // + // Memory related annotations + // + + // DefaultMemory is a sandbox annotation for the memory assigned for a VM by the hypervisor. + DefaultMemory = kataAnnotHypervisorPrefix + "default_memory" + + // MemSlots is a sandbox annotation to specify the memory slots assigned to the VM by the hypervisor. + MemSlots = kataAnnotHypervisorPrefix + "memory_slots" + + // MemOffset is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. + MemOffset = kataAnnotHypervisorPrefix + "memory_offset" + + // MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. + MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc" + + // EnableSwap is a sandbox annotation to enable swap of vm memory. + // The behaviour is undefined if mem_prealloc is also set to true + EnableSwap = kataAnnotHypervisorPrefix + "enable_swap" + + // HugePages is a sandbox annotation to specify if the memory should be pre-allocated from huge pages + HugePages = kataAnnotHypervisorPrefix + "enable_hugepages" + + // FileBackedMemRootDir is a sandbox annotation to soecify file based memory backend root directory + FileBackedMemRootDir = kataAnnotHypervisorPrefix + "file_mem_backend" + + // + // Shared File System related annotations + // + + // Msize9p is a sandbox annotation to specify as the msize for 9p shares + Msize9p = kataAnnotHypervisorPrefix + "msize_9p" + + // SharedFs is a sandbox annotation to specify the shared file system type, either virtio-9p or virtio-fs. + SharedFS = kataAnnotHypervisorPrefix + "shared_fs" + + // VirtioFSDaemon is a sandbox annotations to specify virtio-fs vhost-user daemon path + VirtioFSDaemon = kataAnnotHypervisorPrefix + "virtio_fs_daemon" + + // VirtioFSCache is a sandbox annotation to specify the cache mode for fs version cache or "none" + VirtioFSCache = kataAnnotHypervisorPrefix + "virtio_fs_cache" + + // VirtioFSCacheSize is a sandbox annotation to specify the DAX cache size in MiB + VirtioFSCacheSize = kataAnnotHypervisorPrefix + "virtio_fs_cache_size" + + // VirtioFSExtraArgs is a sandbox annotation to pass options to virtiofsd daemon + VirtioFSExtraArgs = kataAnnotHypervisorPrefix + "virtio_fs_extra_args" + + // + // Block Device related annotations + // + + // BlockDeviceDriver specifies the driver to be used for block device either VirtioSCSI or VirtioBlock + BlockDeviceDriver = kataAnnotHypervisorPrefix + "block_device_driver" + + // DisableBlockDeviceUse is a sandbox annotation that disallows a block device from being used. + DisableBlockDeviceUse = kataAnnotHypervisorPrefix + "disable_block_device_use" + + // EnableIOThreads is a sandbox annotation to enable IO to be processed in a separate thread. + // Supported currently for virtio-scsi driver. + EnableIOThreads = kataAnnotHypervisorPrefix + "enable_iothreads" + + // BlockDeviceCacheSet is a sandbox annotation that specifies cache-related options will be set to block devices or not. + BlockDeviceCacheSet = kataAnnotHypervisorPrefix + "block_device_cache_set" + + // BlockDeviceCacheDirect is a sandbox annotation that specifies cache-related options for block devices. + // Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. + BlockDeviceCacheDirect = kataAnnotHypervisorPrefix + "block_device_cache_direct" + + // BlockDeviceCacheNoflush is a sandbox annotation that specifies cache-related options for block devices. + // Denotes whether flush requests for the device are ignored. + BlockDeviceCacheNoflush = kataAnnotHypervisorPrefix + "block_device_cache_noflush" +) + +// Agent related annotations +const ( + kataAnnotRuntimePrefix = kataConfAnnotationsPrefix + "runtime." + + // DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest. + DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp" + + // SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. + SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only" + + // Experimental is a sandbox annotation that determines if experimental features enabled. + Experimental = kataAnnotRuntimePrefix + "experimental" + + // InterNetworkModel is a sandbox annotaion that determines how the VM should be connected to the + //the container network interface. + InterNetworkModel = kataAnnotRuntimePrefix + "internetworking_model" + + // DisableNewNetNs is a sandbox annotation that determines if create a netns for hypervisor process. + DisableNewNetNs = kataAnnotRuntimePrefix + "disable_new_netns" +) + +const ( + kataAnnotAgentPrefix = kataConfAnnotationsPrefix + "agent." // KernelModules is the annotation key for passing the list of kernel // modules and their parameters that will be loaded in the guest kernel. @@ -60,11 +211,20 @@ const ( // The following example can be used to load two kernel modules with parameters /// // annotations: - // com.github.containers.virtcontainers.KernelModules: "e1000e InterruptThrottleRate=3000,3000,3000 EEE=1; i915 enable_ppgtt=0" + // io.kata-containers.config.agent.kernel_modules: "e1000e InterruptThrottleRate=3000,3000,3000 EEE=1; i915 enable_ppgtt=0" // // The first word is considered as the module name and the rest as its parameters. // - KernelModules = vcAnnotationsPrefix + "KernelModules" + KernelModules = kataAnnotAgentPrefix + "kernel_modules" + + // AgentTrace is a sandbox annotation to enable tracing for the agent. + AgentTrace = kataAnnotAgentPrefix + "enable_tracing" + + // AgentTraceMode is a sandbox annotation to specify the trace mode for the agent. + AgentTraceMode = kataAnnotAgentPrefix + "trace_mode" + + // AgentTraceMode is a sandbox annotation to specify the trace type for the agent. + AgentTraceType = kataAnnotAgentPrefix + "trace_type" ) const ( diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index 7419239459..6619753e44 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "path/filepath" + goruntime "runtime" "strconv" "strings" "syscall" @@ -321,13 +322,32 @@ func SandboxID(spec specs.Spec) (string, error) { return "", fmt.Errorf("Could not find sandbox ID") } +func addAnnotations(ocispec specs.Spec, config *vc.SandboxConfig) error { + addAssetAnnotations(ocispec, config) + if err := addHypervisorConfigOverrides(ocispec, config); err != nil { + return err + } + + if err := addRuntimeConfigOverrides(ocispec, config); err != nil { + return err + } + + if err := addAgentConfigOverrides(ocispec, config); err != nil { + return err + } + return nil +} + func addAssetAnnotations(ocispec specs.Spec, config *vc.SandboxConfig) { assetAnnotations := []string{ vcAnnotations.KernelPath, vcAnnotations.ImagePath, vcAnnotations.InitrdPath, + vcAnnotations.FirmwarePath, vcAnnotations.KernelHash, vcAnnotations.ImageHash, + vcAnnotations.InitrdHash, + vcAnnotations.FirmwareHash, vcAnnotations.AssetHashType, } @@ -339,14 +359,395 @@ func addAssetAnnotations(ocispec specs.Spec, config *vc.SandboxConfig) { config.Annotations[a] = value } +} - if value, ok := ocispec.Annotations[vcAnnotations.KernelModules]; ok { - if c, ok := config.AgentConfig.(vc.KataAgentConfig); ok { - modules := strings.Split(value, KernelModulesSeparator) - c.KernelModules = modules - config.AgentConfig = c +func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig) error { + if err := addHypervisorCPUOverrides(ocispec, config); err != nil { + return err + } + + if err := addHypervisorMemoryOverrides(ocispec, config); err != nil { + return err + } + + if err := addHypervisorBlockOverrides(ocispec, config); err != nil { + return err + } + + if err := addHypervisporVirtioFsOverrides(ocispec, config); err != nil { + return err + } + + if value, ok := ocispec.Annotations[vcAnnotations.KernelParams]; ok { + if value != "" { + params := vc.DeserializeParams(strings.Fields(value)) + for _, param := range params { + if err := config.HypervisorConfig.AddKernelParam(param); err != nil { + return fmt.Errorf("Error adding kernel parameters in annotation kernel_params : %v", err) + } + } } } + + if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok { + if value != "" { + config.HypervisorConfig.HypervisorMachineType = value + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.MachineAccelerators]; ok { + if value != "" { + config.HypervisorConfig.MachineAccelerators = value + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.DisableVhostNet]; ok { + disableVhostNet, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for disable_vhost_net: Please specify boolean value 'true|false'") + } + + config.HypervisorConfig.DisableVhostNet = disableVhostNet + } + + if value, ok := ocispec.Annotations[vcAnnotations.GuestHookPath]; ok { + if value != "" { + config.HypervisorConfig.GuestHookPath = value + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.UseVSock]; ok { + useVsock, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for use_vsock: Please specify boolean value 'true|false'") + } + + config.HypervisorConfig.UseVSock = useVsock + } + + if value, ok := ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus]; ok { + hotplugVFIOOnRootBus, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for hotplug_vfio_on_root_bus: Please specify boolean value 'true|false'") + } + + config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus + } + + if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok { + if value != "" { + config.HypervisorConfig.EntropySource = value + } + } + + return nil +} + +func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if value, ok := ocispec.Annotations[vcAnnotations.DefaultMemory]; ok { + memorySz, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Error encountered parsing annotation for default_memory: %v, please specify positive numeric value greater than 8", err) + } + + if memorySz < vc.MinHypervisorMemory { + return fmt.Errorf("Memory specified in annotation %s is less than minimum required %d, please specify a larger value", vcAnnotations.DefaultMemory, vc.MinHypervisorMemory) + } + + sbConfig.HypervisorConfig.MemorySize = uint32(memorySz) + } + + if value, ok := ocispec.Annotations[vcAnnotations.MemSlots]; ok { + mslots, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Error parsing annotation for memory_slots: %v, please specify positive numeric value", err) + } + + if mslots > 0 { + sbConfig.HypervisorConfig.MemSlots = uint32(mslots) + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.MemOffset]; ok { + moffset, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Error parsing annotation for memory_offset: %v, please specify positive numeric value", err) + } + + if moffset > 0 { + sbConfig.HypervisorConfig.MemOffset = uint32(moffset) + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.MemPrealloc]; ok { + memPrealloc, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for enable_mem_prealloc: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.MemPrealloc = memPrealloc + } + + if value, ok := ocispec.Annotations[vcAnnotations.EnableSwap]; ok { + enableSwap, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for enable_swap: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.Mlock = !enableSwap + } + + if value, ok := ocispec.Annotations[vcAnnotations.FileBackedMemRootDir]; ok { + sbConfig.HypervisorConfig.FileBackedMemRootDir = value + } + + if value, ok := ocispec.Annotations[vcAnnotations.HugePages]; ok { + hugePages, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for enable_hugepages: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.HugePages = hugePages + } + return nil +} + +func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if value, ok := ocispec.Annotations[vcAnnotations.DefaultVCPUs]; ok { + vcpus, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Error encountered parsing annotation default_vcpus: %v, please specify numeric value", err) + } + + numCPUs := goruntime.NumCPU() + + if uint32(vcpus) > uint32(numCPUs) { + return fmt.Errorf("Number of cpus %d specified in annotation default_vcpus is greater than the number of CPUs %d on the system", vcpus, numCPUs) + } + + sbConfig.HypervisorConfig.NumVCPUs = uint32(vcpus) + } + + if value, ok := ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs]; ok { + maxVCPUs, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Error encountered parsing annotation for default_maxvcpus: %v, please specify positive numeric value", err) + } + + numCPUs := goruntime.NumCPU() + max := uint32(maxVCPUs) + + if max > uint32(numCPUs) { + return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than the number of CPUs %d on the system", max, numCPUs) + } + + if sbConfig.HypervisorType == vc.QemuHypervisor && max > vc.MaxQemuVCPUs() { + return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, vc.MaxQemuVCPUs()) + } + + sbConfig.HypervisorConfig.DefaultMaxVCPUs = max + } + + return nil +} + +func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceDriver]; ok { + supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm, config.VirtioBlockCCW} + + valid := false + for _, b := range supportedBlockDrivers { + if b == value { + sbConfig.HypervisorConfig.BlockDeviceDriver = value + valid = true + } + } + + if !valid { + return fmt.Errorf("Invalid hypervisor block storage driver %v specified in annotation (supported drivers: %v)", value, supportedBlockDrivers) + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.DisableBlockDeviceUse]; ok { + disableBlockDeviceUse, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for disable_block_device_use: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.DisableBlockDeviceUse = disableBlockDeviceUse + } + + if value, ok := ocispec.Annotations[vcAnnotations.EnableIOThreads]; ok { + enableIOThreads, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for enable_iothreads: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.EnableIOThreads = enableIOThreads + } + + if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceCacheSet]; ok { + blockDeviceCacheSet, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for block_device_cache_set: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet + } + + if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceCacheDirect]; ok { + blockDeviceCacheDirect, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for block_device_cache_direct: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.BlockDeviceCacheDirect = blockDeviceCacheDirect + } + + if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceCacheNoflush]; ok { + blockDeviceCacheNoflush, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for block_device_cache_noflush: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.BlockDeviceCacheNoflush = blockDeviceCacheNoflush + } + + return nil +} + +func addHypervisporVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if value, ok := ocispec.Annotations[vcAnnotations.SharedFS]; ok { + supportedSharedFS := []string{config.Virtio9P, config.VirtioFS} + valid := false + for _, fs := range supportedSharedFS { + if fs == value { + sbConfig.HypervisorConfig.SharedFS = value + valid = true + } + } + + if !valid { + return fmt.Errorf("Invalid hypervisor shared file system %v specified for annotation shared_fs, (supported file systems: %v)", value, supportedSharedFS) + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.VirtioFSDaemon]; ok { + sbConfig.HypervisorConfig.VirtioFSDaemon = value + } + + if sbConfig.HypervisorConfig.SharedFS == config.VirtioFS && sbConfig.HypervisorConfig.VirtioFSDaemon == "" { + return fmt.Errorf("cannot enable virtio-fs without daemon path") + } + + if value, ok := ocispec.Annotations[vcAnnotations.VirtioFSCache]; ok { + sbConfig.HypervisorConfig.VirtioFSCache = value + } + + if value, ok := ocispec.Annotations[vcAnnotations.VirtioFSCacheSize]; ok { + cacheSize, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Error parsing annotation for virtio_fs_cache_size: %v, please specify positive numeric value", err) + } + + sbConfig.HypervisorConfig.VirtioFSCacheSize = uint32(cacheSize) + } + + if value, ok := ocispec.Annotations[vcAnnotations.Msize9p]; ok { + msize9p, err := strconv.ParseUint(value, 10, 32) + if err != nil || msize9p == 0 { + return fmt.Errorf("Error parsing annotation for msize_9p, please specify positive numeric value") + } + + sbConfig.HypervisorConfig.Msize9p = uint32(msize9p) + } + + return nil +} + +func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if value, ok := ocispec.Annotations[vcAnnotations.DisableGuestSeccomp]; ok { + disableGuestSeccomp, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for disable_guest_seccomp: Please specify boolean value 'true|false'") + } + + sbConfig.DisableGuestSeccomp = disableGuestSeccomp + } + + if value, ok := ocispec.Annotations[vcAnnotations.SandboxCgroupOnly]; ok { + sandboxCgroupOnly, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for sandbox_cgroup_only: Please specify boolean value 'true|false'") + } + + sbConfig.SandboxCgroupOnly = sandboxCgroupOnly + } + + if value, ok := ocispec.Annotations[vcAnnotations.Experimental]; ok { + features := strings.Split(value, " ") + sbConfig.Experimental = []exp.Feature{} + + for _, f := range features { + feature := exp.Get(f) + if feature == nil { + return fmt.Errorf("Unsupported experimental feature %s specified in annotation %v", f, vcAnnotations.Experimental) + } + sbConfig.Experimental = append(sbConfig.Experimental, *feature) + } + } + + if value, ok := ocispec.Annotations[vcAnnotations.DisableNewNetNs]; ok { + disableNewNetNs, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for experimental: Please specify boolean value 'true|false'") + } + sbConfig.NetworkConfig.DisableNewNetNs = disableNewNetNs + } + + if value, ok := ocispec.Annotations[vcAnnotations.InterNetworkModel]; ok { + runtimeConfig := RuntimeConfig{} + if err := runtimeConfig.InterNetworkModel.SetModel(value); err != nil { + return fmt.Errorf("Unknown network model specified in annotation %s", vcAnnotations.InterNetworkModel) + } + + sbConfig.NetworkConfig.InterworkingModel = runtimeConfig.InterNetworkModel + } + + return nil +} + +func addAgentConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig) error { + c, ok := config.AgentConfig.(vc.KataAgentConfig) + if !ok { + return nil + } + + if value, ok := ocispec.Annotations[vcAnnotations.KernelModules]; ok { + modules := strings.Split(value, KernelModulesSeparator) + c.KernelModules = modules + config.AgentConfig = c + } + + if value, ok := ocispec.Annotations[vcAnnotations.AgentTrace]; ok { + trace, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for agent.trace: Please specify boolean value 'true|false'") + } + c.Trace = trace + } + + if value, ok := ocispec.Annotations[vcAnnotations.AgentTraceMode]; ok { + c.TraceMode = value + } + + if value, ok := ocispec.Annotations[vcAnnotations.AgentTraceType]; ok { + c.TraceType = value + } + + config.AgentConfig = c + + return nil } // SandboxConfig converts an OCI compatible runtime configuration file @@ -406,7 +807,9 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c Experimental: runtime.Experimental, } - addAssetAnnotations(ocispec, &sandboxConfig) + if err := addAnnotations(ocispec, &sandboxConfig); err != nil { + return vc.SandboxConfig{}, err + } return sandboxConfig, nil } diff --git a/virtcontainers/pkg/oci/utils_test.go b/virtcontainers/pkg/oci/utils_test.go index 8f526be1ef..dc64051cbb 100644 --- a/virtcontainers/pkg/oci/utils_test.go +++ b/virtcontainers/pkg/oci/utils_test.go @@ -659,15 +659,27 @@ func TestAddAssetAnnotations(t *testing.T) { config := vc.SandboxConfig{ Annotations: make(map[string]string), - AgentConfig: vc.KataAgentConfig{}, } ocispec := specs.Spec{ Annotations: expectedAnnotations, } - addAssetAnnotations(ocispec, &config) + addAnnotations(ocispec, &config) assert.Exactly(expectedAnnotations, config.Annotations) +} + +func TestAddAgentAnnotations(t *testing.T) { + assert := assert.New(t) + + config := vc.SandboxConfig{ + Annotations: make(map[string]string), + AgentConfig: vc.KataAgentConfig{}, + } + + ocispec := specs.Spec{ + Annotations: make(map[string]string), + } expectedAgentConfig := vc.KataAgentConfig{ KernelModules: []string{ @@ -677,7 +689,131 @@ func TestAddAssetAnnotations(t *testing.T) { } ocispec.Annotations[vcAnnotations.KernelModules] = strings.Join(expectedAgentConfig.KernelModules, KernelModulesSeparator) - addAssetAnnotations(ocispec, &config) + addAnnotations(ocispec, &config) assert.Exactly(expectedAgentConfig, config.AgentConfig) - +} + +func TestAddHypervisorAnnotations(t *testing.T) { + assert := assert.New(t) + + config := vc.SandboxConfig{ + Annotations: make(map[string]string), + } + + ocispec := specs.Spec{ + Annotations: make(map[string]string), + } + + expectedHyperConfig := vc.HypervisorConfig{ + KernelParams: []vc.Param{ + { + Key: "vsyscall", + Value: "emulate", + }, + { + Key: "iommu", + Value: "on", + }, + }, + } + + ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on" + addHypervisorConfigOverrides(ocispec, &config) + assert.Exactly(expectedHyperConfig, config.HypervisorConfig) + + ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1" + ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1" + ocispec.Annotations[vcAnnotations.DefaultMemory] = "1024" + ocispec.Annotations[vcAnnotations.MemSlots] = "20" + ocispec.Annotations[vcAnnotations.MemOffset] = "512" + ocispec.Annotations[vcAnnotations.MemPrealloc] = "true" + ocispec.Annotations[vcAnnotations.EnableSwap] = "true" + ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm" + ocispec.Annotations[vcAnnotations.HugePages] = "true" + ocispec.Annotations[vcAnnotations.BlockDeviceDriver] = "virtio-scsi" + ocispec.Annotations[vcAnnotations.DisableBlockDeviceUse] = "true" + ocispec.Annotations[vcAnnotations.EnableIOThreads] = "true" + ocispec.Annotations[vcAnnotations.BlockDeviceCacheSet] = "true" + ocispec.Annotations[vcAnnotations.BlockDeviceCacheDirect] = "true" + ocispec.Annotations[vcAnnotations.BlockDeviceCacheNoflush] = "true" + ocispec.Annotations[vcAnnotations.SharedFS] = "virtio-fs" + ocispec.Annotations[vcAnnotations.VirtioFSDaemon] = "/home/virtiofsd" + ocispec.Annotations[vcAnnotations.VirtioFSCache] = "/home/cache" + ocispec.Annotations[vcAnnotations.Msize9p] = "512" + ocispec.Annotations[vcAnnotations.MachineType] = "q35" + ocispec.Annotations[vcAnnotations.MachineAccelerators] = "nofw" + ocispec.Annotations[vcAnnotations.DisableVhostNet] = "true" + ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/" + ocispec.Annotations[vcAnnotations.UseVSock] = "true" + ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true" + ocispec.Annotations[vcAnnotations.EntropySource] = "/dev/urandom" + + addAnnotations(ocispec, &config) + assert.Equal(config.HypervisorConfig.NumVCPUs, uint32(1)) + assert.Equal(config.HypervisorConfig.DefaultMaxVCPUs, uint32(1)) + assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024)) + assert.Equal(config.HypervisorConfig.MemSlots, uint32(20)) + assert.Equal(config.HypervisorConfig.MemOffset, uint32(512)) + assert.Equal(config.HypervisorConfig.MemPrealloc, true) + assert.Equal(config.HypervisorConfig.Mlock, false) + assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm") + assert.Equal(config.HypervisorConfig.HugePages, true) + assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi") + assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true) + assert.Equal(config.HypervisorConfig.EnableIOThreads, true) + assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true) + assert.Equal(config.HypervisorConfig.BlockDeviceCacheDirect, true) + assert.Equal(config.HypervisorConfig.BlockDeviceCacheNoflush, true) + assert.Equal(config.HypervisorConfig.SharedFS, "virtio-fs") + assert.Equal(config.HypervisorConfig.VirtioFSDaemon, "/home/virtiofsd") + assert.Equal(config.HypervisorConfig.VirtioFSCache, "/home/cache") + assert.Equal(config.HypervisorConfig.Msize9p, uint32(512)) + assert.Equal(config.HypervisorConfig.HypervisorMachineType, "q35") + assert.Equal(config.HypervisorConfig.MachineAccelerators, "nofw") + assert.Equal(config.HypervisorConfig.DisableVhostNet, true) + assert.Equal(config.HypervisorConfig.GuestHookPath, "/usr/bin/") + assert.Equal(config.HypervisorConfig.UseVSock, true) + assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true) + assert.Equal(config.HypervisorConfig.EntropySource, "/dev/urandom") + + // In case an absurd large value is provided, the config value if not over-ridden + ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536" + err := addAnnotations(ocispec, &config) + assert.Error(err) + + ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1" + err = addAnnotations(ocispec, &config) + assert.Error(err) + + ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1" + ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1" + err = addAnnotations(ocispec, &config) + assert.Error(err) + + ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1" + ocispec.Annotations[vcAnnotations.DefaultMemory] = fmt.Sprintf("%d", vc.MinHypervisorMemory+1) + assert.Error(err) +} + +func TestAddRuntimeAnnotations(t *testing.T) { + assert := assert.New(t) + + config := vc.SandboxConfig{ + Annotations: make(map[string]string), + } + + ocispec := specs.Spec{ + Annotations: make(map[string]string), + } + + ocispec.Annotations[vcAnnotations.DisableGuestSeccomp] = "true" + ocispec.Annotations[vcAnnotations.SandboxCgroupOnly] = "true" + ocispec.Annotations[vcAnnotations.DisableNewNetNs] = "true" + ocispec.Annotations[vcAnnotations.InterNetworkModel] = "macvtap" + + addAnnotations(ocispec, &config) + assert.Equal(config.DisableGuestSeccomp, true) + assert.Equal(config.SandboxCgroupOnly, true) + assert.Equal(config.NetworkConfig.DisableNewNetNs, true) + assert.Equal(config.NetworkConfig.InterworkingModel, vc.NetXConnectMacVtapModel) }