mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-26 15:32:30 +00:00
runtime-rs: firecracker hypervisor backend
Add a basic runtime-rs `Hypervisor` trait implementation for AWS Firecracker - Add basic hypervisor operations (setup / start / stop / add_device) - Implement AWS Firecracker API on a separate file `fc_api.rs` - Add support for running jailed (include all sandbox-related content) - Add initial device support (limited as hotplug is not supported) - Add separate config for runtime-rs (FC) Notes: - devmapper is the only snapshotter supported - to account for no sharefs support, we copy files in the sandbox (as in the GO runtime) - nerdctl spawn is broken (TODO: #7703) Fixes: #5268 Signed-off-by: George Pyrros <gpyrros@nubificus.co.uk> Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk> Signed-off-by: Charalampos Mainas <cmainas@nubificus.co.uk> Signed-off-by: George Ntoutsos <gntouts@nubificus.co.uk>
This commit is contained in:
parent
dd12089e0d
commit
2d19f3fbd7
@ -88,3 +88,13 @@ pub const DEFAULT_CH_PCI_BRIDGES: u32 = 2;
|
||||
pub const MAX_CH_PCI_BRIDGES: u32 = 5;
|
||||
pub const MAX_CH_VCPUS: u32 = 256;
|
||||
pub const MIN_CH_MEMORY_SIZE_MB: u32 = 64;
|
||||
|
||||
//Default configuration for firecracker
|
||||
pub const DEFAULT_FIRECRACKER_ENTROPY_SOURCE: &str = "/dev/urandom";
|
||||
pub const DEFAULT_FIRECRACKER_MEMORY_SIZE_MB: u32 = 128;
|
||||
pub const DEFAULT_FIRECRACKER_MEMORY_SLOTS: u32 = 128;
|
||||
pub const DEFAULT_FIRECRACKER_VCPUS: u32 = 1;
|
||||
pub const DEFAULT_FIRECRACKER_GUEST_KERNEL_IMAGE: &str = "vmlinux";
|
||||
pub const DEFAULT_FIRECRACKER_GUEST_KERNEL_PARAMS: &str = "";
|
||||
pub const MAX_FIRECRACKER_VCPUS: u32 = 32;
|
||||
pub const MIN_FIRECRACKER_MEMORY_SIZE_MB: u32 = 128;
|
||||
|
116
src/libs/kata-types/src/config/hypervisor/firecracker.rs
Normal file
116
src/libs/kata-types/src/config/hypervisor/firecracker.rs
Normal file
@ -0,0 +1,116 @@
|
||||
// Copyright (c) 2019-2021 Alibaba Cloud
|
||||
// Copyright (c) 2022-2023 Nubificus LTD
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::io::Result;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{default, register_hypervisor_plugin};
|
||||
|
||||
use crate::config::default::MAX_FIRECRACKER_VCPUS;
|
||||
use crate::config::default::MIN_FIRECRACKER_MEMORY_SIZE_MB;
|
||||
|
||||
use crate::config::{ConfigPlugin, TomlConfig};
|
||||
use crate::{eother, validate_path};
|
||||
|
||||
/// Hypervisor name for firecracker, used to index `TomlConfig::hypervisor`.
|
||||
pub const HYPERVISOR_NAME_FIRECRACKER: &str = "firecracker";
|
||||
|
||||
/// Configuration information for firecracker.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct FirecrackerConfig {}
|
||||
|
||||
impl FirecrackerConfig {
|
||||
/// Create a new instance of `FirecrackerConfig`.
|
||||
pub fn new() -> Self {
|
||||
FirecrackerConfig {}
|
||||
}
|
||||
|
||||
/// Register the firecracker plugin.
|
||||
pub fn register(self) {
|
||||
let plugin = Arc::new(self);
|
||||
register_hypervisor_plugin(HYPERVISOR_NAME_FIRECRACKER, plugin);
|
||||
}
|
||||
}
|
||||
|
||||
impl ConfigPlugin for FirecrackerConfig {
|
||||
fn get_max_cpus(&self) -> u32 {
|
||||
MAX_FIRECRACKER_VCPUS
|
||||
}
|
||||
|
||||
fn get_min_memory(&self) -> u32 {
|
||||
MIN_FIRECRACKER_MEMORY_SIZE_MB
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
HYPERVISOR_NAME_FIRECRACKER
|
||||
}
|
||||
|
||||
/// Adjust the configuration information after loading from configuration file.
|
||||
fn adjust_config(&self, conf: &mut TomlConfig) -> Result<()> {
|
||||
if let Some(firecracker) = conf.hypervisor.get_mut(HYPERVISOR_NAME_FIRECRACKER) {
|
||||
if firecracker.boot_info.kernel.is_empty() {
|
||||
firecracker.boot_info.kernel =
|
||||
default::DEFAULT_FIRECRACKER_GUEST_KERNEL_IMAGE.to_string();
|
||||
}
|
||||
if firecracker.boot_info.kernel_params.is_empty() {
|
||||
firecracker.boot_info.kernel_params =
|
||||
default::DEFAULT_FIRECRACKER_GUEST_KERNEL_PARAMS.to_string();
|
||||
}
|
||||
if firecracker.machine_info.entropy_source.is_empty() {
|
||||
firecracker.machine_info.entropy_source =
|
||||
default::DEFAULT_FIRECRACKER_ENTROPY_SOURCE.to_string();
|
||||
}
|
||||
|
||||
if firecracker.memory_info.default_memory == 0 {
|
||||
firecracker.memory_info.default_memory =
|
||||
default::DEFAULT_FIRECRACKER_MEMORY_SIZE_MB;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate the configuration information.
|
||||
fn validate(&self, conf: &TomlConfig) -> Result<()> {
|
||||
if let Some(firecracker) = conf.hypervisor.get(HYPERVISOR_NAME_FIRECRACKER) {
|
||||
if firecracker.path.is_empty() {
|
||||
return Err(eother!("Firecracker path is empty"));
|
||||
}
|
||||
validate_path!(
|
||||
firecracker.path,
|
||||
"FIRECRACKER binary path `{}` is invalid: {}"
|
||||
)?;
|
||||
if firecracker.boot_info.kernel.is_empty() {
|
||||
return Err(eother!("Guest kernel image for firecracker is empty"));
|
||||
}
|
||||
if firecracker.boot_info.image.is_empty() {
|
||||
return Err(eother!(
|
||||
"Both guest boot image and initrd for firecracker are empty"
|
||||
));
|
||||
}
|
||||
|
||||
if (firecracker.cpu_info.default_vcpus > 0
|
||||
&& firecracker.cpu_info.default_vcpus as u32 > default::MAX_FIRECRACKER_VCPUS)
|
||||
|| firecracker.cpu_info.default_maxvcpus > default::MAX_FIRECRACKER_VCPUS
|
||||
{
|
||||
return Err(eother!(
|
||||
"Firecracker hypervisor can not support {} vCPUs",
|
||||
firecracker.cpu_info.default_maxvcpus
|
||||
));
|
||||
}
|
||||
|
||||
if firecracker.memory_info.default_memory < MIN_FIRECRACKER_MEMORY_SIZE_MB {
|
||||
return Err(eother!(
|
||||
"Firecracker hypervisor has minimal memory limitation {}",
|
||||
MIN_FIRECRACKER_MEMORY_SIZE_MB
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -59,6 +59,9 @@ pub const VIRTIO_SCSI: &str = "virtio-scsi";
|
||||
/// Virtual PMEM device driver.
|
||||
pub const VIRTIO_PMEM: &str = "virtio-pmem";
|
||||
|
||||
mod firecracker;
|
||||
pub use self::firecracker::{FirecrackerConfig, HYPERVISOR_NAME_FIRECRACKER};
|
||||
|
||||
const VIRTIO_9P: &str = "virtio-9p";
|
||||
const VIRTIO_FS: &str = "virtio-fs";
|
||||
const VIRTIO_FS_INLINE: &str = "inline-virtio-fs";
|
||||
@ -530,6 +533,7 @@ impl TopologyConfigInfo {
|
||||
HYPERVISOR_NAME_QEMU,
|
||||
HYPERVISOR_NAME_CH,
|
||||
HYPERVISOR_NAME_DRAGONBALL,
|
||||
HYPERVISOR_NAME_FIRECRACKER,
|
||||
];
|
||||
let hypervisor_name = toml_config.runtime.hypervisor_name.as_str();
|
||||
if !hypervisor_names.contains(&hypervisor_name) {
|
||||
|
@ -25,8 +25,8 @@ pub mod hypervisor;
|
||||
pub use self::agent::Agent;
|
||||
use self::default::DEFAULT_AGENT_DBG_CONSOLE_PORT;
|
||||
pub use self::hypervisor::{
|
||||
BootInfo, CloudHypervisorConfig, DragonballConfig, Hypervisor, QemuConfig,
|
||||
HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_QEMU,
|
||||
BootInfo, CloudHypervisorConfig, DragonballConfig, FirecrackerConfig, Hypervisor, QemuConfig,
|
||||
HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_FIRECRACKER, HYPERVISOR_NAME_QEMU,
|
||||
};
|
||||
|
||||
mod runtime;
|
||||
|
@ -37,6 +37,9 @@ fn get_uds_with_sid(short_id: &str, path: &str) -> Result<String> {
|
||||
return Ok(format!("unix://{}", p.display()));
|
||||
}
|
||||
|
||||
let _ = fs::create_dir_all(kata_run_path.join(short_id))
|
||||
.context(format!("failed to create directory {:?}", kata_run_path.join(short_id)));
|
||||
|
||||
let target_ids: Vec<String> = fs::read_dir(&kata_run_path)?
|
||||
.filter_map(|e| {
|
||||
let x = e.ok()?.file_name().to_string_lossy().into_owned();
|
||||
|
2
src/runtime-rs/Cargo.lock
generated
2
src/runtime-rs/Cargo.lock
generated
@ -1635,6 +1635,8 @@ dependencies = [
|
||||
"dragonball",
|
||||
"futures 0.3.28",
|
||||
"go-flag",
|
||||
"hyper",
|
||||
"hyperlocal",
|
||||
"hypervisor",
|
||||
"kata-sys-util",
|
||||
"kata-types",
|
||||
|
@ -109,6 +109,12 @@ ROOTFSTYPE_XFS := \"xfs\"
|
||||
ROOTFSTYPE_EROFS := \"erofs\"
|
||||
DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4)
|
||||
|
||||
FCBINDIR := $(PREFIXDEPS)/bin
|
||||
FCPATH = $(FCBINDIR)/$(FCCMD)
|
||||
FCVALIDHYPERVISORPATHS := [\"$(FCPATH)\"]
|
||||
FCJAILERPATH = $(FCBINDIR)/$(FCJAILERCMD)
|
||||
FCVALIDJAILERPATHS = [\"$(FCJAILERPATH)\"]
|
||||
|
||||
PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)
|
||||
FIRMWAREPATH :=
|
||||
FIRMWAREVOLUMEPATH :=
|
||||
@ -164,8 +170,11 @@ DEFMSIZE9P := 8192
|
||||
DEFVFIOMODE := guest-kernel
|
||||
##VAR DEFSANDBOXCGROUPONLY=<bool> Default cgroup model
|
||||
DEFSANDBOXCGROUPONLY ?= false
|
||||
DEFSANDBOXCGROUPONLY_DB ?= true
|
||||
DEFSANDBOXCGROUPONLY_FC ?= true
|
||||
DEFSTATICRESOURCEMGMT ?= false
|
||||
DEFSTATICRESOURCEMGMT_DB ?= false
|
||||
DEFSTATICRESOURCEMGMT_FC ?= true
|
||||
DEFBINDMOUNTS := []
|
||||
DEFDANCONF := /run/kata-containers/dans
|
||||
SED = sed
|
||||
@ -216,7 +225,7 @@ ifneq (,$(DBCMD))
|
||||
KERNELTYPE_DB = uncompressed
|
||||
KERNEL_NAME_DB = $(call MAKE_KERNEL_NAME_DB,$(KERNELTYPE_DB))
|
||||
KERNELPATH_DB = $(KERNELDIR)/$(KERNEL_NAME_DB)
|
||||
DEFSANDBOXCGROUPONLY = true
|
||||
DEFSANDBOXCGROUPONLY_DB = true
|
||||
RUNTIMENAME := virt_container
|
||||
PIPESIZE := 1
|
||||
DBSHAREDFS := inline-virtio-fs
|
||||
@ -244,6 +253,9 @@ ifneq (,$(CLHCMD))
|
||||
KERNEL_NAME_CLH = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_CLH))
|
||||
KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH)
|
||||
VMROOTFSDRIVER_CLH := virtio-pmem
|
||||
|
||||
DEFSTATICRESOURCEMGMT = true
|
||||
DEFSANDBOXCGROUPONLY = true
|
||||
endif
|
||||
|
||||
ifneq (,$(QEMUCMD))
|
||||
@ -288,6 +300,28 @@ endif
|
||||
DEFSECCOMPSANDBOXPARAM := on,obsolete=deny,spawn=deny,resourcecontrol=deny
|
||||
DEFGUESTSELINUXLABEL := system_u:system_r:container_t
|
||||
endif
|
||||
ifneq (,$(FCCMD))
|
||||
KNOWN_HYPERVISORS += $(HYPERVISOR_FC)
|
||||
CONFIG_FILE_FC = configuration-rs-fc.toml
|
||||
CONFIG_FC = config/$(CONFIG_FILE_FC)
|
||||
CONFIG_FC_IN = $(CONFIG_FC).in
|
||||
CONFIG_PATH_FC = $(abspath $(CONFDIR)/$(CONFIG_FILE_FC))
|
||||
CONFIG_PATHS += $(CONFIG_PATH_FC)
|
||||
SYSCONFIG_FC = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_FC))
|
||||
SYSCONFIG_PATHS += $(SYSCONFIG_FC)
|
||||
CONFIGS += $(CONFIG_FC)
|
||||
# firecracker-specific options (all should be suffixed by "_FC")
|
||||
DEFBLOCKSTORAGEDRIVER_FC := virtio-blk-mmio
|
||||
DEFMAXMEMSZ_FC := 2048
|
||||
DEFNETWORKMODEL_FC := tcfilter
|
||||
KERNELPARAMS = console=ttyS0 agent.log_vport=1025
|
||||
KERNELTYPE_FC = uncompressed
|
||||
KERNEL_NAME_FC = $(call MAKE_KERNEL_NAME_FC,$(KERNELTYPE_FC))
|
||||
KERNELPATH_FC = $(KERNELDIR)/$(KERNEL_NAME_FC)
|
||||
DEFSANDBOXCGROUPONLY_FC = true
|
||||
RUNTIMENAME := virt_container
|
||||
DEFSTATICRESOURCEMGMT_FC ?= true
|
||||
endif
|
||||
|
||||
ifeq ($(DEFAULT_HYPERVISOR),$(HYPERVISOR_DB))
|
||||
DEFAULT_HYPERVISOR_CONFIG = $(CONFIG_FILE_DB)
|
||||
@ -296,16 +330,21 @@ endif
|
||||
ifeq ($(DEFAULT_HYPERVISOR),$(HYPERVISOR_QEMU))
|
||||
DEFAULT_HYPERVISOR_CONFIG = $(CONFIG_FILE_QEMU)
|
||||
endif
|
||||
ifeq ($(DEFAULT_HYPERVISOR),$(HYPERVISOR_FC))
|
||||
DEFAULT_HYPERVISOR_CONFIG = $(CONFIG_FILE_FC)
|
||||
endif
|
||||
# list of variables the user may wish to override
|
||||
USER_VARS += ARCH
|
||||
USER_VARS += BINDIR
|
||||
USER_VARS += CONFIG_DB_IN
|
||||
USER_VARS += CONFIG_FC_IN
|
||||
USER_VARS += CONFIG_PATH
|
||||
USER_VARS += CONFIG_QEMU_IN
|
||||
USER_VARS += DESTDIR
|
||||
USER_VARS += DEFAULT_HYPERVISOR
|
||||
USER_VARS += DBCMD
|
||||
USER_VARS += DBCTLCMD
|
||||
USER_VARS += FCCTLCMD
|
||||
USER_VARS += DBPATH
|
||||
USER_VARS += DBVALIDHYPERVISORPATHS
|
||||
USER_VARS += DBCTLPATH
|
||||
@ -316,6 +355,13 @@ USER_VARS += QEMUPATH
|
||||
USER_VARS += QEMUVALIDHYPERVISORPATHS
|
||||
USER_VARS += FIRMWAREPATH_CLH
|
||||
USER_VARS += KERNELPATH_CLH
|
||||
USER_VARS += FCCMD
|
||||
USER_VARS += FCPATH
|
||||
USER_VARS += FCVALIDHYPERVISORPATHS
|
||||
USER_VARS += FCJAILERPATH
|
||||
USER_VARS += FCVALIDJAILERPATHS
|
||||
USER_VARS += FCVALIDJAILERPATHS
|
||||
USER_VARS += DEFMAXMEMSZ_FC
|
||||
USER_VARS += SYSCONFIG
|
||||
USER_VARS += IMAGENAME
|
||||
USER_VARS += IMAGEPATH
|
||||
@ -329,6 +375,8 @@ USER_VARS += KERNELDIR
|
||||
USER_VARS += KERNELTYPE
|
||||
USER_VARS += KERNELPATH_DB
|
||||
USER_VARS += KERNELPATH_QEMU
|
||||
USER_VARS += KERNELPATH_FC
|
||||
USER_VARS += KERNELPATH
|
||||
USER_VARS += KERNELVIRTIOFSPATH
|
||||
USER_VARS += FIRMWAREPATH
|
||||
USER_VARS += FIRMWAREVOLUMEPATH
|
||||
@ -365,6 +413,7 @@ USER_VARS += DEFBRIDGES
|
||||
USER_VARS += DEFNETWORKMODEL_DB
|
||||
USER_VARS += DEFNETWORKMODEL_CLH
|
||||
USER_VARS += DEFNETWORKMODEL_QEMU
|
||||
USER_VARS += DEFNETWORKMODEL_FC
|
||||
USER_VARS += DEFDISABLEGUESTEMPTYDIR
|
||||
USER_VARS += DEFDISABLEGUESTSECCOMP
|
||||
USER_VARS += DEFDISABLESELINUX
|
||||
@ -374,6 +423,7 @@ USER_VARS += DEFDISABLEBLOCK
|
||||
USER_VARS += DEFBLOCKSTORAGEDRIVER_DB
|
||||
USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
|
||||
USER_VARS += DEFBLOCKDEVICEAIO_QEMU
|
||||
USER_VARS += DEFBLOCKSTORAGEDRIVER_FC
|
||||
USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS
|
||||
USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
|
||||
USER_VARS += DEFVIRTIOFSDAEMON
|
||||
@ -396,8 +446,11 @@ USER_VARS += DEFENTROPYSOURCE
|
||||
USER_VARS += DEFVALIDENTROPYSOURCES
|
||||
USER_VARS += DEFSANDBOXCGROUPONLY
|
||||
USER_VARS += DEFSANDBOXCGROUPONLY_QEMU
|
||||
USER_VARS += DEFSANDBOXCGROUPONLY_DB
|
||||
USER_VARS += DEFSANDBOXCGROUPONLY_FC
|
||||
USER_VARS += DEFSTATICRESOURCEMGMT
|
||||
USER_VARS += DEFSTATICRESOURCEMGMT_DB
|
||||
USER_VARS += DEFSTATICRESOURCEMGMT_FC
|
||||
USER_VARS += DEFBINDMOUNTS
|
||||
USER_VARS += DEFVFIOMODE
|
||||
USER_VARS += BUILDFLAGS
|
||||
@ -405,6 +458,7 @@ USER_VARS += RUNTIMENAME
|
||||
USER_VARS += HYPERVISOR_DB
|
||||
USER_VARS += HYPERVISOR_CLH
|
||||
USER_VARS += HYPERVISOR_QEMU
|
||||
USER_VARS += HYPERVISOR_FC
|
||||
USER_VARS += PIPESIZE
|
||||
USER_VARS += DBSHAREDFS
|
||||
USER_VARS += KATA_INSTALL_GROUP
|
||||
@ -442,6 +496,7 @@ RUNTIME_VERSION=$(VERSION)
|
||||
GENERATED_VARS = \
|
||||
VERSION \
|
||||
CONFIG_DB_IN \
|
||||
CONFIG_FC_IN \
|
||||
$(USER_VARS)
|
||||
|
||||
|
||||
@ -483,6 +538,9 @@ endef
|
||||
define MAKE_KERNEL_NAME_DB
|
||||
$(if $(findstring uncompressed,$1),vmlinux-dragonball-experimental.container,vmlinuz-dragonball-experimental.container)
|
||||
endef
|
||||
define MAKE_KERNEL_NAME_FC
|
||||
$(if $(findstring uncompressed,$1),vmlinux.container,vmlinuz.container)
|
||||
endef
|
||||
|
||||
# Returns the name of the kernel file to use based on the provided KERNELTYPE.
|
||||
# # $1 : KERNELTYPE (compressed or uncompressed)
|
||||
|
@ -13,3 +13,5 @@ QEMUCMD := qemu-system-aarch64
|
||||
|
||||
# dragonball binary name
|
||||
DBCMD := dragonball
|
||||
FCCMD := firecracker
|
||||
FCJAILERCMD := jailer
|
||||
|
@ -16,3 +16,7 @@ DBCMD := dragonball
|
||||
|
||||
# cloud-hypervisor binary name
|
||||
CLHCMD := cloud-hypervisor
|
||||
|
||||
# firecracker binary (vmm and jailer)
|
||||
FCCMD := firecracker
|
||||
FCJAILERCMD := jailer
|
||||
|
@ -341,7 +341,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# The sandbox cgroup is constrained if there is no container type annotation.
|
||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_DB@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
|
373
src/runtime-rs/config/configuration-rs-fc.toml.in
Normal file
373
src/runtime-rs/config/configuration-rs-fc.toml.in
Normal file
@ -0,0 +1,373 @@
|
||||
# Copyright (c) 2017-2023 Intel Corporation
|
||||
# Copyright (c) Adobe Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# XXX: WARNING: this file is auto-generated.
|
||||
# XXX:
|
||||
# XXX: Source file: "@CONFIG_FC_IN@"
|
||||
# XXX: Project:
|
||||
# XXX: Name: @PROJECT_NAME@
|
||||
# XXX: Type: @PROJECT_TYPE@
|
||||
|
||||
[hypervisor.firecracker]
|
||||
path = "@FCPATH@"
|
||||
kernel = "@KERNELPATH_FC@"
|
||||
image = "@IMAGEPATH@"
|
||||
|
||||
rootfs_type=@DEFROOTFSTYPE@
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONS@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @FCVALIDHYPERVISORPATHS@
|
||||
valid_hypervisor_paths = @FCVALIDHYPERVISORPATHS@
|
||||
|
||||
# Path for the jailer specific to firecracker
|
||||
# If the jailer path is not set kata will launch firecracker
|
||||
# without a jail. If the jailer is set firecracker will be
|
||||
# launched in a jailed enviornment created by the jailer
|
||||
#jailer_path = "@FCJAILERPATH@"
|
||||
|
||||
# List of valid jailer path values for the hypervisor
|
||||
# Each member of the list can be a regular expression
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @FCVALIDJAILERPATHS@
|
||||
valid_jailer_paths = @FCVALIDJAILERPATHS@
|
||||
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||
# trouble running pre-2.15 glibc.
|
||||
#
|
||||
# WARNING: - any parameter specified here will take priority over the default
|
||||
# parameter value of the same name used to start the virtual machine.
|
||||
# Do not set values here unless you understand the impact of doing so as you
|
||||
# may stop the virtual machine from booting.
|
||||
# To see the list of default parameters, enable hypervisor debug, create a
|
||||
# container and look for 'default-kernel-parameters' log entries.
|
||||
kernel_params = "@KERNELPARAMS@"
|
||||
|
||||
# Default number of vCPUs per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFVCPUS@
|
||||
# < 0 --> will be set to the actual number of physical cores
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores
|
||||
default_vcpus = 1
|
||||
|
||||
# Default maximum number of vCPUs per SB/VM:
|
||||
# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
|
||||
# the actual number of physical cores is greater than it.
|
||||
# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
|
||||
# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
|
||||
# can be added to a SB/VM, but the memory footprint will be big. Another example, with
|
||||
# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
|
||||
# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
|
||||
# unless you know what are you doing.
|
||||
# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
|
||||
default_maxvcpus = @DEFMAXVCPUS@
|
||||
|
||||
# Bridges can be used to hot plug devices.
|
||||
# Limitations:
|
||||
# * Currently only pci bridges are supported
|
||||
# * Until 30 devices per bridge can be hot plugged.
|
||||
# * Until 5 PCI bridges can be cold plugged per VM.
|
||||
# This limitation could be a bug in the kernel
|
||||
# Default number of bridges per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFBRIDGES@
|
||||
# > 1 <= 5 --> will be set to the specified number
|
||||
# > 5 --> will be set to 5
|
||||
default_bridges = @DEFBRIDGES@
|
||||
|
||||
# Default memory size in MiB for SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSZ@ MiB.
|
||||
default_memory = @DEFMEMSZ@
|
||||
|
||||
#
|
||||
# Default memory slots per SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSLOTS@.
|
||||
# This is will determine the times that memory will be hotadded to sandbox/VM.
|
||||
memory_slots = @DEFMEMSLOTS@
|
||||
|
||||
# The size in MiB will be plused to max memory of hypervisor.
|
||||
# It is the memory address space for the NVDIMM devie.
|
||||
# If set block storage driver (block_device_driver) to "nvdimm",
|
||||
# should set memory_offset to the size of block device.
|
||||
# Default 0
|
||||
#memory_offset = 0
|
||||
|
||||
# Default maximum memory in MiB per SB / VM
|
||||
# unspecified or == 0 --> will be set to the actual amount of physical RAM
|
||||
# > 0 <= amount of physical RAM --> will be set to the specified number
|
||||
# > amount of physical RAM --> will be set to the actual amount of physical RAM
|
||||
default_maxmemory = @DEFMAXMEMSZ_FC@
|
||||
|
||||
# Block storage driver to be used for the hypervisor in case the container
|
||||
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
|
||||
# or nvdimm.
|
||||
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
|
||||
|
||||
# Specifies cache-related options will be set to block devices or not.
|
||||
# Default false
|
||||
#block_device_cache_set = true
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
|
||||
# Default false
|
||||
#block_device_cache_direct = true
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether flush requests for the device are ignored.
|
||||
# Default false
|
||||
#block_device_cache_noflush = true
|
||||
|
||||
# Enable pre allocation of VM RAM, default false
|
||||
# Enabling this will result in lower container density
|
||||
# as all of the memory will be allocated and locked
|
||||
# This is useful when you want to reserve all the memory
|
||||
# upfront or in the cases where you want memory latencies
|
||||
# to be very predictable
|
||||
# Default false
|
||||
#enable_mem_prealloc = true
|
||||
|
||||
# Enable huge pages for VM RAM, default false
|
||||
# Enabling this will result in the VM memory
|
||||
# being allocated using huge pages.
|
||||
# This is useful when you want to use vhost-user network
|
||||
# stacks within the container. This will automatically
|
||||
# result in memory pre allocation
|
||||
#enable_hugepages = true
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
# command line: intel_iommu=on,iommu=pt
|
||||
#enable_iommu = true
|
||||
|
||||
# This option changes the default hypervisor and kernel parameters
|
||||
# to enable debug output where available.
|
||||
#
|
||||
# Default false
|
||||
#enable_debug = true
|
||||
|
||||
# Disable the customizations done in the runtime when it detects
|
||||
# that it is running on top a VMM. This will result in the runtime
|
||||
# behaving as it would when running on bare metal.
|
||||
#
|
||||
#disable_nesting_checks = true
|
||||
|
||||
# This is the msize used for 9p shares. It is the number of bytes
|
||||
# used for 9p packet payload.
|
||||
#msize_9p = @DEFMSIZE9P@
|
||||
|
||||
# VFIO devices are hotplugged on a bridge by default.
|
||||
# Enable hotplugging on root bus. This may be required for devices with
|
||||
# a large PCI bar, as this is a current limitation with hotplugging on
|
||||
# a bridge.
|
||||
# Default false
|
||||
#hotplug_vfio_on_root_bus = true
|
||||
|
||||
#
|
||||
# Default entropy source.
|
||||
# The path to a host source of entropy (including a real hardware RNG)
|
||||
# /dev/urandom and /dev/random are two main options.
|
||||
# Be aware that /dev/random is a blocking source of entropy. If the host
|
||||
# runs out of entropy, the VMs boot time will increase leading to get startup
|
||||
# timeouts.
|
||||
# The source of entropy /dev/urandom is non-blocking and provides a
|
||||
# generally acceptable source of entropy. It should work well for pretty much
|
||||
# all practical purposes.
|
||||
#entropy_source= "@DEFENTROPYSOURCE@"
|
||||
|
||||
# List of valid annotations values for entropy_source
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
|
||||
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
||||
|
||||
# Path to OCI hook binaries in the *guest rootfs*.
|
||||
# This does not affect host-side hooks which must instead be added to
|
||||
# the OCI spec passed to the runtime.
|
||||
#
|
||||
# You can create a rootfs with hooks by customizing the osbuilder scripts:
|
||||
# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
|
||||
#
|
||||
# Hooks must be stored in a subdirectory of guest_hook_path according to their
|
||||
# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
|
||||
# The agent will scan these directories for executable files and add them, in
|
||||
# lexicographical order, to the lifecycle of the guest container.
|
||||
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
|
||||
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
|
||||
# Warnings will be logged if any error is encountered will scanning for hooks,
|
||||
# but it will not abort container execution.
|
||||
#guest_hook_path = "/usr/share/oci/hooks"
|
||||
#
|
||||
# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Firecracker, it provides a built-in rate limiter, which is based on TBF(Token Bucket Filter)
|
||||
# queueing discipline.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
#rx_rate_limiter_max_rate = 0
|
||||
# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Firecracker, it provides a built-in rate limiter, which is based on TBF(Token Bucket Filter)
|
||||
# queueing discipline.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
#tx_rate_limiter_max_rate = 0
|
||||
|
||||
# disable applying SELinux on the VMM process (default false)
|
||||
disable_selinux=@DEFDISABLESELINUX@
|
||||
|
||||
[factory]
|
||||
# VM templating support. Once enabled, new VMs are created from template
|
||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||
# agent memory by mapping it readonly. It helps speeding up new container
|
||||
# creation and saves a lot of memory if there are many kata containers running
|
||||
# on the same host.
|
||||
#
|
||||
# When disabled, new VMs are created from scratch.
|
||||
#
|
||||
# Note: Requires "initrd=" to be set ("image=" is not supported).
|
||||
#
|
||||
# Default false
|
||||
#enable_template = true
|
||||
|
||||
[agent.@PROJECT_TYPE@]
|
||||
# If enabled, make the agent display debug-level messages.
|
||||
# (default: disabled)
|
||||
#enable_debug = true
|
||||
|
||||
# Enable agent tracing.
|
||||
#
|
||||
# If enabled, the agent will generate OpenTelemetry trace spans.
|
||||
#
|
||||
# Notes:
|
||||
#
|
||||
# - If the runtime also has tracing enabled, the agent spans will be
|
||||
# associated with the appropriate runtime parent span.
|
||||
# - If enabled, the runtime will wait for the container to shutdown,
|
||||
# increasing the container shutdown time slightly.
|
||||
#
|
||||
# (default: disabled)
|
||||
#enable_tracing = true
|
||||
|
||||
# Comma separated list of kernel modules and their parameters.
|
||||
# These modules will be loaded in the guest kernel using modprobe(8).
|
||||
# The following example can be used to load two kernel modules with parameters
|
||||
# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
|
||||
# The first word is considered as the module name and the rest as its parameters.
|
||||
# Container will not be started when:
|
||||
# * A kernel module is specified and the modprobe command is not installed in the guest
|
||||
# or it fails loading the module.
|
||||
# * The module is not available in the guest or it doesn't met the guest kernel
|
||||
# requirements, like architecture and version.
|
||||
#
|
||||
kernel_modules=[]
|
||||
|
||||
# Enable debug console.
|
||||
|
||||
# If enabled, user can connect guest OS running inside hypervisor
|
||||
# through "kata-runtime exec <sandbox-id>" command
|
||||
|
||||
#debug_console_enabled = true
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 45)
|
||||
dial_timeout = 45
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
# system log
|
||||
# (default: disabled)
|
||||
#enable_debug = true
|
||||
#
|
||||
# Internetworking model
|
||||
# Determines how the VM should be connected to the
|
||||
# the container network interface
|
||||
# Options:
|
||||
#
|
||||
# - macvtap
|
||||
# Used when the Container network interface can be bridged using
|
||||
# macvtap.
|
||||
#
|
||||
# - none
|
||||
# Used when customize network. Only creates a tap device. No veth pair.
|
||||
#
|
||||
# - tcfilter
|
||||
# Uses tc filter rules to redirect traffic from the network interface
|
||||
# provided by plugin to a tap interface connected to the VM.
|
||||
#
|
||||
internetworking_model="@DEFNETWORKMODEL_FC@"
|
||||
|
||||
name="@RUNTIMENAME@"
|
||||
hypervisor_name="@HYPERVISOR_FC@"
|
||||
agent_name="@PROJECT_TYPE@"
|
||||
|
||||
# disable guest seccomp
|
||||
# Determines whether container seccomp profiles are passed to the virtual
|
||||
# machine and applied by the kata agent. If set to true, seccomp is not applied
|
||||
# within the guest
|
||||
# (default: true)
|
||||
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
|
||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||
# (default: disabled)
|
||||
#enable_tracing = true
|
||||
|
||||
# Set the full url to the Jaeger HTTP Thrift collector.
|
||||
# The default if not set will be "http://localhost:14268/api/traces"
|
||||
#jaeger_endpoint = ""
|
||||
|
||||
# Sets the username to be used if basic auth is required for Jaeger.
|
||||
#jaeger_user = ""
|
||||
|
||||
# Sets the password to be used if basic auth is required for Jaeger.
|
||||
#jaeger_password = ""
|
||||
|
||||
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
|
||||
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
|
||||
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
|
||||
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
|
||||
# (like OVS) directly.
|
||||
# (default: false)
|
||||
#disable_new_netns = true
|
||||
|
||||
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# The sandbox cgroup is constrained if there is no container type annotation.
|
||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_FC@
|
||||
|
||||
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
|
||||
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
|
||||
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
|
||||
# Compatibility for determining appropriate sandbox (VM) size:
|
||||
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
|
||||
# does not yet support sandbox sizing annotations.
|
||||
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_FC@
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# they may break compatibility, and are prepared for a big version bump.
|
||||
# Supported experimental features:
|
||||
# (default: [])
|
||||
experimental=@DEFAULTEXPFEATURES@
|
||||
|
||||
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
|
||||
# (default: false)
|
||||
# enable_pprof = true
|
@ -49,6 +49,9 @@ qapi-qmp = "0.14.0"
|
||||
|
||||
[target.'cfg(not(target_arch = "s390x"))'.dependencies]
|
||||
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall", "virtio-mem", "virtio-balloon", "vhost-user-net", "host-device"] }
|
||||
dbs-utils = { path = "../../../dragonball/src/dbs_utils" }
|
||||
hyperlocal = "0.8.0"
|
||||
hyper = {version = "0.14.18", features = ["client"]}
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
324
src/runtime-rs/crates/hypervisor/src/firecracker/fc_api.rs
Normal file
324
src/runtime-rs/crates/hypervisor/src/firecracker/fc_api.rs
Normal file
@ -0,0 +1,324 @@
|
||||
//Copyright (c) 2019-2022 Alibaba Cloud
|
||||
//Copyright (c) 2023 Nubificus Ltd
|
||||
//
|
||||
//SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use crate::{
|
||||
firecracker::{
|
||||
inner_hypervisor::{FC_AGENT_SOCKET_NAME, ROOT},
|
||||
sl, FcInner,
|
||||
},
|
||||
kernel_param::KernelParams,
|
||||
NetworkConfig, Param,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use dbs_utils::net::MacAddr;
|
||||
use hyper::{Body, Method, Request, Response};
|
||||
use hyperlocal::Uri;
|
||||
use kata_sys_util::mount;
|
||||
use nix::mount::MsFlags;
|
||||
use serde_json::json;
|
||||
use tokio::{fs, fs::File};
|
||||
|
||||
const REQUEST_RETRY: u32 = 500;
|
||||
const FC_KERNEL: &str = "vmlinux";
|
||||
const FC_ROOT_FS: &str = "rootfs";
|
||||
const DRIVE_PREFIX: &str = "drive";
|
||||
const DISK_POOL_SIZE: u32 = 6;
|
||||
|
||||
impl FcInner {
|
||||
pub(crate) fn get_resource(&self, src: &str, dst: &str) -> Result<String> {
|
||||
if self.jailed {
|
||||
self.jail_resource(src, dst)
|
||||
} else {
|
||||
Ok(src.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn jail_resource(&self, src: &str, dst: &str) -> Result<String> {
|
||||
if src.is_empty() || dst.is_empty() {
|
||||
return Err(anyhow!("invalid param src {} dst {}", src, dst));
|
||||
}
|
||||
|
||||
let jailed_location = [self.vm_path.as_str(), ROOT, dst].join("/");
|
||||
mount::bind_mount_unchecked(src, jailed_location.as_str(), false, MsFlags::MS_SLAVE)
|
||||
.context("bind_mount ERROR")?;
|
||||
|
||||
let mut abs_path = String::from("/");
|
||||
abs_path.push_str(dst);
|
||||
Ok(abs_path)
|
||||
}
|
||||
|
||||
// Remounting jailer root to ensure it has exec permissions, since firecracker binary will
|
||||
// execute from there
|
||||
pub(crate) async fn remount_jailer_with_exec(&self) -> Result<()> {
|
||||
let localpath = [self.vm_path.clone(), ROOT.to_string()].join("/");
|
||||
let _ = fs::create_dir_all(&localpath)
|
||||
.await
|
||||
.context(format!("failed to create directory {:?}", &localpath));
|
||||
mount::bind_mount_unchecked(&localpath, &localpath, false, MsFlags::MS_SHARED)
|
||||
.context("bind mount jailer root")?;
|
||||
|
||||
mount::bind_remount(&localpath, false).context("rebind mount jailer root")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn prepare_hvsock(&mut self) -> Result<()> {
|
||||
let rel_uds_path = match self.jailed {
|
||||
false => [self.vm_path.as_str(), FC_AGENT_SOCKET_NAME].join("/"),
|
||||
true => FC_AGENT_SOCKET_NAME.to_string(),
|
||||
};
|
||||
|
||||
let body_vsock: String = json!({
|
||||
"guest_cid": 3,
|
||||
"uds_path": rel_uds_path,
|
||||
"vsock_id": ROOT,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
self.request_with_retry(Method::PUT, "/vsock", body_vsock)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn prepare_vmm_resources(&mut self) -> Result<()> {
|
||||
let mut kernel_params = KernelParams::new(self.config.debug_info.enable_debug);
|
||||
kernel_params.push(Param::new("pci", "off"));
|
||||
kernel_params.push(Param::new("iommu", "off"));
|
||||
let rootfs_driver = self.config.blockdev_info.block_device_driver.clone();
|
||||
|
||||
kernel_params.append(&mut KernelParams::new_rootfs_kernel_params(
|
||||
&rootfs_driver,
|
||||
&self.config.boot_info.rootfs_type,
|
||||
)?);
|
||||
kernel_params.append(&mut KernelParams::from_string(
|
||||
&self.config.boot_info.kernel_params,
|
||||
));
|
||||
let mut parameters = String::new().to_owned();
|
||||
|
||||
for param in &kernel_params.to_string() {
|
||||
parameters.push_str(¶m.to_string());
|
||||
}
|
||||
|
||||
let kernel = self
|
||||
.get_resource(&self.config.boot_info.kernel, FC_KERNEL)
|
||||
.context("get resource KERNEL")?;
|
||||
let rootfs = self
|
||||
.get_resource(&self.config.boot_info.image, FC_ROOT_FS)
|
||||
.context("get resource ROOTFS")?;
|
||||
|
||||
let body_kernel: String = json!({
|
||||
"kernel_image_path": kernel,
|
||||
"boot_args": parameters,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let body_rootfs: String = json!({
|
||||
"drive_id": "rootfs",
|
||||
"path_on_host": rootfs,
|
||||
"is_root_device": false,
|
||||
"is_read_only": true
|
||||
})
|
||||
.to_string();
|
||||
|
||||
info!(sl(), "Before first request");
|
||||
self.request_with_retry(Method::PUT, "/boot-source", body_kernel)
|
||||
.await?;
|
||||
self.request_with_retry(Method::PUT, "/drives/rootfs", body_rootfs)
|
||||
.await?;
|
||||
|
||||
let abs_path = [&self.vm_path, ROOT].join("/");
|
||||
|
||||
let rel_path = "/".to_string();
|
||||
let _ = fs::create_dir_all(&abs_path)
|
||||
.await
|
||||
.context(format!("failed to create directory {:?}", &abs_path));
|
||||
|
||||
// We create some placeholder drives to be used for patching block devices while the vmm is
|
||||
// running, as firecracker does not support device hotplug.
|
||||
for i in 1..DISK_POOL_SIZE {
|
||||
let full_path_name = format!("{}/drive{}", abs_path, i);
|
||||
|
||||
let _ = File::create(&full_path_name)
|
||||
.await
|
||||
.context(format!("failed to create file {:?}", &full_path_name));
|
||||
|
||||
let path_on_host = match self.jailed {
|
||||
false => abs_path.clone(),
|
||||
true => rel_path.clone(),
|
||||
};
|
||||
let body: String = json!({
|
||||
"drive_id": format!("drive{}",i),
|
||||
"path_on_host": format!("{}/drive{}", path_on_host, i),
|
||||
"is_root_device": false,
|
||||
"is_read_only": false
|
||||
})
|
||||
.to_string();
|
||||
|
||||
self.request_with_retry(Method::PUT, &format!("/drives/drive{}", i), body)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
pub(crate) async fn patch_container_rootfs(
|
||||
&mut self,
|
||||
drive_id: &str,
|
||||
drive_path: &str,
|
||||
) -> Result<()> {
|
||||
let new_drive_id = &[DRIVE_PREFIX, drive_id].concat();
|
||||
let new_drive_path = self
|
||||
.get_resource(drive_path, new_drive_id)
|
||||
.context("get resource CONTAINER ROOTFS")?;
|
||||
let body: String = json!({
|
||||
"drive_id": format!("drive{drive_id}"),
|
||||
"path_on_host": new_drive_path
|
||||
})
|
||||
.to_string();
|
||||
self.request_with_retry(
|
||||
Method::PATCH,
|
||||
&["/drives/", &format!("drive{drive_id}")].concat(),
|
||||
body,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn add_net_device(
|
||||
&mut self,
|
||||
config: &NetworkConfig,
|
||||
device_id: String,
|
||||
) -> Result<()> {
|
||||
let g_mac = match &config.guest_mac {
|
||||
Some(mac) => MacAddr::from_bytes(&mac.0).ok(),
|
||||
None => None,
|
||||
};
|
||||
let body: String = json!({
|
||||
"iface_id": &device_id,
|
||||
"guest_mac": g_mac,
|
||||
"host_dev_name": &config.host_dev_name
|
||||
|
||||
})
|
||||
.to_string();
|
||||
self.request_with_retry(
|
||||
Method::PUT,
|
||||
&["/network-interfaces/", &device_id].concat(),
|
||||
body,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn request_with_retry(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: &str,
|
||||
data: String,
|
||||
) -> Result<()> {
|
||||
let url: hyper::Uri = Uri::new(&self.asock_path, uri).into();
|
||||
self.send_request_with_retry(method, url, data).await
|
||||
}
|
||||
|
||||
pub(crate) async fn send_request_with_retry(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: hyper::Uri,
|
||||
data: String,
|
||||
) -> Result<()> {
|
||||
debug!(sl(), "METHOD: {:?}", method.clone());
|
||||
debug!(sl(), "URI: {:?}", uri.clone());
|
||||
debug!(sl(), "DATA: {:?}", data.clone());
|
||||
for _count in 0..REQUEST_RETRY {
|
||||
let req = Request::builder()
|
||||
.method(method.clone())
|
||||
.uri(uri.clone())
|
||||
.header("Accept", "application/json")
|
||||
.header("Content-Type", "application/json")
|
||||
.body(Body::from(data.clone()))?;
|
||||
|
||||
match self.send_request(req).await {
|
||||
Ok(resp) => {
|
||||
debug!(sl(), "Request sent, resp: {:?}", resp);
|
||||
return Ok(());
|
||||
}
|
||||
Err(resp) => {
|
||||
debug!(sl(), "Request sent with error, resp: {:?}", resp);
|
||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(anyhow::anyhow!(
|
||||
"After {} attempts, it still doesn't work.",
|
||||
REQUEST_RETRY
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) async fn send_request(&self, req: Request<Body>) -> Result<Response<Body>> {
|
||||
let resp = self.client.request(req).await?;
|
||||
|
||||
let status = resp.status();
|
||||
debug!(sl(), "Request RESPONSE {:?} {:?}", &status, resp);
|
||||
if status.is_success() {
|
||||
return Ok(resp);
|
||||
} else {
|
||||
let body = hyper::body::to_bytes(resp.into_body()).await?;
|
||||
if body.is_empty() {
|
||||
debug!(sl(), "Request FAILED WITH STATUS: {:?}", status);
|
||||
None
|
||||
} else {
|
||||
let body = String::from_utf8_lossy(&body).into_owned();
|
||||
debug!(
|
||||
sl(),
|
||||
"Request FAILED WITH STATUS: {:?} and BODY: {:?}", status, body
|
||||
);
|
||||
Some(body)
|
||||
};
|
||||
}
|
||||
|
||||
Err(anyhow::anyhow!(
|
||||
"After {} attempts, it
|
||||
still doesn't work.",
|
||||
REQUEST_RETRY
|
||||
))
|
||||
}
|
||||
pub(crate) fn cleanup_resource(&self) {
|
||||
if self.jailed {
|
||||
self.umount_jail_resource(FC_KERNEL).ok();
|
||||
self.umount_jail_resource(FC_ROOT_FS).ok();
|
||||
|
||||
for i in 1..DISK_POOL_SIZE {
|
||||
self.umount_jail_resource(&[DRIVE_PREFIX, &i.to_string()].concat())
|
||||
.ok();
|
||||
}
|
||||
|
||||
self.umount_jail_resource("").ok();
|
||||
}
|
||||
std::fs::remove_dir_all(self.vm_path.as_str())
|
||||
.map_err(|err| {
|
||||
error!(
|
||||
sl(),
|
||||
"failed to remove dir all for {} with error: {:?}", &self.vm_path, &err
|
||||
);
|
||||
err
|
||||
})
|
||||
.ok();
|
||||
}
|
||||
|
||||
pub(crate) fn umount_jail_resource(&self, jailed_path: &str) -> Result<()> {
|
||||
let path = match jailed_path {
|
||||
// Handle final case to umount the bind-mounted `/run/kata/firecracker/{id}/root` dir
|
||||
"" => [self.vm_path.clone(), ROOT.to_string()].join("/"),
|
||||
// Handle generic case to umount the bind-mounted
|
||||
// `/run/kata/firecracker/{id}/root/asset` file/dir
|
||||
_ => [
|
||||
self.vm_path.clone(),
|
||||
ROOT.to_string(),
|
||||
jailed_path.to_string(),
|
||||
]
|
||||
.join("/"),
|
||||
};
|
||||
nix::mount::umount2(path.as_str(), nix::mount::MntFlags::MNT_DETACH)
|
||||
.with_context(|| format!("umount path {}", &path))
|
||||
}
|
||||
}
|
208
src/runtime-rs/crates/hypervisor/src/firecracker/inner.rs
Normal file
208
src/runtime-rs/crates/hypervisor/src/firecracker/inner.rs
Normal file
@ -0,0 +1,208 @@
|
||||
//Copyright (c) 2019-2022 Alibaba Cloud
|
||||
//Copyright (c) 2023 Nubificus Ltd
|
||||
//
|
||||
//SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use crate::firecracker::{inner_hypervisor::FC_API_SOCKET_NAME, sl};
|
||||
use crate::HypervisorState;
|
||||
use crate::MemoryConfig;
|
||||
use crate::HYPERVISOR_FIRECRACKER;
|
||||
use crate::{device::DeviceType, VmmState};
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hyper::Client;
|
||||
use hyperlocal::{UnixClientExt, UnixConnector};
|
||||
use kata_types::{
|
||||
capabilities::{Capabilities, CapabilityBits},
|
||||
config::hypervisor::Hypervisor as HypervisorConfig,
|
||||
};
|
||||
use nix::sched::{setns, CloneFlags};
|
||||
use persist::sandbox_persist::Persist;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use tokio::process::Command;
|
||||
|
||||
unsafe impl Send for FcInner {}
|
||||
unsafe impl Sync for FcInner {}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FcInner {
|
||||
pub(crate) id: String,
|
||||
pub(crate) asock_path: String,
|
||||
pub(crate) state: VmmState,
|
||||
pub(crate) config: HypervisorConfig,
|
||||
pub(crate) pid: Option<u32>,
|
||||
pub(crate) vm_path: String,
|
||||
pub(crate) netns: Option<String>,
|
||||
pub(crate) client: Client<UnixConnector>,
|
||||
pub(crate) jailer_root: String,
|
||||
pub(crate) jailed: bool,
|
||||
pub(crate) run_dir: String,
|
||||
pub(crate) pending_devices: Vec<DeviceType>,
|
||||
pub(crate) capabilities: Capabilities,
|
||||
}
|
||||
|
||||
impl FcInner {
|
||||
pub fn new() -> FcInner {
|
||||
let mut capabilities = Capabilities::new();
|
||||
capabilities.set(CapabilityBits::BlockDeviceSupport);
|
||||
FcInner {
|
||||
id: String::default(),
|
||||
asock_path: String::default(),
|
||||
state: VmmState::NotReady,
|
||||
config: Default::default(),
|
||||
pid: None,
|
||||
netns: None,
|
||||
vm_path: String::default(),
|
||||
client: Client::unix(),
|
||||
jailer_root: String::default(),
|
||||
jailed: false,
|
||||
run_dir: String::default(),
|
||||
pending_devices: vec![],
|
||||
capabilities,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn prepare_vmm(&mut self, netns: Option<String>) -> Result<()> {
|
||||
let mut cmd: Command;
|
||||
self.netns = netns.clone();
|
||||
match self.jailed {
|
||||
true => {
|
||||
debug!(sl(), "Running Jailed");
|
||||
cmd = Command::new(&self.config.jailer_path);
|
||||
let api_socket = ["/run/", FC_API_SOCKET_NAME].join("/");
|
||||
let args = [
|
||||
"--id",
|
||||
&self.id,
|
||||
"--gid",
|
||||
"0",
|
||||
"--uid",
|
||||
"0",
|
||||
"--exec-file",
|
||||
&self.config.path,
|
||||
"--chroot-base-dir",
|
||||
&self.jailer_root,
|
||||
"--",
|
||||
"--api-sock",
|
||||
&api_socket,
|
||||
];
|
||||
cmd.args(args);
|
||||
}
|
||||
false => {
|
||||
debug!(sl(), "Running non-Jailed");
|
||||
cmd = Command::new(&self.config.path);
|
||||
cmd.args(["--api-sock", &self.asock_path]);
|
||||
}
|
||||
}
|
||||
debug!(sl(), "Exec: {:?}", cmd);
|
||||
|
||||
// Make sure we're in the correct Network Namespace
|
||||
unsafe {
|
||||
let _pre = cmd.pre_exec(move || {
|
||||
if let Some(netns_path) = &netns {
|
||||
debug!(sl(), "set netns for vmm master {:?}", &netns_path);
|
||||
let netns_fd = std::fs::File::open(netns_path);
|
||||
let _ = setns(netns_fd?.as_raw_fd(), CloneFlags::CLONE_NEWNET)
|
||||
.context("set netns failed");
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
let mut child = cmd.spawn()?;
|
||||
|
||||
match child.id() {
|
||||
Some(id) => {
|
||||
let cur_tid = nix::unistd::gettid().as_raw() as u32;
|
||||
info!(
|
||||
sl(),
|
||||
"VMM spawned successfully: PID: {:?}, current TID: {:?}", id, cur_tid
|
||||
);
|
||||
self.pid = Some(id);
|
||||
}
|
||||
None => {
|
||||
let exit_status = child.wait().await?;
|
||||
error!(sl(), "Process exited, status: {:?}", exit_status);
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn hypervisor_config(&self) -> HypervisorConfig {
|
||||
debug!(sl(), "[Firecracker]: Hypervisor config");
|
||||
self.config.clone()
|
||||
}
|
||||
|
||||
pub(crate) fn set_hypervisor_config(&mut self, config: HypervisorConfig) {
|
||||
debug!(sl(), "[Firecracker]: Set Hypervisor config");
|
||||
self.config = config;
|
||||
}
|
||||
|
||||
pub(crate) fn resize_memory(&mut self, new_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
warn!(
|
||||
sl(),
|
||||
"memory size unchanged, requested: {:?} Not implemented", new_mem_mb
|
||||
);
|
||||
Ok((
|
||||
0,
|
||||
MemoryConfig {
|
||||
..Default::default()
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn set_capabilities(&mut self, flag: CapabilityBits) {
|
||||
self.capabilities.add(flag);
|
||||
}
|
||||
|
||||
pub(crate) fn set_guest_memory_block_size(&mut self, size: u32) {
|
||||
warn!(
|
||||
sl(),
|
||||
"guest memory block size unchanged, requested: {:?}, Not implemented", size
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) fn guest_memory_block_size_mb(&self) -> u32 {
|
||||
warn!(sl(), "guest memory block size Not implemented");
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Persist for FcInner {
|
||||
type State = HypervisorState;
|
||||
type ConstructorArgs = ();
|
||||
|
||||
async fn save(&self) -> Result<Self::State> {
|
||||
Ok(HypervisorState {
|
||||
hypervisor_type: HYPERVISOR_FIRECRACKER.to_string(),
|
||||
id: self.id.clone(),
|
||||
vm_path: self.vm_path.clone(),
|
||||
config: self.hypervisor_config(),
|
||||
jailed: self.jailed,
|
||||
jailer_root: self.jailer_root.clone(),
|
||||
run_dir: self.run_dir.clone(),
|
||||
netns: self.netns.clone(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
async fn restore(
|
||||
_hypervisor_args: Self::ConstructorArgs,
|
||||
hypervisor_state: Self::State,
|
||||
) -> Result<Self> {
|
||||
Ok(FcInner {
|
||||
id: hypervisor_state.id,
|
||||
asock_path: String::default(),
|
||||
state: VmmState::NotReady,
|
||||
vm_path: hypervisor_state.vm_path,
|
||||
config: hypervisor_state.config,
|
||||
netns: hypervisor_state.netns,
|
||||
pid: None,
|
||||
jailed: hypervisor_state.jailed,
|
||||
jailer_root: hypervisor_state.jailer_root,
|
||||
client: Client::unix(),
|
||||
pending_devices: vec![],
|
||||
run_dir: hypervisor_state.run_dir,
|
||||
capabilities: Capabilities::new(),
|
||||
})
|
||||
}
|
||||
}
|
102
src/runtime-rs/crates/hypervisor/src/firecracker/inner_device.rs
Normal file
102
src/runtime-rs/crates/hypervisor/src/firecracker/inner_device.rs
Normal file
@ -0,0 +1,102 @@
|
||||
//Copyright (c) 2019-2022 Alibaba Cloud
|
||||
//Copyright (c) 2019-2022 Ant Group
|
||||
//Copyright (c) 2023 Nubificus Ltd
|
||||
//
|
||||
//SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use super::FcInner;
|
||||
use crate::firecracker::{
|
||||
inner_hypervisor::{FC_AGENT_SOCKET_NAME, ROOT},
|
||||
sl,
|
||||
};
|
||||
use crate::VmmState;
|
||||
use crate::{device::DeviceType, HybridVsockConfig, VsockConfig};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use serde_json::json;
|
||||
|
||||
impl FcInner {
|
||||
pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result<()> {
|
||||
if self.state == VmmState::NotReady {
|
||||
info!(sl(), "VMM not ready, queueing device {}", device);
|
||||
|
||||
self.pending_devices.insert(0, device);
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
debug!(sl(), "Add Device {} ", &device);
|
||||
|
||||
match device {
|
||||
DeviceType::Block(block) => self
|
||||
.hotplug_block_device(block.config.path_on_host.as_str(), block.config.index)
|
||||
.await
|
||||
.context("add block device"),
|
||||
DeviceType::Network(network) => self
|
||||
.add_net_device(&network.config, network.device_id)
|
||||
.await
|
||||
.context("add net device"),
|
||||
DeviceType::HybridVsock(hvsock) => {
|
||||
self.add_hvsock(&hvsock.config).await.context("add vsock")
|
||||
}
|
||||
DeviceType::Vsock(vsock) => self.add_vsock(&vsock.config).await.context("add vsock"),
|
||||
_ => Err(anyhow!("unhandled device: {:?}", device)),
|
||||
}
|
||||
}
|
||||
|
||||
// Since Firecracker doesn't support sharefs, we patch block devices on pre-start inserted
|
||||
// dummy drives
|
||||
pub(crate) async fn hotplug_block_device(&mut self, path: &str, id: u64) -> Result<()> {
|
||||
if id > 0 {
|
||||
self.patch_container_rootfs(&id.to_string(), path).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn remove_device(&mut self, device: DeviceType) -> Result<()> {
|
||||
info!(sl(), "Remove Device {} ", device);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn update_device(&mut self, device: DeviceType) -> Result<()> {
|
||||
info!(sl(), "update device {:?}", &device);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn add_hvsock(&mut self, config: &HybridVsockConfig) -> Result<()> {
|
||||
let rel_uds_path = match self.jailed {
|
||||
false => [self.vm_path.as_str(), FC_AGENT_SOCKET_NAME].join("/"),
|
||||
true => FC_AGENT_SOCKET_NAME.to_string(),
|
||||
};
|
||||
let body_vsock: String = json!({
|
||||
"vsock_id": String::from(ROOT),
|
||||
"guest_cid": config.guest_cid,
|
||||
"uds_path": rel_uds_path,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
info!(sl(), "HybridVsock configure: {:?}", &body_vsock);
|
||||
|
||||
self.request_with_retry(hyper::Method::PUT, "/vsock", body_vsock)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn add_vsock(&mut self, config: &VsockConfig) -> Result<()> {
|
||||
let rel_uds_path = match self.jailed {
|
||||
false => [self.vm_path.as_str(), FC_AGENT_SOCKET_NAME].join("/"),
|
||||
true => FC_AGENT_SOCKET_NAME.to_string(),
|
||||
};
|
||||
let body_vsock: String = json!({
|
||||
"vsock_id": String::from(ROOT),
|
||||
"guest_cid": config.guest_cid,
|
||||
"uds_path": rel_uds_path,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
info!(sl(), "HybridVsock configure: {:?}", &body_vsock);
|
||||
|
||||
self.request_with_retry(hyper::Method::PUT, "/vsock", body_vsock)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -0,0 +1,192 @@
|
||||
//Copyright (c) 2019-2022 Alibaba Cloud
|
||||
//Copyright (c) 2023 Nubificus Ltd
|
||||
//
|
||||
//SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use crate::firecracker::{sl, FcInner};
|
||||
use crate::{VcpuThreadIds, VmmState, HYPERVISOR_FIRECRACKER};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_types::capabilities::Capabilities;
|
||||
use kata_types::config::KATA_PATH;
|
||||
use std::collections::HashSet;
|
||||
use std::iter::FromIterator;
|
||||
use tokio::fs;
|
||||
|
||||
pub const FC_API_SOCKET_NAME: &str = "fc.sock";
|
||||
pub const FC_AGENT_SOCKET_NAME: &str = "kata.hvsock";
|
||||
pub const ROOT: &str = "root";
|
||||
|
||||
const HYBRID_VSOCK_SCHEME: &str = "hvsock";
|
||||
|
||||
impl FcInner {
|
||||
pub(crate) async fn prepare_vm(&mut self, id: &str, _netns: Option<String>) -> Result<()> {
|
||||
debug!(sl(), "Preparing Firecracker");
|
||||
|
||||
self.id = id.to_string();
|
||||
|
||||
if !self.config.jailer_path.is_empty() {
|
||||
debug!(sl(), "Running jailed");
|
||||
self.jailed = true;
|
||||
self.jailer_root = KATA_PATH.to_string();
|
||||
debug!(sl(), "jailer_root: {:?}", self.jailer_root);
|
||||
self.vm_path = [
|
||||
self.jailer_root.clone(),
|
||||
HYPERVISOR_FIRECRACKER.to_string(),
|
||||
id.to_string(),
|
||||
]
|
||||
.join("/");
|
||||
debug!(sl(), "VM Path: {:?}", self.vm_path);
|
||||
self.run_dir = [self.vm_path.clone(), "root".to_string(), "run".to_string()].join("/");
|
||||
debug!(sl(), "Rundir: {:?}", self.run_dir);
|
||||
let _ = self.remount_jailer_with_exec().await;
|
||||
} else {
|
||||
self.vm_path = [KATA_PATH.to_string(), id.to_string()].join("/");
|
||||
debug!(sl(), "VM Path: {:?}", self.vm_path);
|
||||
self.run_dir = [self.vm_path.clone(), "run".to_string()].join("/");
|
||||
debug!(sl(), "Rundir: {:?}", self.run_dir);
|
||||
}
|
||||
// We construct the FC API socket path based on the run_dir variable (jailed or
|
||||
// non-jailed).
|
||||
self.asock_path = [self.run_dir.as_str(), "fc.sock"].join("/");
|
||||
debug!(sl(), "Socket Path: {:?}", self.asock_path);
|
||||
|
||||
let _ = fs::create_dir_all(self.run_dir.as_str())
|
||||
.await
|
||||
.context(format!("failed to create directory {:?}", self.vm_path));
|
||||
|
||||
self.netns = _netns.clone();
|
||||
self.prepare_vmm(self.netns.clone()).await?;
|
||||
self.state = VmmState::VmmServerReady;
|
||||
self.prepare_vmm_resources().await?;
|
||||
self.prepare_hvsock().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
|
||||
debug!(sl(), "Starting sandbox");
|
||||
let body: String = serde_json::json!({
|
||||
"action_type": "InstanceStart"
|
||||
})
|
||||
.to_string();
|
||||
self.request_with_retry(hyper::Method::PUT, "/actions", body)
|
||||
.await?;
|
||||
self.state = VmmState::VmRunning;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn stop_vm(&mut self) -> Result<()> {
|
||||
debug!(sl(), "Stopping sandbox");
|
||||
if self.state != VmmState::VmRunning {
|
||||
debug!(sl(), "VM not running!");
|
||||
} else if let Some(pid_to_kill) = &self.pid {
|
||||
let pid = ::nix::unistd::Pid::from_raw(*pid_to_kill as i32);
|
||||
if let Err(err) = ::nix::sys::signal::kill(pid, nix::sys::signal::SIGKILL) {
|
||||
if err != ::nix::Error::ESRCH {
|
||||
debug!(sl(), "Failed to kill VMM with pid {} {:?}", pid, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn pause_vm(&self) -> Result<()> {
|
||||
warn!(sl(), "Pause VM: Not implemented");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn save_vm(&self) -> Result<()> {
|
||||
warn!(sl(), "Save VM: Not implemented");
|
||||
Ok(())
|
||||
}
|
||||
pub(crate) fn resume_vm(&self) -> Result<()> {
|
||||
warn!(sl(), "Resume VM: Not implemented");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn get_agent_socket(&self) -> Result<String> {
|
||||
debug!(sl(), "Get kata-agent socket");
|
||||
let vsock_path = match self.jailed {
|
||||
false => [self.vm_path.as_str(), FC_AGENT_SOCKET_NAME].join("/"),
|
||||
true => [self.vm_path.as_str(), ROOT, FC_AGENT_SOCKET_NAME].join("/"),
|
||||
};
|
||||
Ok(format!("{}://{}", HYBRID_VSOCK_SCHEME, vsock_path))
|
||||
}
|
||||
|
||||
pub(crate) async fn disconnect(&mut self) {
|
||||
warn!(sl(), "Disconnect: Not implemented");
|
||||
}
|
||||
pub(crate) async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||
debug!(sl(), "Get Thread IDs");
|
||||
Ok(VcpuThreadIds::default())
|
||||
}
|
||||
|
||||
pub(crate) async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
debug!(sl(), "Get PIDs");
|
||||
let mut pids = HashSet::new();
|
||||
// get shim thread ids
|
||||
pids.insert(self.pid.unwrap());
|
||||
|
||||
debug!(sl(), "PIDs: {:?}", pids);
|
||||
Ok(Vec::from_iter(pids.into_iter()))
|
||||
}
|
||||
|
||||
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
|
||||
debug!(sl(), "Get VMM master TID");
|
||||
if let Some(pid) = self.pid {
|
||||
Ok(pid)
|
||||
} else {
|
||||
Err(anyhow!("could not get vmm master tid"))
|
||||
}
|
||||
}
|
||||
pub(crate) async fn get_ns_path(&self) -> Result<String> {
|
||||
debug!(sl(), "Get NS path");
|
||||
if let Some(pid) = self.pid {
|
||||
let ns_path = format!("/proc/{}/ns", pid);
|
||||
Ok(ns_path)
|
||||
} else {
|
||||
Err(anyhow!("could not get ns path"))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn cleanup(&self) -> Result<()> {
|
||||
debug!(sl(), "Cleanup");
|
||||
self.cleanup_resource();
|
||||
|
||||
std::fs::remove_dir_all(self.vm_path.as_str())
|
||||
.map_err(|err| {
|
||||
error!(
|
||||
sl(),
|
||||
"failed to remove dir all for {} with error: {:?}", &self.vm_path, &err
|
||||
);
|
||||
err
|
||||
})
|
||||
.ok();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn resize_vcpu(&self, old_vcpu: u32, new_vcpu: u32) -> Result<(u32, u32)> {
|
||||
warn!(sl(), "Resize vCPU: Not implemented");
|
||||
Ok((old_vcpu, new_vcpu))
|
||||
}
|
||||
|
||||
pub(crate) async fn check(&self) -> Result<()> {
|
||||
warn!(sl(), "Check: Not implemented");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn get_jailer_root(&self) -> Result<String> {
|
||||
debug!(sl(), "Get Jailer Root");
|
||||
Ok(self.jailer_root.clone())
|
||||
}
|
||||
|
||||
pub(crate) async fn capabilities(&self) -> Result<Capabilities> {
|
||||
debug!(sl(), "Capabilities");
|
||||
Ok(self.capabilities.clone())
|
||||
}
|
||||
|
||||
pub(crate) async fn get_hypervisor_metrics(&self) -> Result<String> {
|
||||
warn!(sl(), "Get Hypervisor Metrics: Not implemented");
|
||||
todo!()
|
||||
}
|
||||
}
|
215
src/runtime-rs/crates/hypervisor/src/firecracker/mod.rs
Normal file
215
src/runtime-rs/crates/hypervisor/src/firecracker/mod.rs
Normal file
@ -0,0 +1,215 @@
|
||||
//Copyright (c) 2019-2022 Alibaba Cloud
|
||||
//Copyright (c) 2023 Nubificus Ltd
|
||||
//
|
||||
//SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
mod fc_api;
|
||||
mod inner;
|
||||
mod inner_device;
|
||||
mod inner_hypervisor;
|
||||
|
||||
use super::HypervisorState;
|
||||
use crate::MemoryConfig;
|
||||
use crate::{device::DeviceType, Hypervisor, HypervisorConfig, VcpuThreadIds};
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use inner::FcInner;
|
||||
use kata_types::capabilities::Capabilities;
|
||||
use kata_types::capabilities::CapabilityBits;
|
||||
use persist::sandbox_persist::Persist;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Firecracker {
|
||||
inner: Arc<RwLock<FcInner>>,
|
||||
}
|
||||
|
||||
// Convenience function to set the scope.
|
||||
pub fn sl() -> slog::Logger {
|
||||
slog_scope::logger().new(o!("subsystem" => "firecracker"))
|
||||
}
|
||||
|
||||
impl Default for Firecracker {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Firecracker {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(FcInner::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.set_hypervisor_config(config)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Hypervisor for Firecracker {
|
||||
async fn prepare_vm(&self, id: &str, netns: Option<String>) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.prepare_vm(id, netns).await
|
||||
}
|
||||
|
||||
async fn start_vm(&self, timeout: i32) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.start_vm(timeout).await
|
||||
}
|
||||
|
||||
async fn stop_vm(&self) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.stop_vm().await
|
||||
}
|
||||
|
||||
async fn pause_vm(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.pause_vm()
|
||||
}
|
||||
|
||||
async fn resume_vm(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resume_vm()
|
||||
}
|
||||
|
||||
async fn save_vm(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.save_vm().await
|
||||
}
|
||||
|
||||
async fn add_device(&self, device: DeviceType) -> Result<DeviceType> {
|
||||
let mut inner = self.inner.write().await;
|
||||
match inner.add_device(device.clone()).await {
|
||||
Ok(_) => Ok(device),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
async fn remove_device(&self, device: DeviceType) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.remove_device(device).await
|
||||
}
|
||||
|
||||
async fn update_device(&self, device: DeviceType) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.update_device(device).await
|
||||
}
|
||||
|
||||
async fn get_agent_socket(&self) -> Result<String> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_agent_socket().await
|
||||
}
|
||||
|
||||
async fn disconnect(&self) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.disconnect().await
|
||||
}
|
||||
|
||||
async fn hypervisor_config(&self) -> HypervisorConfig {
|
||||
let inner = self.inner.read().await;
|
||||
inner.hypervisor_config()
|
||||
}
|
||||
|
||||
async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_thread_ids().await
|
||||
}
|
||||
|
||||
async fn cleanup(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.cleanup().await
|
||||
}
|
||||
|
||||
async fn resize_vcpu(&self, old_vcpu: u32, new_vcpu: u32) -> Result<(u32, u32)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resize_vcpu(old_vcpu, new_vcpu).await
|
||||
}
|
||||
|
||||
async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_pids().await
|
||||
}
|
||||
|
||||
async fn get_vmm_master_tid(&self) -> Result<u32> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_vmm_master_tid().await
|
||||
}
|
||||
|
||||
async fn get_ns_path(&self) -> Result<String> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_ns_path().await
|
||||
}
|
||||
|
||||
async fn check(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.check().await
|
||||
}
|
||||
|
||||
async fn get_jailer_root(&self) -> Result<String> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_jailer_root().await
|
||||
}
|
||||
|
||||
async fn save_state(&self) -> Result<HypervisorState> {
|
||||
self.save().await
|
||||
}
|
||||
|
||||
async fn capabilities(&self) -> Result<Capabilities> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.capabilities().await
|
||||
}
|
||||
|
||||
async fn get_hypervisor_metrics(&self) -> Result<String> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_hypervisor_metrics().await
|
||||
}
|
||||
|
||||
async fn set_capabilities(&self, flag: CapabilityBits) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.set_capabilities(flag)
|
||||
}
|
||||
|
||||
async fn set_guest_memory_block_size(&self, size: u32) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.set_guest_memory_block_size(size);
|
||||
}
|
||||
|
||||
async fn guest_memory_block_size(&self) -> u32 {
|
||||
let inner = self.inner.read().await;
|
||||
inner.guest_memory_block_size_mb()
|
||||
}
|
||||
|
||||
async fn resize_memory(&self, new_mem_mb: u32) -> Result<(u32, MemoryConfig)> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.resize_memory(new_mem_mb)
|
||||
}
|
||||
|
||||
async fn get_passfd_listener_addr(&self) -> Result<(String, u32)> {
|
||||
Err(anyhow::anyhow!("Not yet supported"))
|
||||
}
|
||||
}
|
||||
#[async_trait]
|
||||
impl Persist for Firecracker {
|
||||
type State = HypervisorState;
|
||||
type ConstructorArgs = ();
|
||||
/// Save a state of the component.
|
||||
async fn save(&self) -> Result<Self::State> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.save().await.context("save hypervisor state")
|
||||
}
|
||||
/// Restore a component from a specified state.
|
||||
async fn restore(
|
||||
hypervisor_args: Self::ConstructorArgs,
|
||||
hypervisor_state: Self::State,
|
||||
) -> Result<Self> {
|
||||
let inner = FcInner::restore(hypervisor_args, hypervisor_state).await?;
|
||||
Ok(Self {
|
||||
inner: Arc::new(RwLock::new(inner)),
|
||||
})
|
||||
}
|
||||
}
|
@ -118,6 +118,11 @@ impl KernelParams {
|
||||
self.params.append(&mut params.params);
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
pub(crate) fn push(&mut self, new_param: Param) {
|
||||
self.params.push(new_param);
|
||||
}
|
||||
|
||||
pub(crate) fn from_string(params_string: &str) -> Self {
|
||||
let mut params = vec![];
|
||||
|
||||
|
@ -15,6 +15,8 @@ pub use device::driver::*;
|
||||
use device::DeviceType;
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
pub mod dragonball;
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
pub mod firecracker;
|
||||
mod kernel_param;
|
||||
pub mod qemu;
|
||||
pub use kernel_param::Param;
|
||||
@ -61,6 +63,7 @@ const HUGE_SHMEM: &str = "hugeshmem";
|
||||
|
||||
pub const HYPERVISOR_DRAGONBALL: &str = "dragonball";
|
||||
pub const HYPERVISOR_QEMU: &str = "qemu";
|
||||
pub const HYPERVISOR_FIRECRACKER: &str = "firecracker";
|
||||
|
||||
pub const DEFAULT_HYBRID_VSOCK_NAME: &str = "kata.hvsock";
|
||||
pub const JAILER_ROOT: &str = "root";
|
||||
|
@ -14,9 +14,14 @@ pub const PERSIST_FILE: &str = "state.json";
|
||||
use kata_sys_util::validate::verify_id;
|
||||
use safe_path::scoped_join;
|
||||
|
||||
pub fn to_disk<T: serde::Serialize>(value: &T, sid: &str) -> Result<()> {
|
||||
pub fn to_disk<T: serde::Serialize>(value: &T, sid: &str, jailer_path: &str) -> Result<()> {
|
||||
verify_id(sid).context("failed to verify sid")?;
|
||||
let mut path = scoped_join(KATA_PATH, sid)?;
|
||||
// FIXME: handle jailed case
|
||||
let mut path = match jailer_path {
|
||||
"" => scoped_join(KATA_PATH, sid)?,
|
||||
_ => scoped_join(jailer_path, "root")?,
|
||||
};
|
||||
//let mut path = scoped_join(KATA_PATH, sid)?;
|
||||
if path.exists() {
|
||||
path.push(PERSIST_FILE);
|
||||
let f = File::create(path)
|
||||
@ -62,10 +67,10 @@ mod tests {
|
||||
key: 1,
|
||||
};
|
||||
// invalid sid
|
||||
assert!(to_disk(&data, "..3").is_err());
|
||||
assert!(to_disk(&data, "../../../3").is_err());
|
||||
assert!(to_disk(&data, "a/b/c").is_err());
|
||||
assert!(to_disk(&data, ".#cdscd.").is_err());
|
||||
assert!(to_disk(&data, "..3", "").is_err());
|
||||
assert!(to_disk(&data, "../../../3", "").is_err());
|
||||
assert!(to_disk(&data, "a/b/c", "").is_err());
|
||||
assert!(to_disk(&data, ".#cdscd.", "").is_err());
|
||||
|
||||
let sid = "aadede";
|
||||
let sandbox_dir = [KATA_PATH, sid].join("/");
|
||||
@ -74,7 +79,7 @@ mod tests {
|
||||
.create(&sandbox_dir)
|
||||
.is_ok()
|
||||
{
|
||||
assert!(to_disk(&data, sid).is_ok());
|
||||
assert!(to_disk(&data, sid, "").is_ok());
|
||||
if let Ok(result) = from_disk::<Kata>(sid) {
|
||||
assert_eq!(result.name, data.name);
|
||||
assert_eq!(result.key, data.key);
|
||||
|
@ -23,9 +23,13 @@ use common::{message::Message, RuntimeHandler, RuntimeInstance};
|
||||
use hypervisor::Hypervisor;
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
use hypervisor::{dragonball::Dragonball, HYPERVISOR_DRAGONBALL};
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
use hypervisor::{firecracker::Firecracker, HYPERVISOR_FIRECRACKER};
|
||||
use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU};
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
use kata_types::config::DragonballConfig;
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
use kata_types::config::FirecrackerConfig;
|
||||
use kata_types::config::{hypervisor::register_hypervisor_plugin, QemuConfig, TomlConfig};
|
||||
|
||||
#[cfg(all(feature = "cloud-hypervisor", not(target_arch = "s390x")))]
|
||||
@ -55,6 +59,9 @@ impl RuntimeHandler for VirtContainer {
|
||||
{
|
||||
let dragonball_config = Arc::new(DragonballConfig::new());
|
||||
register_hypervisor_plugin("dragonball", dragonball_config);
|
||||
|
||||
let firecracker_config = Arc::new(FirecrackerConfig::new());
|
||||
register_hypervisor_plugin("firecracker", firecracker_config);
|
||||
}
|
||||
|
||||
let qemu_config = Arc::new(QemuConfig::new());
|
||||
@ -160,6 +167,14 @@ async fn new_hypervisor(toml_config: &TomlConfig) -> Result<Arc<dyn Hypervisor>>
|
||||
.await;
|
||||
Ok(Arc::new(hypervisor))
|
||||
}
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
HYPERVISOR_FIRECRACKER => {
|
||||
let mut hypervisor = Firecracker::new();
|
||||
hypervisor
|
||||
.set_hypervisor_config(hypervisor_config.clone())
|
||||
.await;
|
||||
Ok(Arc::new(hypervisor))
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "cloud-hypervisor", not(target_arch = "s390x")))]
|
||||
HYPERVISOR_NAME_CH => {
|
||||
|
@ -18,12 +18,14 @@ use common::{Sandbox, SandboxNetworkEnv};
|
||||
use containerd_shim_protos::events::task::TaskOOM;
|
||||
use hypervisor::VsockConfig;
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
use hypervisor::{dragonball::Dragonball, HYPERVISOR_DRAGONBALL};
|
||||
use hypervisor::{dragonball::Dragonball, HYPERVISOR_DRAGONBALL, HYPERVISOR_FIRECRACKER};
|
||||
use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU};
|
||||
use hypervisor::{utils::get_hvsock_path, HybridVsockConfig, DEFAULT_GUEST_VSOCK_CID};
|
||||
use hypervisor::{BlockConfig, Hypervisor};
|
||||
use kata_sys_util::hooks::HookStates;
|
||||
use kata_types::capabilities::CapabilityBits;
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
use kata_types::config::hypervisor::HYPERVISOR_NAME_CH;
|
||||
use kata_types::config::TomlConfig;
|
||||
use persist::{self, sandbox_persist::Persist};
|
||||
use resource::manager::ManagerArgs;
|
||||
@ -570,12 +572,39 @@ impl Persist for VirtSandbox {
|
||||
|
||||
/// Save a state of Sandbox
|
||||
async fn save(&self) -> Result<Self::State> {
|
||||
let hypervisor_state = self.hypervisor.save_state().await?;
|
||||
let sandbox_state = crate::sandbox_persist::SandboxState {
|
||||
sandbox_type: VIRTCONTAINER.to_string(),
|
||||
resource: Some(self.resource_manager.save().await?),
|
||||
hypervisor: Some(self.hypervisor.save_state().await?),
|
||||
hypervisor: match hypervisor_state.hypervisor_type.as_str() {
|
||||
// TODO support other hypervisors
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
HYPERVISOR_DRAGONBALL => Ok(Some(hypervisor_state)),
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
HYPERVISOR_NAME_CH => Ok(Some(hypervisor_state)),
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
HYPERVISOR_FIRECRACKER => Ok(Some(hypervisor_state)),
|
||||
HYPERVISOR_QEMU => Ok(Some(hypervisor_state)),
|
||||
_ => Err(anyhow!(
|
||||
"Unsupported hypervisor {}",
|
||||
hypervisor_state.hypervisor_type
|
||||
)),
|
||||
}?,
|
||||
};
|
||||
persist::to_disk(&sandbox_state, &self.sid)?;
|
||||
// FIXME: properly handle jailed case
|
||||
// eg: Determine if we are running jailed:
|
||||
// let h = sandbox_state.hypervisor.clone().unwrap_or_default();
|
||||
// Figure out the jailed path:
|
||||
// jailed_path = h.<>
|
||||
// and somehow store the sandbox state into the jail:
|
||||
// persist::to_disk(&sandbox_state, &self.sid, jailed_path)?;
|
||||
// Issue is, how to handle restore.
|
||||
let h = sandbox_state.hypervisor.as_ref().unwrap();
|
||||
let vmpath = match h.jailed {
|
||||
true => h.vm_path.clone(),
|
||||
false => "".to_string(),
|
||||
};
|
||||
persist::to_disk(&sandbox_state, &self.sid, vmpath.as_str())?;
|
||||
Ok(sandbox_state)
|
||||
}
|
||||
/// Restore Sandbox
|
||||
|
Loading…
Reference in New Issue
Block a user