From f1167645f3ec4912e7dcefa2a0552460a2787d21 Mon Sep 17 00:00:00 2001 From: Chasing1020 <643601464@qq.com> Date: Wed, 28 Aug 2024 18:35:46 +0800 Subject: [PATCH] runtime-rs: support for remote hypervisors type This patch adds the support of the remote hypervisor type for runtime-rs. The cloud-api-adaptor needs the annotations and network namespace path to create the VMs. The remote hypervisor opens a UNIX domain socket specified in the config file, and sends ttrpc requests to a external process to control sandbox VMs. Fixes: #10350 Signed-off-by: Chasing1020 <643601464@qq.com> --- .../src/annotations/cri_containerd.rs | 5 + src/libs/kata-types/src/config/default.rs | 8 + .../kata-types/src/config/hypervisor/mod.rs | 24 ++ .../src/config/hypervisor/remote.rs | 116 ++++++ src/libs/kata-types/src/config/mod.rs | 2 +- src/libs/protocols/build.rs | 3 + src/libs/protocols/protos/remote.proto | 47 +++ src/libs/protocols/src/lib.rs | 4 + src/runtime-rs/Cargo.lock | 5 + src/runtime-rs/Makefile | 22 +- src/runtime-rs/arch/x86_64-options.mk | 2 + .../config/configuration-remote.toml.in | 297 +++++++++++++++ src/runtime-rs/crates/agent/src/sock/mod.rs | 12 + .../crates/agent/src/sock/remote.rs | 61 ++++ src/runtime-rs/crates/hypervisor/Cargo.toml | 8 + src/runtime-rs/crates/hypervisor/src/lib.rs | 2 + .../crates/hypervisor/src/remote/inner.rs | 341 ++++++++++++++++++ .../crates/hypervisor/src/remote/mod.rs | 209 +++++++++++ .../crates/runtimes/virt_container/src/lib.rs | 13 +- 19 files changed, 1178 insertions(+), 3 deletions(-) create mode 100644 src/libs/kata-types/src/config/hypervisor/remote.rs create mode 100644 src/libs/protocols/protos/remote.proto create mode 100644 src/runtime-rs/config/configuration-remote.toml.in create mode 100644 src/runtime-rs/crates/agent/src/sock/remote.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/remote/inner.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/remote/mod.rs diff --git a/src/libs/kata-types/src/annotations/cri_containerd.rs b/src/libs/kata-types/src/annotations/cri_containerd.rs index a6e54f284..181098559 100644 --- a/src/libs/kata-types/src/annotations/cri_containerd.rs +++ b/src/libs/kata-types/src/annotations/cri_containerd.rs @@ -11,7 +11,12 @@ pub const CONTAINER_NAME_LABEL_KEY: &str = "io.kubernetes.cri.container-name"; pub const SANDBOX: &str = "sandbox"; pub const CONTAINER: &str = "container"; +// SandboxID is the sandbox ID annotation pub const SANDBOX_ID_LABEL_KEY: &str = "io.kubernetes.cri.sandbox-id"; +// SandboxName is the name of the sandbox (pod) +pub const SANDBOX_NAME_LABEL_KEY: &str = "io.kubernetes.cri.sandbox-name"; +// SandboxNamespace is the name of the namespace of the sandbox (pod) +pub const SANDBOX_NAMESPACE_LABEL_KEY: &str = "io.kubernetes.cri.sandbox-namespace"; // Ref: https://pkg.go.dev/github.com/containerd/containerd@v1.6.7/pkg/cri/annotations // SandboxCPU annotations are based on the initial CPU configuration for the sandbox. This is calculated as the diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs index 5dc47dd3c..596cbf1af 100644 --- a/src/libs/kata-types/src/config/default.rs +++ b/src/libs/kata-types/src/config/default.rs @@ -98,3 +98,11 @@ pub const DEFAULT_FIRECRACKER_GUEST_KERNEL_IMAGE: &str = "vmlinux"; pub const DEFAULT_FIRECRACKER_GUEST_KERNEL_PARAMS: &str = ""; pub const MAX_FIRECRACKER_VCPUS: u32 = 32; pub const MIN_FIRECRACKER_MEMORY_SIZE_MB: u32 = 128; + +// Default configuration for remote +pub const DEFAULT_REMOTE_HYPERVISOR_SOCKET: &str = "/run/peerpod/hypervisor.sock"; +pub const DEFAULT_REMOTE_HYPERVISOR_TIMEOUT: i32 = 600; // 600 Seconds +pub const MAX_REMOTE_VCPUS: u32 = 32; +pub const MIN_REMOTE_MEMORY_SIZE_MB: u32 = 64; +pub const DEFAULT_REMOTE_MEMORY_SIZE_MB: u32 = 128; +pub const DEFAULT_REMOTE_MEMORY_SLOTS: u32 = 128; diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index d9bf095f5..ab0a38aaa 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -44,6 +44,9 @@ pub use self::qemu::{QemuConfig, HYPERVISOR_NAME_QEMU}; mod ch; pub use self::ch::{CloudHypervisorConfig, HYPERVISOR_NAME_CH}; +mod remote; +pub use self::remote::{RemoteConfig, HYPERVISOR_NAME_REMOTE}; + /// Virtual PCI block device driver. pub const VIRTIO_BLK_PCI: &str = "virtio-blk-pci"; @@ -540,6 +543,7 @@ impl TopologyConfigInfo { HYPERVISOR_NAME_CH, HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_FIRECRACKER, + HYPERVISOR_NAME_REMOTE, ]; let hypervisor_name = toml_config.runtime.hypervisor_name.as_str(); if !hypervisor_names.contains(&hypervisor_name) { @@ -1040,6 +1044,18 @@ impl SharedFsInfo { } } +/// Configuration information for remote hypervisor type. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct RemoteInfo { + /// Remote hypervisor socket path + #[serde(default)] + pub hypervisor_socket: String, + + /// Remote hyperisor timeout of creating (in seconds) + #[serde(default)] + pub hypervisor_timeout: i32, +} + /// Common configuration information for hypervisors. #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct Hypervisor { @@ -1123,6 +1139,10 @@ pub struct Hypervisor { #[serde(default, flatten)] pub shared_fs: SharedFsInfo, + /// Remote hypervisor configuration information. + #[serde(default, flatten)] + pub remote_info: RemoteInfo, + /// A sandbox annotation used to specify prefetch_files.list host path container image /// being used, and runtime will pass it to Hypervisor to search for corresponding /// prefetch list file: @@ -1164,6 +1184,10 @@ impl ConfigOps for Hypervisor { fn adjust_config(conf: &mut TomlConfig) -> Result<()> { HypervisorVendor::adjust_config(conf)?; let hypervisors: Vec = conf.hypervisor.keys().cloned().collect(); + info!( + sl!(), + "Adjusting hypervisor configuration {:?}", hypervisors + ); for hypervisor in hypervisors.iter() { if let Some(plugin) = get_hypervisor_plugin(hypervisor) { plugin.adjust_config(conf)?; diff --git a/src/libs/kata-types/src/config/hypervisor/remote.rs b/src/libs/kata-types/src/config/hypervisor/remote.rs new file mode 100644 index 000000000..0ad595312 --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/remote.rs @@ -0,0 +1,116 @@ +// Copyright 2024 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use byte_unit::{Byte, Unit}; +use std::io::Result; +use std::path::Path; +use std::sync::Arc; +use sysinfo::System; + +use crate::{ + config::{ + default::{self, MAX_REMOTE_VCPUS, MIN_REMOTE_MEMORY_SIZE_MB}, + ConfigPlugin, + }, device::DRIVER_NVDIMM_TYPE, eother, resolve_path +}; + +use super::register_hypervisor_plugin; + +/// Hypervisor name for remote, used to index `TomlConfig::hypervisor`. +pub const HYPERVISOR_NAME_REMOTE: &str = "remote"; + +/// Configuration information for remote. +#[derive(Default, Debug)] +pub struct RemoteConfig {} + +impl RemoteConfig { + /// Create a new instance of `RemoteConfig` + pub fn new() -> Self { + RemoteConfig {} + } + + /// Register the remote plugin. + pub fn register(self) { + let plugin = Arc::new(self); + register_hypervisor_plugin(HYPERVISOR_NAME_REMOTE, plugin); + } +} + +impl ConfigPlugin for RemoteConfig { + fn name(&self) -> &str { + HYPERVISOR_NAME_REMOTE + } + + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&self, conf: &mut crate::config::TomlConfig) -> Result<()> { + if let Some(remote) = conf.hypervisor.get_mut(HYPERVISOR_NAME_REMOTE) { + if remote.remote_info.hypervisor_socket.is_empty() { + remote.remote_info.hypervisor_socket = + default::DEFAULT_REMOTE_HYPERVISOR_SOCKET.to_string(); + } + resolve_path!( + remote.remote_info.hypervisor_socket, + "Remote hypervisor socket `{}` is invalid: {}" + )?; + if remote.remote_info.hypervisor_timeout == 0 { + remote.remote_info.hypervisor_timeout = default::DEFAULT_REMOTE_HYPERVISOR_TIMEOUT; + } + if remote.memory_info.default_memory == 0 { + remote.memory_info.default_memory = default::MIN_REMOTE_MEMORY_SIZE_MB; + } + if remote.memory_info.memory_slots == 0 { + remote.memory_info.memory_slots = default::DEFAULT_REMOTE_MEMORY_SLOTS + } + } + + Ok(()) + } + + /// Validate the configuration information. + fn validate(&self, conf: &crate::config::TomlConfig) -> Result<()> { + if let Some(remote) = conf.hypervisor.get(HYPERVISOR_NAME_REMOTE) { + let s = System::new_all(); + let total_memory = Byte::from_u64(s.total_memory()) + .get_adjusted_unit(Unit::MiB) + .get_value() as u32; + if remote.memory_info.default_maxmemory != total_memory { + return Err(eother!( + "Remote hypervisor does not support memory hotplug, default_maxmemory must be equal to the total system memory", + )); + } + let cpus = num_cpus::get() as u32; + if remote.cpu_info.default_maxvcpus != cpus { + return Err(eother!( + "Remote hypervisor does not support CPU hotplug, default_maxvcpus must be equal to the total system CPUs", + )); + } + if !remote.boot_info.initrd.is_empty() { + return Err(eother!("Remote hypervisor does not support initrd")); + } + if !remote.boot_info.rootfs_type.is_empty() { + return Err(eother!("Remote hypervisor does not support rootfs_type")); + } + if remote.blockdev_info.block_device_driver.as_str() == DRIVER_NVDIMM_TYPE { + return Err(eother!("Remote hypervisor does not support nvdimm")); + } + if remote.memory_info.default_memory < MIN_REMOTE_MEMORY_SIZE_MB { + return Err(eother!( + "Remote hypervisor has minimal memory limitation {}", + MIN_REMOTE_MEMORY_SIZE_MB + )); + } + } + + Ok(()) + } + + fn get_min_memory(&self) -> u32 { + MIN_REMOTE_MEMORY_SIZE_MB + } + + fn get_max_cpus(&self) -> u32 { + MAX_REMOTE_VCPUS + } +} diff --git a/src/libs/kata-types/src/config/mod.rs b/src/libs/kata-types/src/config/mod.rs index a5c32fefe..a5928cd10 100644 --- a/src/libs/kata-types/src/config/mod.rs +++ b/src/libs/kata-types/src/config/mod.rs @@ -26,7 +26,7 @@ pub use self::agent::Agent; use self::default::DEFAULT_AGENT_DBG_CONSOLE_PORT; pub use self::hypervisor::{ BootInfo, CloudHypervisorConfig, DragonballConfig, FirecrackerConfig, Hypervisor, QemuConfig, - HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_FIRECRACKER, HYPERVISOR_NAME_QEMU, + RemoteConfig, HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_FIRECRACKER, HYPERVISOR_NAME_QEMU, }; mod runtime; diff --git a/src/libs/protocols/build.rs b/src/libs/protocols/build.rs index a76606b07..8d5c2b947 100644 --- a/src/libs/protocols/build.rs +++ b/src/libs/protocols/build.rs @@ -204,6 +204,7 @@ fn real_main() -> Result<(), std::io::Error> { "protos/agent.proto", "protos/health.proto", "protos/confidential_data_hub.proto", + "protos/remote.proto", ], true, )?; @@ -214,6 +215,7 @@ fn real_main() -> Result<(), std::io::Error> { "src/confidential_data_hub_ttrpc.rs", "src/confidential_data_hub_ttrpc_async.rs", )?; + fs::rename("src/remote_ttrpc.rs", "src/remote_ttrpc_async.rs")?; } codegen( @@ -222,6 +224,7 @@ fn real_main() -> Result<(), std::io::Error> { "protos/agent.proto", "protos/health.proto", "protos/confidential_data_hub.proto", + "protos/remote.proto", ], false, )?; diff --git a/src/libs/protocols/protos/remote.proto b/src/libs/protocols/protos/remote.proto new file mode 100644 index 000000000..93dbd5696 --- /dev/null +++ b/src/libs/protocols/protos/remote.proto @@ -0,0 +1,47 @@ +// Copyright 2024 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +syntax = "proto3"; + +package remote; + +service Hypervisor { + rpc CreateVM(CreateVMRequest) returns (CreateVMResponse) {} + rpc StartVM(StartVMRequest) returns (StartVMResponse) {} + rpc StopVM(StopVMRequest) returns (StopVMResponse) {} + rpc Version(VersionRequest) returns (VersionResponse) {} +} + +message VersionRequest { + string version = 1; +} + +message VersionResponse { + string version = 1; +} + +message CreateVMRequest { + string id = 1; + map annotations = 2; + string networkNamespacePath = 3; +} + +message CreateVMResponse { + string agentSocketPath = 1; +} + +message StartVMRequest { + string id = 1; +} + +message StartVMResponse { +} + +message StopVMRequest { + string id = 1; +} + +message StopVMResponse { +} diff --git a/src/libs/protocols/src/lib.rs b/src/libs/protocols/src/lib.rs index 97bbef6f0..5894151e4 100644 --- a/src/libs/protocols/src/lib.rs +++ b/src/libs/protocols/src/lib.rs @@ -21,6 +21,10 @@ pub mod oci; mod serde_config; pub mod trans; pub mod types; +pub mod remote; +pub mod remote_ttrpc; +#[cfg(feature = "async")] +pub mod remote_ttrpc_async; #[cfg(feature = "with-serde")] pub use serde_config::{ diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 45e80c388..10457637a 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -1685,8 +1685,11 @@ dependencies = [ "libc", "logging", "nix 0.24.3", + "oci-spec", "path-clean", "persist", + "protobuf 3.2.0", + "protocols", "qapi", "qapi-qmp", "qapi-spec", @@ -1706,6 +1709,8 @@ dependencies = [ "thiserror", "tokio", "tracing", + "ttrpc", + "ttrpc-codegen", "vmm-sys-util 0.11.1", ] diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 39b9db469..34fa7ab59 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -86,6 +86,7 @@ HYPERVISOR_DB = dragonball HYPERVISOR_FC = firecracker HYPERVISOR_QEMU = qemu HYPERVISOR_CLH = cloud-hypervisor +HYPERVISOR_REMOTE = remote # When set to true, builds the built-in Dragonball hypervisor USE_BUILDIN_DB := true @@ -93,7 +94,7 @@ USE_BUILDIN_DB := true HYPERVISOR ?= $(HYPERVISOR_DB) ##VAR HYPERVISOR= List of hypervisors this build system can generate configuration for. -HYPERVISORS := $(HYPERVISOR_DB) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) +HYPERVISORS := $(HYPERVISOR_DB) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) $(HYPERVISOR_REMOTE) CLHPATH := $(CLHBINDIR)/$(CLHCMD) CLHVALIDHYPERVISORPATHS := [\"$(CLHPATH)\"] @@ -317,6 +318,20 @@ ifneq (,$(FCCMD)) DEFSTATICRESOURCEMGMT_FC := true endif +ifneq (,$(REMOTE)) + KNOWN_HYPERVISORS += $(HYPERVISOR_REMOTE) + CONFIG_FILE_REMOTE = configuration-remote.toml + CONFIG_REMOTE = config/$(CONFIG_FILE_REMOTE) + CONFIG_REMOTE_IN = $(CONFIG_REMOTE).in + CONFIG_PATH_REMOTE = $(abspath $(CONFDIR)/$(CONFIG_FILE_REMOTE)) + CONFIG_PATHS += $(CONFIG_PATH_REMOTE) + SYSCONFDIR_REMOTE = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_REMOTE)) + SYSCONFIG_PATHS += $(SYSCONFDIR_REMOTE) + CONFIGS += $(CONFIG_REMOTE) + # remote-specific options (all should be suffixed by "_REMOTE") + DEFSANDBOXCGROUPONLY_REMOTE := false +endif + ifeq ($(HYPERVISOR),$(HYPERVISOR_DB)) DEFAULT_HYPERVISOR_CONFIG = $(CONFIG_FILE_DB) endif @@ -327,6 +342,9 @@ endif ifeq ($(HYPERVISOR),$(HYPERVISOR_FC)) DEFAULT_HYPERVISOR_CONFIG = $(CONFIG_FILE_FC) endif +ifeq ($(DEFAULT_HYPERVISOR),$(HYPERVISOR_REMOTE)) + DEFAULT_HYPERVISOR_CONFIG = $(CONFIG_FILE_REMOTE) +endif # list of variables the user may wish to override USER_VARS += ARCH USER_VARS += BINDIR @@ -334,6 +352,7 @@ USER_VARS += CONFIG_DB_IN USER_VARS += CONFIG_FC_IN USER_VARS += CONFIG_PATH USER_VARS += CONFIG_QEMU_IN +USER_VARS += CONFIG_REMOTE_IN USER_VARS += DESTDIR USER_VARS += HYPERVISOR USER_VARS += USE_BUILDIN_DB @@ -443,6 +462,7 @@ USER_VARS += DEFSANDBOXCGROUPONLY_QEMU USER_VARS += DEFSANDBOXCGROUPONLY_DB USER_VARS += DEFSANDBOXCGROUPONLY_FC USER_VARS += DEFSANDBOXCGROUPONLY_CLH +USER_VARS += DEFSANDBOXCGROUPONLY_REMOTE USER_VARS += DEFSTATICRESOURCEMGMT_DB USER_VARS += DEFSTATICRESOURCEMGMT_FC USER_VARS += DEFSTATICRESOURCEMGMT_CLH diff --git a/src/runtime-rs/arch/x86_64-options.mk b/src/runtime-rs/arch/x86_64-options.mk index 3f905d829..7ed1192ad 100644 --- a/src/runtime-rs/arch/x86_64-options.mk +++ b/src/runtime-rs/arch/x86_64-options.mk @@ -20,3 +20,5 @@ CLHCMD := cloud-hypervisor # firecracker binary (vmm and jailer) FCCMD := firecracker FCJAILERCMD := jailer + +REMOTE := remote diff --git a/src/runtime-rs/config/configuration-remote.toml.in b/src/runtime-rs/config/configuration-remote.toml.in new file mode 100644 index 000000000..b750a9de8 --- /dev/null +++ b/src/runtime-rs/config/configuration-remote.toml.in @@ -0,0 +1,297 @@ +# Copyright 2024 Kata Contributors +# +# SPDX-License-Identifier: Apache-2.0 +# + + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_REMOTE_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + + +[hypervisor.remote] +# Default VM information query service unix domain socket, created by cloud-api-adaptor +# Ref: https://github.com/confidential-containers/cloud-api-adaptor/blob/main/src/cloud-api-adaptor/docs/vminfo.md +remote_hypervisor_socket = "/run/peerpod/hypervisor.sock" +# Timeout in seconds for creating a remote hypervisor, 600s(10min) by default +remote_hypervisor_timeout = 600 + + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +# confidential_guest = true + + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +# Note: Remote hypervisor is only handling the following annotations +enable_annotations = ["machine_type", "default_memory", "default_vcpus"] + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +# NOTE: kernel_params are not currently passed over in remote hypervisor +# kernel_params = "" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWAREPATH@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +# default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +# default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +# Note: the remote hypervisor uses the peer pod config to determine the memory of the VM +# default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +# Note: the remote hypervisor uses the peer pod config to determine the memory of the VM +#memory_slots = @DEFMEMSLOTS@ + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. And Debug also enable the hmp socket. +# +# Default false +# enable_debug = true + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +# Note: The remote hypervisor has a different guest, so currently requires this to be disabled +disable_guest_selinux = true + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +# enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +# enable_tracing = true + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 30) +#dial_timeout = 30 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +# enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +# Note: The remote hypervisor, uses it's own network, so "none" is required +internetworking_model="none" + +name="virt_container" +hypervisor_name="remote" +agent_name="kata" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +# Note: The remote hypervisor has a different guest, so currently requires this to be set to true +disable_guest_seccomp=true + + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +# Note: The remote hypervisor has a different networking model, which requires true +disable_new_netns = false + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_REMOTE@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +# Note: the remote hypervisor uses the peer pod config to determine the sandbox size, so requires this to be set to true +static_sandbox_resource_mgmt=true + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +# Note: remote hypervisor has no sharing of emptydir mounts from host to guest +disable_guest_empty_dir=false + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true diff --git a/src/runtime-rs/crates/agent/src/sock/mod.rs b/src/runtime-rs/crates/agent/src/sock/mod.rs index 4e4d851a1..21809253b 100644 --- a/src/runtime-rs/crates/agent/src/sock/mod.rs +++ b/src/runtime-rs/crates/agent/src/sock/mod.rs @@ -8,6 +8,8 @@ mod hybrid_vsock; pub use hybrid_vsock::HybridVsock; mod vsock; pub use vsock::Vsock; +mod remote; +pub use remote::Remote; use std::{ pin::Pin, @@ -28,6 +30,7 @@ use url::Url; const VSOCK_SCHEME: &str = "vsock"; const HYBRID_VSOCK_SCHEME: &str = "hvsock"; +const REMOTE_SCHEME: &str = "remote"; /// Socket stream pub enum Stream { @@ -98,6 +101,7 @@ impl ConnectConfig { enum SockType { Vsock(Vsock), HybridVsock(HybridVsock), + Remote(Remote), } #[async_trait] @@ -114,6 +118,7 @@ pub fn new(address: &str, port: u32) -> Result> { match parse(address, port).context("parse url")? { SockType::Vsock(sock) => Ok(Arc::new(sock)), SockType::HybridVsock(sock) => Ok(Arc::new(sock)), + SockType::Remote(sock) => Ok(Arc::new(sock)), } } @@ -136,6 +141,13 @@ fn parse(address: &str, port: u32) -> Result { let uds = path[0]; Ok(SockType::HybridVsock(HybridVsock::new(uds, port))) } + REMOTE_SCHEME => { + let path: Vec<&str> = url.path().split(':').collect(); + if path.len() != 1 { + return Err(anyhow!("invalid path {:?}", path)); + } + Ok(SockType::Remote(Remote::new(path[0].to_string()))) + } _ => Err(anyhow!("Unsupported scheme")), } } diff --git a/src/runtime-rs/crates/agent/src/sock/remote.rs b/src/runtime-rs/crates/agent/src/sock/remote.rs new file mode 100644 index 000000000..c22d9ccbc --- /dev/null +++ b/src/runtime-rs/crates/agent/src/sock/remote.rs @@ -0,0 +1,61 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{os::unix::prelude::AsRawFd, path::Path}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use tokio::{io::Interest, net::UnixStream}; + +use super::{ConnectConfig, Sock, Stream}; + +#[derive(Debug, PartialEq)] +pub struct Remote { + path: String, +} + +impl Remote { + pub fn new(path: String) -> Self { + Self { path } + } +} + +#[async_trait] +impl Sock for Remote { + async fn connect(&self, config: &ConnectConfig) -> Result { + let retry_times = config.reconnect_timeout_ms / config.dial_timeout_ms; + for i in 0..retry_times { + match connect_helper(&self.path).await { + Ok(stream) => { + info!( + sl!(), + "remote connect success on {} current client fd {}", + i, + stream.as_raw_fd() + ); + return Ok(Stream::Unix(stream)); + } + Err(err) => { + debug!(sl!(), "remote connect on {} err : {:?}", i, err); + tokio::time::sleep(std::time::Duration::from_millis(config.dial_timeout_ms)) + .await; + continue; + } + } + } + Err(anyhow!("cannot connect to agent ttrpc server {:?}", config)) + } +} + +async fn connect_helper(address: &str) -> Result { + let stream = UnixStream::connect(Path::new(&address)) + .await + .context("failed to create UnixAddr")?; + stream + .ready(Interest::READABLE | Interest::WRITABLE) + .await?; + Ok(stream) +} diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index 71787cd19..62a92544f 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -28,11 +28,15 @@ rand = "0.8.4" path-clean = "1.0.1" lazy_static = "1.4" tracing = "0.1.36" +ttrpc = {version = "0.8.1", features = ["async"] } +protobuf = "3.1.0" kata-sys-util = { path = "../../../libs/kata-sys-util" } kata-types = { path = "../../../libs/kata-types" } logging = { path = "../../../libs/logging" } +protocols = { path = "../../../libs/protocols", features = ["async"] } shim-interface = { path = "../../../libs/shim-interface" } +oci-spec = { version = "0.6.8", features = ["runtime"] } ch-config = { path = "ch-config", optional = true } tests_utils = { path = "../../tests/utils" } @@ -68,3 +72,7 @@ hypervisor = { path = ".", features = ["cloud-hypervisor"] } test-utils = { path = "../../../libs/test-utils" } serial_test = "2.0.0" + + +[build-dependencies] +ttrpc-codegen = "0.4.2" diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index 6cf5a5f19..a56f2c9d5 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -19,6 +19,7 @@ pub mod dragonball; pub mod firecracker; mod kernel_param; pub mod qemu; +pub mod remote; pub use kernel_param::Param; pub mod utils; use std::collections::HashMap; @@ -66,6 +67,7 @@ const HUGE_SHMEM: &str = "hugeshmem"; pub const HYPERVISOR_DRAGONBALL: &str = "dragonball"; pub const HYPERVISOR_QEMU: &str = "qemu"; pub const HYPERVISOR_FIRECRACKER: &str = "firecracker"; +pub const HYPERVISOR_REMOTE: &str = "remote"; pub const DEFAULT_HYBRID_VSOCK_NAME: &str = "kata.hvsock"; pub const JAILER_ROOT: &str = "root"; diff --git a/src/runtime-rs/crates/hypervisor/src/remote/inner.rs b/src/runtime-rs/crates/hypervisor/src/remote/inner.rs new file mode 100644 index 000000000..81e193794 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/remote/inner.rs @@ -0,0 +1,341 @@ +// Copyright 2024 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::{ + device::DeviceType, hypervisor_persist::HypervisorState, HypervisorConfig, HYPERVISOR_REMOTE, +}; +use crate::{MemoryConfig, VcpuThreadIds}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use persist::sandbox_persist::Persist; +use protocols::{ + remote::{CreateVMRequest, StartVMRequest, StopVMRequest}, + remote_ttrpc_async::HypervisorClient, +}; +use std::{collections::HashMap, time}; +use tokio::sync::{mpsc, Mutex}; +use ttrpc::context::{self}; +use ttrpc::r#async::Client; + +const REMOTE_SCHEME: &str = "remote"; +const DEFAULT_MIN_TIMEOUT: i32 = time::Duration::from_secs(60).as_millis() as i32; + +pub struct RemoteInner { + /// sandbox id + pub(crate) id: String, + /// hypervisor config + pub(crate) config: HypervisorConfig, + /// agent socket path + pub(crate) agent_socket_path: String, + /// sandbox annotations + pub(crate) annotations: HashMap, + /// netns path + pub(crate) netns: Option, + /// hypervisor unix client + pub(crate) client: Option, + + exit_notify: Option>, + exit_waiter: Mutex<(mpsc::Receiver, i32)>, +} + +impl std::fmt::Debug for RemoteInner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RemoteInner") + .field("id", &self.id) + .field("config", &self.config) + .field("agent_socket_path", &self.agent_socket_path) + .field("annotations", &self.annotations) + .field("netns", &self.netns) + .finish() + } +} + +impl RemoteInner { + pub fn new() -> Self { + let (exit_notify, exit_waiter) = mpsc::channel(1); + + Self { + id: "".to_string(), + config: HypervisorConfig::default(), + agent_socket_path: "".to_string(), + annotations: HashMap::new(), + netns: None, + client: None, + + exit_notify: Some(exit_notify), + exit_waiter: Mutex::new((exit_waiter, 0)), + } + } + + fn get_ttrpc_client(&mut self) -> Result { + match self.client { + Some(ref c) => Ok(HypervisorClient::new(c.clone())), + None => { + let c = Client::connect(&format!( + "unix://{}", + &self.config.remote_info.hypervisor_socket + )) + .context("connect to ")?; + self.client = Some(c.clone()); + Ok(HypervisorClient::new(c)) + } + } + } + + pub(crate) async fn prepare_vm(&mut self, id: &str, netns: Option) -> Result<()> { + info!(sl!(), "Preparing REMOTE VM"); + self.id = id.to_string(); + + if let Some(netns_path) = &netns { + debug!(sl!(), "set netns for vmm master {:?}", &netns_path); + std::fs::metadata(netns_path).context("check netns path")?; + } + + let client = self.get_ttrpc_client()?; + + let ctx = context::Context::default(); + let req = CreateVMRequest { + id: id.to_string(), + annotations: self.annotations.clone(), + networkNamespacePath: netns.clone().unwrap_or_default(), + ..Default::default() + }; + info!(sl!(), "Preparing REMOTE VM req: {:?}", req.clone()); + let resp = client.create_vm(ctx, &req).await?; + info!(sl!(), "Preparing REMOTE VM resp: {:?}", resp.clone()); + self.agent_socket_path = resp.agentSocketPath; + self.netns = netns; + Ok(()) + } + + pub(crate) async fn start_vm(&mut self, timeout: i32) -> Result<()> { + info!(sl!(), "Starting REMOTE VM"); + + let mut min_timeout = DEFAULT_MIN_TIMEOUT; + if self.config.remote_info.hypervisor_timeout > 0 { + min_timeout = self.config.remote_info.hypervisor_timeout.min(timeout); + } + let timeout = min_timeout; + + let client = self.get_ttrpc_client()?; + + let req = StartVMRequest { + id: self.id.clone(), + ..Default::default() + }; + let ctx = + context::with_timeout(time::Duration::from_secs(timeout as u64).as_nanos() as i64); + let _resp = client.start_vm(ctx, &req).await?; + + Ok(()) + } + + pub(crate) async fn stop_vm(&mut self) -> Result<()> { + info!(sl!(), "Stopping REMOTE VM"); + + let client = self.get_ttrpc_client()?; + + let ctx = context::with_timeout(time::Duration::from_secs(1).as_nanos() as i64); + let req = StopVMRequest { + id: self.id.clone(), + ..Default::default() + }; + let _resp = client.stop_vm(ctx, &req).await?; + + self.exit_notify.take().unwrap().send(1).await?; + Ok(()) + } + + pub(crate) async fn pause_vm(&self) -> Result<()> { + warn!(sl!(), "RemoteInner::pause_vm(): NOT YET IMPLEMENTED"); + todo!() + } + + pub(crate) async fn wait_vm(&self) -> Result { + info!(sl!(), "Wait Remote VM"); + let mut waiter = self.exit_waiter.lock().await; + if let Some(exitcode) = waiter.0.recv().await { + waiter.1 = exitcode; + } + + Ok(waiter.1) + } + + pub(crate) async fn resume_vm(&self) -> Result<()> { + warn!(sl!(), "RemoteInner::resume_vm(): NOT YET IMPLEMENTED"); + todo!() + } + + pub(crate) async fn save_vm(&self) -> Result<()> { + warn!(sl!(), "RemoteInner::save_vm(): NOT YET IMPLEMENTED"); + todo!() + } + + pub(crate) async fn add_device(&self, device: DeviceType) -> Result { + warn!(sl!(), "RemoteInner::add_device(): NOT YET IMPLEMENTED"); + Ok(device) + } + + pub(crate) async fn remove_device(&self, _device: DeviceType) -> Result<()> { + warn!(sl!(), "RemoteInner::remove_device(): NOT YET IMPLEMENTED"); + todo!() + } + + pub(crate) async fn update_device(&self, _device: DeviceType) -> Result<()> { + warn!(sl!(), "RemoteInner::update_device(): NOT YET IMPLEMENTED"); + todo!() + } + + pub(crate) async fn get_agent_socket(&self) -> Result { + Ok(format!("{}://{}", REMOTE_SCHEME, &self.agent_socket_path)) + } + + pub(crate) async fn disconnect(&mut self) { + warn!(sl!(), "RemoteInner::disconnect(): NOT YET IMPLEMENTED"); + todo!() + } + + pub fn hypervisor_config(&self) -> HypervisorConfig { + info!( + sl!(), + "RemoteInner::hypervisor_config(): {:?}", + self.config.clone() + ); + self.config.clone() + } + + pub(crate) async fn get_thread_ids(&self) -> Result { + warn!(sl!(), "RemoteInner::get_thread_ids(): NOT YET IMPLEMENTED"); + let vcpu_thread_ids: VcpuThreadIds = VcpuThreadIds { + vcpus: HashMap::new(), + }; + Ok(vcpu_thread_ids) + } + + pub(crate) async fn get_vmm_master_tid(&self) -> Result { + warn!(sl!(), "RemoteInner::get_vmm_master_tid()"); + let tid = nix::unistd::gettid().as_raw(); + Ok(tid as u32) + } + + pub(crate) async fn get_ns_path(&self) -> Result { + info!(sl!(), "RemoteInner::get_ns_path()"); + Ok(self.netns.clone().unwrap_or_default()) + } + + pub(crate) async fn cleanup(&self) -> Result<()> { + info!(sl!(), "RemoteInner::cleanup(): NOT YET IMPLEMENTED"); + Ok(()) + } + + pub(crate) async fn resize_vcpu( + &mut self, + _old_vcpus: u32, + _new_vcpus: u32, + ) -> Result<(u32, u32)> { + info!(sl!(), "RemoteInner::resize_vcpu(): NOT YET IMPLEMENTED"); + Ok((_old_vcpus, _new_vcpus)) + } + + pub(crate) async fn get_pids(&self) -> Result> { + warn!(sl!(), "RemoteInner::get_pids(): NOT YET IMPLEMENTED"); + todo!() + } + + pub(crate) async fn check(&self) -> Result<()> { + warn!(sl!(), "RemoteInner::check(): NOT YET IMPLEMENTED"); + todo!() + } + + pub(crate) async fn get_jailer_root(&self) -> Result { + warn!(sl!(), "RemoteInner::get_jailer_root(): NOT YET IMPLEMENTED"); + Ok("".into()) + } + + pub(crate) async fn capabilities(&self) -> Result { + Ok(Capabilities::default()) + } + + pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + self.config = config; + } + + pub(crate) async fn get_hypervisor_metrics(&self) -> Result { + warn!( + sl!(), + "RemoteInner::get_hypervisor_metrics(): NOT YET IMPLEMENTED" + ); + todo!() + } + + pub(crate) fn set_capabilities(&mut self, _flag: CapabilityBits) { + warn!( + sl!(), + "RemoteInner::set_capabilities(): NOT YET IMPLEMENTED" + ); + todo!() + } + + pub(crate) fn set_guest_memory_block_size(&mut self, _size: u32) { + info!( + sl!(), + "RemoteInner::set_guest_memory_block_size(): NOT YET IMPLEMENTED" + ) + } + + pub(crate) fn guest_memory_block_size_mb(&self) -> u32 { + warn!( + sl!(), + "RemoteInner::guest_memory_block_size_mb(): NOT YET IMPLEMENTED" + ); + 0 + } + + pub(crate) fn resize_memory(&self, _new_mem_mb: u32) -> Result<(u32, MemoryConfig)> { + Ok(( + _new_mem_mb, + MemoryConfig { + ..Default::default() + }, + )) + } +} + +#[async_trait] +impl Persist for RemoteInner { + type State = HypervisorState; + type ConstructorArgs = (); + + /// Save a state of hypervisor + async fn save(&self) -> Result { + Ok(HypervisorState { + hypervisor_type: HYPERVISOR_REMOTE.to_string(), + id: self.id.clone(), + config: self.config.clone(), + netns: self.netns.clone(), + ..Default::default() + }) + } + + /// Restore hypervisor + async fn restore( + _hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let (exit_notify, exit_waiter) = mpsc::channel(1); + + Ok(RemoteInner { + id: hypervisor_state.id, + config: hypervisor_state.config, + agent_socket_path: "".to_string(), + annotations: HashMap::new(), + netns: hypervisor_state.netns, + client: None, + exit_notify: Some(exit_notify), + exit_waiter: Mutex::new((exit_waiter, 0)), + }) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/remote/mod.rs b/src/runtime-rs/crates/hypervisor/src/remote/mod.rs new file mode 100644 index 000000000..41932a2e6 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/remote/mod.rs @@ -0,0 +1,209 @@ +// Copyright 2024 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::HypervisorState; +use crate::{device::DeviceType, Hypervisor, HypervisorConfig, MemoryConfig, VcpuThreadIds}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use inner::RemoteInner; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use persist::sandbox_persist::Persist; +use std::sync::Arc; +use tokio::sync::RwLock; + +mod inner; + +#[derive(Debug)] +pub struct Remote { + inner: Arc>, +} + +impl Default for Remote { + fn default() -> Self { + Self::new() + } +} + +impl Remote { + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(RemoteInner::new())), + } + } + + pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + let mut inner = self.inner.write().await; + inner.set_hypervisor_config(config) + } +} + +#[async_trait] +impl Hypervisor for Remote { + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()> { + let mut inner = self.inner.write().await; + inner.prepare_vm(id, netns).await + } + + async fn start_vm(&self, timeout: i32) -> Result<()> { + let mut inner = self.inner.write().await; + inner.start_vm(timeout).await + } + + async fn stop_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.stop_vm().await + } + + async fn wait_vm(&self) -> Result { + let inner = self.inner.read().await; + inner.wait_vm().await + } + + async fn pause_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.pause_vm().await + } + + async fn resume_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.resume_vm().await + } + + async fn save_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.save_vm().await + } + + async fn add_device(&self, device: DeviceType) -> Result { + let inner = self.inner.write().await; + inner.add_device(device).await + } + + async fn remove_device(&self, device: DeviceType) -> Result<()> { + let inner = self.inner.write().await; + inner.remove_device(device).await + } + + async fn update_device(&self, device: DeviceType) -> Result<()> { + let inner = self.inner.write().await; + inner.update_device(device).await + } + + async fn get_agent_socket(&self) -> Result { + let inner = self.inner.read().await; + inner.get_agent_socket().await + } + + async fn disconnect(&self) { + let mut inner = self.inner.write().await; + inner.disconnect().await + } + + async fn hypervisor_config(&self) -> HypervisorConfig { + let inner = self.inner.read().await; + inner.hypervisor_config() + } + + async fn get_thread_ids(&self) -> Result { + let inner = self.inner.read().await; + inner.get_thread_ids().await + } + + async fn get_vmm_master_tid(&self) -> Result { + let inner = self.inner.read().await; + inner.get_vmm_master_tid().await + } + + async fn get_ns_path(&self) -> Result { + let inner = self.inner.read().await; + inner.get_ns_path().await + } + + async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.cleanup().await + } + + async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> { + let mut inner = self.inner.write().await; + inner.resize_vcpu(old_vcpus, new_vcpus).await + } + + async fn get_pids(&self) -> Result> { + let inner = self.inner.read().await; + inner.get_pids().await + } + + async fn check(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.check().await + } + + async fn get_jailer_root(&self) -> Result { + let inner = self.inner.read().await; + inner.get_jailer_root().await + } + + async fn save_state(&self) -> Result { + self.save().await + } + + async fn capabilities(&self) -> Result { + let inner = self.inner.read().await; + inner.capabilities().await + } + + async fn get_hypervisor_metrics(&self) -> Result { + let inner = self.inner.read().await; + inner.get_hypervisor_metrics().await + } + + async fn set_capabilities(&self, flag: CapabilityBits) { + let mut inner = self.inner.write().await; + inner.set_capabilities(flag) + } + + async fn set_guest_memory_block_size(&self, size: u32) { + let mut inner = self.inner.write().await; + inner.set_guest_memory_block_size(size); + } + + async fn guest_memory_block_size(&self) -> u32 { + let inner = self.inner.read().await; + inner.guest_memory_block_size_mb() + } + + async fn resize_memory(&self, new_mem_mb: u32) -> Result<(u32, MemoryConfig)> { + let inner = self.inner.read().await; + inner.resize_memory(new_mem_mb) + } + + async fn get_passfd_listener_addr(&self) -> Result<(String, u32)> { + Err(anyhow::anyhow!("Not yet supported")) + } +} + +#[async_trait] +impl Persist for Remote { + type State = HypervisorState; + type ConstructorArgs = (); + + /// Save a state of the component. + async fn save(&self) -> Result { + let inner = self.inner.read().await; + inner.save().await.context("save remote hypervisor state") + } + + /// Restore a component from a specified state. + async fn restore( + hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let inner = RemoteInner::restore(hypervisor_args, hypervisor_state).await?; + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs index 3d8a737e2..2f77c08a0 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs @@ -26,10 +26,12 @@ use hypervisor::{dragonball::Dragonball, HYPERVISOR_DRAGONBALL}; #[cfg(not(target_arch = "s390x"))] use hypervisor::{firecracker::Firecracker, HYPERVISOR_FIRECRACKER}; use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU}; +use hypervisor::{remote::Remote, HYPERVISOR_REMOTE}; #[cfg(all(feature = "dragonball", not(target_arch = "s390x")))] use kata_types::config::DragonballConfig; #[cfg(not(target_arch = "s390x"))] use kata_types::config::FirecrackerConfig; +use kata_types::config::RemoteConfig; use kata_types::config::{hypervisor::register_hypervisor_plugin, QemuConfig, TomlConfig}; #[cfg(all(feature = "cloud-hypervisor", not(target_arch = "s390x")))] @@ -75,6 +77,9 @@ impl RuntimeHandler for VirtContainer { register_hypervisor_plugin(HYPERVISOR_NAME_CH, ch_config); } + let remote_config = Arc::new(RemoteConfig::new()); + register_hypervisor_plugin("remote", remote_config); + Ok(()) } @@ -179,7 +184,6 @@ async fn new_hypervisor(toml_config: &TomlConfig) -> Result> .await; Ok(Arc::new(hypervisor)) } - #[cfg(all(feature = "cloud-hypervisor", not(target_arch = "s390x")))] HYPERVISOR_NAME_CH => { let mut hypervisor = CloudHypervisor::new(); @@ -190,6 +194,13 @@ async fn new_hypervisor(toml_config: &TomlConfig) -> Result> Ok(Arc::new(hypervisor)) } + HYPERVISOR_REMOTE => { + let mut hypervisor = Remote::new(); + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + Ok(Arc::new(hypervisor)) + } _ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)), } }