diff --git a/.github/workflows/commit-message-check.yaml b/.github/workflows/commit-message-check.yaml index a4852c0798..191e94b0da 100644 --- a/.github/workflows/commit-message-check.yaml +++ b/.github/workflows/commit-message-check.yaml @@ -63,7 +63,8 @@ jobs: # the entire commit message. # # - Body lines *can* be longer than the maximum if they start - # with a non-alphabetic character. + # with a non-alphabetic character or if there is no whitespace in + # the line. # # This allows stack traces, log files snippets, emails, long URLs, # etc to be specified. Some of these naturally "work" as they start @@ -74,8 +75,8 @@ jobs: # # - A SoB comment can be any length (as it is unreasonable to penalise # people with long names/email addresses :) - pattern: '^.+(\n([a-zA-Z].{0,149}|[^a-zA-Z\n].*|Signed-off-by:.*|))+$' - error: 'Body line too long (max 72)' + pattern: '^.+(\n([a-zA-Z].{0,150}|[^a-zA-Z\n].*|[^\s\n]*|Signed-off-by:.*|))+$' + error: 'Body line too long (max 150)' post_error: ${{ env.error_msg }} - name: Check Fixes diff --git a/VERSION b/VERSION index 23f468b7ea..3fef9b5648 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5.0-alpha2 +2.5.0-rc0 diff --git a/ci/lib.sh b/ci/lib.sh index f7f1eeeb75..3cb2c158f6 100644 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -18,6 +18,13 @@ clone_tests_repo() { if [ -d "$tests_repo_dir" ]; then [ -n "${CI:-}" ] && return + # git config --global --add safe.directory will always append + # the target to .gitconfig without checking the existence of + # the target, so it's better to check it before adding the target repo. + local sd="$(git config --global --get safe.directory ${tests_repo_dir} || true)" + if [ -z "${sd}" ]; then + git config --global --add safe.directory ${tests_repo_dir} + fi pushd "${tests_repo_dir}" git checkout "${branch}" git pull diff --git a/docs/Developer-Guide.md b/docs/Developer-Guide.md index 05664b7248..c1c2d62ab1 100644 --- a/docs/Developer-Guide.md +++ b/docs/Developer-Guide.md @@ -425,7 +425,7 @@ To build utilizing the same options as Kata, you should make use of the `configu $ cd $your_qemu_directory $ $packaging_dir/scripts/configure-hypervisor.sh kata-qemu > kata.cfg $ eval ./configure "$(cat kata.cfg)" -$ make -j $(nproc) +$ make -j $(nproc --ignore=1) $ sudo -E make install ``` diff --git a/docs/how-to/README.md b/docs/how-to/README.md index 6dd163ce73..aa09b49c73 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -5,7 +5,7 @@ - [Run Kata containers with `crictl`](run-kata-with-crictl.md) - [Run Kata Containers with Kubernetes](run-kata-with-k8s.md) - [How to use Kata Containers and Containerd](containerd-kata.md) -- [How to use Kata Containers and CRI (containerd) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md) +- [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md) - [Kata Containers and service mesh for Kubernetes](service-mesh.md) - [How to import Kata Containers logs into Fluentd](how-to-import-kata-logs-with-fluentd.md) diff --git a/docs/how-to/containerd-kata.md b/docs/how-to/containerd-kata.md index ffb7614a09..2ac0613c6d 100644 --- a/docs/how-to/containerd-kata.md +++ b/docs/how-to/containerd-kata.md @@ -132,9 +132,9 @@ The `RuntimeClass` is suggested. The following configuration includes two runtime classes: - `plugins.cri.containerd.runtimes.runc`: the runc, and it is the default runtime. -- `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/master/runtime/v2#binary-naming)) +- `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/main/runtime/v2#binary-naming)) where the dot-connected string `io.containerd.kata.v2` is translated to `containerd-shim-kata-v2` (i.e. the - binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)). + binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/main/runtime/v2)). ```toml [plugins.cri.containerd] diff --git a/docs/how-to/how-to-set-prometheus-in-k8s.md b/docs/how-to/how-to-set-prometheus-in-k8s.md index 2090c3bd51..e61db2d1ca 100644 --- a/docs/how-to/how-to-set-prometheus-in-k8s.md +++ b/docs/how-to/how-to-set-prometheus-in-k8s.md @@ -19,7 +19,7 @@ Also you should ensure that `kubectl` working correctly. > **Note**: More information about Kubernetes integrations: > - [Run Kata Containers with Kubernetes](run-kata-with-k8s.md) > - [How to use Kata Containers and Containerd](containerd-kata.md) -> - [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md) +> - [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md) ## Configure Prometheus diff --git a/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md b/docs/how-to/how-to-use-k8s-with-containerd-and-kata.md similarity index 93% rename from docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md rename to docs/how-to/how-to-use-k8s-with-containerd-and-kata.md index bacfdccc86..de7a34ef61 100644 --- a/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md +++ b/docs/how-to/how-to-use-k8s-with-containerd-and-kata.md @@ -1,15 +1,15 @@ -# How to use Kata Containers and CRI (containerd plugin) with Kubernetes +# How to use Kata Containers and containerd with Kubernetes This document describes how to set up a single-machine Kubernetes (k8s) cluster. The Kubernetes cluster will use the -[CRI containerd](https://github.com/containerd/containerd/) and -[Kata Containers](https://katacontainers.io) to launch untrusted workloads. +[containerd](https://github.com/containerd/containerd/) and +[Kata Containers](https://katacontainers.io) to launch workloads. ## Requirements - Kubernetes, Kubelet, `kubeadm` -- containerd with `cri` plug-in +- containerd - Kata Containers > **Note:** For information about the supported versions of these components, @@ -149,7 +149,7 @@ $ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master- ## Create runtime class for Kata Containers -By default, all pods are created with the default runtime configured in CRI containerd plugin. +By default, all pods are created with the default runtime configured in containerd. From Kubernetes v1.12, users can use [`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/#runtime-class) to specify a different runtime for Pods. ```bash @@ -166,7 +166,7 @@ $ sudo -E kubectl apply -f runtime.yaml ## Run pod in Kata Containers -If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod with the +If a pod has the `runtimeClassName` set to `kata`, the CRI runs the pod with the [Kata Containers runtime](../../src/runtime/README.md). - Create an pod configuration that using Kata Containers runtime diff --git a/docs/how-to/privileged.md b/docs/how-to/privileged.md index 10868f9a31..048509ff17 100644 --- a/docs/how-to/privileged.md +++ b/docs/how-to/privileged.md @@ -40,7 +40,7 @@ See below example config: ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration.toml" ``` - - [Kata Containers with Containerd and CRI documentation](how-to-use-k8s-with-cri-containerd-and-kata.md) + - [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md) - [Containerd CRI config documentation](https://github.com/containerd/containerd/blob/main/docs/cri/config.md) #### CRI-O diff --git a/docs/how-to/run-kata-with-k8s.md b/docs/how-to/run-kata-with-k8s.md index fd53838b88..4e5c58d5a2 100644 --- a/docs/how-to/run-kata-with-k8s.md +++ b/docs/how-to/run-kata-with-k8s.md @@ -15,7 +15,7 @@ After choosing one CRI implementation, you must make the appropriate configurati to ensure it integrates with Kata Containers. Kata Containers 1.5 introduced the `shimv2` for containerd 1.2.0, reducing the components -required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)). +required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](../how-to/how-to-use-k8s-with-containerd-and-kata.md#configure-containerd-to-use-kata-containers)). An equivalent shim implementation for CRI-O is planned. @@ -57,7 +57,7 @@ content shown below: To customize containerd to select Kata Containers runtime, follow our "Configure containerd to use Kata Containers" internal documentation -[here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers). +[here](../how-to/how-to-use-k8s-with-containerd-and-kata.md#configure-containerd-to-use-kata-containers). ## Install Kubernetes @@ -85,7 +85,7 @@ Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-tim Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock" ``` For more information about containerd see the "Configure Kubelet to use containerd" -documentation [here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-kubelet-to-use-containerd). +documentation [here](../how-to/how-to-use-k8s-with-containerd-and-kata.md#configure-kubelet-to-use-containerd). ## Run a Kubernetes pod with Kata Containers @@ -99,7 +99,18 @@ $ sudo systemctl restart kubelet $ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16 # If using containerd -$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16 +$ cat < Result<()>; async fn run(&mut self, p: Process) -> Result<()>; async fn destroy(&mut self) -> Result<()>; - fn exec(&mut self) -> Result<()>; + async fn exec(&mut self) -> Result<()>; } // LinuxContainer protected by Mutex @@ -587,14 +587,20 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { // only change stdio devices owner when user // isn't root. - if guser.uid != 0 { - set_stdio_permissions(guser.uid)?; + if !uid.is_root() { + set_stdio_permissions(uid)?; } setid(uid, gid)?; if !guser.additional_gids.is_empty() { - setgroups(guser.additional_gids.as_slice()).map_err(|e| { + let gids: Vec = guser + .additional_gids + .iter() + .map(|gid| Gid::from_raw(*gid)) + .collect(); + + unistd::setgroups(&gids).map_err(|e| { let _ = write_sync( cwfd, SYNC_FAILED, @@ -634,11 +640,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { capabilities::drop_privileges(cfd_log, c)?; } - if init { - // notify parent to run poststart hooks - write_sync(cwfd, SYNC_SUCCESS, "")?; - } - let args = oci_process.args.to_vec(); let env = oci_process.env.to_vec(); @@ -730,7 +731,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { // within the container to the specified user. // The ownership needs to match because it is created outside of // the container and needs to be localized. -fn set_stdio_permissions(uid: libc::uid_t) -> Result<()> { +fn set_stdio_permissions(uid: Uid) -> Result<()> { let meta = fs::metadata("/dev/null")?; let fds = [ std::io::stdin().as_raw_fd(), @@ -745,19 +746,13 @@ fn set_stdio_permissions(uid: libc::uid_t) -> Result<()> { continue; } - // According to the POSIX specification, -1 is used to indicate that owner and group - // are not to be changed. Since uid_t and gid_t are unsigned types, we have to wrap - // around to get -1. - let gid = 0u32.wrapping_sub(1); - // We only change the uid owner (as it is possible for the mount to // prefer a different gid, and there's no reason for us to change it). // The reason why we don't just leave the default uid=X mount setup is // that users expect to be able to actually use their console. Without // this code, you couldn't effectively run as a non-root user inside a // container and also have a console set up. - let res = unsafe { libc::fchown(*fd, uid, gid) }; - Errno::result(res).map_err(|e| anyhow!(e).context("set stdio permissions failed"))?; + unistd::fchown(*fd, Some(uid), None).with_context(|| "set stdio permissions failed")?; } Ok(()) @@ -1054,7 +1049,7 @@ impl BaseContainer for LinuxContainer { self.start(p).await?; if init { - self.exec()?; + self.exec().await?; self.status.transition(ContainerState::Running); } @@ -1102,7 +1097,7 @@ impl BaseContainer for LinuxContainer { Ok(()) } - fn exec(&mut self) -> Result<()> { + async fn exec(&mut self) -> Result<()> { let fifo = format!("{}/{}", &self.root, EXEC_FIFO_FILENAME); let fd = fcntl::open(fifo.as_str(), OFlag::O_WRONLY, Mode::from_bits_truncate(0))?; let data: &[u8] = &[0]; @@ -1114,6 +1109,26 @@ impl BaseContainer for LinuxContainer { .as_secs(); self.status.transition(ContainerState::Running); + + let spec = self + .config + .spec + .as_ref() + .ok_or_else(|| anyhow!("OCI spec was not found"))?; + let st = self.oci_state()?; + + // run poststart hook + if spec.hooks.is_some() { + info!(self.logger, "poststart hook"); + let hooks = spec + .hooks + .as_ref() + .ok_or_else(|| anyhow!("OCI hooks were not found"))?; + for h in hooks.poststart.iter() { + execute_hook(&self.logger, h, &st).await?; + } + } + unistd::close(fd)?; Ok(()) @@ -1335,20 +1350,6 @@ async fn join_namespaces( // notify child run prestart hooks completed info!(logger, "notify child run prestart hook completed!"); write_async(pipe_w, SYNC_SUCCESS, "").await?; - - info!(logger, "notify child parent ready to run poststart hook!"); - // wait to run poststart hook - read_async(pipe_r).await?; - info!(logger, "get ready to run poststart hook!"); - - // run poststart hook - if spec.hooks.is_some() { - info!(logger, "poststart hook"); - let hooks = spec.hooks.as_ref().unwrap(); - for h in hooks.poststart.iter() { - execute_hook(&logger, h, st).await?; - } - } } info!(logger, "wait for child process ready to run exec"); @@ -1477,12 +1478,6 @@ impl LinuxContainer { } } -fn setgroups(grps: &[libc::gid_t]) -> Result<()> { - let ret = unsafe { libc::setgroups(grps.len(), grps.as_ptr() as *const libc::gid_t) }; - Errno::result(ret).map(drop)?; - Ok(()) -} - use std::fs::OpenOptions; use std::io::Write; @@ -1652,6 +1647,7 @@ mod tests { use super::*; use crate::process::Process; use crate::skip_if_not_root; + use nix::unistd::Uid; use std::fs; use std::os::unix::fs::MetadataExt; use std::os::unix::io::AsRawFd; @@ -1797,7 +1793,7 @@ mod tests { let old_uid = meta.uid(); let uid = 1000; - set_stdio_permissions(uid).unwrap(); + set_stdio_permissions(Uid::from_raw(uid)).unwrap(); let meta = fs::metadata("/dev/stdin").unwrap(); assert_eq!(meta.uid(), uid); @@ -1809,7 +1805,7 @@ mod tests { assert_eq!(meta.uid(), uid); // restore the uid - set_stdio_permissions(old_uid).unwrap(); + set_stdio_permissions(Uid::from_raw(old_uid)).unwrap(); } #[test] @@ -2090,9 +2086,10 @@ mod tests { assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret); } - #[test] - fn test_linuxcontainer_exec() { - let ret = new_linux_container_and_then(|mut c: LinuxContainer| c.exec()); + #[tokio::test] + async fn test_linuxcontainer_exec() { + let (c, _dir) = new_linux_container(); + let ret = c.unwrap().exec().await; assert!(ret.is_err(), "Expecting Err, Got {:?}", ret); } diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 4386dde5cf..6a023e093e 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -27,7 +27,6 @@ use nix::unistd::{self, dup, Pid}; use std::env; use std::ffi::OsStr; use std::fs::{self, File}; -use std::os::unix::ffi::OsStrExt; use std::os::unix::fs as unixfs; use std::os::unix::io::AsRawFd; use std::path::Path; @@ -382,27 +381,13 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result let contents_array: Vec<&str> = contents.split(' ').collect(); let hostname = contents_array[0].trim(); - if sethostname(OsStr::new(hostname)).is_err() { + if unistd::sethostname(OsStr::new(hostname)).is_err() { warn!(logger, "failed to set hostname"); } Ok(()) } -#[instrument] -fn sethostname(hostname: &OsStr) -> Result<()> { - let size = hostname.len() as usize; - - let result = - unsafe { libc::sethostname(hostname.as_bytes().as_ptr() as *const libc::c_char, size) }; - - if result != 0 { - Err(anyhow!("failed to set hostname")) - } else { - Ok(()) - } -} - // The Rust standard library had suppressed the default SIGPIPE behavior, // see https://github.com/rust-lang/rust/pull/13158. // Since the parent's signal handler would be inherited by it's child process, diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index e4c57c7093..c02fd7e030 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -246,7 +246,7 @@ impl AgentService { .get_container(&cid) .ok_or_else(|| anyhow!("Invalid container id"))?; - ctr.exec()?; + ctr.exec().await?; if sid == cid { return Ok(()); @@ -1769,34 +1769,25 @@ fn is_signal_handled(proc_status_file: &str, signum: u32) -> bool { let sig_mask: u64 = 1 << shift_count; let reader = BufReader::new(file); - // Read the file line by line using the lines() iterator from std::io::BufRead. - for (_index, line) in reader.lines().enumerate() { - let line = match line { - Ok(l) => l, - Err(_) => { - warn!(sl!(), "failed to read file {}", proc_status_file); - return false; - } - }; - if line.starts_with("SigCgt:") { + // read lines start with SigBlk/SigIgn/SigCgt and check any match the signal mask + reader + .lines() + .flatten() + .filter(|line| { + line.starts_with("SigBlk:") + || line.starts_with("SigIgn:") + || line.starts_with("SigCgt:") + }) + .any(|line| { let mask_vec: Vec<&str> = line.split(':').collect(); - if mask_vec.len() != 2 { - warn!(sl!(), "parse the SigCgt field failed"); - return false; - } - let sig_cgt_str = mask_vec[1].trim(); - let sig_cgt_mask = match u64::from_str_radix(sig_cgt_str, 16) { - Ok(h) => h, - Err(_) => { - warn!(sl!(), "failed to parse the str {} to hex", sig_cgt_str); - return false; + if mask_vec.len() == 2 { + let sig_str = mask_vec[1].trim(); + if let Ok(sig) = u64::from_str_radix(sig_str, 16) { + return sig & sig_mask == sig_mask; } - }; - - return (sig_cgt_mask & sig_mask) == sig_mask; - } - } - false + } + false + }) } fn do_mem_hotplug_by_probe(addrs: &[u64]) -> Result<()> { @@ -2618,7 +2609,12 @@ OtherField:other TestData { status_file_data: Some("SigBlk:0000000000000001"), signum: 1, - result: false, + result: true, + }, + TestData { + status_file_data: Some("SigIgn:0000000000000001"), + signum: 1, + result: true, }, TestData { status_file_data: None, diff --git a/src/libs/oci/src/lib.rs b/src/libs/oci/src/lib.rs index ace2238ef3..3bcaefa162 100644 --- a/src/libs/oci/src/lib.rs +++ b/src/libs/oci/src/lib.rs @@ -45,7 +45,7 @@ pub struct Spec { pub process: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub root: Option, - #[serde(default, skip_serializing_if = "String:: is_empty")] + #[serde(default, skip_serializing_if = "String::is_empty")] pub hostname: String, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub mounts: Vec, diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 757d0a48a9..fa07b87deb 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -158,6 +158,8 @@ DEFMEMSZ := 2048 # - vm template memory # - hugepage memory DEFMEMSLOTS := 10 +# Default maximum memory in MiB +DEFMAXMEMSZ := 0 #Default number of bridges DEFBRIDGES := 1 DEFENABLEANNOTATIONS := [\"enable_iommu\"] @@ -442,6 +444,7 @@ USER_VARS += DEFMAXVCPUS USER_VARS += DEFMAXVCPUS_ACRN USER_VARS += DEFMEMSZ USER_VARS += DEFMEMSLOTS +USER_VARS += DEFMAXMEMSZ USER_VARS += DEFBRIDGES USER_VARS += DEFNETWORKMODEL_ACRN USER_VARS += DEFNETWORKMODEL_CLH diff --git a/src/runtime/README.md b/src/runtime/README.md index d6738f66fb..c8aeec0ce7 100644 --- a/src/runtime/README.md +++ b/src/runtime/README.md @@ -87,6 +87,27 @@ following locations (in order): > **Note:** For both binaries, the first path that exists will be used. +#### Drop-in configuration file fragments + +To enable changing configuration without changing the configuration file +itself, drop-in configuration file fragments are supported. Once a +configuration file is parsed, if there is a subdirectory called `config.d` in +the same directory as the configuration file its contents will be loaded +in alphabetical order and each item will be parsed as a config file. Settings +loaded from these configuration file fragments override settings loaded from +the main configuration file and earlier fragments. Users are encouraged to use +familiar naming conventions to order the fragments (e.g. `config.d/10-this`, +`config.d/20-that` etc.). + +Non-existent or empty `config.d` directory is not an error (in other words, not +using configuration file fragments is fine). On the other hand, if fragments +are used, they must be valid - any errors while parsing fragments (unreadable +fragment files, contents not valid TOML) are treated the same as errors +while parsing the main configuration file. A `config.d` subdirectory affects +only the `configuration.toml` _in the same directory_. For fragments in +`config.d` to be parsed, there has to be a valid main configuration file _in +that location_ (it can be empty though). + ### Hypervisor specific configuration Kata Containers supports multiple hypervisors so your `configuration.toml` diff --git a/src/runtime/cmd/kata-runtime/factory_test.go b/src/runtime/cmd/kata-runtime/factory_test.go index f980bc5a56..3ab861420b 100644 --- a/src/runtime/cmd/kata-runtime/factory_test.go +++ b/src/runtime/cmd/kata-runtime/factory_test.go @@ -44,7 +44,7 @@ func TestFactoryCLIFunctionInit(t *testing.T) { tmpdir := t.TempDir() - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) set := flag.NewFlagSet("", 0) @@ -91,7 +91,7 @@ func TestFactoryCLIFunctionDestroy(t *testing.T) { tmpdir := t.TempDir() - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) set := flag.NewFlagSet("", 0) @@ -123,7 +123,7 @@ func TestFactoryCLIFunctionStatus(t *testing.T) { tmpdir := t.TempDir() - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) set := flag.NewFlagSet("", 0) diff --git a/src/runtime/cmd/kata-runtime/main_test.go b/src/runtime/cmd/kata-runtime/main_test.go index 95c7dc59d5..f8f98c3c47 100644 --- a/src/runtime/cmd/kata-runtime/main_test.go +++ b/src/runtime/cmd/kata-runtime/main_test.go @@ -33,8 +33,6 @@ const ( testDirMode = os.FileMode(0750) testFileMode = os.FileMode(0640) testExeFileMode = os.FileMode(0750) - - testConsole = "/dev/pts/999" ) var ( @@ -151,7 +149,7 @@ func newTestHypervisorConfig(dir string, create bool) (vc.HypervisorConfig, erro } // newTestRuntimeConfig creates a new RuntimeConfig -func newTestRuntimeConfig(dir, consolePath string, create bool) (oci.RuntimeConfig, error) { +func newTestRuntimeConfig(dir string, create bool) (oci.RuntimeConfig, error) { if dir == "" { return oci.RuntimeConfig{}, errors.New("BUG: need directory") } @@ -164,7 +162,6 @@ func newTestRuntimeConfig(dir, consolePath string, create bool) (oci.RuntimeConf return oci.RuntimeConfig{ HypervisorType: vc.QemuHypervisor, HypervisorConfig: hypervisorConfig, - Console: consolePath, }, nil } diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index f0be92ad08..5f1368ce82 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -118,6 +118,9 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_ACRN@" # but it will not abort container execution. #guest_hook_path = "/usr/share/oci/hooks" +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + [agent.@PROJECT_TYPE@] # If enabled, make the agent display debug-level messages. # (default: disabled) @@ -186,9 +189,6 @@ internetworking_model="@DEFNETWORKMODEL_ACRN@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ -# disable applying SELinux on the VMM process (default false) -disable_selinux=@DEFDISABLESELINUX@ - # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 41d4ae4930..f09c095f0e 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -39,6 +39,9 @@ image = "@IMAGEPATH@" # Default false # confidential_guest = true +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # Path to the firmware. # If you want Cloud Hypervisor to use a specific firmware, set its path below. # This is option is only used when confidential_guest is enabled. @@ -105,6 +108,12 @@ default_memory = @DEFMEMSZ@ # This is will determine the times that memory will be hotadded to sandbox/VM. #memory_slots = @DEFMEMSLOTS@ +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + # Shared file system type: # - virtio-fs (default) # - virtio-fs-nydus @@ -313,9 +322,6 @@ internetworking_model="@DEFNETWORKMODEL_CLH@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ -# disable applying SELinux on the VMM process (default false) -disable_selinux=@DEFDISABLESELINUX@ - # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in index b6892bd17c..b7f349c0dd 100644 --- a/src/runtime/config/configuration-fc.toml.in +++ b/src/runtime/config/configuration-fc.toml.in @@ -91,6 +91,7 @@ default_bridges = @DEFBRIDGES@ # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ + # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. @@ -104,6 +105,12 @@ default_memory = @DEFMEMSZ@ # Default 0 #memory_offset = 0 +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + # Block storage driver to be used for the hypervisor in case the container # rootfs is backed by a block device. This is virtio-scsi, virtio-blk # or nvdimm. @@ -214,6 +221,9 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ # Default 0-sized value means unlimited rate. #tx_rate_limiter_max_rate = 0 +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + [factory] # VM templating support. Once enabled, new VMs are created from template # using vm cloning. They will share the same initial kernel, initramfs and @@ -302,9 +312,6 @@ internetworking_model="@DEFNETWORKMODEL_FC@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ -# disable applying SELinux on the VMM process (default false) -disable_selinux=@DEFDISABLESELINUX@ - # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 702b71aadd..3ec44c8b6e 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -134,6 +134,12 @@ default_memory = @DEFMEMSZ@ # This is will determine the times that memory will be hotadded to sandbox/VM. #memory_slots = @DEFMEMSLOTS@ +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + # The size in MiB will be plused to max memory of hypervisor. # It is the memory address space for the NVDIMM devie. # If set block storage driver (block_device_driver) to "nvdimm", @@ -400,6 +406,9 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ # use legacy serial for guest console if available and implemented for architecture. Default false #use_legacy_serial = true +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + [factory] # VM templating support. Once enabled, new VMs are created from template # using vm cloning. They will share the same initial kernel, initramfs and @@ -517,9 +526,6 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ -# disable applying SELinux on the VMM process (default false) -disable_selinux=@DEFDISABLESELINUX@ - # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/go.mod b/src/runtime/go.mod index 14f0f3ecf3..36a2f618b9 100644 --- a/src/runtime/go.mod +++ b/src/runtime/go.mod @@ -33,6 +33,7 @@ require ( github.com/opencontainers/runc v1.1.2 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 github.com/opencontainers/selinux v1.10.1 + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.11.1 github.com/prometheus/client_model v0.2.0 diff --git a/src/runtime/go.sum b/src/runtime/go.sum index 86868038e9..63870b2325 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -767,6 +767,8 @@ github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGq github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= +github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= diff --git a/src/runtime/pkg/containerd-shim-v2/create.go b/src/runtime/pkg/containerd-shim-v2/create.go index eba829e2dd..6b14a94c7a 100644 --- a/src/runtime/pkg/containerd-shim-v2/create.go +++ b/src/runtime/pkg/containerd-shim-v2/create.go @@ -144,7 +144,7 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con // ctx will be canceled after this rpc service call, but the sandbox will live // across multiple rpc service calls. // - sandbox, _, err := katautils.CreateSandbox(s.ctx, vci, *ociSpec, *s.config, rootFs, r.ID, bundlePath, "", disableOutput, false) + sandbox, _, err := katautils.CreateSandbox(s.ctx, vci, *ociSpec, *s.config, rootFs, r.ID, bundlePath, disableOutput, false) if err != nil { return nil, err } @@ -179,7 +179,7 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con } }() - _, err = katautils.CreateContainer(ctx, s.sandbox, *ociSpec, rootFs, r.ID, bundlePath, "", disableOutput, runtimeConfig.DisableGuestEmptyDir) + _, err = katautils.CreateContainer(ctx, s.sandbox, *ociSpec, rootFs, r.ID, bundlePath, disableOutput, runtimeConfig.DisableGuestEmptyDir) if err != nil { return nil, err } diff --git a/src/runtime/pkg/containerd-shim-v2/create_test.go b/src/runtime/pkg/containerd-shim-v2/create_test.go index 994de7c436..121d5ea4db 100644 --- a/src/runtime/pkg/containerd-shim-v2/create_test.go +++ b/src/runtime/pkg/containerd-shim-v2/create_test.go @@ -51,7 +51,7 @@ func TestCreateSandboxSuccess(t *testing.T) { tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -99,7 +99,7 @@ func TestCreateSandboxFail(t *testing.T) { tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -136,7 +136,7 @@ func TestCreateSandboxConfigFail(t *testing.T) { tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -185,7 +185,7 @@ func TestCreateContainerSuccess(t *testing.T) { tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -224,7 +224,7 @@ func TestCreateContainerFail(t *testing.T) { tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -274,7 +274,7 @@ func TestCreateContainerConfigFail(t *testing.T) { tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) diff --git a/src/runtime/pkg/containerd-shim-v2/utils_test.go b/src/runtime/pkg/containerd-shim-v2/utils_test.go index 35b489920b..32195bcce0 100644 --- a/src/runtime/pkg/containerd-shim-v2/utils_test.go +++ b/src/runtime/pkg/containerd-shim-v2/utils_test.go @@ -28,7 +28,6 @@ const ( testSandboxID = "777-77-77777777" testContainerID = "42" - testConsole = "/dev/pts/888" testContainerTypeAnnotation = "io.kubernetes.cri.container-type" testSandboxIDAnnotation = "io.kubernetes.cri.sandbox-id" @@ -91,7 +90,7 @@ func newTestHypervisorConfig(dir string, create bool) (vc.HypervisorConfig, erro } // newTestRuntimeConfig creates a new RuntimeConfig -func newTestRuntimeConfig(dir, consolePath string, create bool) (oci.RuntimeConfig, error) { +func newTestRuntimeConfig(dir string, create bool) (oci.RuntimeConfig, error) { if dir == "" { return oci.RuntimeConfig{}, errors.New("BUG: need directory") } @@ -104,7 +103,6 @@ func newTestRuntimeConfig(dir, consolePath string, create bool) (oci.RuntimeConf return oci.RuntimeConfig{ HypervisorType: vc.QemuHypervisor, HypervisorConfig: hypervisorConfig, - Console: consolePath, }, nil } diff --git a/src/runtime/pkg/containerd-shim-v2/wait.go b/src/runtime/pkg/containerd-shim-v2/wait.go index 8a24c29bfa..ebb742790d 100644 --- a/src/runtime/pkg/containerd-shim-v2/wait.go +++ b/src/runtime/pkg/containerd-shim-v2/wait.go @@ -53,6 +53,11 @@ func wait(ctx context.Context, s *service, c *container, execID string) (int32, "container": c.id, "pid": processID, }).Error("Wait for process failed") + + // set return code if wait failed + if ret == 0 { + ret = exitCode255 + } } timeStamp := time.Now() diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index 748c5b4e51..61f9236b9c 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -444,7 +444,7 @@ func getVhostUserDevName(dirname string, majorNum, minorNum uint32) (string, err // DeviceState is a structure which represents host devices // plugged to a hypervisor, one Device can be shared among containers in POD -// Refs: virtcontainers/device/drivers/generic.go:GenericDevice +// Refs: pkg/device/drivers/generic.go:GenericDevice type DeviceState struct { // DriverOptions is specific options for each device driver // for example, for BlockDevice, we can set DriverOptions["block-driver"]="virtio-blk" @@ -459,7 +459,7 @@ type DeviceState struct { ID string // Type is used to specify driver type - // Refs: virtcontainers/device/config/config.go:DeviceType + // Refs: pkg/device/config/config.go:DeviceType Type string // Type of device: c, b, u or p diff --git a/src/runtime/pkg/device/config/pmem.go b/src/runtime/pkg/device/config/pmem.go index 44fd321873..cdd4db9988 100644 --- a/src/runtime/pkg/device/config/pmem.go +++ b/src/runtime/pkg/device/config/pmem.go @@ -26,7 +26,7 @@ const ( ) var ( - pmemLog = logrus.WithField("source", "virtcontainers/device/config") + pmemLog = logrus.WithField("source", "pkg/device/config") ) // SetLogger sets up a logger for this pkg diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index 527c9cfbc7..5676f4451c 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -226,6 +226,7 @@ type RuntimeConfigOptions struct { DefaultVCPUCount uint32 DefaultMaxVCPUCount uint32 DefaultMemSize uint32 + DefaultMaxMemorySize uint64 DefaultMsize9p uint32 DisableBlock bool EnableIOThreads bool diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index dbdfdac303..0903c8ea9e 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -10,8 +10,10 @@ package katautils import ( "errors" "fmt" + "io/ioutil" "os" "path/filepath" + "reflect" goruntime "runtime" "strings" @@ -24,6 +26,7 @@ import ( vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" + "github.com/pbnjay/memory" "github.com/sirupsen/logrus" ) @@ -121,6 +124,7 @@ type hypervisor struct { DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"` MemorySize uint32 `toml:"default_memory"` MemSlots uint32 `toml:"memory_slots"` + DefaultMaxMemorySize uint64 `toml:"default_maxmemory"` DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` PCIeRootPort uint32 `toml:"pcie_root_port"` @@ -176,6 +180,20 @@ type agent struct { DialTimeout uint32 `toml:"dial_timeout"` } +func (orig *tomlConfig) Clone() tomlConfig { + clone := *orig + clone.Hypervisor = make(map[string]hypervisor) + clone.Agent = make(map[string]agent) + + for key, value := range orig.Hypervisor { + clone.Hypervisor[key] = value + } + for key, value := range orig.Agent { + clone.Agent[key] = value + } + return clone +} + func (h hypervisor) path() (string, error) { p := h.Path @@ -220,7 +238,7 @@ func (h hypervisor) initrd() (string, error) { p := h.Initrd if p == "" { - return "", errors.New("initrd is not set") + return "", nil } return ResolvePath(p) @@ -230,7 +248,7 @@ func (h hypervisor) image() (string, error) { p := h.Image if p == "" { - return "", errors.New("image is not set") + return "", nil } return ResolvePath(p) @@ -400,6 +418,20 @@ func (h hypervisor) defaultMemOffset() uint64 { return offset } +func (h hypervisor) defaultMaxMemSz() uint64 { + hostMemory := memory.TotalMemory() / 1024 / 1024 //MiB + + if h.DefaultMaxMemorySize == 0 { + return hostMemory + } + + if h.DefaultMaxMemorySize > hostMemory { + return hostMemory + } + + return h.DefaultMaxMemorySize +} + func (h hypervisor) defaultBridges() uint32 { if h.DefaultBridges == 0 { return defaultBridgesCount @@ -474,24 +506,6 @@ func (h hypervisor) vhostUserStorePath() string { return h.VhostUserStorePath } -func (h hypervisor) getInitrdAndImage() (initrd string, image string, err error) { - initrd, errInitrd := h.initrd() - - image, errImage := h.image() - - if h.ConfidentialGuest && h.MachineType == vc.QemuCCWVirtio { - if image != "" || initrd != "" { - return "", "", errors.New("Neither the image nor initrd path may be set for Secure Execution") - } - } else if image != "" && initrd != "" { - return "", "", errors.New("having both an image and an initrd defined in the configuration file is not supported") - } else if errInitrd != nil && errImage != nil { - return "", "", fmt.Errorf("Either initrd or image must be set to a valid path (initrd: %v) (image: %v)", errInitrd, errImage) - } - - return -} - func (h hypervisor) getDiskRateLimiterBwMaxRate() int64 { return h.DiskRateLimiterBwMaxRate } @@ -601,7 +615,12 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } - initrd, image, err := h.getInitrdAndImage() + initrd, err := h.initrd() + if err != nil { + return vc.HypervisorConfig{}, err + } + + image, err := h.image() if err != nil { return vc.HypervisorConfig{}, err } @@ -635,6 +654,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), EntropySource: h.GetEntropySource(), EntropySourceList: h.EntropySourceList, DefaultBridges: h.defaultBridges(), @@ -649,6 +669,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { RxRateLimiterMaxRate: rxRateLimiterMaxRate, TxRateLimiterMaxRate: txRateLimiterMaxRate, EnableAnnotations: h.EnableAnnotations, + DisableSeLinux: h.DisableSeLinux, }, nil } @@ -663,7 +684,12 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } - initrd, image, err := h.getInitrdAndImage() + initrd, err := h.initrd() + if err != nil { + return vc.HypervisorConfig{}, err + } + + image, err := h.image() if err != nil { return vc.HypervisorConfig{}, err } @@ -736,6 +762,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), MemOffset: h.defaultMemOffset(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), VirtioMem: h.VirtioMem, EntropySource: h.GetEntropySource(), EntropySourceList: h.EntropySourceList, @@ -779,6 +806,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestSwap: h.GuestSwap, Rootless: h.Rootless, LegacySerial: h.LegacySerial, + DisableSeLinux: h.DisableSeLinux, }, nil } @@ -833,6 +861,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), EntropySource: h.GetEntropySource(), EntropySourceList: h.EntropySourceList, DefaultBridges: h.defaultBridges(), @@ -842,6 +871,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { BlockDeviceDriver: blockDriver, DisableVhostNet: h.DisableVhostNet, GuestHookPath: h.guestHookPath(), + DisableSeLinux: h.DisableSeLinux, EnableAnnotations: h.EnableAnnotations, }, nil } @@ -857,7 +887,12 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } - initrd, image, err := h.getInitrdAndImage() + initrd, err := h.initrd() + if err != nil { + return vc.HypervisorConfig{}, err + } + + image, err := h.image() if err != nil { return vc.HypervisorConfig{}, err } @@ -910,6 +945,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), MemOffset: h.defaultMemOffset(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), VirtioMem: h.VirtioMem, EntropySource: h.GetEntropySource(), EntropySourceList: h.EntropySourceList, @@ -1305,9 +1341,179 @@ func decodeConfig(configPath string) (tomlConfig, string, error) { return tomlConf, resolved, err } + err = decodeDropIns(resolved, &tomlConf) + if err != nil { + return tomlConf, resolved, err + } + return tomlConf, resolved, nil } +func decodeDropIns(mainConfigPath string, tomlConf *tomlConfig) error { + configDir := filepath.Dir(mainConfigPath) + dropInDir := filepath.Join(configDir, "config.d") + + files, err := ioutil.ReadDir(dropInDir) + if err != nil { + if !os.IsNotExist(err) { + return fmt.Errorf("error reading %q directory: %s", dropInDir, err) + } else { + return nil + } + } + + for _, file := range files { + dropInFpath := filepath.Join(dropInDir, file.Name()) + + err = updateFromDropIn(dropInFpath, tomlConf) + if err != nil { + return err + } + } + + return nil +} + +func updateFromDropIn(dropInFpath string, tomlConf *tomlConfig) error { + configData, err := os.ReadFile(dropInFpath) + if err != nil { + return fmt.Errorf("error reading file %q: %s", dropInFpath, err) + } + + // Ordinarily, BurntSushi only updates fields of tomlConfig that are + // changed by the file and leaves the rest alone. This doesn't apply + // though to tomlConfig substructures that are stored in maps. Their + // previous contents are erased by toml.Decode() and only fields changed by + // the file are set. To work around this, a bit of juggling is needed to + // preserve the previous contents and merge them manually with the incoming + // changes afterwards, using reflection. + tomlConfOrig := tomlConf.Clone() + + var md toml.MetaData + md, err = toml.Decode(string(configData), &tomlConf) + + if err != nil { + return fmt.Errorf("error decoding file %q: %s", dropInFpath, err) + } + + if len(md.Undecoded()) > 0 { + msg := fmt.Sprintf("warning: undecoded keys in %q: %+v", dropInFpath, md.Undecoded()) + kataUtilsLogger.Warn(msg) + } + + for _, key := range md.Keys() { + err = applyKey(*tomlConf, key, &tomlConfOrig) + if err != nil { + return fmt.Errorf("error applying key '%+v' from drop-in file %q: %s", key, dropInFpath, err) + } + } + + tomlConf.Hypervisor = tomlConfOrig.Hypervisor + tomlConf.Agent = tomlConfOrig.Agent + + return nil +} + +func applyKey(sourceConf tomlConfig, key []string, targetConf *tomlConfig) error { + // Any key that might need treatment provided by this function has to have + // (at least) three components: [ map_name map_key_name field_toml_tag ], + // e.g. [agent kata enable_tracing] or [hypervisor qemu confidential_guest]. + if len(key) < 3 { + return nil + } + switch key[0] { + case "agent": + return applyAgentKey(sourceConf, key[1:], targetConf) + case "hypervisor": + return applyHypervisorKey(sourceConf, key[1:], targetConf) + // The table the 'key' is in is not stored in a map so no special handling + // is needed. + } + return nil +} + +// Both of the following functions copy the value of a 'sourceConf' field +// identified by the TOML tag in 'key' into the corresponding field in +// 'targetConf'. +func applyAgentKey(sourceConf tomlConfig, key []string, targetConf *tomlConfig) error { + agentName := key[0] + tomlKeyName := key[1] + + sourceAgentConf := sourceConf.Agent[agentName] + targetAgentConf := targetConf.Agent[agentName] + + err := copyFieldValue(reflect.ValueOf(&sourceAgentConf).Elem(), tomlKeyName, reflect.ValueOf(&targetAgentConf).Elem()) + if err != nil { + return err + } + + targetConf.Agent[agentName] = targetAgentConf + return nil +} + +func applyHypervisorKey(sourceConf tomlConfig, key []string, targetConf *tomlConfig) error { + hypervisorName := key[0] + tomlKeyName := key[1] + + sourceHypervisorConf := sourceConf.Hypervisor[hypervisorName] + targetHypervisorConf := targetConf.Hypervisor[hypervisorName] + + err := copyFieldValue(reflect.ValueOf(&sourceHypervisorConf).Elem(), tomlKeyName, reflect.ValueOf(&targetHypervisorConf).Elem()) + if err != nil { + return err + } + + targetConf.Hypervisor[hypervisorName] = targetHypervisorConf + return nil +} + +// Copies a TOML value of the source field identified by its TOML key to the +// corresponding field of the target. Basically +// 'target[tomlKeyName] = source[tomlKeyNmae]'. +func copyFieldValue(source reflect.Value, tomlKeyName string, target reflect.Value) error { + val, err := getValue(source, tomlKeyName) + if err != nil { + return fmt.Errorf("error getting key %q from a decoded drop-in conf file: %s", tomlKeyName, err) + } + err = setValue(target, tomlKeyName, val) + if err != nil { + return fmt.Errorf("error setting key %q to a new value '%v': %s", tomlKeyName, val.Interface(), err) + } + return nil +} + +// The first argument is expected to be a reflect.Value of a tomlConfig +// substructure (hypervisor, agent), the second argument is a TOML key +// corresponding to the substructure field whose TOML value is queried. +// Return value corresponds to 'tomlConfStruct[tomlKey]'. +func getValue(tomlConfStruct reflect.Value, tomlKey string) (reflect.Value, error) { + tomlConfStructType := tomlConfStruct.Type() + for j := 0; j < tomlConfStruct.NumField(); j++ { + fieldTomlTag := tomlConfStructType.Field(j).Tag.Get("toml") + if fieldTomlTag == tomlKey { + return tomlConfStruct.Field(j), nil + } + } + return reflect.Value{}, fmt.Errorf("key %q not found", tomlKey) +} + +// The first argument is expected to be a reflect.Value of a tomlConfig +// substructure (hypervisor, agent), the second argument is a TOML key +// corresponding to the substructure field whose TOML value is to be changed, +// the third argument is a reflect.Value representing the new TOML value. +// An equivalent of 'tomlConfStruct[tomlKey] = newVal'. +func setValue(tomlConfStruct reflect.Value, tomlKey string, newVal reflect.Value) error { + tomlConfStructType := tomlConfStruct.Type() + for j := 0; j < tomlConfStruct.NumField(); j++ { + fieldTomlTag := tomlConfStructType.Field(j).Tag.Get("toml") + if fieldTomlTag == tomlKey { + tomlConfStruct.Field(j).Set(newVal) + return nil + } + } + return fmt.Errorf("key %q not found", tomlKey) +} + // checkConfig checks the validity of the specified config. func checkConfig(config oci.RuntimeConfig) error { if err := checkNetNsConfig(config); err != nil { diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 0bcac2137f..86619ab13d 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -22,6 +22,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" + "github.com/pbnjay/memory" "github.com/stretchr/testify/assert" ) @@ -85,6 +86,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf sharedFS := "virtio-9p" virtioFSdaemon := path.Join(dir, "virtiofsd") epcSize := int64(0) + maxMemory := uint64(memory.TotalMemory() / 1024 / 1024) configFileOptions := ktu.RuntimeConfigOptions{ Hypervisor: "qemu", @@ -104,6 +106,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf DefaultVCPUCount: defaultVCPUCount, DefaultMaxVCPUCount: defaultMaxVCPUCount, DefaultMemSize: defaultMemSize, + DefaultMaxMemorySize: maxMemory, DefaultMsize9p: defaultMsize9p, HypervisorDebug: hypervisorDebug, RuntimeDebug: runtimeDebug, @@ -153,6 +156,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf NumVCPUs: defaultVCPUCount, DefaultMaxVCPUs: getCurrentCpuNum(), MemorySize: defaultMemSize, + DefaultMaxMemorySize: maxMemory, DisableBlockDeviceUse: disableBlockDevice, BlockDeviceDriver: defaultBlockDeviceDriver, DefaultBridges: defaultBridgesCount, @@ -1017,7 +1021,7 @@ func TestHypervisorDefaultsKernel(t *testing.T) { assert.Equal(h.kernelParams(), kernelParams, "custom hypervisor kernel parameterms wrong") } -// The default initrd path is not returned by h.initrd() +// The default initrd path is not returned by h.initrd(), it isn't an error if path isn't provided func TestHypervisorDefaultsInitrd(t *testing.T) { assert := assert.New(t) @@ -1041,18 +1045,18 @@ func TestHypervisorDefaultsInitrd(t *testing.T) { defaultInitrdPath = testInitrdPath h := hypervisor{} p, err := h.initrd() - assert.Error(err) + assert.NoError(err) assert.Equal(p, "", "default Image path wrong") // test path resolution defaultInitrdPath = testInitrdLinkPath h = hypervisor{} p, err = h.initrd() - assert.Error(err) + assert.NoError(err) assert.Equal(p, "") } -// The default image path is not returned by h.image() +// The default image path is not returned by h.image(), it isn't an error if path isn't provided func TestHypervisorDefaultsImage(t *testing.T) { assert := assert.New(t) @@ -1076,14 +1080,14 @@ func TestHypervisorDefaultsImage(t *testing.T) { defaultImagePath = testImagePath h := hypervisor{} p, err := h.image() - assert.Error(err) + assert.NoError(err) assert.Equal(p, "", "default Image path wrong") // test path resolution defaultImagePath = testImageLinkPath h = hypervisor{} p, err = h.image() - assert.Error(err) + assert.NoError(err) assert.Equal(p, "") } @@ -1654,3 +1658,69 @@ func TestValidateBindMounts(t *testing.T) { } } } + +func TestLoadDropInConfiguration(t *testing.T) { + tmpdir := t.TempDir() + + // Test Runtime and Hypervisor to represent structures stored directly and + // in maps, respectively. For each of them, test + // - a key that's only set in the base config file + // - a key that's only set in a drop-in + // - a key that's set in the base config file and then changed by a drop-in + // - a key that's set in a drop-in and then overridden by another drop-in + // Avoid default values to reduce the risk of mistaking a result of + // something having gone wrong with the expected value. + + runtimeConfigFileData := ` +[hypervisor.qemu] +path = "/usr/bin/qemu-kvm" +default_bridges = 3 +[runtime] +enable_debug = true +internetworking_model="tcfilter" +` + dropInData := ` +[hypervisor.qemu] +default_vcpus = 2 +default_bridges = 4 +shared_fs = "virtio-fs" +[runtime] +sandbox_cgroup_only=true +internetworking_model="macvtap" +vfio_mode="guest-kernel" +` + dropInOverrideData := ` +[hypervisor.qemu] +shared_fs = "virtio-9p" +[runtime] +vfio_mode="vfio" +` + + configPath := path.Join(tmpdir, "runtime.toml") + err := createConfig(configPath, runtimeConfigFileData) + assert.NoError(t, err) + + dropInDir := path.Join(tmpdir, "config.d") + err = os.Mkdir(dropInDir, os.FileMode(0777)) + assert.NoError(t, err) + + dropInPath := path.Join(dropInDir, "10-base") + err = createConfig(dropInPath, dropInData) + assert.NoError(t, err) + + dropInOverridePath := path.Join(dropInDir, "10-override") + err = createConfig(dropInOverridePath, dropInOverrideData) + assert.NoError(t, err) + + config, _, err := decodeConfig(configPath) + assert.NoError(t, err) + + assert.Equal(t, config.Hypervisor["qemu"].Path, "/usr/bin/qemu-kvm") + assert.Equal(t, config.Hypervisor["qemu"].NumVCPUs, int32(2)) + assert.Equal(t, config.Hypervisor["qemu"].DefaultBridges, uint32(4)) + assert.Equal(t, config.Hypervisor["qemu"].SharedFS, "virtio-9p") + assert.Equal(t, config.Runtime.Debug, true) + assert.Equal(t, config.Runtime.SandboxCgroupOnly, true) + assert.Equal(t, config.Runtime.InterNetworkModel, "macvtap") + assert.Equal(t, config.Runtime.VfioMode, "vfio") +} diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index e456d37276..ffcaa07154 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -111,12 +111,12 @@ func SetEphemeralStorageType(ociSpec specs.Spec, disableGuestEmptyDir bool) spec // CreateSandbox create a sandbox container func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeConfig oci.RuntimeConfig, rootFs vc.RootFs, - containerID, bundlePath, console string, disableOutput, systemdCgroup bool) (_ vc.VCSandbox, _ vc.Process, err error) { + containerID, bundlePath string, disableOutput, systemdCgroup bool) (_ vc.VCSandbox, _ vc.Process, err error) { span, ctx := katatrace.Trace(ctx, nil, "CreateSandbox", createTracingTags) katatrace.AddTags(span, "container_id", containerID) defer span.End() - sandboxConfig, err := oci.SandboxConfig(ociSpec, runtimeConfig, bundlePath, containerID, console, disableOutput, systemdCgroup) + sandboxConfig, err := oci.SandboxConfig(ociSpec, runtimeConfig, bundlePath, containerID, disableOutput, systemdCgroup) if err != nil { return nil, vc.Process{}, err } @@ -219,7 +219,7 @@ func checkForFIPS(sandboxConfig *vc.SandboxConfig) error { } // CreateContainer create a container -func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Spec, rootFs vc.RootFs, containerID, bundlePath, console string, disableOutput bool, disableGuestEmptyDir bool) (vc.Process, error) { +func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Spec, rootFs vc.RootFs, containerID, bundlePath string, disableOutput bool, disableGuestEmptyDir bool) (vc.Process, error) { var c vc.VCContainer span, ctx := katatrace.Trace(ctx, nil, "CreateContainer", createTracingTags) @@ -228,7 +228,7 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp ociSpec = SetEphemeralStorageType(ociSpec, disableGuestEmptyDir) - contConfig, err := oci.ContainerConfig(ociSpec, bundlePath, containerID, console, disableOutput) + contConfig, err := oci.ContainerConfig(ociSpec, bundlePath, containerID, disableOutput) if err != nil { return vc.Process{}, err } diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index 15b4561137..b1e4cf2a90 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -28,7 +28,6 @@ import ( ) const ( - testConsole = "/dev/pts/999" testContainerTypeAnnotation = "io.kubernetes.cri-o.ContainerType" testSandboxIDAnnotation = "io.kubernetes.cri-o.SandboxID" testContainerTypeContainer = "container" @@ -50,7 +49,7 @@ func init() { } // newTestRuntimeConfig creates a new RuntimeConfig -func newTestRuntimeConfig(dir, consolePath string, create bool) (oci.RuntimeConfig, error) { +func newTestRuntimeConfig(dir string, create bool) (oci.RuntimeConfig, error) { if dir == "" { return oci.RuntimeConfig{}, errors.New("BUG: need directory") } @@ -63,7 +62,6 @@ func newTestRuntimeConfig(dir, consolePath string, create bool) (oci.RuntimeConf return oci.RuntimeConfig{ HypervisorType: vc.QemuHypervisor, HypervisorConfig: hypervisorConfig, - Console: consolePath, }, nil } @@ -213,7 +211,7 @@ func TestCreateSandboxConfigFail(t *testing.T) { tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -233,7 +231,7 @@ func TestCreateSandboxConfigFail(t *testing.T) { rootFs := vc.RootFs{Mounted: true} - _, _, err = CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, testConsole, true, true) + _, _, err = CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, true, true) assert.Error(err) } @@ -246,7 +244,7 @@ func TestCreateSandboxFail(t *testing.T) { tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -254,7 +252,7 @@ func TestCreateSandboxFail(t *testing.T) { rootFs := vc.RootFs{Mounted: true} - _, _, err = CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, testConsole, true, true) + _, _, err = CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, true, true) assert.Error(err) assert.True(vcmock.IsMockError(err)) } @@ -268,7 +266,7 @@ func TestCreateSandboxAnnotations(t *testing.T) { tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t) - runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + runtimeConfig, err := newTestRuntimeConfig(tmpdir, true) assert.NoError(err) spec, err := compatoci.ParseConfigJSON(bundlePath) @@ -290,7 +288,7 @@ func TestCreateSandboxAnnotations(t *testing.T) { testingImpl.CreateSandboxFunc = nil }() - sandbox, _, err := CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, testConsole, true, true) + sandbox, _, err := CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, true, true) assert.NoError(err) netNsPath, err := sandbox.Annotations("nerdctl/network-namespace") @@ -356,7 +354,7 @@ func TestCreateContainerContainerConfigFail(t *testing.T) { rootFs := vc.RootFs{Mounted: true} for _, disableOutput := range []bool{true, false} { - _, err = CreateContainer(context.Background(), mockSandbox, spec, rootFs, testContainerID, bundlePath, testConsole, disableOutput, false) + _, err = CreateContainer(context.Background(), mockSandbox, spec, rootFs, testContainerID, bundlePath, disableOutput, false) assert.Error(err) assert.False(vcmock.IsMockError(err)) assert.True(strings.Contains(err.Error(), containerType)) @@ -383,7 +381,7 @@ func TestCreateContainerFail(t *testing.T) { rootFs := vc.RootFs{Mounted: true} for _, disableOutput := range []bool{true, false} { - _, err = CreateContainer(context.Background(), mockSandbox, spec, rootFs, testContainerID, bundlePath, testConsole, disableOutput, false) + _, err = CreateContainer(context.Background(), mockSandbox, spec, rootFs, testContainerID, bundlePath, disableOutput, false) assert.Error(err) assert.True(vcmock.IsMockError(err)) } @@ -417,7 +415,7 @@ func TestCreateContainer(t *testing.T) { rootFs := vc.RootFs{Mounted: true} for _, disableOutput := range []bool{true, false} { - _, err = CreateContainer(context.Background(), mockSandbox, spec, rootFs, testContainerID, bundlePath, testConsole, disableOutput, false) + _, err = CreateContainer(context.Background(), mockSandbox, spec, rootFs, testContainerID, bundlePath, disableOutput, false) assert.NoError(err) } } diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 71423cf0cc..055acb9b09 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -105,7 +105,6 @@ type RuntimeConfig struct { //Experimental features enabled Experimental []exp.Feature - Console string JaegerEndpoint string JaegerUser string JaegerPassword string @@ -187,16 +186,27 @@ func cmdEnvs(spec specs.Spec, envs []types.EnvVar) []types.EnvVar { func newMount(m specs.Mount) vc.Mount { readonly := false + bind := false for _, flag := range m.Options { - if flag == "ro" { + switch flag { + case "rbind", "bind": + bind = true + case "ro": readonly = true - break } } + + // normal bind mounts, set type to bind. + // https://github.com/opencontainers/runc/blob/v1.1.3/libcontainer/specconv/spec_linux.go#L512-L520 + mountType := m.Type + if mountType != vc.KataEphemeralDevType && mountType != vc.KataLocalDevType && bind { + mountType = "bind" + } + return vc.Mount{ Source: m.Source, Destination: m.Destination, - Type: m.Type, + Type: mountType, Options: m.Options, ReadOnly: readonly, } @@ -861,8 +871,8 @@ func addAgentConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig) error // SandboxConfig converts an OCI compatible runtime configuration file // to a virtcontainers sandbox configuration structure. -func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, console string, detach, systemdCgroup bool) (vc.SandboxConfig, error) { - containerConfig, err := ContainerConfig(ocispec, bundlePath, cid, console, detach) +func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid string, detach, systemdCgroup bool) (vc.SandboxConfig, error) { + containerConfig, err := ContainerConfig(ocispec, bundlePath, cid, detach) if err != nil { return vc.SandboxConfig{}, err } @@ -913,9 +923,6 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c DisableGuestSeccomp: runtime.DisableGuestSeccomp, - // Q: Is this really necessary? @weizhang555 - // Spec: &ocispec, - Experimental: runtime.Experimental, } @@ -947,7 +954,7 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c // ContainerConfig converts an OCI compatible runtime configuration // file to a virtcontainers container configuration structure. -func ContainerConfig(ocispec specs.Spec, bundlePath, cid, console string, detach bool) (vc.ContainerConfig, error) { +func ContainerConfig(ocispec specs.Spec, bundlePath, cid string, detach bool) (vc.ContainerConfig, error) { rootfs := vc.RootFs{Target: ocispec.Root.Path, Mounted: true} if !filepath.IsAbs(rootfs.Target) { rootfs.Target = filepath.Join(bundlePath, ocispec.Root.Path) @@ -962,7 +969,6 @@ func ContainerConfig(ocispec specs.Spec, bundlePath, cid, console string, detach User: strconv.FormatUint(uint64(ocispec.Process.User.UID), 10), PrimaryGroup: strconv.FormatUint(uint64(ocispec.Process.User.GID), 10), Interactive: ocispec.Process.Terminal, - Console: console, Detach: detach, NoNewPrivileges: ocispec.Process.NoNewPrivileges, } diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index 2ddd42d111..b5e7440b0c 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -38,7 +38,6 @@ const ( var ( tempRoot = "" tempBundlePath = "" - consolePath = "" ) func createConfig(fileName string, fileData string) (string, error) { @@ -72,7 +71,6 @@ func TestMinimalSandboxConfig(t *testing.T) { runtimeConfig := RuntimeConfig{ HypervisorType: vc.QemuHypervisor, - Console: consolePath, } capList := []string{"CAP_AUDIT_WRITE", "CAP_KILL", "CAP_NET_BIND_SERVICE"} @@ -94,7 +92,6 @@ func TestMinimalSandboxConfig(t *testing.T) { PrimaryGroup: "0", SupplementaryGroups: []string{"10", "29"}, Interactive: true, - Console: consolePath, NoNewPrivileges: true, Capabilities: &specs.LinuxCapabilities{ Bounding: capList, @@ -181,7 +178,7 @@ func TestMinimalSandboxConfig(t *testing.T) { SystemdCgroup: true, } - sandboxConfig, err := SandboxConfig(spec, runtimeConfig, tempBundlePath, containerID, consolePath, false, true) + sandboxConfig, err := SandboxConfig(spec, runtimeConfig, tempBundlePath, containerID, false, true) assert.NoError(err) assert.Exactly(sandboxConfig, expectedSandboxConfig) @@ -452,7 +449,6 @@ func TestMain(m *testing.M) { } tempBundlePath = filepath.Join(tempRoot, "ocibundle") - consolePath = filepath.Join(tempRoot, "console") /* Create temp bundle directory if necessary */ err = os.MkdirAll(tempBundlePath, dirMode) @@ -513,7 +509,6 @@ func TestAddAssetAnnotations(t *testing.T) { runtimeConfig := RuntimeConfig{ HypervisorType: vc.QemuHypervisor, - Console: consolePath, } // Try annotations without enabling them first @@ -567,7 +562,6 @@ func TestAddAgentAnnotations(t *testing.T) { runtimeConfig := RuntimeConfig{ HypervisorType: vc.QemuHypervisor, - Console: consolePath, } ocispec.Annotations[vcAnnotations.KernelModules] = strings.Join(expectedAgentConfig.KernelModules, KernelModulesSeparator) @@ -594,7 +588,6 @@ func TestContainerPipeSizeAnnotation(t *testing.T) { runtimeConfig := RuntimeConfig{ HypervisorType: vc.QemuHypervisor, - Console: consolePath, } ocispec.Annotations[vcAnnotations.AgentContainerPipeSize] = "foo" @@ -629,7 +622,6 @@ func TestAddHypervisorAnnotations(t *testing.T) { runtimeConfig := RuntimeConfig{ HypervisorType: vc.QemuHypervisor, - Console: consolePath, } runtimeConfig.HypervisorConfig.EnableAnnotations = []string{".*"} runtimeConfig.HypervisorConfig.FileBackedMemRootList = []string{"/dev/shm*"} @@ -743,7 +735,6 @@ func TestAddProtectedHypervisorAnnotations(t *testing.T) { runtimeConfig := RuntimeConfig{ HypervisorType: vc.QemuHypervisor, - Console: consolePath, } ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on" err := addAnnotations(ocispec, &config, runtimeConfig) @@ -809,7 +800,6 @@ func TestAddRuntimeAnnotations(t *testing.T) { runtimeConfig := RuntimeConfig{ HypervisorType: vc.QemuHypervisor, - Console: consolePath, } ocispec.Annotations[vcAnnotations.DisableGuestSeccomp] = "true" @@ -1210,3 +1200,79 @@ func TestCalculateSandboxSizing(t *testing.T) { assert.Equal(tt.expectedMem, mem, "unexpected memory") } } + +func TestNewMount(t *testing.T) { + assert := assert.New(t) + + testCases := []struct { + out vc.Mount + in specs.Mount + }{ + { + in: specs.Mount{ + Source: "proc", + Destination: "/proc", + Type: "proc", + Options: nil, + }, + out: vc.Mount{ + Source: "proc", + Destination: "/proc", + Type: "proc", + Options: nil, + }, + }, + { + in: specs.Mount{ + Source: "proc", + Destination: "/proc", + Type: "proc", + Options: []string{"ro"}, + }, + out: vc.Mount{ + Source: "proc", + Destination: "/proc", + Type: "proc", + Options: []string{"ro"}, + ReadOnly: true, + }, + }, + { + in: specs.Mount{ + Source: "/abc", + Destination: "/def", + Type: "none", + Options: []string{"bind"}, + }, + out: vc.Mount{ + Source: "/abc", + Destination: "/def", + Type: "bind", + Options: []string{"bind"}, + }, + }, { + in: specs.Mount{ + Source: "/abc", + Destination: "/def", + Type: "none", + Options: []string{"rbind"}, + }, + out: vc.Mount{ + Source: "/abc", + Destination: "/def", + Type: "bind", + Options: []string{"rbind"}, + }, + }, + } + + for _, tt := range testCases { + actualMount := newMount(tt.in) + + assert.Equal(tt.out.Source, actualMount.Source, "unexpected mount source") + assert.Equal(tt.out.Destination, actualMount.Destination, "unexpected mount destination") + assert.Equal(tt.out.Type, actualMount.Type, "unexpected mount type") + assert.Equal(tt.out.Options, actualMount.Options, "unexpected mount options") + assert.Equal(tt.out.ReadOnly, actualMount.ReadOnly, "unexpected mount ReadOnly") + } +} diff --git a/src/runtime/vendor/github.com/pbnjay/memory/LICENSE b/src/runtime/vendor/github.com/pbnjay/memory/LICENSE new file mode 100644 index 0000000000..63ca4a6d24 --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2017, Jeremy Jay +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/pbnjay/memory/README.md b/src/runtime/vendor/github.com/pbnjay/memory/README.md new file mode 100644 index 0000000000..e98f261a0f --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/README.md @@ -0,0 +1,41 @@ +# memory + +Package `memory` provides two methods reporting total physical system memory +accessible to the kernel, and free memory available to the running application. + +This package has no external dependency besides the standard library and default operating system tools. + +Documentation: +[![GoDoc](https://godoc.org/github.com/pbnjay/memory?status.svg)](https://godoc.org/github.com/pbnjay/memory) + +This is useful for dynamic code to minimize thrashing and other contention, similar to the stdlib `runtime.NumCPU` +See some history of the proposal at https://github.com/golang/go/issues/21816 + + +## Example + +```go +fmt.Printf("Total system memory: %d\n", memory.TotalMemory()) +fmt.Printf("Free memory: %d\n", memory.FreeMemory()) +``` + + +## Testing + +Tested/working on: + - macOS 10.12.6 (16G29), 10.15.7 (19H2) + - Windows 10 1511 (10586.1045) + - Linux RHEL (3.10.0-327.3.1.el7.x86_64) + - Raspberry Pi 3 (ARMv8) on Raspbian, ODROID-C1+ (ARMv7) on Ubuntu, C.H.I.P + (ARMv7). + - Amazon Linux 2 aarch64 (m6a.large, 4.14.203-156.332.amzn2.aarch64) + +Tested on virtual machines: + - Windows 7 SP1 386 + - Debian stretch 386 + - NetBSD 7.1 amd64 + 386 + - OpenBSD 6.1 amd64 + 386 + - FreeBSD 11.1 amd64 + 386 + - DragonFly BSD 4.8.1 amd64 + +If you have access to untested systems please test and report success/bugs. diff --git a/src/runtime/vendor/github.com/pbnjay/memory/doc.go b/src/runtime/vendor/github.com/pbnjay/memory/doc.go new file mode 100644 index 0000000000..4e4f984c0f --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/doc.go @@ -0,0 +1,24 @@ +// Package memory provides a single method reporting total system memory +// accessible to the kernel. +package memory + +// TotalMemory returns the total accessible system memory in bytes. +// +// The total accessible memory is installed physical memory size minus reserved +// areas for the kernel and hardware, if such reservations are reported by +// the operating system. +// +// If accessible memory size could not be determined, then 0 is returned. +func TotalMemory() uint64 { + return sysTotalMemory() +} + +// FreeMemory returns the total free system memory in bytes. +// +// The total free memory is installed physical memory size minus reserved +// areas for other applications running on the same system. +// +// If free memory size could not be determined, then 0 is returned. +func FreeMemory() uint64 { + return sysFreeMemory() +} diff --git a/src/runtime/vendor/github.com/pbnjay/memory/go.mod b/src/runtime/vendor/github.com/pbnjay/memory/go.mod new file mode 100644 index 0000000000..5965765912 --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/go.mod @@ -0,0 +1,3 @@ +module github.com/pbnjay/memory + +go 1.16 diff --git a/src/runtime/vendor/github.com/pbnjay/memory/memory_bsd.go b/src/runtime/vendor/github.com/pbnjay/memory/memory_bsd.go new file mode 100644 index 0000000000..49d808a9e7 --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/memory_bsd.go @@ -0,0 +1,19 @@ +// +build freebsd openbsd dragonfly netbsd + +package memory + +func sysTotalMemory() uint64 { + s, err := sysctlUint64("hw.physmem") + if err != nil { + return 0 + } + return s +} + +func sysFreeMemory() uint64 { + s, err := sysctlUint64("hw.usermem") + if err != nil { + return 0 + } + return s +} diff --git a/src/runtime/vendor/github.com/pbnjay/memory/memory_darwin.go b/src/runtime/vendor/github.com/pbnjay/memory/memory_darwin.go new file mode 100644 index 0000000000..a3f4576990 --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/memory_darwin.go @@ -0,0 +1,49 @@ +// +build darwin + +package memory + +import ( + "os/exec" + "regexp" + "strconv" +) + +func sysTotalMemory() uint64 { + s, err := sysctlUint64("hw.memsize") + if err != nil { + return 0 + } + return s +} + +func sysFreeMemory() uint64 { + cmd := exec.Command("vm_stat") + outBytes, err := cmd.Output() + if err != nil { + return 0 + } + + rePageSize := regexp.MustCompile("page size of ([0-9]*) bytes") + reFreePages := regexp.MustCompile("Pages free: *([0-9]*)\\.") + + // default: page size of 4096 bytes + matches := rePageSize.FindSubmatchIndex(outBytes) + pageSize := uint64(4096) + if len(matches) == 4 { + pageSize, err = strconv.ParseUint(string(outBytes[matches[2]:matches[3]]), 10, 64) + if err != nil { + return 0 + } + } + + // ex: Pages free: 1126961. + matches = reFreePages.FindSubmatchIndex(outBytes) + freePages := uint64(0) + if len(matches) == 4 { + freePages, err = strconv.ParseUint(string(outBytes[matches[2]:matches[3]]), 10, 64) + if err != nil { + return 0 + } + } + return freePages * pageSize +} diff --git a/src/runtime/vendor/github.com/pbnjay/memory/memory_linux.go b/src/runtime/vendor/github.com/pbnjay/memory/memory_linux.go new file mode 100644 index 0000000000..3d07711ce5 --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/memory_linux.go @@ -0,0 +1,29 @@ +// +build linux + +package memory + +import "syscall" + +func sysTotalMemory() uint64 { + in := &syscall.Sysinfo_t{} + err := syscall.Sysinfo(in) + if err != nil { + return 0 + } + // If this is a 32-bit system, then these fields are + // uint32 instead of uint64. + // So we always convert to uint64 to match signature. + return uint64(in.Totalram) * uint64(in.Unit) +} + +func sysFreeMemory() uint64 { + in := &syscall.Sysinfo_t{} + err := syscall.Sysinfo(in) + if err != nil { + return 0 + } + // If this is a 32-bit system, then these fields are + // uint32 instead of uint64. + // So we always convert to uint64 to match signature. + return uint64(in.Freeram) * uint64(in.Unit) +} diff --git a/src/runtime/vendor/github.com/pbnjay/memory/memory_windows.go b/src/runtime/vendor/github.com/pbnjay/memory/memory_windows.go new file mode 100644 index 0000000000..c8500cc6f3 --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/memory_windows.go @@ -0,0 +1,60 @@ +// +build windows + +package memory + +import ( + "syscall" + "unsafe" +) + +// omitting a few fields for brevity... +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa366589(v=vs.85).aspx +type memStatusEx struct { + dwLength uint32 + dwMemoryLoad uint32 + ullTotalPhys uint64 + ullAvailPhys uint64 + unused [5]uint64 +} + +func sysTotalMemory() uint64 { + kernel32, err := syscall.LoadDLL("kernel32.dll") + if err != nil { + return 0 + } + // GetPhysicallyInstalledSystemMemory is simpler, but broken on + // older versions of windows (and uses this under the hood anyway). + globalMemoryStatusEx, err := kernel32.FindProc("GlobalMemoryStatusEx") + if err != nil { + return 0 + } + msx := &memStatusEx{ + dwLength: 64, + } + r, _, _ := globalMemoryStatusEx.Call(uintptr(unsafe.Pointer(msx))) + if r == 0 { + return 0 + } + return msx.ullTotalPhys +} + +func sysFreeMemory() uint64 { + kernel32, err := syscall.LoadDLL("kernel32.dll") + if err != nil { + return 0 + } + // GetPhysicallyInstalledSystemMemory is simpler, but broken on + // older versions of windows (and uses this under the hood anyway). + globalMemoryStatusEx, err := kernel32.FindProc("GlobalMemoryStatusEx") + if err != nil { + return 0 + } + msx := &memStatusEx{ + dwLength: 64, + } + r, _, _ := globalMemoryStatusEx.Call(uintptr(unsafe.Pointer(msx))) + if r == 0 { + return 0 + } + return msx.ullAvailPhys +} diff --git a/src/runtime/vendor/github.com/pbnjay/memory/memsysctl.go b/src/runtime/vendor/github.com/pbnjay/memory/memsysctl.go new file mode 100644 index 0000000000..438d9eff8e --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/memsysctl.go @@ -0,0 +1,21 @@ +// +build darwin freebsd openbsd dragonfly netbsd + +package memory + +import ( + "syscall" + "unsafe" +) + +func sysctlUint64(name string) (uint64, error) { + s, err := syscall.Sysctl(name) + if err != nil { + return 0, err + } + // hack because the string conversion above drops a \0 + b := []byte(s) + if len(b) < 8 { + b = append(b, 0) + } + return *(*uint64)(unsafe.Pointer(&b[0])), nil +} diff --git a/src/runtime/vendor/github.com/pbnjay/memory/stub.go b/src/runtime/vendor/github.com/pbnjay/memory/stub.go new file mode 100644 index 0000000000..f29473ba08 --- /dev/null +++ b/src/runtime/vendor/github.com/pbnjay/memory/stub.go @@ -0,0 +1,10 @@ +// +build !linux,!darwin,!windows,!freebsd,!dragonfly,!netbsd,!openbsd + +package memory + +func sysTotalMemory() uint64 { + return 0 +} +func sysFreeMemory() uint64 { + return 0 +} diff --git a/src/runtime/vendor/modules.txt b/src/runtime/vendor/modules.txt index adf9f6ce53..7a43665e37 100644 --- a/src/runtime/vendor/modules.txt +++ b/src/runtime/vendor/modules.txt @@ -260,6 +260,9 @@ github.com/opencontainers/selinux/go-selinux github.com/opencontainers/selinux/go-selinux/label github.com/opencontainers/selinux/pkg/pwalk github.com/opencontainers/selinux/pkg/pwalkdir +# github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 +## explicit +github.com/pbnjay/memory # github.com/pkg/errors v0.9.1 ## explicit github.com/pkg/errors diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index f6da05ec58..1c3ebc1476 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -348,10 +348,6 @@ func (a *Acrn) createDummyVirtioBlkDev(ctx context.Context, devices []Device) ([ } func (a *Acrn) setConfig(config *HypervisorConfig) error { - if err := config.Valid(); err != nil { - return err - } - a.config = *config return nil diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 6984b73be5..73e09cfe69 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -268,11 +268,6 @@ var clhDebugKernelParams = []Param{ //########################################################### func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error { - err := config.Valid() - if err != nil { - return err - } - clh.config = *config return nil @@ -480,12 +475,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Enable hugepages if needed clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages) if !clh.config.ConfidentialGuest { - hostMemKb, err := GetHostMemorySizeKb(procMemInfo) - if err != nil { - return nil - } + hotplugSize := clh.config.DefaultMaxMemorySize // OpenAPI only supports int64 values - clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hostMemKb) * utils.KiB).ToBytes())) + clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) } // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) @@ -882,6 +874,11 @@ func (clh *cloudHypervisor) ResizeMemory(ctx context.Context, reqMemMB uint32, m return 0, MemoryDevice{}, err } + maxHotplugSize := utils.MemUnit(*info.Config.Memory.HotplugSize) * utils.Byte + if reqMemMB > uint32(maxHotplugSize.ToMiB()) { + reqMemMB = uint32(maxHotplugSize.ToMiB()) + } + currentMem := utils.MemUnit(info.Config.Memory.Size) * utils.Byte newMem := utils.MemUnit(reqMemMB) * utils.MiB diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 25012d3f3d..434e320417 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -189,11 +189,6 @@ func (fc *firecracker) truncateID(id string) string { } func (fc *firecracker) setConfig(config *HypervisorConfig) error { - err := config.Valid() - if err != nil { - return err - } - fc.config = *config return nil diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 119c4667c3..48eda09778 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -11,7 +11,6 @@ import ( "fmt" "os" "runtime" - "strconv" "strings" "github.com/pkg/errors" @@ -50,7 +49,6 @@ const ( // MockHypervisor is a mock hypervisor for testing purposes MockHypervisor HypervisorType = "mock" - procMemInfo = "/proc/meminfo" procCPUInfo = "/proc/cpuinfo" defaultVCPUs = 1 @@ -441,6 +439,9 @@ type HypervisorConfig struct { // DefaultMem specifies default memory size in MiB for the VM. MemorySize uint32 + // DefaultMaxMemorySize specifies the maximum amount of RAM in MiB for the VM. + DefaultMaxMemorySize uint64 + // DefaultBridges specifies default number of bridges for the VM. // Bridges can be used to hot plug devices DefaultBridges uint32 @@ -568,61 +569,6 @@ func (conf *HypervisorConfig) CheckTemplateConfig() error { return nil } -func (conf *HypervisorConfig) Valid() error { - // Kata specific checks. Should be done outside the hypervisor - if conf.KernelPath == "" { - return fmt.Errorf("Missing kernel path") - } - - if conf.ConfidentialGuest && conf.HypervisorMachineType == QemuCCWVirtio { - if conf.ImagePath != "" || conf.InitrdPath != "" { - fmt.Println("yes, failing") - return fmt.Errorf("Neither the image or initrd path may be set for Secure Execution") - } - } else if conf.ImagePath == "" && conf.InitrdPath == "" { - return fmt.Errorf("Missing image and initrd path") - } else if conf.ImagePath != "" && conf.InitrdPath != "" { - return fmt.Errorf("Image and initrd path cannot be both set") - } - - if err := conf.CheckTemplateConfig(); err != nil { - return err - } - - if conf.NumVCPUs == 0 { - conf.NumVCPUs = defaultVCPUs - } - - if conf.MemorySize == 0 { - conf.MemorySize = defaultMemSzMiB - } - - if conf.DefaultBridges == 0 { - conf.DefaultBridges = defaultBridges - } - - if conf.BlockDeviceDriver == "" { - conf.BlockDeviceDriver = defaultBlockDriver - } else if conf.BlockDeviceDriver == config.VirtioBlock && conf.HypervisorMachineType == QemuCCWVirtio { - conf.BlockDeviceDriver = config.VirtioBlockCCW - } - - if conf.DefaultMaxVCPUs == 0 || conf.DefaultMaxVCPUs > defaultMaxVCPUs { - conf.DefaultMaxVCPUs = defaultMaxVCPUs - } - - if conf.ConfidentialGuest && conf.NumVCPUs != conf.DefaultMaxVCPUs { - hvLogger.Warnf("Confidential guests do not support hotplugging of vCPUs. Setting DefaultMaxVCPUs to NumVCPUs (%d)", conf.NumVCPUs) - conf.DefaultMaxVCPUs = conf.NumVCPUs - } - - if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS { - conf.Msize9p = defaultMsize9p - } - - return nil -} - // AddKernelParam allows the addition of new kernel parameters to an existing // hypervisor configuration. func (conf *HypervisorConfig) AddKernelParam(p Param) error { @@ -796,39 +742,6 @@ func DeserializeParams(parameters []string) []Param { return params } -func GetHostMemorySizeKb(memInfoPath string) (uint64, error) { - f, err := os.Open(memInfoPath) - if err != nil { - return 0, err - } - defer f.Close() - - scanner := bufio.NewScanner(f) - for scanner.Scan() { - // Expected format: ["MemTotal:", "1234", "kB"] - parts := strings.Fields(scanner.Text()) - - // Sanity checks: Skip malformed entries. - if len(parts) < 3 || parts[0] != "MemTotal:" || parts[2] != "kB" { - continue - } - - sizeKb, err := strconv.ParseUint(parts[1], 0, 64) - if err != nil { - continue - } - - return sizeKb, nil - } - - // Handle errors that may have occurred during the reading of the file. - if err := scanner.Err(); err != nil { - return 0, err - } - - return 0, fmt.Errorf("unable get MemTotal from %s", memInfoPath) -} - // CheckCmdline checks whether an option or parameter is present in the kernel command line. // Search is case-insensitive. // Takes path to file that contains the kernel command line, desired option, and permitted values diff --git a/src/runtime/virtcontainers/hypervisor_config_darwin.go b/src/runtime/virtcontainers/hypervisor_config_darwin.go new file mode 100644 index 0000000000..e074d59ea6 --- /dev/null +++ b/src/runtime/virtcontainers/hypervisor_config_darwin.go @@ -0,0 +1,33 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "fmt" +) + +func validateHypervisorConfig(conf *HypervisorConfig) error { + + if conf.KernelPath == "" { + return fmt.Errorf("Missing kernel path") + } + + if conf.ImagePath == "" && conf.InitrdPath == "" { + return fmt.Errorf("Missing image and initrd path") + } else if conf.ImagePath != "" && conf.InitrdPath != "" { + return fmt.Errorf("Image and initrd path cannot be both set") + } + + if conf.NumVCPUs == 0 { + conf.NumVCPUs = defaultVCPUs + } + + if conf.MemorySize == 0 { + conf.MemorySize = defaultMemSzMiB + } + + return nil +} diff --git a/src/runtime/virtcontainers/hypervisor_config_linux.go b/src/runtime/virtcontainers/hypervisor_config_linux.go new file mode 100644 index 0000000000..f1f20d3151 --- /dev/null +++ b/src/runtime/virtcontainers/hypervisor_config_linux.go @@ -0,0 +1,67 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "fmt" + + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" +) + +func validateHypervisorConfig(conf *HypervisorConfig) error { + + if conf.KernelPath == "" { + return fmt.Errorf("Missing kernel path") + } + + if conf.ConfidentialGuest && conf.HypervisorMachineType == QemuCCWVirtio { + if conf.ImagePath != "" || conf.InitrdPath != "" { + fmt.Println("yes, failing") + return fmt.Errorf("Neither the image or initrd path may be set for Secure Execution") + } + } else if conf.ImagePath == "" && conf.InitrdPath == "" { + return fmt.Errorf("Missing image and initrd path") + } else if conf.ImagePath != "" && conf.InitrdPath != "" { + return fmt.Errorf("Image and initrd path cannot be both set") + } + + if err := conf.CheckTemplateConfig(); err != nil { + return err + } + + if conf.NumVCPUs == 0 { + conf.NumVCPUs = defaultVCPUs + } + + if conf.MemorySize == 0 { + conf.MemorySize = defaultMemSzMiB + } + + if conf.DefaultBridges == 0 { + conf.DefaultBridges = defaultBridges + } + + if conf.BlockDeviceDriver == "" { + conf.BlockDeviceDriver = defaultBlockDriver + } else if conf.BlockDeviceDriver == config.VirtioBlock && conf.HypervisorMachineType == QemuCCWVirtio { + conf.BlockDeviceDriver = config.VirtioBlockCCW + } + + if conf.DefaultMaxVCPUs == 0 || conf.DefaultMaxVCPUs > defaultMaxVCPUs { + conf.DefaultMaxVCPUs = defaultMaxVCPUs + } + + if conf.ConfidentialGuest && conf.NumVCPUs != conf.DefaultMaxVCPUs { + hvLogger.Warnf("Confidential guests do not support hotplugging of vCPUs. Setting DefaultMaxVCPUs to NumVCPUs (%d)", conf.NumVCPUs) + conf.DefaultMaxVCPUs = conf.NumVCPUs + } + + if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS { + conf.Msize9p = defaultMsize9p + } + + return nil +} diff --git a/src/runtime/virtcontainers/hypervisor_config_linux_test.go b/src/runtime/virtcontainers/hypervisor_config_linux_test.go new file mode 100644 index 0000000000..609e52fd73 --- /dev/null +++ b/src/runtime/virtcontainers/hypervisor_config_linux_test.go @@ -0,0 +1,114 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHypervisorConfigNoImagePath(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: "", + HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), + } + + testHypervisorConfigValid(t, hypervisorConfig, false) +} + +func TestHypervisorConfigNoHypervisorPath(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + } + + testHypervisorConfigValid(t, hypervisorConfig, true) +} + +func TestHypervisorConfigIsValid(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), + } + + testHypervisorConfigValid(t, hypervisorConfig, true) +} + +func TestHypervisorConfigBothInitrdAndImage(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + InitrdPath: fmt.Sprintf("%s/%s", testDir, testInitrd), + HypervisorPath: "", + } + + testHypervisorConfigValid(t, hypervisorConfig, false) +} + +func TestHypervisorConfigSecureExecution(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + InitrdPath: fmt.Sprintf("%s/%s", testDir, testInitrd), + ConfidentialGuest: true, + HypervisorMachineType: QemuCCWVirtio, + } + + // Secure Execution should only specify a kernel (encrypted image contains all components) + testHypervisorConfigValid(t, hypervisorConfig, false) +} + +func TestHypervisorConfigValidTemplateConfig(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), + BootToBeTemplate: true, + BootFromTemplate: true, + } + testHypervisorConfigValid(t, hypervisorConfig, false) + + hypervisorConfig.BootToBeTemplate = false + testHypervisorConfigValid(t, hypervisorConfig, false) + hypervisorConfig.MemoryPath = "foobar" + testHypervisorConfigValid(t, hypervisorConfig, false) + hypervisorConfig.DevicesStatePath = "foobar" + testHypervisorConfigValid(t, hypervisorConfig, true) + + hypervisorConfig.BootFromTemplate = false + hypervisorConfig.BootToBeTemplate = true + testHypervisorConfigValid(t, hypervisorConfig, true) + hypervisorConfig.MemoryPath = "" + testHypervisorConfigValid(t, hypervisorConfig, false) +} + +func TestHypervisorConfigDefaults(t *testing.T) { + assert := assert.New(t) + hypervisorConfig := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + } + testHypervisorConfigValid(t, hypervisorConfig, true) + + hypervisorConfigDefaultsExpected := &HypervisorConfig{ + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: "", + NumVCPUs: defaultVCPUs, + MemorySize: defaultMemSzMiB, + DefaultBridges: defaultBridges, + BlockDeviceDriver: defaultBlockDriver, + DefaultMaxVCPUs: defaultMaxVCPUs, + Msize9p: defaultMsize9p, + } + + assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected) +} diff --git a/src/runtime/virtcontainers/hypervisor_config_test.go b/src/runtime/virtcontainers/hypervisor_config_test.go new file mode 100644 index 0000000000..49558f6a97 --- /dev/null +++ b/src/runtime/virtcontainers/hypervisor_config_test.go @@ -0,0 +1,30 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func testHypervisorConfigValid(t *testing.T, hypervisorConfig *HypervisorConfig, success bool) { + err := validateHypervisorConfig(hypervisorConfig) + assert := assert.New(t) + assert.False(success && err != nil) + assert.False(!success && err == nil) +} + +func TestHypervisorConfigNoKernelPath(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + KernelPath: "", + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), + } + + testHypervisorConfigValid(t, hypervisorConfig, false) +} diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index ec475e86eb..6e2fb540ba 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -8,7 +8,6 @@ package virtcontainers import ( "fmt" "os" - "path/filepath" "testing" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" @@ -86,124 +85,6 @@ func TestNewHypervisorFromUnknownHypervisorType(t *testing.T) { assert.Nil(hy) } -func testHypervisorConfigValid(t *testing.T, hypervisorConfig *HypervisorConfig, success bool) { - err := hypervisorConfig.Valid() - assert := assert.New(t) - assert.False(success && err != nil) - assert.False(!success && err == nil) -} - -func TestHypervisorConfigNoKernelPath(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: "", - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), - } - - testHypervisorConfigValid(t, hypervisorConfig, false) -} - -func TestHypervisorConfigNoImagePath(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: "", - HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), - } - - testHypervisorConfigValid(t, hypervisorConfig, false) -} - -func TestHypervisorConfigNoHypervisorPath(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", - } - - testHypervisorConfigValid(t, hypervisorConfig, true) -} - -func TestHypervisorConfigIsValid(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), - } - - testHypervisorConfigValid(t, hypervisorConfig, true) -} - -func TestHypervisorConfigBothInitrdAndImage(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - InitrdPath: fmt.Sprintf("%s/%s", testDir, testInitrd), - HypervisorPath: "", - } - - testHypervisorConfigValid(t, hypervisorConfig, false) -} - -func TestHypervisorConfigSecureExecution(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - InitrdPath: fmt.Sprintf("%s/%s", testDir, testInitrd), - ConfidentialGuest: true, - HypervisorMachineType: QemuCCWVirtio, - } - - // Secure Execution should only specify a kernel (encrypted image contains all components) - testHypervisorConfigValid(t, hypervisorConfig, false) -} - -func TestHypervisorConfigValidTemplateConfig(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), - BootToBeTemplate: true, - BootFromTemplate: true, - } - testHypervisorConfigValid(t, hypervisorConfig, false) - - hypervisorConfig.BootToBeTemplate = false - testHypervisorConfigValid(t, hypervisorConfig, false) - hypervisorConfig.MemoryPath = "foobar" - testHypervisorConfigValid(t, hypervisorConfig, false) - hypervisorConfig.DevicesStatePath = "foobar" - testHypervisorConfigValid(t, hypervisorConfig, true) - - hypervisorConfig.BootFromTemplate = false - hypervisorConfig.BootToBeTemplate = true - testHypervisorConfigValid(t, hypervisorConfig, true) - hypervisorConfig.MemoryPath = "" - testHypervisorConfigValid(t, hypervisorConfig, false) -} - -func TestHypervisorConfigDefaults(t *testing.T) { - assert := assert.New(t) - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", - } - testHypervisorConfigValid(t, hypervisorConfig, true) - - hypervisorConfigDefaultsExpected := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: "", - NumVCPUs: defaultVCPUs, - MemorySize: defaultMemSzMiB, - DefaultBridges: defaultBridges, - BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxVCPUs, - Msize9p: defaultMsize9p, - } - - assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected) -} - func TestAppendParams(t *testing.T) { assert := assert.New(t) paramList := []Param{ @@ -372,55 +253,6 @@ func TestAddKernelParamInvalid(t *testing.T) { assert.Error(err) } -func TestGetHostMemorySizeKb(t *testing.T) { - assert := assert.New(t) - type testData struct { - contents string - expectedResult int - expectError bool - } - - data := []testData{ - { - ` - MemTotal: 1 kB - MemFree: 2 kB - SwapTotal: 3 kB - SwapFree: 4 kB - `, - 1024, - false, - }, - { - ` - MemFree: 2 kB - SwapTotal: 3 kB - SwapFree: 4 kB - `, - 0, - true, - }, - } - - dir := t.TempDir() - - file := filepath.Join(dir, "meminfo") - _, err := GetHostMemorySizeKb(file) - assert.Error(err) - - for _, d := range data { - err = os.WriteFile(file, []byte(d.contents), os.FileMode(0640)) - assert.NoError(err) - defer os.Remove(file) - - hostMemKb, err := GetHostMemorySizeKb(file) - - assert.False((d.expectError && err == nil)) - assert.False((!d.expectError && err != nil)) - assert.NotEqual(hostMemKb, d.expectedResult) - } -} - func TestCheckCmdline(t *testing.T) { assert := assert.New(t) diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index 83e776d289..f4a0b934ec 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -31,10 +31,6 @@ func (m *mockHypervisor) HypervisorConfig() HypervisorConfig { } func (m *mockHypervisor) setConfig(config *HypervisorConfig) error { - if err := config.Valid(); err != nil { - return err - } - return nil } diff --git a/src/runtime/virtcontainers/mock_hypervisor_test.go b/src/runtime/virtcontainers/mock_hypervisor_test.go index 5e89ae8a69..0159a993dd 100644 --- a/src/runtime/virtcontainers/mock_hypervisor_test.go +++ b/src/runtime/virtcontainers/mock_hypervisor_test.go @@ -21,9 +21,9 @@ func TestMockHypervisorCreateVM(t *testing.T) { config: &SandboxConfig{ ID: "mock_sandbox", HypervisorConfig: HypervisorConfig{ - KernelPath: "", - ImagePath: "", - HypervisorPath: "", + KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), + ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), + HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), }, }, } @@ -33,16 +33,6 @@ func TestMockHypervisorCreateVM(t *testing.T) { ctx := context.Background() - // wrong config - err = m.CreateVM(ctx, sandbox.config.ID, network, &sandbox.config.HypervisorConfig) - assert.Error(err) - - sandbox.config.HypervisorConfig = HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), - } - err = m.CreateVM(ctx, sandbox.config.ID, network, &sandbox.config.HypervisorConfig) assert.NoError(err) } diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index c8e48953f6..115ff5e7d4 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -302,24 +302,8 @@ func (q *qemu) cpuTopology() govmmQemu.SMP { return q.arch.cpuTopology(q.config.NumVCPUs, q.config.DefaultMaxVCPUs) } -func (q *qemu) hostMemMB() (uint64, error) { - hostMemKb, err := GetHostMemorySizeKb(procMemInfo) - if err != nil { - return 0, fmt.Errorf("Unable to read memory info: %s", err) - } - if hostMemKb == 0 { - return 0, fmt.Errorf("Error host memory size 0") - } - - return hostMemKb / 1024, nil -} - func (q *qemu) memoryTopology() (govmmQemu.Memory, error) { - hostMemMb, err := q.hostMemMB() - if err != nil { - return govmmQemu.Memory{}, err - } - + hostMemMb := q.config.DefaultMaxMemorySize memMb := uint64(q.config.MemorySize) return q.arch.memoryTopology(memMb, hostMemMb, uint8(q.config.MemSlots)), nil @@ -461,11 +445,6 @@ func (q *qemu) setupFileBackedMem(knobs *govmmQemu.Knobs, memory *govmmQemu.Memo } func (q *qemu) setConfig(config *HypervisorConfig) error { - err := config.Valid() - if err != nil { - return err - } - q.config = *config return nil @@ -513,7 +492,6 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi span, ctx := katatrace.Trace(ctx, q.Logger(), "CreateVM", qemuTracingTags, map[string]string{"VM_ID": q.id}) defer span.End() - // Breaks hypervisor abstraction Has Kata Specific logic: See within if err := q.setup(ctx, id, hypervisorConfig); err != nil { return err } @@ -779,12 +757,8 @@ func (q *qemu) getMemArgs() (bool, string, string, error) { } func (q *qemu) setupVirtioMem(ctx context.Context) error { - maxMem, err := q.hostMemMB() - if err != nil { - return err - } // backend memory size must be multiple of 4Mib - sizeMB := (int(maxMem) - int(q.config.MemorySize)) >> 2 << 2 + sizeMB := (int(q.config.DefaultMaxMemorySize) - int(q.config.MemorySize)) >> 2 << 2 share, target, memoryBack, err := q.getMemArgs() if err != nil { @@ -1970,8 +1944,6 @@ func (q *qemu) hotplugMemory(memDev *MemoryDevice, op Operation) (int, error) { return 0, err } - currentMemory := int(q.config.MemorySize) + q.state.HotpluggedMemory - if memDev.SizeMB == 0 { memLog.Debug("hotplug is not required") return 0, nil @@ -1985,17 +1957,7 @@ func (q *qemu) hotplugMemory(memDev *MemoryDevice, op Operation) (int, error) { return 0, nil case AddDevice: memLog.WithField("operation", "add").Debugf("Requested to add memory: %d MB", memDev.SizeMB) - maxMem, err := q.hostMemMB() - if err != nil { - return 0, err - } - // Don't exceed the maximum amount of memory - if currentMemory+memDev.SizeMB > int(maxMem) { - // Fixme: return a typed error - return 0, fmt.Errorf("Unable to hotplug %d MiB memory, the SB has %d MiB and the maximum amount is %d MiB", - memDev.SizeMB, currentMemory, maxMem) - } memoryAdded, err := q.hotplugAddMemory(memDev) if err != nil { return memoryAdded, err @@ -2231,6 +2193,11 @@ func (q *qemu) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSiz case currentMemory < reqMemMB: //hotplug addMemMB := reqMemMB - currentMemory + + if currentMemory+addMemMB > uint32(q.config.DefaultMaxMemorySize) { + addMemMB = uint32(q.config.DefaultMaxMemorySize) - currentMemory + } + memHotplugMB, err := calcHotplugMemMiBSize(addMemMB, memoryBlockSizeMB) if err != nil { return currentMemory, MemoryDevice{}, err diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index b50d73a917..b932189e8d 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -21,6 +21,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" + "github.com/pbnjay/memory" "github.com/pkg/errors" "github.com/stretchr/testify/assert" ) @@ -172,20 +173,20 @@ func TestQemuCPUTopology(t *testing.T) { func TestQemuMemoryTopology(t *testing.T) { mem := uint32(1000) + maxMem := memory.TotalMemory() / 1024 / 1024 //MiB slots := uint32(8) assert := assert.New(t) q := &qemu{ arch: &qemuArchBase{}, config: HypervisorConfig{ - MemorySize: mem, - MemSlots: slots, + MemorySize: mem, + MemSlots: slots, + DefaultMaxMemorySize: maxMem, }, } - hostMemKb, err := GetHostMemorySizeKb(procMemInfo) - assert.NoError(err) - memMax := fmt.Sprintf("%dM", int(float64(hostMemKb)/1024)) + memMax := fmt.Sprintf("%dM", int(maxMem)) expectedOut := govmmQemu.Memory{ Size: fmt.Sprintf("%dM", mem), diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index a995f1f77b..6c9b7d06fb 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -593,6 +593,10 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor s.Logger().WithError(err).Debug("restore sandbox failed") } + if err := validateHypervisorConfig(&sandboxConfig.HypervisorConfig); err != nil { + return nil, err + } + // store doesn't require hypervisor to be stored immediately if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil { return nil, err @@ -1590,7 +1594,7 @@ func (s *Sandbox) ResumeContainer(ctx context.Context, containerID string) error } // createContainers registers all containers, create the -// containers in the guest and starts one shim per container. +// containers in the guest. func (s *Sandbox) createContainers(ctx context.Context) error { span, ctx := katatrace.Trace(ctx, s.Logger(), "createContainers", sandboxTracingTags, map[string]string{"sandbox_id": s.id}) defer span.End() diff --git a/src/runtime/virtcontainers/types/sandbox.go b/src/runtime/virtcontainers/types/sandbox.go index f4fc3e503f..5149b04232 100644 --- a/src/runtime/virtcontainers/types/sandbox.go +++ b/src/runtime/virtcontainers/types/sandbox.go @@ -314,7 +314,6 @@ type Cmd struct { User string PrimaryGroup string WorkDir string - Console string Args []string Envs []EnvVar diff --git a/src/runtime/virtcontainers/vm.go b/src/runtime/virtcontainers/vm.go index a9db8efc06..a96661d434 100644 --- a/src/runtime/virtcontainers/vm.go +++ b/src/runtime/virtcontainers/vm.go @@ -42,9 +42,8 @@ type VMConfig struct { HypervisorConfig HypervisorConfig } -// Valid Check VMConfig validity. func (c *VMConfig) Valid() error { - return c.HypervisorConfig.Valid() + return validateHypervisorConfig(&c.HypervisorConfig) } // ToGrpc convert VMConfig struct to grpc format pb.GrpcVMConfig. diff --git a/tools/osbuilder/dockerfiles/QAT/run.sh b/tools/osbuilder/dockerfiles/QAT/run.sh index b30c34c8b0..59194a2bcb 100755 --- a/tools/osbuilder/dockerfiles/QAT/run.sh +++ b/tools/osbuilder/dockerfiles/QAT/run.sh @@ -90,14 +90,14 @@ build_qat_drivers() KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION} cd $QAT_SRC KERNEL_SOURCE_ROOT=${linux_kernel_path} ./configure ${QAT_CONFIGURE_OPTIONS} - make all -j$(nproc) + make all -j $($(nproc ${CI:+--ignore 1})) } add_qat_to_rootfs() { /bin/echo -e "\n\e[1;42mCopy driver modules to rootfs\e[0m" cd $QAT_SRC - sudo -E make INSTALL_MOD_PATH=${ROOTFS_DIR} qat-driver-install -j$(nproc) + sudo -E make INSTALL_MOD_PATH=${ROOTFS_DIR} qat-driver-install -j$(nproc --ignore=1) sudo cp $QAT_SRC/build/usdm_drv.ko ${ROOTFS_DIR}/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR} cd ${kata_repo_path}/tools/osbuilder/image-builder diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index 9d09db049c..9f9bdcd6c5 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -19,7 +19,7 @@ $(MK_DIR)/dockerbuild/install_yq.sh: $(MK_DIR)/kata-deploy-copy-yq-installer.sh all-parallel: $(MK_DIR)/dockerbuild/install_yq.sh - ${MAKE} -f $(MK_PATH) all -j$$(( $$(nproc) - 1 )) V= + ${MAKE} -f $(MK_PATH) all -j $(shell nproc ${CI:+--ignore 1}) V= all: cloud-hypervisor-tarball \ firecracker-tarball \ diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index 5514d9a640..06a4a93ac9 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -23,8 +23,13 @@ RUN apt-get update && \ ARG IMG_USER=kata-builder ARG UID=1000 ARG GID=1000 +# gid of the docker group on the host, required for running docker in docker builds. +ARG HOST_DOCKER_GID + RUN if [ ${IMG_USER} != "root" ]; then groupadd --gid=${GID} ${IMG_USER};fi RUN if [ ${IMG_USER} != "root" ]; then adduser ${IMG_USER} --uid=${UID} --gid=${GID};fi +RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then groupadd --gid=${HOST_DOCKER_GID} docker_on_host;fi +RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then usermod -a -G docker_on_host ${IMG_USER};fi RUN sh -c "echo '${IMG_USER} ALL=NOPASSWD: ALL' >> /etc/sudoers" #FIXME: gcc is required as agent is build out of a container build. @@ -40,4 +45,4 @@ RUN apt-get update && \ apt-get clean && rm -rf /var/lib/apt/lists ENV USER ${IMG_USER} -USER ${UID}:${GID} +USER ${IMG_USER} diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 4035ff9cbd..24d19c936f 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -20,17 +20,27 @@ if [ "${script_dir}" != "${PWD}" ]; then ln -sf "${script_dir}/build" "${PWD}/build" fi +# This is the gid of the "docker" group on host. In case of docker in docker builds +# for some of the targets (clh builds from source), the nested container user needs to +# be part of this group. +docker_gid=$(getent group docker | cut -d: -f3 || { echo >&2 "Missing docker group, docker needs to be installed" && false; }) + +# If docker gid is the effective group id of the user, do not pass it as +# an additional group. +if [ ${docker_gid} == ${gid} ]; then + docker_gid="" +fi + docker build -q -t build-kata-deploy \ --build-arg IMG_USER="${USER}" \ --build-arg UID=${uid} \ --build-arg GID=${gid} \ + --build-arg HOST_DOCKER_GID=${docker_gid} \ "${script_dir}/dockerbuild/" docker run \ -v /var/run/docker.sock:/var/run/docker.sock \ - --user ${uid}:${gid} \ --env USER=${USER} -v "${kata_dir}:${kata_dir}" \ --rm \ -w ${script_dir} \ build-kata-deploy "${kata_deploy_create}" $@ - diff --git a/tools/packaging/kernel/build-kernel.sh b/tools/packaging/kernel/build-kernel.sh index 4e58a45ef1..9b9a008d8f 100755 --- a/tools/packaging/kernel/build-kernel.sh +++ b/tools/packaging/kernel/build-kernel.sh @@ -418,9 +418,9 @@ build_kernel() { [ -n "${arch_target}" ] || arch_target="$(uname -m)" arch_target=$(arch_to_kernel "${arch_target}") pushd "${kernel_path}" >>/dev/null - make -j $(nproc) ARCH="${arch_target}" + make -j $(nproc ${CI:+--ignore 1}) ARCH="${arch_target}" if [ "${conf_guest}" == "sev" ]; then - make -j $(nproc --ignore=1) INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=${kernel_path} modules_install + make -j $(nproc ${CI:+--ignore 1}) INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=${kernel_path} modules_install fi [ "$arch_target" != "powerpc" ] && ([ -e "arch/${arch_target}/boot/bzImage" ] || [ -e "arch/${arch_target}/boot/Image.gz" ]) [ -e "vmlinux" ] diff --git a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh index 82ad3b42b9..8cb1a6e79f 100755 --- a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh +++ b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh @@ -56,6 +56,7 @@ build_clh_from_source() { repo_dir="${repo_dir//.git}" rm -rf "${repo_dir}" git clone "${cloud_hypervisor_repo}" + git config --global --add safe.directory "$PWD/repo_dir" pushd "${repo_dir}" if [ -n "${cloud_hypervisor_pr}" ]; then diff --git a/tools/packaging/static-build/qemu/Dockerfile b/tools/packaging/static-build/qemu/Dockerfile index 61cc6ce951..27d088303e 100644 --- a/tools/packaging/static-build/qemu/Dockerfile +++ b/tools/packaging/static-build/qemu/Dockerfile @@ -74,6 +74,6 @@ RUN git clone --depth=1 "${QEMU_REPO}" qemu && \ /root/patch_qemu.sh "${QEMU_VERSION}" "/root/kata_qemu/patches" && \ (PREFIX="${PREFIX}" /root/configure-hypervisor.sh -s "kata-qemu${BUILD_SUFFIX}" | xargs ./configure \ --with-pkgversion="kata-static${BUILD_SUFFIX}") && \ - make -j"$(nproc)" && \ + make -j"$(nproc ${CI:+--ignore 1})" && \ make install DESTDIR="${QEMU_DESTDIR}" && \ /root/static-build/scripts/qemu-build-post.sh diff --git a/tools/packaging/static-build/scripts/kata-configure-docker.sh b/tools/packaging/static-build/scripts/kata-configure-docker.sh deleted file mode 100644 index 72fbc28e01..0000000000 --- a/tools/packaging/static-build/scripts/kata-configure-docker.sh +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -# Description: Script to configure Docker for the static -# version of Kata Containers. - -[ -z "${DEBUG}" ] || set -x -set -o errexit -set -o nounset -set -o pipefail - -docker_config_dir="/etc/docker" -docker_config_file="${docker_config_file:-${docker_config_dir}/daemon.json}" - -# The static version of Kata Containers is entirely contained within -# this directory. -readonly static_base_dir="/opt/kata" - -# Path to runtime in static archive file -readonly runtime_path="${static_base_dir}/bin/kata-runtime" - -die() -{ - local msg="$*" - echo >&2 "ERROR: $msg" - exit 1 -} - -info() -{ - local msg="$*" - echo >&2 "INFO: $msg" -} - -configure_docker() -{ - local file="$1" - [ -z "$file" ] && die "need file" - - mkdir -p "${docker_config_dir}" - - if [ -e "$docker_config_file" ] - then - local today=$(date '+%Y-%m-%d') - local backup="${docker_config_file}.${today}" - - info "Backing up original Docker config file '$docker_config_file' to '$backup'" - - sudo cp "${docker_config_file}" "${docker_config_file}.${today}" - else - # Create a minimal valid JSON document - echo "{}" > "${docker_config_file}" - fi - - local config_files=$(tar tvf "$file" |\ - grep "/configuration-.*\.toml" |\ - grep -v -- '->' |\ - awk '{print $NF}' |\ - sed 's/^\.//g' || true) - - [ -z "$config_files" ] && die "cannot find any configuration files in '$file'" - - local config - local -a runtimes - - for config in $(echo "$config_files" | tr '\n' ' ') - do - local runtime - runtime=$(echo "$config" |\ - awk -F \/ '{print $NF}' |\ - sed -e 's/configuration/kata/g' -e 's/\.toml//g') - - runtimes+=("$runtime") - - local result - result=$(cat "$docker_config_file" |\ - jq \ - --arg config "$config" \ - --arg runtime "$runtime" \ - --arg runtime_path "$runtime_path" \ - '.runtimes[$runtime] = {path: $runtime_path, "runtimeArgs": ["--config", $config]}') - - echo "$result" > "$docker_config_file" - done - - info "Validating $docker_config_file" - - jq -S . "$docker_config_file" &>/dev/null - - info "Restarting Docker to apply new configuration" - - $chronic sudo systemctl restart docker - - info "Docker configured for the following additional runtimes: ${runtimes[@]}" -} - -setup() -{ - source "/etc/os-release" || source "/usr/lib/os-release" - - # Used to manipulate $docker_config_file - local pkg="jq" - - case "$ID" in - opensuse*) distro="opensuse" ;; - *) distro="$ID" ;; - esac - - # Use chronic(1) if available - chronic= - command -v chronic && chronic=chronic - - if command -v "$pkg" &>/dev/null - then - return 0 - fi - - info "Cannot find $pkg command so installing package" - - case "$distro" in - centos|rhel) $chronic sudo -E yum -y install "$pkg" ;; - debian|ubuntu) $chronic sudo -E apt-get --no-install-recommends install -y "$pkg" ;; - fedora) $chronic sudo -E dnf -y install "$pkg" ;; - opensuse|sles) $chronic sudo -E zypper -y install "$pkg" ;; - *) die "do not know how to install command $pkg' for distro '$distro'" ;; - esac -} - -main() -{ - local file="$1" - [ -z "$file" ] && die "need full path to Kata Containers static archive file" - - echo "$file" | grep -q "^kata-static-.*\.tar.xz" || die "invalid file: '$file'" - - [ $(id -u) -eq 0 ] || die "must be run as root" - - setup - - configure_docker "$file" -} - -main "$@" diff --git a/tools/packaging/static-build/shim-v2/build.sh b/tools/packaging/static-build/shim-v2/build.sh index 8cc34dba04..5b073ff07c 100755 --- a/tools/packaging/static-build/shim-v2/build.sh +++ b/tools/packaging/static-build/shim-v2/build.sh @@ -29,12 +29,12 @@ fi sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ -w "${repo_root_dir}/src/runtime" \ "${container_image}" \ - bash -c "make PREFIX=${PREFIX} QEMUCMD=qemu-system-${arch}" + bash -c "git config --global --add safe.directory ${repo_root_dir} && make PREFIX=${PREFIX} QEMUCMD=qemu-system-${arch}" sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ -w "${repo_root_dir}/src/runtime" \ "${container_image}" \ - bash -c "make PREFIX="${PREFIX}" DESTDIR="${DESTDIR}" install" + bash -c "git config --global --add safe.directory ${repo_root_dir} && make PREFIX="${PREFIX}" DESTDIR="${DESTDIR}" install" sudo sed -i -e '/^initrd =/d' "${DESTDIR}/${PREFIX}/share/defaults/kata-containers/configuration-qemu.toml" sudo sed -i -e '/^initrd =/d' "${DESTDIR}/${PREFIX}/share/defaults/kata-containers/configuration-fc.toml"