From c1b6838e254896213c1a3495ac1ee0245b56ee2b Mon Sep 17 00:00:00 2001 From: "fupan.lfp" Date: Fri, 20 Mar 2020 16:08:37 +0800 Subject: [PATCH] rustjail: refactoring the way of creating container process In the previous implementation, create a container process by forking the parent process as the container process, and then at the forked child process do much more setting, such as rootfs mounting, drop capabilities and so on, at last exec the container entry cmd to switch into container process. But since the parent is a muti thread process, which would cause a dead lock in the forked child. For example, if one of the parent process's thread do some malloc operation, which would take a mutex lock, and at the same time, the parent forked a child process, since the mutex lock status would be inherited by the child process but there's no chance to release the lock in the child since the child process only has a single thread which would meet a dead lock if it would do some malloc operation. Thus, the new implementation would do exec directly after forked and then do the setting in the exec process. Of course, this requred a data communication between parent and child since the child cannot depends on the shared memory by fork way. Fixes: #166 Fixes: #133 Signed-off-by: fupan.lfp --- src/agent/rustjail/src/capabilities.rs | 24 +- src/agent/rustjail/src/cgroups/fs/mod.rs | 23 +- src/agent/rustjail/src/container.rs | 1401 +++++++++++----------- src/agent/rustjail/src/errors.rs | 4 +- src/agent/rustjail/src/lib.rs | 8 +- src/agent/rustjail/src/mount.rs | 69 +- src/agent/rustjail/src/process.rs | 58 +- src/agent/rustjail/src/sync.rs | 177 +++ src/agent/rustjail/src/validator.rs | 6 + src/agent/src/device.rs | 2 +- src/agent/src/grpc.rs | 4 +- src/agent/src/main.rs | 8 +- 12 files changed, 1007 insertions(+), 777 deletions(-) create mode 100644 src/agent/rustjail/src/sync.rs diff --git a/src/agent/rustjail/src/capabilities.rs b/src/agent/rustjail/src/capabilities.rs index 5a156ff5e5..6a2f1201ab 100644 --- a/src/agent/rustjail/src/capabilities.rs +++ b/src/agent/rustjail/src/capabilities.rs @@ -9,10 +9,12 @@ use lazy_static; use crate::errors::*; +use crate::log_child; +use crate::sync::write_count; use caps::{self, CapSet, Capability, CapsHashSet}; use oci::LinuxCapabilities; -use slog::Logger; use std::collections::HashMap; +use std::os::unix::io::RawFd; lazy_static! { pub static ref CAPSMAP: HashMap = { @@ -76,14 +78,14 @@ lazy_static! { }; } -fn to_capshashset(logger: &Logger, caps: &[String]) -> CapsHashSet { +fn to_capshashset(cfd_log: RawFd, caps: &[String]) -> CapsHashSet { let mut r = CapsHashSet::new(); for cap in caps.iter() { let c = CAPSMAP.get(cap); if c.is_none() { - warn!(logger, "{} is not a cap", cap); + log_child!(cfd_log, "{} is not a cap", cap); continue; } @@ -98,37 +100,35 @@ pub fn reset_effective() -> Result<()> { Ok(()) } -pub fn drop_priviledges(logger: &Logger, caps: &LinuxCapabilities) -> Result<()> { - let logger = logger.new(o!("subsystem" => "capabilities")); - +pub fn drop_priviledges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> { let all = caps::all(); - for c in all.difference(&to_capshashset(&logger, caps.bounding.as_ref())) { + for c in all.difference(&to_capshashset(cfd_log, caps.bounding.as_ref())) { caps::drop(None, CapSet::Bounding, *c)?; } caps::set( None, CapSet::Effective, - to_capshashset(&logger, caps.effective.as_ref()), + to_capshashset(cfd_log, caps.effective.as_ref()), )?; caps::set( None, CapSet::Permitted, - to_capshashset(&logger, caps.permitted.as_ref()), + to_capshashset(cfd_log, caps.permitted.as_ref()), )?; caps::set( None, CapSet::Inheritable, - to_capshashset(&logger, caps.inheritable.as_ref()), + to_capshashset(cfd_log, caps.inheritable.as_ref()), )?; if let Err(_) = caps::set( None, CapSet::Ambient, - to_capshashset(&logger, caps.ambient.as_ref()), + to_capshashset(cfd_log, caps.ambient.as_ref()), ) { - warn!(logger, "failed to set ambient capability"); + log_child!(cfd_log, "failed to set ambient capability"); } Ok(()) diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 3363742921..aba62822fc 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -2,7 +2,6 @@ // // SPDX-License-Identifier: Apache-2.0 // - use crate::cgroups::FreezerState; use crate::cgroups::Manager as CgroupManager; use crate::container::DEFAULT_DEVICES; @@ -19,6 +18,7 @@ use protocols::agent::{ use regex::Regex; use std::collections::HashMap; use std::fs; +use std::path::Path; // Convenience macro to obtain the scope logger macro_rules! sl { @@ -207,7 +207,7 @@ fn parse_size(s: &str, m: &HashMap) -> Result { fn custom_size(mut size: f64, base: f64, m: &Vec) -> String { let mut i = 0; - while size > base { + while size >= base && i < m.len() - 1 { size /= base; i += 1; } @@ -307,7 +307,6 @@ where T: ToString, { let p = format!("{}/{}", dir, file); - info!(sl!(), "{}", p.as_str()); fs::write(p.as_str(), v.to_string().as_bytes())?; Ok(()) } @@ -936,6 +935,11 @@ fn get_blkio_stat(dir: &str, file: &str) -> Result = l.split(' ').collect(); @@ -1224,7 +1228,7 @@ fn get_all_procs(dir: &str) -> Result> { Ok(m) } -#[derive(Debug, Clone)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct Manager { pub paths: HashMap, pub mounts: HashMap, @@ -1238,7 +1242,6 @@ pub const FROZEN: &'static str = "FROZEN"; impl CgroupManager for Manager { fn apply(&self, pid: pid_t) -> Result<()> { for (key, value) in &self.paths { - info!(sl!(), "apply cgroup {}", key); apply(value, pid)?; } @@ -1249,7 +1252,6 @@ impl CgroupManager for Manager { for (key, value) in &self.paths { let _ = fs::create_dir_all(value); let sub = get_subsystem(key)?; - info!(sl!(), "setting cgroup {}", key); sub.set(value, spec, update)?; } @@ -1301,9 +1303,16 @@ impl CgroupManager for Manager { }; // BlkioStats + // note that virtiofs has no blkio stats info!(sl!(), "blkio_stats"); let blkio_stats = if self.paths.get("blkio").is_some() { - SingularPtrField::some(Blkio().get_stats(self.paths.get("blkio").unwrap())?) + match Blkio().get_stats(self.paths.get("blkio").unwrap()) { + Ok(stat) => SingularPtrField::some(stat), + Err(e) => { + warn!(sl!(), "failed to get blkio stats"); + SingularPtrField::none() + } + } } else { SingularPtrField::none() }; diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index 4629529c40..17b9fefca9 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -8,7 +8,6 @@ use oci::{Hook, Linux, LinuxNamespace, LinuxResources, POSIXRlimit, Spec}; use serde_json; use std::ffi::{CStr, CString}; use std::fs; -use std::mem; use std::os::unix::io::RawFd; use std::path::{Path, PathBuf}; use std::time::SystemTime; @@ -17,14 +16,16 @@ use libc::pid_t; use oci::{LinuxDevice, LinuxIDMapping}; use std::clone::Clone; use std::fmt::Display; -use std::process::Command; +use std::process::{Child, Command}; // use crate::configs::namespaces::{NamespaceType}; use crate::cgroups::Manager as CgroupManager; use crate::process::Process; // use crate::intelrdt::Manager as RdtManager; use crate::errors::*; +use crate::log_child; use crate::specconv::CreateOpts; +use crate::sync::*; // use crate::stats::Stats; use crate::capabilities::{self, CAPSMAP}; use crate::cgroups::fs::{self as fscgroup, Manager as FsManager}; @@ -34,12 +35,11 @@ use protocols::agent::StatsContainerResponse; use nix::errno::Errno; use nix::fcntl::{self, OFlag}; +use nix::fcntl::{FcntlArg, FdFlag}; use nix::pty; use nix::sched::{self, CloneFlags}; use nix::sys::signal::{self, Signal}; -use nix::sys::socket::{self, ControlMessage, ControlMessageOwned, MsgFlags}; use nix::sys::stat::{self, Mode}; -use nix::sys::uio::IoVec; use nix::unistd::{self, ForkResult, Gid, Pid, Uid}; use nix::Error; @@ -48,12 +48,23 @@ use protobuf::SingularPtrField; use oci::State as OCIState; use std::collections::HashMap; +use std::io::BufRead; +use std::io::BufReader; +use std::os::unix::io::FromRawFd; use slog::{debug, info, o, Logger}; const STATE_FILENAME: &'static str = "state.json"; const EXEC_FIFO_FILENAME: &'static str = "exec.fifo"; const VER_MARKER: &'static str = "1.2.5"; +const PID_NS_PATH: &str = "/proc/self/ns/pid"; + +const INIT: &str = "INIT"; +const NO_PIVOT: &str = "NO_PIVOT"; +const CRFD_FD: &str = "CRFD_FD"; +const CWFD_FD: &str = "CWFD_FD"; +const CLOG_FD: &str = "CLOG_FD"; +const FIFO_FD: &str = "FIFO_FD"; type Status = Option; pub type Config = CreateOpts; @@ -240,6 +251,321 @@ pub trait Container: BaseContainer { // fn notify_memory_pressure(&self, lvl: PressureLevel) -> Result<(Sender, Receiver)>; } +pub fn init_child() { + let cwfd = std::env::var(CWFD_FD).unwrap().parse::().unwrap(); + let cfd_log = std::env::var(CLOG_FD).unwrap().parse::().unwrap(); + match do_init_child(cwfd) { + Ok(_) => (), + Err(e) => { + log_child!(cfd_log, "child exit: {:?}", e); + write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str()); + return; + } + } + + std::process::exit(-1); +} + +fn do_init_child(cwfd: RawFd) -> Result<()> { + lazy_static::initialize(&NAMESPACES); + lazy_static::initialize(&DEFAULT_DEVICES); + lazy_static::initialize(&RLIMITMAPS); + lazy_static::initialize(&CAPSMAP); + + let init = std::env::var(INIT)?.eq(format!("{}", true).as_str()); + let no_pivot = std::env::var(NO_PIVOT)?.eq(format!("{}", true).as_str()); + let crfd = std::env::var(CRFD_FD)?.parse::().unwrap(); + let cfd_log = std::env::var(CLOG_FD)?.parse::().unwrap(); + + log_child!(cfd_log, "child process start run"); + let buf = read_sync(crfd)?; + let spec_str = std::str::from_utf8(&buf)?; + let spec: oci::Spec = serde_json::from_str(spec_str)?; + + log_child!(cfd_log, "notify parent to send oci process"); + write_sync(cwfd, SYNC_SUCCESS, "")?; + + let buf = read_sync(crfd)?; + let process_str = std::str::from_utf8(&buf)?; + let mut oci_process: oci::Process = serde_json::from_str(process_str)?; + log_child!(cfd_log, "notify parent to send cgroup manager"); + write_sync(cwfd, SYNC_SUCCESS, "")?; + + let buf = read_sync(crfd)?; + let cm_str = std::str::from_utf8(&buf)?; + let cm: FsManager = serde_json::from_str(cm_str)?; + + let p = if spec.process.is_some() { + spec.process.as_ref().unwrap() + } else { + return Err(ErrorKind::ErrorCode("didn't find process in Spec".to_string()).into()); + }; + + if spec.linux.is_none() { + return Err(ErrorKind::ErrorCode("no linux config".to_string()).into()); + } + let linux = spec.linux.as_ref().unwrap(); + + // get namespace vector to join/new + let nses = get_namespaces(&linux)?; + + let mut userns = false; + let mut to_new = CloneFlags::empty(); + let mut to_join = Vec::new(); + + for ns in &nses { + let s = NAMESPACES.get(&ns.r#type.as_str()); + if s.is_none() { + return Err(ErrorKind::ErrorCode("invalid ns type".to_string()).into()); + } + let s = s.unwrap(); + + if ns.path.is_empty() { + // skip the pidns since it has been done in parent process. + if *s != CloneFlags::CLONE_NEWPID { + to_new.set(*s, true); + } + } else { + let fd = match fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()) { + Ok(v) => v, + Err(e) => { + log_child!( + cfd_log, + "cannot open type: {} path: {}", + ns.r#type.clone(), + ns.path.clone() + ); + log_child!(cfd_log, "error is : {}", e.as_errno().unwrap().desc()); + return Err(e.into()); + } + }; + + if *s != CloneFlags::CLONE_NEWPID { + to_join.push((*s, fd)); + } + } + } + + if to_new.contains(CloneFlags::CLONE_NEWUSER) { + userns = true; + } + + if p.oom_score_adj.is_some() { + log_child!(cfd_log, "write oom score {}", p.oom_score_adj.unwrap()); + fs::write( + "/proc/self/oom_score_adj", + p.oom_score_adj.unwrap().to_string().as_bytes(), + )?; + } + + // set rlimit + for rl in p.rlimits.iter() { + log_child!(cfd_log, "set resource limit: {:?}", rl); + setrlimit(rl)?; + } + + if userns { + log_child!(cfd_log, "enter new user namespace"); + sched::unshare(CloneFlags::CLONE_NEWUSER)?; + } + + log_child!(cfd_log, "notify parent unshare user ns completed"); + // notify parent unshare user ns completed. + write_sync(cwfd, SYNC_SUCCESS, "")?; + // wait parent to setup user id mapping. + log_child!(cfd_log, "wait parent to setup user id mapping"); + read_sync(crfd)?; + + if userns { + log_child!(cfd_log, "setup user id"); + setid(Uid::from_raw(0), Gid::from_raw(0))?; + } + + let mut mount_fd = -1; + let mut bind_device = false; + for (s, fd) in to_join { + if s == CloneFlags::CLONE_NEWNS { + mount_fd = fd; + continue; + } + + log_child!(cfd_log, "join namespace {:?}", s); + if let Err(e) = sched::setns(fd, s) { + if s == CloneFlags::CLONE_NEWUSER { + if e.as_errno().unwrap() != Errno::EINVAL { + write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str()); + return Err(e.into()); + } + } else { + write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str()); + return Err(e.into()); + } + } + unistd::close(fd)?; + + if s == CloneFlags::CLONE_NEWUSER { + setid(Uid::from_raw(0), Gid::from_raw(0))?; + bind_device = true; + } + } + + sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?; + + if userns { + bind_device = true; + } + + if to_new.contains(CloneFlags::CLONE_NEWUTS) { + unistd::sethostname(&spec.hostname)?; + } + + let rootfs = spec.root.as_ref().unwrap().path.as_str(); + log_child!(cfd_log, "setup rootfs {}", rootfs); + let root = fs::canonicalize(rootfs)?; + let rootfs = root.to_str().unwrap(); + + if to_new.contains(CloneFlags::CLONE_NEWNS) { + // setup rootfs + mount::init_rootfs(cfd_log, &spec, &cm.paths, &cm.mounts, bind_device)?; + } + + if init { + // notify parent to run prestart hooks + write_sync(cwfd, SYNC_SUCCESS, "")?; + // wait parent run prestart hooks + let _ = read_sync(crfd)?; + } + + if mount_fd != -1 { + sched::setns(mount_fd, CloneFlags::CLONE_NEWNS)?; + unistd::close(mount_fd)?; + } + + if to_new.contains(CloneFlags::CLONE_NEWNS) { + // unistd::chroot(rootfs)?; + if no_pivot { + mount::ms_move_root(rootfs)?; + } else { + // pivot root + mount::pivot_rootfs(rootfs)?; + } + + // setup sysctl + set_sysctls(&linux.sysctl)?; + unistd::chdir("/")?; + } + + if to_new.contains(CloneFlags::CLONE_NEWNS) { + mount::finish_rootfs(cfd_log, &spec)?; + } + + if !oci_process.cwd.is_empty() { + unistd::chdir(oci_process.cwd.as_str())?; + } + + let guser = &oci_process.user; + + let uid = Uid::from_raw(guser.uid); + let gid = Gid::from_raw(guser.gid); + + setid(uid, gid)?; + + if guser.additional_gids.len() > 0 { + setgroups(guser.additional_gids.as_slice()).map_err(|e| { + write_sync( + cwfd, + SYNC_FAILED, + format!("setgroups failed: {:?}", e).as_str(), + ); + e + })?; + } + + // NoNewPeiviledges, Drop capabilities + if oci_process.no_new_privileges { + if let Err(_) = prctl::set_no_new_privileges(true) { + return Err(ErrorKind::ErrorCode("cannot set no new privileges".to_string()).into()); + } + } + + if oci_process.capabilities.is_some() { + let c = oci_process.capabilities.as_ref().unwrap(); + capabilities::drop_priviledges(cfd_log, c)?; + } + + if init { + // notify parent to run poststart hooks + // cfd is closed when return from join_namespaces + // should retunr cfile instead of cfd? + write_sync(cwfd, SYNC_SUCCESS, "")?; + } + + let args = oci_process.args.to_vec(); + let env = oci_process.env.to_vec(); + + let mut fifofd = -1; + if init { + fifofd = std::env::var(FIFO_FD)?.parse::().unwrap(); + } + + //cleanup the env inherited from parent + for (key, _) in env::vars() { + env::remove_var(key); + } + + // setup the envs + for e in env.iter() { + let v: Vec<&str> = e.splitn(2, "=").collect(); + if v.len() != 2 { + //info!(logger, "incorrect env config!"); + continue; + } + env::set_var(v[0], v[1]); + } + + let exec_file = Path::new(&args[0]); + log_child!(cfd_log, "process command: {:?}", &args); + if !exec_file.exists() { + match find_file(exec_file) { + Some(_) => (), + None => { + return Err( + ErrorKind::ErrorCode(format!("the file {} is not exist", &args[0])).into(), + ); + } + } + } + + // notify parent that the child's ready to start + write_sync(cwfd, SYNC_SUCCESS, "")?; + log_child!(cfd_log, "ready to run exec"); + unistd::close(cfd_log); + unistd::close(crfd); + unistd::close(cwfd); + + if oci_process.terminal { + unistd::setsid()?; + unsafe { + libc::ioctl(0, libc::TIOCSCTTY); + } + } + + if init { + let fd = fcntl::open( + format!("/proc/self/fd/{}", fifofd).as_str(), + OFlag::O_RDONLY | OFlag::O_CLOEXEC, + Mode::from_bits_truncate(0), + )?; + unistd::close(fifofd)?; + let mut buf: &mut [u8] = &mut [0]; + unistd::read(fd, &mut buf)?; + } + + do_exec(&args); + + Err(ErrorKind::ErrorCode("fail to create container".to_string()).into()) +} + impl BaseContainer for LinuxContainer { fn id(&self) -> String { self.id.clone() @@ -322,6 +648,7 @@ impl BaseContainer for LinuxContainer { } fn start(&mut self, mut p: Process) -> Result<()> { + let tty = p.tty; let fifo_file = format!("{}/{}", &self.root, EXEC_FIFO_FILENAME); info!(self.logger, "enter container.start!"); let mut fifofd: RawFd = -1; @@ -334,16 +661,12 @@ impl BaseContainer for LinuxContainer { fifofd = fcntl::open( fifo_file.as_str(), - OFlag::O_PATH | OFlag::O_CLOEXEC, + OFlag::O_PATH, Mode::from_bits(0).unwrap(), )?; } info!(self.logger, "exec fifo opened!"); - lazy_static::initialize(&NAMESPACES); - lazy_static::initialize(&DEFAULT_DEVICES); - lazy_static::initialize(&RLIMITMAPS); - lazy_static::initialize(&CAPSMAP); fscgroup::init_static(); if self.config.spec.is_none() { @@ -354,240 +677,197 @@ impl BaseContainer for LinuxContainer { if spec.linux.is_none() { return Err(ErrorKind::ErrorCode("no linux config".to_string()).into()); } - let linux = spec.linux.as_ref().unwrap(); - // get namespace vector to join/new - let nses = get_namespaces(&linux, p.init, self.init_process_pid)?; - info!(self.logger, "got namespaces {:?}!\n", nses); - let mut to_new = CloneFlags::empty(); - let mut to_join = Vec::new(); - let mut pidns = false; - let mut userns = false; - for ns in &nses { - let s = NAMESPACES.get(&ns.r#type.as_str()); - if s.is_none() { - return Err(ErrorKind::ErrorCode("invalid ns type".to_string()).into()); - } - let s = s.unwrap(); - if ns.path.is_empty() { - to_new.set(*s, true); - } else { - let fd = match fcntl::open(ns.path.as_str(), OFlag::empty(), Mode::empty()) { - Ok(v) => v, - Err(e) => { - info!( - self.logger, - "cannot open type: {} path: {}", - ns.r#type.clone(), - ns.path.clone() - ); - info!(self.logger, "error is : {}", e.as_errno().unwrap().desc()); - return Err(e.into()); - } - }; - // .chain_err(|| format!("fail to open ns {}", &ns.Type))?; - to_join.push((*s, fd)); - } - - if *s == CloneFlags::CLONE_NEWPID { - pidns = true; - } - } - - if to_new.contains(CloneFlags::CLONE_NEWUSER) { - userns = true; - } - - let mut parent: u32 = 0; let st = self.oci_state()?; - let (child, cfd) = match join_namespaces( - &self.logger, - &spec, - to_new, - &to_join, - pidns, - userns, - p.init, - self.config.no_pivot_root, - self.cgroup_manager.as_ref().unwrap(), - &st, - &mut parent, - ) { - Ok((u, v)) => (u, v), - Err(e) => { - if parent == 0 { - info!(self.logger, "parent process error out!"); - return Err(e); - } else if parent == 1 { - info!(self.logger, "child process 1 error out!"); - std::process::exit(-1); - } else { - info!(self.logger, "child process 2 error out!"); - std::process::exit(-2); + let execid = p.exec_id.clone(); + let (pfd_log, cfd_log) = unistd::pipe().chain_err(|| "failed to create pipe")?; + fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + + let logger = self.logger.new(o!("action" => "child process log")); + let log_handler = thread::spawn(move || { + let log_file = unsafe { std::fs::File::from_raw_fd(pfd_log) }; + let mut reader = BufReader::new(log_file); + let execid = execid; + + loop { + let mut line = String::new(); + match reader.read_line(&mut line) { + Err(e) => { + info!(logger, "read child process log error: {:?}", e); + break; + } + Ok(count) => { + if count == 0 { + info!( + logger, + "execid:{}, read child process log end", + execid.as_str() + ); + break; + } + + info!(logger, "execid:{},{}", execid.as_str(), line); + } } } + }); + + info!(self.logger, "exec fifo opened!"); + let (prfd, cwfd) = unistd::pipe().chain_err(|| "failed to create pipe")?; + let (crfd, pwfd) = unistd::pipe().chain_err(|| "failed to create pipe")?; + fcntl::fcntl(prfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + fcntl::fcntl(pwfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + + defer!({ + unistd::close(prfd); + unistd::close(pwfd); + }); + + let mut child_stdin = std::process::Stdio::null(); + let mut child_stdout = std::process::Stdio::null(); + let mut child_stderr = std::process::Stdio::null(); + let mut stdin = -1; + let mut stdout = -1; + let mut stderr = -1; + + if tty { + let pseduo = pty::openpty(None, None)?; + p.term_master = Some(pseduo.master); + fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + + child_stdin = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) }; + child_stdout = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) }; + child_stderr = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) }; + } else { + stdin = p.stdin.unwrap(); + stdout = p.stdout.unwrap(); + stderr = p.stderr.unwrap(); + child_stdin = unsafe { std::process::Stdio::from_raw_fd(stdin) }; + child_stdout = unsafe { std::process::Stdio::from_raw_fd(stdout) }; + child_stderr = unsafe { std::process::Stdio::from_raw_fd(stderr) }; + } + + let old_pid_ns = match fcntl::open(PID_NS_PATH, OFlag::O_CLOEXEC, Mode::empty()) { + Ok(v) => v, + Err(e) => { + error!( + self.logger, + "cannot open pid ns path: {} with error: {:?}", PID_NS_PATH, e + ); + return Err(e.into()); + } }; - info!(self.logger, "entered namespaces!"); - if child != Pid::from_raw(-1) { - // parent - p.pid = child.as_raw(); - self.status = Some("created".to_string()); - if p.init { - self.init_process_pid = p.pid; - unistd::close(fifofd)?; - } - self.created = SystemTime::now(); - // defer!({ self.processes.insert(p.pid, p); () }); - // parent process need to receive ptmx masterfd - // and set it up in process struct - unistd::close(p.stdin.unwrap())?; - unistd::close(p.stderr.unwrap())?; - unistd::close(p.stdout.unwrap())?; + //restore the parent's process's pid namespace. + defer!({ + sched::setns(old_pid_ns, CloneFlags::CLONE_NEWPID); + unistd::close(old_pid_ns); + }); - for &(_, fd) in &to_join { - let _ = unistd::close(fd); - } - - // create the pipes for notify process exited - let (exit_pipe_r, exit_pipe_w) = - unistd::pipe2(OFlag::O_CLOEXEC).chain_err(|| "failed to create pipe")?; - p.exit_pipe_w = Some(exit_pipe_w); - p.exit_pipe_r = Some(exit_pipe_r); - - let console_fd = if p.parent_console_socket.is_some() { - p.parent_console_socket.unwrap() - } else { - self.processes.insert(p.pid, p); - return Ok(()); - }; - - let mut v: Vec = vec![0; 40]; - let iov = IoVec::from_mut_slice(v.as_mut_slice()); - let mut c: Vec = vec![0; 40]; - - match socket::recvmsg(console_fd, &[iov], Some(&mut c), MsgFlags::empty()) { - Ok(rmsg) => { - let cmsg: Vec = rmsg.cmsgs().collect(); - // expect the vector lenght 1 - if cmsg.len() != 1 { - return Err( - ErrorKind::ErrorCode("error in semd/recvmsg!".to_string()).into() - ); - } - - match &cmsg[0] { - ControlMessageOwned::ScmRights(v) => { - if v.len() != 1 { - return Err(ErrorKind::ErrorCode( - "error in send/recvmsg!".to_string(), - ) - .into()); - } - - p.term_master = Some(v[0]); - } - // all other cases are error - _ => { - return Err( - ErrorKind::ErrorCode("error in send/recvmsg!".to_string()).into() - ); - } - } - } - Err(e) => return Err(ErrorKind::Nix(e).into()), - } - - unistd::close(p.parent_console_socket.unwrap())?; - unistd::close(p.console_socket.unwrap())?; - - // turn off echo - // let mut term = termios::tcgetattr(p.term_master.unwrap())?; - // term.local_flags &= !(LocalFlags::ECHO | LocalFlags::ICANON); - // termios::tcsetattr(p.term_master.unwrap(), SetArg::TCSANOW, &term)?; - - self.processes.insert(p.pid, p); - - return Ok(()); - } // end parent - - // setup stdio in child process - // need fd to send master fd to parent... store the fd in - // process struct? - setup_stdio(&p)?; - - if to_new.contains(CloneFlags::CLONE_NEWNS) { - info!(self.logger, "finish rootfs!"); - mount::finish_rootfs(spec)?; + let mut pidns = None; + if !p.init { + pidns = Some(get_pid_namespace(&self.logger, linux)?); } - if !p.oci.cwd.is_empty() { - debug!(self.logger, "cwd: {}", p.oci.cwd.as_str()); - unistd::chdir(p.oci.cwd.as_str())?; + if pidns.is_some() { + sched::setns(pidns.unwrap(), CloneFlags::CLONE_NEWPID) + .chain_err(|| "failed to join pidns")?; + unistd::close(pidns.unwrap())?; + } else { + sched::unshare(CloneFlags::CLONE_NEWPID)?; } - // setup uid/gid - info!(self.logger, "{:?}", &p.oci); + let exec_path = std::env::current_exe()?; + let mut child = std::process::Command::new(exec_path); + let mut child = child + .arg("init") + .stdin(child_stdin) + .stdout(child_stdout) + .stderr(child_stderr) + .env(INIT, format!("{}", p.init)) + .env(NO_PIVOT, format!("{}", self.config.no_pivot_root)) + .env(CRFD_FD, format!("{}", crfd)) + .env(CWFD_FD, format!("{}", cwfd)) + .env(CLOG_FD, format!("{}", cfd_log)); - let guser = &p.oci.user; - - let uid = Uid::from_raw(guser.uid); - let gid = Gid::from_raw(guser.gid); - - setid(uid, gid)?; - - if guser.additional_gids.len() > 0 { - setgroups(guser.additional_gids.as_slice())?; + if p.init { + child = child.env(FIFO_FD, format!("{}", fifofd)); } - // NoNewPeiviledges, Drop capabilities - if p.oci.no_new_privileges { - if let Err(_) = prctl::set_no_new_privileges(true) { - return Err( - ErrorKind::ErrorCode("cannot set no new privileges".to_string()).into(), + let mut child = child.spawn()?; + + unistd::close(crfd)?; + unistd::close(cwfd)?; + unistd::close(cfd_log)?; + + p.pid = child.id() as i32; + if p.init { + self.init_process_pid = p.pid; + } + + if p.init { + unistd::close(fifofd); + } + + info!(self.logger, "execid: {}, child pid: {}", p.exec_id, p.pid); + + match join_namespaces( + &self.logger, + &spec, + &p, + self.cgroup_manager.as_ref().unwrap(), + &st, + &mut child, + pwfd, + prfd, + ) { + Ok(_) => (), + Err(e) => { + error!( + self.logger, + "execid:{}, create container process error {:?}", + p.exec_id.as_str(), + e ); + // kill the child process. + signal::kill(Pid::from_raw(p.pid), Some(Signal::SIGKILL)); + return Err(e); } - } + }; - if p.oci.capabilities.is_some() { - let c = p.oci.capabilities.as_ref().unwrap(); - info!(self.logger, "drop capabilities!"); - capabilities::drop_priviledges(&self.logger, c)?; - } + info!( + self.logger, + "execid: {}, entered namespaces!", + p.exec_id.as_str() + ); + + self.status = Some("created".to_string()); + self.created = SystemTime::now(); + + // create the pipes for notify process exited + let (exit_pipe_r, exit_pipe_w) = unistd::pipe2(OFlag::O_CLOEXEC) + .chain_err(|| "failed to create pipe") + .map_err(|e| { + signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL)); + e + })?; + + p.exit_pipe_w = Some(exit_pipe_w); + p.exit_pipe_r = Some(exit_pipe_r); if p.init { - // notify parent to run poststart hooks - // cfd is closed when return from join_namespaces - // should retunr cfile instead of cfd? - write_sync(cfd, 0)?; + let spec = self.config.spec.as_mut().unwrap(); + update_namespaces(&self.logger, spec, p.pid)?; } + self.processes.insert(p.pid, p); - // new and the stat parent process - // For init process, we need to setup a lot of things - // For exec process, only need to join existing namespaces, - // the namespaces are got from init process or from - // saved spec. - debug!(self.logger, "before setup execfifo!"); - info!(self.logger, "{}", VER_MARKER); - if p.init { - let fd = fcntl::open( - format!("/proc/self/fd/{}", fifofd).as_str(), - OFlag::O_RDONLY | OFlag::O_CLOEXEC, - Mode::from_bits_truncate(0), - )?; - unistd::close(fifofd)?; - let mut buf: &mut [u8] = &mut [0]; - unistd::read(fd, &mut buf)?; - } - - // exec process - let args = p.oci.args.to_vec(); - let env = p.oci.env.to_vec(); - do_exec(&self.logger, &args[0], &args, &env)?; - - Err(ErrorKind::ErrorCode("fail to create container".to_string()).into()) + info!(self.logger, "wait on child log handler"); + log_handler.join(); + info!(self.logger, "create process completed"); + return Ok(()); } fn run(&mut self, p: Process) -> Result<()> { @@ -619,6 +899,7 @@ impl BaseContainer for LinuxContainer { } self.status = Some("stopped".to_string()); + fs::remove_dir_all(&self.root)?; Ok(()) } @@ -654,9 +935,26 @@ impl BaseContainer for LinuxContainer { use std::env; -fn do_exec(logger: &Logger, path: &str, args: &[String], env: &[String]) -> Result<()> { - let logger = logger.new(o!("command" => "exec")); +fn find_file

(exe_name: P) -> Option +where + P: AsRef, +{ + env::var_os("PATH").and_then(|paths| { + env::split_paths(&paths) + .filter_map(|dir| { + let full_path = dir.join(&exe_name); + if full_path.is_file() { + Some(full_path) + } else { + None + } + }) + .next() + }) +} +fn do_exec(args: &[String]) -> Result<()> { + let path = &args[0]; let p = CString::new(path.to_string()).unwrap(); let sa: Vec = args .iter() @@ -664,126 +962,41 @@ fn do_exec(logger: &Logger, path: &str, args: &[String], env: &[String]) -> Resu .collect(); let a: Vec<&CStr> = sa.iter().map(|s| s.as_c_str()).collect(); - for (key, _) in env::vars() { - env::remove_var(key); - } - - for e in env.iter() { - let v: Vec<&str> = e.splitn(2, "=").collect(); - if v.len() != 2 { - info!(logger, "incorrect env config!"); - continue; - } - env::set_var(v[0], v[1]); - } - /* - let env: Vec = env - .iter() - .map(|s| CString::new(s.to_string()).unwrap_or_default()) - .collect(); - */ - // execvp doesn't use env for the search path, so we set env manually - debug!(logger, "exec process right now!"); if let Err(e) = unistd::execvp(p.as_c_str(), a.as_slice()) { - info!(logger, "execve failed!!!"); - info!(logger, "binary: {:?}, args: {:?}, envs: {:?}", p, a, env); + // info!(logger, "execve failed!!!"); + // info!(logger, "binary: {:?}, args: {:?}, envs: {:?}", p, a, env); match e { nix::Error::Sys(errno) => { - info!(logger, "{}", errno.desc()); - } - Error::InvalidPath => { - info!(logger, "invalid path"); - } - Error::InvalidUtf8 => { - info!(logger, "invalid utf8"); - } - Error::UnsupportedOperation => { - info!(logger, "unsupported operation"); + std::process::exit(errno as i32); } + _ => std::process::exit(-2), } - std::process::exit(-2); } // should never reach here Ok(()) } -fn get_namespaces(linux: &Linux, init: bool, init_pid: pid_t) -> Result> { - let mut ns: Vec = Vec::new(); - if init { - for i in &linux.namespaces { - ns.push(LinuxNamespace { - r#type: i.r#type.clone(), - path: i.path.clone(), - }); +fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> Result<()> { + let linux = match spec.linux.as_mut() { + None => { + return Err( + ErrorKind::ErrorCode("Spec didn't container linux field".to_string()).into(), + ) } - } else { - for i in NAMESPACES.keys() { - let ns_path = format!("/proc/{}/ns/{}", init_pid, TYPETONAME.get(i).unwrap()); - let ns_path_buf = Path::new(&ns_path).read_link()?; + Some(l) => l, + }; - let init_ns_path = format!( + let namespaces = linux.namespaces.as_mut_slice(); + for namespace in namespaces.iter_mut() { + if TYPETONAME.contains_key(namespace.r#type.as_str()) { + let ns_path = format!( "/proc/{}/ns/{}", - unistd::getpid(), - TYPETONAME.get(i).unwrap() + init_pid, + TYPETONAME.get(namespace.r#type.as_str()).unwrap() ); - let init_ns_path_buf = Path::new(&init_ns_path).read_link()?; - // ignore the namespace which is the same with system init namespace, - // since it shouldn't be join. - if !ns_path_buf.eq(&init_ns_path_buf) { - ns.push(LinuxNamespace { - r#type: i.to_string(), - path: ns_path, - }); - } - } - } - Ok(ns) -} - -pub const PIDSIZE: usize = mem::size_of::(); - -fn read_sync(fd: RawFd) -> Result { - let mut v: [u8; PIDSIZE] = [0; PIDSIZE]; - let mut len = 0; - - loop { - match unistd::read(fd, &mut v[len..]) { - Ok(l) => { - len += l; - if len == PIDSIZE { - break; - } - } - - Err(e) => { - if e != Error::from_errno(Errno::EINTR) { - return Err(e.into()); - } - } - } - } - - Ok(pid_t::from_be_bytes(v)) -} - -fn write_sync(fd: RawFd, pid: pid_t) -> Result<()> { - let buf = pid.to_be_bytes(); - let mut len = 0; - - loop { - match unistd::write(fd, &buf[len..]) { - Ok(l) => { - len += l; - if len == PIDSIZE { - break; - } - } - - Err(e) => { - if e != Error::from_errno(Errno::EINTR) { - return Err(e.into()); - } + if namespace.path == "" { + namespace.path = ns_path; } } } @@ -791,383 +1004,215 @@ fn write_sync(fd: RawFd, pid: pid_t) -> Result<()> { Ok(()) } +fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result { + for ns in &linux.namespaces { + if ns.r#type == "pid" { + if ns.path == "" { + error!(logger, "pid ns path is empty"); + return Err(ErrorKind::ErrorCode("pid ns path is empty".to_string()).into()); + } + + let fd = match fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()) { + Ok(v) => v, + Err(e) => { + error!( + logger, + "cannot open type: {} path: {}", + ns.r#type.clone(), + ns.path.clone() + ); + error!(logger, "error is : {}", e.as_errno().unwrap().desc()); + return Err(e.into()); + } + }; + + return Ok(fd); + } + } + + Err(ErrorKind::ErrorCode("cannot find the pid ns".to_string()).into()) +} + +fn is_userns_enabled(linux: &Linux) -> bool { + for ns in &linux.namespaces { + if ns.r#type == "user" && ns.path == "" { + return true; + } + } + + false +} + +fn get_namespaces(linux: &Linux) -> Result> { + let mut ns: Vec = Vec::new(); + for i in &linux.namespaces { + ns.push(LinuxNamespace { + r#type: i.r#type.clone(), + path: i.path.clone(), + }); + } + Ok(ns) +} + fn join_namespaces( logger: &Logger, spec: &Spec, - to_new: CloneFlags, - to_join: &Vec<(CloneFlags, RawFd)>, - pidns: bool, - userns: bool, - init: bool, - no_pivot: bool, + p: &Process, cm: &FsManager, st: &OCIState, - parent: &mut u32, -) -> Result<(Pid, RawFd)> { + child: &mut Child, + pwfd: RawFd, + prfd: RawFd, +) -> Result<()> { let logger = logger.new(o!("action" => "join-namespaces")); - // let ccond = Cond::new().chain_err(|| "create cond failed")?; - // let pcond = Cond::new().chain_err(|| "create cond failed")?; - let (pfd, cfd) = unistd::pipe2(OFlag::O_CLOEXEC).chain_err(|| "failed to create pipe")?; - let (crfd, pwfd) = unistd::pipe2(OFlag::O_CLOEXEC)?; - let linux = spec.linux.as_ref().unwrap(); let res = linux.resources.as_ref(); - match unistd::fork()? { - ForkResult::Parent { child } => { - // let mut pfile = unsafe { File::from_raw_fd(pfd) }; - unistd::close(cfd)?; - unistd::close(crfd)?; + let userns = is_userns_enabled(linux); - //wait child setup user namespace - let _ = read_sync(pfd)?; + info!( + logger, + "execid: {}, try to send spec from parent to child", + p.exec_id.as_str() + ); + let spec_str = serde_json::to_string(spec)?; + write_sync(pwfd, SYNC_DATA, spec_str.as_str())?; - if userns { - // setup uid/gid mappings - write_mappings( - &logger, - &format!("/proc/{}/uid_map", child.as_raw()), - &linux.uid_mappings, - )?; - write_mappings( - &logger, - &format!("/proc/{}/gid_map", child.as_raw()), - &linux.gid_mappings, - )?; - } + info!( + logger, + "execid: {}, wait child received oci spec", + p.exec_id.as_str() + ); - // apply cgroups - if init { - if res.is_some() { - info!(logger, "apply cgroups!"); - cm.set(res.unwrap(), false)?; - } - } + // child.try_wait()?; + read_sync(prfd)?; - if res.is_some() { - cm.apply(child.as_raw())?; - } + info!( + logger, + "execid: {}, send oci process from parent to child", + p.exec_id.as_str() + ); + let process_str = serde_json::to_string(&p.oci)?; + write_sync(pwfd, SYNC_DATA, process_str.as_str())?; - write_sync(pwfd, 0)?; + info!( + logger, + "execid: {}, wait child received oci process", + p.exec_id.as_str() + ); + read_sync(prfd)?; - let mut pid = child.as_raw(); - info!(logger, "first child! {}", pid); - info!(logger, "wait for final child!"); - if pidns { - pid = read_sync(pfd)?; - // pfile.read_to_string(&mut json)?; - /* - let msg: SyncPC = match serde_json::from_reader(&mut pfile) { - Ok(u) => u, - Err(e) => { - match e.classify() { - Category::Io => info!("Io error!"), - Category::Syntax => info!("syntax error!"), - Category::Data => info!("data error!"), - Category::Eof => info!("end of file!"), - } + let cm_str = serde_json::to_string(cm)?; + write_sync(pwfd, SYNC_DATA, cm_str.as_str())?; - return Err(ErrorKind::Serde(e).into()); - } - }; - */ - // notify child continue - info!(logger, "got final child pid! {}", pid); - write_sync(pwfd, 0)?; - info!(logger, "resume child!"); - // wait for child to exit - // Since the child would be reaped by our reaper, so - // there is no need reap the child here. - // wait::waitpid(Some(child), None); - } - // read out child pid here. we don't use - // cgroup to get it - // and the wait for child exit to get grandchild - - if init { - info!(logger, "wait for hook!"); - let _ = read_sync(pfd)?; - - // run prestart hook - if spec.hooks.is_some() { - info!(logger, "prestart"); - let hooks = spec.hooks.as_ref().unwrap(); - for h in hooks.prestart.iter() { - execute_hook(&logger, h, st)?; - } - } - - // notify child run prestart hooks completed - write_sync(pwfd, 0)?; - - // wait to run poststart hook - let _ = read_sync(pfd)?; - //run poststart hook - if spec.hooks.is_some() { - info!(logger, "poststart"); - let hooks = spec.hooks.as_ref().unwrap(); - for h in hooks.poststart.iter() { - execute_hook(&logger, h, st)?; - } - } - } - unistd::close(pfd)?; - unistd::close(pwfd)?; - - return Ok((Pid::from_raw(pid), cfd)); - } - ForkResult::Child => { - *parent = 1; - unistd::close(pfd)?; - unistd::close(pwfd)?; - // set oom_score_adj - - let p = if spec.process.is_some() { - spec.process.as_ref().unwrap() - } else { - return Err(nix::Error::Sys(Errno::EINVAL).into()); - }; - - if p.oom_score_adj.is_some() { - fs::write( - "/proc/self/oom_score_adj", - p.oom_score_adj.unwrap().to_string().as_bytes(), - )? - } - - // set rlimit - for rl in p.rlimits.iter() { - setrlimit(rl)?; - } - - if userns { - sched::unshare(CloneFlags::CLONE_NEWUSER)?; - } - - write_sync(cfd, 0)?; - let _ = read_sync(crfd)?; - - if userns { - setid(Uid::from_raw(0), Gid::from_raw(0))?; - } - } - } - - // child process continues - // let mut cfile = unsafe { File::from_raw_fd(cfd) }; - let mut mount_fd = -1; - let mut bind_device = false; - for &(s, fd) in to_join { - if s == CloneFlags::CLONE_NEWNS { - mount_fd = fd; - continue; - } - - // just skip user namespace for now - // we cannot join user namespace in multithreaded - // program, which is us(kata-agent using grpc) - // To fix this - // 1. write kata-agent as singlethread program - // 2. use a binary to exec OR self exec to enter - // namespaces before multithreaded, the way - // rustjail works - /* - if s == CloneFlags::CLONE_NEWUSER { - unistd::close(fd)?; - continue; - } - */ - if let Err(e) = sched::setns(fd, s) { - info!(logger, "setns error: {}", e.as_errno().unwrap().desc()); - info!(logger, "setns: ns type: {:?}", s); - if s == CloneFlags::CLONE_NEWUSER { - if e.as_errno().unwrap() != Errno::EINVAL { - return Err(e.into()); - } - } else { - return Err(e.into()); - } - } - unistd::close(fd)?; - - if s == CloneFlags::CLONE_NEWUSER { - setid(Uid::from_raw(0), Gid::from_raw(0))?; - bind_device = true; - } - } - - info!(logger, "to_new: {:?}", to_new); - sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?; + //wait child setup user namespace + info!( + logger, + "execid: {}, wait child setup user namespace", + p.exec_id.as_str() + ); + read_sync(prfd)?; if userns { - bind_device = true; + info!( + logger, + "execid: {}, setup uid/gid mappings", + p.exec_id.as_str() + ); + // setup uid/gid mappings + write_mappings( + &logger, + &format!("/proc/{}/uid_map", p.pid), + &linux.uid_mappings, + )?; + write_mappings( + &logger, + &format!("/proc/{}/gid_map", p.pid), + &linux.gid_mappings, + )?; } - // create a pipe for sync between parent and child. - // here we should make sure the parent return pid before - // the child notify grand parent to run hooks, otherwise - // both of the parent and his child would write cfd at the same - // time which would mesh the grand parent to read. - let (chfd, phfd) = unistd::pipe2(OFlag::O_CLOEXEC) - .chain_err(|| "failed to create pipe for syncing run hooks")?; + // apply cgroups + if p.init { + if res.is_some() { + info!(logger, "apply cgroups!"); + cm.set(res.unwrap(), false)?; + } + } - if pidns { - match unistd::fork()? { - ForkResult::Parent { child } => { - unistd::close(chfd)?; - // set child pid to topmost parent and the exit - write_sync(cfd, child.as_raw())?; + if res.is_some() { + cm.apply(p.pid)?; + } - info!( - logger, - "json: {}", - serde_json::to_string(&SyncPC { - pid: child.as_raw() - }) - .unwrap() - ); - // wait for parent read it and the continue - info!(logger, "after send out child pid!"); - let _ = read_sync(crfd)?; + info!( + logger, + "execid: {}, notify child to continue", + p.exec_id.as_str() + ); + // notify child to continue + write_sync(pwfd, SYNC_SUCCESS, "")?; - // notify child to continue. - write_sync(phfd, 0)?; - std::process::exit(0); + if p.init { + info!( + logger, + "execid:{}, notify child parent ready to run prestart hook!", + p.exec_id.as_str() + ); + let _ = read_sync(prfd)?; + + info!( + logger, + "execid:{}, get ready to run prestart hook!", + p.exec_id.as_str() + ); + + // run prestart hook + if spec.hooks.is_some() { + info!(logger, "execid:{}, prestart", p.exec_id.as_str()); + let hooks = spec.hooks.as_ref().unwrap(); + for h in hooks.prestart.iter() { + execute_hook(&logger, h, st)?; } - ForkResult::Child => { - *parent = 2; - unistd::close(phfd)?; + } + + // notify child run prestart hooks completed + info!( + logger, + "execid:{}, notify child run prestart hook completed!", + p.exec_id.as_str() + ); + write_sync(pwfd, SYNC_SUCCESS, "")?; + + info!( + logger, + "execid:{}, notify child parent ready to run poststart hook!", + p.exec_id.as_str() + ); + // wait to run poststart hook + read_sync(prfd)?; + info!( + logger, + "execid:{}, get ready to run poststart hook!", + p.exec_id.as_str() + ); + + //run poststart hook + if spec.hooks.is_some() { + info!(logger, "execid:{}, poststart", p.exec_id.as_str()); + let hooks = spec.hooks.as_ref().unwrap(); + for h in hooks.poststart.iter() { + execute_hook(&logger, h, st)?; } } } - if to_new.contains(CloneFlags::CLONE_NEWUTS) { - unistd::sethostname(&spec.hostname)?; - } - - let rootfs = spec.root.as_ref().unwrap().path.as_str(); - let root = fs::canonicalize(rootfs)?; - let rootfs = root.to_str().unwrap(); - - if to_new.contains(CloneFlags::CLONE_NEWNS) { - // setup rootfs - info!(logger, "setup rootfs!"); - mount::init_rootfs(&logger, &spec, &cm.paths, &cm.mounts, bind_device)?; - } - - // wait until parent notified - if pidns { - let _ = read_sync(chfd)?; - } - unistd::close(chfd)?; - - if init { - // notify parent to run prestart hooks - write_sync(cfd, 0)?; - // wait parent run prestart hooks - let _ = read_sync(crfd)?; - } - - unistd::close(crfd)?; - - if mount_fd != -1 { - sched::setns(mount_fd, CloneFlags::CLONE_NEWNS)?; - unistd::close(mount_fd)?; - } - - if to_new.contains(CloneFlags::CLONE_NEWNS) { - // unistd::chroot(rootfs)?; - if no_pivot { - mount::ms_move_root(rootfs)?; - } else { - // pivot root - mount::pivot_rootfs(rootfs)?; - } - - // setup sysctl - set_sysctls(&linux.sysctl)?; - unistd::chdir("/")?; - if let Err(_) = stat::stat("marker") { - info!(logger, "not in expect root!!"); - } - info!(logger, "in expect rootfs!"); - - if let Err(_) = stat::stat("/bin/sh") { - info!(logger, "no '/bin/sh'???"); - } - } - - // notify parent to continue before block on exec fifo - - info!(logger, "rootfs: {}", &rootfs); - - // block on exec fifo - - Ok((Pid::from_raw(-1), cfd)) -} - -fn setup_stdio(p: &Process) -> Result<()> { - if p.console_socket.is_some() { - // we can setup ptmx master for process - // turn off echo - // let mut term = termios::tcgetattr(0)?; - // termios::cfmakeraw(&mut term); - // term.local_flags &= !(LocalFlags::ECHO | LocalFlags::ICANON); - // term.control_chars[VMIN] = 1; - // term.control_chars[VTIME] = 0; - - let pseduo = pty::openpty(None, None)?; - defer!(unistd::close(pseduo.master).unwrap()); - let data: &[u8] = b"/dev/ptmx"; - let iov = [IoVec::from_slice(&data)]; - let fds = [pseduo.master]; - let cmsg = ControlMessage::ScmRights(&fds); - let mut console_fd = p.console_socket.unwrap(); - - socket::sendmsg(console_fd, &iov, &[cmsg], MsgFlags::empty(), None)?; - - unistd::close(console_fd)?; - unistd::close(p.parent_console_socket.unwrap())?; - console_fd = pseduo.slave; - - unistd::setsid()?; - unsafe { - libc::ioctl(console_fd, libc::TIOCSCTTY); - } - unistd::dup2(console_fd, 0)?; - unistd::dup2(console_fd, 1)?; - unistd::dup2(console_fd, 2)?; - - // turn off echo - // let mut term = termios::tcgetattr(0)?; - // term.local_flags &= !(LocalFlags::ECHO | LocalFlags::ICANON); - // termios::tcsetattr(0, SetArg::TCSANOW, &term)?; - - if console_fd > 2 { - unistd::close(console_fd)?; - } - } else { - // dup stdin/stderr/stdout - unistd::dup2(p.stdin.unwrap(), 0)?; - unistd::dup2(p.stdout.unwrap(), 1)?; - unistd::dup2(p.stderr.unwrap(), 2)?; - - if p.stdin.unwrap() > 2 { - unistd::close(p.stdin.unwrap())?; - } - - if p.stdout.unwrap() > 2 { - unistd::close(p.stdout.unwrap())?; - } - if p.stderr.unwrap() > 2 { - unistd::close(p.stderr.unwrap())?; - } - } - - unistd::close(p.parent_stdin.unwrap())?; - unistd::close(p.parent_stdout.unwrap())?; - unistd::close(p.parent_stderr.unwrap())?; + info!( + logger, + "execid:{}, wait for child process ready to run exec", + p.exec_id.as_str() + ); + read_sync(prfd)?; Ok(()) } @@ -1441,7 +1486,9 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> { let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?; match unistd::fork()? { ForkResult::Parent { child: _ch } => { - let status = read_sync(rfd)?; + let buf = read_sync(rfd)?; + let buf_array: [u8; 4] = [buf[0], buf[1], buf[2], buf[3]]; + let status: i32 = i32::from_be_bytes(buf_array); info!(logger, "hook child: {}", _ch); @@ -1572,7 +1619,7 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> { }; handle.join().unwrap(); - let _ = write_sync(wfd, status); + let _ = write_sync(wfd, status, ""); // let _ = wait::waitpid(Pid::from_raw(pid), // Some(WaitPidFlag::WEXITED | WaitPidFlag::__WALL)); std::process::exit(0); diff --git a/src/agent/rustjail/src/errors.rs b/src/agent/rustjail/src/errors.rs index 130f610e16..8478a50b6d 100644 --- a/src/agent/rustjail/src/errors.rs +++ b/src/agent/rustjail/src/errors.rs @@ -16,11 +16,13 @@ error_chain! { Ffi(std::ffi::NulError); Caps(caps::errors::Error); Serde(serde_json::Error); - UTF8(std::string::FromUtf8Error); + FromUTF8(std::string::FromUtf8Error); Parse(std::num::ParseIntError); Scanfmt(scan_fmt::parse::ScanError); Ip(std::net::AddrParseError); Regex(regex::Error); + EnvVar(std::env::VarError); + UTF8(std::str::Utf8Error); } // define new errors errors { diff --git a/src/agent/rustjail/src/lib.rs b/src/agent/rustjail/src/lib.rs index f429b371ed..b3866d988d 100644 --- a/src/agent/rustjail/src/lib.rs +++ b/src/agent/rustjail/src/lib.rs @@ -42,14 +42,14 @@ macro_rules! sl { }; } +pub mod capabilities; pub mod cgroups; pub mod container; pub mod errors; pub mod mount; pub mod process; pub mod specconv; -// pub mod sync; -pub mod capabilities; +pub mod sync; pub mod validator; // pub mod factory; @@ -66,8 +66,6 @@ pub mod validator; // construtc ociSpec from grpcSpec, which is needed for hook // execution. since hooks read config.json -use std::collections::HashMap; -use std::mem::MaybeUninit; use oci::{ Box as ociBox, Hooks as ociHooks, Linux as ociLinux, LinuxCapabilities as ociLinuxCapabilities, Mount as ociMount, POSIXRlimit as ociPOSIXRlimit, Process as ociProcess, Root as ociRoot, @@ -77,6 +75,8 @@ use protocols::oci::{ Hooks as grpcHooks, Linux as grpcLinux, Mount as grpcMount, Process as grpcProcess, Root as grpcRoot, Spec as grpcSpec, }; +use std::collections::HashMap; +use std::mem::MaybeUninit; pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess { let console_size = if p.ConsoleSize.is_some() { diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index aff77b19af..52a1d0dde3 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -14,6 +14,7 @@ use oci::{LinuxDevice, Mount, Spec}; use std::collections::{HashMap, HashSet}; use std::fs::{self, OpenOptions}; use std::os::unix; +use std::os::unix::io::RawFd; use std::path::{Path, PathBuf}; use path_absolutize::*; @@ -23,10 +24,11 @@ use std::io::{BufRead, BufReader}; use crate::container::DEFAULT_DEVICES; use crate::errors::*; +use crate::sync::write_count; use lazy_static; use std::string::ToString; -use slog::Logger; +use crate::log_child; // Info reveals information about a particular mounted filesystem. This // struct is populated from the content in the /proc//mountinfo file. @@ -97,7 +99,7 @@ lazy_static! { } pub fn init_rootfs( - logger: &Logger, + cfd_log: RawFd, spec: &Spec, cpath: &HashMap, mounts: &HashMap, @@ -133,13 +135,13 @@ pub fn init_rootfs( return Err(ErrorKind::Nix(nix::Error::Sys(Errno::EINVAL)).into()); } if m.r#type == "cgroup" { - mount_cgroups(logger, &m, rootfs, flags, &data, cpath, mounts)?; + mount_cgroups(cfd_log, &m, rootfs, flags, &data, cpath, mounts)?; } else { if m.destination == "/dev" { flags &= !MsFlags::MS_RDONLY; } - mount_from(&m, &rootfs, flags, &data, "")?; + mount_from(cfd_log, &m, &rootfs, flags, &data, "")?; } } @@ -156,7 +158,7 @@ pub fn init_rootfs( } fn mount_cgroups( - logger: &Logger, + cfd_log: RawFd, m: &Mount, rootfs: &str, flags: MsFlags, @@ -173,8 +175,8 @@ fn mount_cgroups( }; let cflags = MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV; - info!(logger, "tmpfs"); - mount_from(&ctm, rootfs, cflags, "", "")?; + // info!(logger, "tmpfs"); + mount_from(cfd_log, &ctm, rootfs, cflags, "", "")?; let olddir = unistd::getcwd()?; unistd::chdir(rootfs)?; @@ -183,7 +185,7 @@ fn mount_cgroups( // bind mount cgroups for (key, mount) in mounts.iter() { - info!(logger, "{}", key); + log_child!(cfd_log, "mount cgroup subsystem {}", key); let source = if cpath.get(key).is_some() { cpath.get(key).unwrap() } else { @@ -210,7 +212,7 @@ fn mount_cgroups( srcs.insert(source.to_string()); - info!(logger, "{}", destination.as_str()); + log_child!(cfd_log, "mount destination: {}", destination.as_str()); let bm = Mount { source: source.to_string(), @@ -219,25 +221,24 @@ fn mount_cgroups( options: Vec::new(), }; - mount_from( - &bm, - rootfs, - flags | MsFlags::MS_REC | MsFlags::MS_BIND, - "", - "", - )?; + let mut mount_flags: MsFlags = flags | MsFlags::MS_REC | MsFlags::MS_BIND; + if key.contains("systemd") { + mount_flags &= !MsFlags::MS_RDONLY; + } + mount_from(cfd_log, &bm, rootfs, mount_flags, "", "")?; if key != base { let src = format!("{}/{}", m.destination.as_str(), key); match unix::fs::symlink(destination.as_str(), &src[1..]) { Err(e) => { - info!( - logger, + log_child!( + cfd_log, "symlink: {} {} err: {}", key, destination.as_str(), e.to_string() ); + return Err(e.into()); } Ok(_) => {} @@ -421,7 +422,14 @@ fn parse_mount(m: &Mount) -> (MsFlags, String) { (flags, data.join(",")) } -fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) -> Result<()> { +fn mount_from( + cfd_log: RawFd, + m: &Mount, + rootfs: &str, + flags: MsFlags, + data: &str, + _label: &str, +) -> Result<()> { let d = String::from(data); let dest = format!("{}{}", rootfs, &m.destination); @@ -437,8 +445,8 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) match fs::create_dir_all(&dir) { Ok(_) => {} Err(e) => { - info!( - sl!(), + log_child!( + cfd_log, "creat dir {}: {}", dir.to_str().unwrap(), e.to_string() @@ -456,8 +464,6 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) PathBuf::from(&m.source) }; - info!(sl!(), "{}, {}", src.to_str().unwrap(), dest.as_str()); - // ignore this check since some mount's src didn't been a directory // such as tmpfs. /* @@ -472,7 +478,12 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) match stat::stat(dest.as_str()) { Ok(_) => {} Err(e) => { - info!(sl!(), "{}: {}", dest.as_str(), e.as_errno().unwrap().desc()); + log_child!( + cfd_log, + "{}: {}", + dest.as_str(), + e.as_errno().unwrap().desc() + ); } } @@ -485,7 +496,7 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) ) { Ok(_) => {} Err(e) => { - info!(sl!(), "mount error: {}", e.as_errno().unwrap().desc()); + log_child!(cfd_log, "mount error: {}", e.as_errno().unwrap().desc()); return Err(e.into()); } } @@ -508,8 +519,8 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) None::<&str>, ) { Err(e) => { - info!( - sl!(), + log_child!( + cfd_log, "remout {}: {}", dest.as_str(), e.as_errno().unwrap().desc() @@ -616,9 +627,9 @@ fn bind_dev(dev: &LinuxDevice) -> Result<()> { Ok(()) } -pub fn finish_rootfs(spec: &Spec) -> Result<()> { +pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> { let olddir = unistd::getcwd()?; - info!(sl!(), "{}", olddir.to_str().unwrap()); + log_child!(cfd_log, "old cwd: {}", olddir.to_str().unwrap()); unistd::chdir("/")?; if spec.linux.is_some() { let linux = spec.linux.as_ref().unwrap(); diff --git a/src/agent/rustjail/src/process.rs b/src/agent/rustjail/src/process.rs index 2a525b48f9..d4deb0b845 100644 --- a/src/agent/rustjail/src/process.rs +++ b/src/agent/rustjail/src/process.rs @@ -34,10 +34,8 @@ pub struct Process { pub extra_files: Vec, // pub caps: Capabilities, // pub rlimits: Vec, - pub console_socket: Option, pub term_master: Option, - // parent end of fds - pub parent_console_socket: Option, + pub tty: bool, pub parent_stdin: Option, pub parent_stdout: Option, pub parent_stderr: Option, @@ -89,9 +87,8 @@ impl Process { exit_pipe_w: None, exit_pipe_r: None, extra_files: Vec::new(), - console_socket: None, + tty: ocip.terminal, term_master: None, - parent_console_socket: None, parent_stdin: None, parent_stdout: None, parent_stderr: None, @@ -104,44 +101,21 @@ impl Process { info!(logger, "before create console socket!"); - if ocip.terminal { - let (psocket, csocket) = match socket::socketpair( - AddressFamily::Unix, - SockType::Stream, - None, - SockFlag::SOCK_CLOEXEC, - ) { - Ok((u, v)) => (u, v), - Err(e) => { - match e { - Error::Sys(errno) => { - info!(logger, "socketpair: {}", errno.desc()); - } - _ => { - info!(logger, "socketpair: other error!"); - } - } - return Err(e); - } - }; - p.parent_console_socket = Some(psocket); - p.console_socket = Some(csocket); + if !p.tty { + info!(logger, "created console socket!"); + + let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?; + p.parent_stdin = Some(pstdin); + p.stdin = Some(stdin); + + let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; + p.parent_stdout = Some(pstdout); + p.stdout = Some(stdout); + + let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; + p.parent_stderr = Some(pstderr); + p.stderr = Some(stderr); } - - info!(logger, "created console socket!"); - - let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?; - p.parent_stdin = Some(pstdin); - p.stdin = Some(stdin); - - let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; - p.parent_stdout = Some(pstdout); - p.stdout = Some(stdout); - - let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; - p.parent_stderr = Some(pstderr); - p.stderr = Some(stderr); - Ok(p) } } diff --git a/src/agent/rustjail/src/sync.rs b/src/agent/rustjail/src/sync.rs new file mode 100644 index 0000000000..b17277f87b --- /dev/null +++ b/src/agent/rustjail/src/sync.rs @@ -0,0 +1,177 @@ +// Copyright (c) 2019 Ant Financial +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::errors::*; +use nix::errno::Errno; +use nix::unistd; +use nix::Error; +use std::mem; +use std::os::unix::io::RawFd; + +pub const SYNC_SUCCESS: i32 = 1; +pub const SYNC_FAILED: i32 = 2; +pub const SYNC_DATA: i32 = 3; + +const DATA_SIZE: usize = 100; +const MSG_SIZE: usize = mem::size_of::(); + +#[macro_export] +macro_rules! log_child { + ($fd:expr, $($arg:tt)+) => ({ + let lfd = $fd; + let mut log_str = format_args!($($arg)+).to_string(); + log_str.push('\n'); + write_count(lfd, log_str.as_bytes(), log_str.len()); + }) +} + +pub fn write_count(fd: RawFd, buf: &[u8], count: usize) -> Result { + let mut len = 0; + + loop { + match unistd::write(fd, &buf[len..]) { + Ok(l) => { + len += l; + if len == count { + break; + } + } + + Err(e) => { + if e != Error::from_errno(Errno::EINTR) { + return Err(e.into()); + } + } + } + } + + Ok(len) +} + +fn read_count(fd: RawFd, count: usize) -> Result> { + let mut v: Vec = vec![0; count]; + let mut len = 0; + + loop { + match unistd::read(fd, &mut v[len..]) { + Ok(l) => { + len += l; + if len == count || l == 0 { + break; + } + } + + Err(e) => { + if e != Error::from_errno(Errno::EINTR) { + return Err(e.into()); + } + } + } + } + + Ok(v[0..len].to_vec()) +} + +pub fn read_sync(fd: RawFd) -> Result> { + let buf = read_count(fd, MSG_SIZE)?; + if buf.len() != MSG_SIZE { + return Err(ErrorKind::ErrorCode(format!( + "process: {} failed to receive sync message from peer: got msg length: {}, expected: {}", + std::process::id(), + buf.len(), + MSG_SIZE + )) + .into()); + } + let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]]; + let msg: i32 = i32::from_be_bytes(buf_array); + match msg { + SYNC_SUCCESS => return Ok(Vec::new()), + SYNC_DATA => { + let buf = read_count(fd, MSG_SIZE)?; + let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]]; + let msg_length: i32 = i32::from_be_bytes(buf_array); + let data_buf = read_count(fd, msg_length as usize)?; + + return Ok(data_buf); + } + SYNC_FAILED => { + let mut error_buf = vec![]; + loop { + let buf = read_count(fd, DATA_SIZE)?; + + error_buf.extend(&buf); + if DATA_SIZE == buf.len() { + continue; + } else { + break; + } + } + + let error_str = match std::str::from_utf8(&error_buf) { + Ok(v) => v, + Err(e) => { + return Err(ErrorKind::ErrorCode(format!( + "receive error message from child process failed: {:?}", + e + )) + .into()) + } + }; + + return Err(ErrorKind::ErrorCode(String::from(error_str)).into()); + } + _ => return Err(ErrorKind::ErrorCode("error in receive sync message".to_string()).into()), + } +} + +pub fn write_sync(fd: RawFd, msg_type: i32, data_str: &str) -> Result<()> { + let buf = msg_type.to_be_bytes(); + + let count = write_count(fd, &buf, MSG_SIZE)?; + if count != MSG_SIZE { + return Err(ErrorKind::ErrorCode("error in send sync message".to_string()).into()); + } + + match msg_type { + SYNC_FAILED => match write_count(fd, data_str.as_bytes(), data_str.len()) { + Ok(_count) => unistd::close(fd)?, + Err(e) => { + unistd::close(fd)?; + return Err( + ErrorKind::ErrorCode("error in send message to process".to_string()).into(), + ); + } + }, + SYNC_DATA => { + let length: i32 = data_str.len() as i32; + match write_count(fd, &length.to_be_bytes(), MSG_SIZE) { + Ok(_count) => (), + Err(e) => { + unistd::close(fd)?; + return Err(ErrorKind::ErrorCode( + "error in send message to process".to_string(), + ) + .into()); + } + } + + match write_count(fd, data_str.as_bytes(), data_str.len()) { + Ok(_count) => (), + Err(e) => { + unistd::close(fd)?; + return Err(ErrorKind::ErrorCode( + "error in send message to process".to_string(), + ) + .into()); + } + } + } + + _ => (), + }; + + Ok(()) +} diff --git a/src/agent/rustjail/src/validator.rs b/src/agent/rustjail/src/validator.rs index 76777eb349..3e06398d89 100644 --- a/src/agent/rustjail/src/validator.rs +++ b/src/agent/rustjail/src/validator.rs @@ -1,9 +1,15 @@ +// Copyright (c) 2019 Ant Financial +// +// SPDX-License-Identifier: Apache-2.0 +// + use crate::container::Config; use crate::errors::*; use lazy_static; use nix::errno::Errno; use nix::Error; use oci::{LinuxIDMapping, LinuxNamespace, Spec}; +use protobuf::RepeatedField; use std::collections::HashMap; use std::path::{Component, PathBuf}; diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 72823a5c46..65c6d02016 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -14,8 +14,8 @@ use crate::linux_abi::*; use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCSITYPE}; use crate::sandbox::Sandbox; use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER}; -use protocols::agent::Device; use oci::Spec; +use protocols::agent::Device; use rustjail::errors::*; // Convenience macro to obtain the scope logger diff --git a/src/agent/src/grpc.rs b/src/agent/src/grpc.rs index b010b449cb..1a8fc58863 100644 --- a/src/agent/src/grpc.rs +++ b/src/agent/src/grpc.rs @@ -578,11 +578,11 @@ impl protocols::agent_grpc::AgentService for agentService { req: protocols::agent::ExecProcessRequest, sink: ::grpcio::UnarySink, ) { - if let Err(_) = self.do_exec_process(req) { + if let Err(e) = self.do_exec_process(req) { let f = sink .fail(RpcStatus::new( RpcStatusCode::Internal, - Some(String::from("fail to exec process!")), + Some(format!("{}", e)), )) .map_err(|_e| error!(sl!(), "fail to exec process!")); ctx.spawn(f); diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index dab89daa05..41a29fc1a2 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -10,14 +10,14 @@ #![allow(non_snake_case)] #[macro_use] extern crate lazy_static; +extern crate oci; extern crate prctl; extern crate protocols; extern crate regex; extern crate rustjail; +extern crate scan_fmt; extern crate serde_json; extern crate signal_hook; -extern crate scan_fmt; -extern crate oci; #[macro_use] extern crate scopeguard; @@ -100,6 +100,10 @@ fn announce(logger: &Logger) { fn main() -> Result<()> { let args: Vec = env::args().collect(); + if args.len() == 2 && args[1] == "init" { + rustjail::container::init_child(); + exit(0); + } env::set_var("RUST_BACKTRACE", "full");