runtime-rs: add StartContainer hook

StartContainer will be execute in guest container namespace in Kata.
The Hook Path of this kind of hook is also in guest container namespace.

StartContainer is executed after start operation is called, and it
should be executed before user-specific command is executed.

Fixes: #5787

Signed-off-by: Yushuo <y-shuo@linux.alibaba.com>
This commit is contained in:
Yushuo 2022-12-05 17:20:18 +08:00
parent 977f281c5c
commit e80c9f7b74
7 changed files with 53 additions and 9 deletions

View File

@ -374,13 +374,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let buf = read_sync(crfd)?;
let spec_str = std::str::from_utf8(&buf)?;
let spec: oci::Spec = serde_json::from_str(spec_str)?;
log_child!(cfd_log, "notify parent to send oci process");
write_sync(cwfd, SYNC_SUCCESS, "")?;
let buf = read_sync(crfd)?;
let process_str = std::str::from_utf8(&buf)?;
let oci_process: oci::Process = serde_json::from_str(process_str)?;
log_child!(cfd_log, "notify parent to send oci state");
write_sync(cwfd, SYNC_SUCCESS, "")?;
let buf = read_sync(crfd)?;
let state_str = std::str::from_utf8(&buf)?;
let mut state: oci::State = serde_json::from_str(state_str)?;
log_child!(cfd_log, "notify parent to send cgroup manager");
write_sync(cwfd, SYNC_SUCCESS, "")?;
@ -743,6 +748,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
unistd::read(fd, buf)?;
}
if init {
// StartContainer Hooks:
// * should be run in container namespace
// * should be run after container is created and before container is started (before user-specific command is executed)
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#startcontainer-hooks
state.pid = std::process::id() as i32;
state.status = oci::ContainerState::Created;
if let Some(hooks) = spec.hooks.as_ref() {
let mut start_container_states = HookStates::new();
start_container_states.execute_hooks(&hooks.start_container, Some(state))?;
}
}
// With NoNewPrivileges, we should set seccomp as close to
// do_exec as possible in order to reduce the amount of
// system calls in the seccomp profiles.
@ -1323,7 +1341,6 @@ async fn join_namespaces(
write_async(pipe_w, SYNC_DATA, spec_str.as_str()).await?;
info!(logger, "wait child received oci spec");
read_async(pipe_r).await?;
info!(logger, "send oci process from parent to child");
@ -1333,6 +1350,13 @@ async fn join_namespaces(
info!(logger, "wait child received oci process");
read_async(pipe_r).await?;
info!(logger, "try to send state from parent to child");
let state_str = serde_json::to_string(st)?;
write_async(pipe_w, SYNC_DATA, state_str.as_str()).await?;
info!(logger, "wait child received oci state");
read_async(pipe_r).await?;
let cm_str = if use_systemd_cgroup {
serde_json::to_string(cm.as_any()?.downcast_ref::<SystemdManager>().unwrap())
} else {

View File

@ -153,19 +153,17 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {
fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
let prestart = hook_grpc_to_oci(h.Prestart.as_ref());
let create_runtime = hook_grpc_to_oci(h.CreateRuntime.as_ref());
let create_container = hook_grpc_to_oci(h.CreateContainer.as_ref());
let start_container = hook_grpc_to_oci(h.StartContainer.as_ref());
let poststart = hook_grpc_to_oci(h.Poststart.as_ref());
let poststop = hook_grpc_to_oci(h.Poststop.as_ref());
oci::Hooks {
prestart,
create_runtime,
create_container,
start_container,
poststart,
poststop,
}

View File

@ -197,6 +197,8 @@ pub struct Hooks {
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub create_container: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub start_container: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub poststart: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub poststop: Vec<Hook>,

View File

@ -172,6 +172,9 @@ message Hooks {
// CreateContainer is a list of hooks to be run after VM is started, and before container is created.
repeated Hook CreateContainer = 5 [(gogoproto.nullable) = false];
// StartContainer is a list of hooks to be run after container is created, but before it is started.
repeated Hook StartContainer = 6 [(gogoproto.nullable) = false];
}
message Hook {

View File

@ -296,6 +296,7 @@ impl From<oci::Hooks> for crate::oci::Hooks {
Prestart: from_vec(from.prestart),
CreateRuntime: from_vec(from.create_runtime),
CreateContainer: from_vec(from.create_container),
StartContainer: from_vec(from.start_container),
Poststart: from_vec(from.poststart),
Poststop: from_vec(from.poststop),
unknown_fields: Default::default(),
@ -984,6 +985,10 @@ impl From<crate::oci::Hooks> for oci::Hooks {
for hook in from.take_CreateContainer().to_vec() {
create_container.push(hook.into())
}
let mut start_container = Vec::new();
for hook in from.take_StartContainer().to_vec() {
start_container.push(hook.into())
}
let mut poststart = Vec::new();
for hook in from.take_Poststart().to_vec() {
poststart.push(hook.into());
@ -996,6 +1001,7 @@ impl From<crate::oci::Hooks> for oci::Hooks {
prestart,
create_runtime,
create_container,
start_container,
poststart,
poststop,
}

View File

@ -124,7 +124,6 @@ pub struct CreateContainerRequest {
pub devices: Vec<Device>,
pub storages: Vec<Storage>,
pub oci: Option<oci::Spec>,
pub guest_hooks: Option<oci::Hooks>,
pub sandbox_pidns: bool,
pub rootfs_mounts: Vec<oci::Mount>,
}

View File

@ -396,8 +396,20 @@ impl Container {
}
fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
// hook should be done on host
spec.hooks = None;
// Only the StartContainer hook needs to be reserved for execution in the guest
let start_container_hooks = match spec.hooks.as_ref() {
Some(hooks) => hooks.start_container.clone(),
None => Vec::new(),
};
spec.hooks = if start_container_hooks.is_empty() {
None
} else {
Some(oci::Hooks {
start_container: start_container_hooks,
..Default::default()
})
};
// special process K8s ephemeral volumes.
update_ephemeral_storage_type(spec);