diff --git a/src/runtime-rs/crates/agent/src/kata/agent.rs b/src/runtime-rs/crates/agent/src/kata/agent.rs index be8d006405..6d41e2be73 100644 --- a/src/runtime-rs/crates/agent/src/kata/agent.rs +++ b/src/runtime-rs/crates/agent/src/kata/agent.rs @@ -130,5 +130,6 @@ impl_agent!( get_guest_details | crate::GetGuestDetailsRequest | crate::GuestDetailsResponse | None, add_swap | crate::AddSwapRequest | crate::Empty | None, add_swap_path | crate::AddSwapPathRequest | crate::Empty | None, - set_policy | crate::SetPolicyRequest | crate::Empty | None + set_policy | crate::SetPolicyRequest | crate::Empty | None, + get_diagnostic_data | crate::GetDiagnosticDataRequest | crate::GetDiagnosticDataResponse | None ); diff --git a/src/runtime-rs/crates/agent/src/kata/trans.rs b/src/runtime-rs/crates/agent/src/kata/trans.rs index 138bbafbda..eec571a558 100644 --- a/src/runtime-rs/crates/agent/src/kata/trans.rs +++ b/src/runtime-rs/crates/agent/src/kata/trans.rs @@ -28,8 +28,8 @@ use crate::{ VersionCheckResponse, VolumeStatsRequest, VolumeStatsResponse, WaitProcessRequest, WriteStreamRequest, }, - GetGuestDetailsRequest, OomEventResponse, SetPolicyRequest, WaitProcessResponse, - WriteStreamResponse, + GetDiagnosticDataRequest, GetDiagnosticDataResponse, GetGuestDetailsRequest, OomEventResponse, + SetPolicyRequest, WaitProcessResponse, WriteStreamResponse, }; fn trans_vec>(from: Vec) -> Vec { @@ -754,6 +754,22 @@ impl From for agent::SetPolicyRequest { } } +impl From for agent::GetDiagnosticDataRequest { + fn from(from: GetDiagnosticDataRequest) -> Self { + Self { + log_type: from.log_type, + container_id: from.container_id, + ..Default::default() + } + } +} + +impl From for GetDiagnosticDataResponse { + fn from(from: agent::GetDiagnosticDataResponse) -> Self { + Self { data: from.data } + } +} + impl From for AgentDetails { fn from(src: agent::AgentDetails) -> Self { Self { diff --git a/src/runtime-rs/crates/agent/src/lib.rs b/src/runtime-rs/crates/agent/src/lib.rs index b4fdbae15a..a339e32131 100644 --- a/src/runtime-rs/crates/agent/src/lib.rs +++ b/src/runtime-rs/crates/agent/src/lib.rs @@ -17,15 +17,16 @@ pub use types::{ ARPNeighbor, ARPNeighbors, AddArpNeighborRequest, AddSwapPathRequest, AddSwapRequest, BlkioStatsEntry, CheckRequest, CloseStdinRequest, ContainerID, ContainerProcessID, CopyFileRequest, CreateContainerRequest, CreateSandboxRequest, Empty, ExecProcessRequest, - GetGuestDetailsRequest, GetIPTablesRequest, GetIPTablesResponse, GuestDetailsResponse, - HealthCheckResponse, IPAddress, IPFamily, Interface, Interfaces, ListProcessesRequest, - MemHotplugByProbeRequest, MetricsResponse, OnlineCPUMemRequest, OomEventResponse, - ReadStreamRequest, ReadStreamResponse, RemoveContainerRequest, ReseedRandomDevRequest, - ResizeVolumeRequest, Route, Routes, SetGuestDateTimeRequest, SetIPTablesRequest, - SetIPTablesResponse, SignalProcessRequest, StatsContainerResponse, Storage, - TtyWinResizeRequest, UpdateContainerRequest, UpdateInterfaceRequest, UpdateRoutesRequest, - VersionCheckResponse, VolumeStatsRequest, VolumeStatsResponse, WaitProcessRequest, - WaitProcessResponse, WriteStreamRequest, WriteStreamResponse, + GetDiagnosticDataRequest, GetDiagnosticDataResponse, GetGuestDetailsRequest, + GetIPTablesRequest, GetIPTablesResponse, GuestDetailsResponse, HealthCheckResponse, IPAddress, + IPFamily, Interface, Interfaces, ListProcessesRequest, MemHotplugByProbeRequest, + MetricsResponse, OnlineCPUMemRequest, OomEventResponse, ReadStreamRequest, ReadStreamResponse, + RemoveContainerRequest, ReseedRandomDevRequest, ResizeVolumeRequest, Route, Routes, + SetGuestDateTimeRequest, SetIPTablesRequest, SetIPTablesResponse, SignalProcessRequest, + StatsContainerResponse, Storage, TtyWinResizeRequest, UpdateContainerRequest, + UpdateInterfaceRequest, UpdateRoutesRequest, VersionCheckResponse, VolumeStatsRequest, + VolumeStatsResponse, WaitProcessRequest, WaitProcessResponse, WriteStreamRequest, + WriteStreamResponse, }; use anyhow::Result; @@ -100,4 +101,10 @@ pub trait Agent: AgentManager + HealthService + Send + Sync { async fn add_swap(&self, req: AddSwapRequest) -> Result; async fn add_swap_path(&self, req: AddSwapPathRequest) -> Result; async fn set_policy(&self, req: SetPolicyRequest) -> Result; + + // diagnostics + async fn get_diagnostic_data( + &self, + req: GetDiagnosticDataRequest, + ) -> Result; } diff --git a/src/runtime-rs/crates/agent/src/types.rs b/src/runtime-rs/crates/agent/src/types.rs index 8491da32ea..8e0917b559 100644 --- a/src/runtime-rs/crates/agent/src/types.rs +++ b/src/runtime-rs/crates/agent/src/types.rs @@ -620,6 +620,17 @@ pub struct SetPolicyRequest { pub policy: String, } +#[derive(PartialEq, Clone, Default, Debug)] +pub struct GetDiagnosticDataRequest { + pub log_type: String, + pub container_id: String, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct GetDiagnosticDataResponse { + pub data: String, +} + #[cfg(test)] mod test { use std::convert::TryFrom; diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs index 8787b22365..7ed51d6075 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs @@ -491,6 +491,10 @@ impl Container { } pub async fn stop_process(&self, container_process: &ContainerProcess) -> Result<()> { + if container_process.process_type == ProcessType::Container { + self.copy_termination_log().await; + } + let mut inner = self.inner.write().await; let device_manager = self.resource_manager.get_device_manager().await; inner @@ -512,6 +516,77 @@ impl Container { Ok(()) } + async fn copy_termination_log(&self) { + let toml_config = self.resource_manager.config().await; + let shared_fs = toml_config + .hypervisor + .get(&toml_config.runtime.hypervisor_name) + .and_then(|h| h.shared_fs.shared_fs.as_deref()); + + // When a shared filesystem is configured the host can read the + // termination log directly. shared_fs == None means no shared + // filesystem (the "none" config value is normalised to None by + // SharedFsInfo::adjust_config). + if shared_fs.is_some() { + return; + } + + let annotations = self.spec.annotations().clone().unwrap_or_default(); + let policy = annotations.get("io.kubernetes.container.terminationMessagePolicy"); + if policy.map(|p| p.as_str()) != Some("File") { + return; + } + + let termination_path = + match annotations.get("io.kubernetes.container.terminationMessagePath") { + Some(p) if !p.is_empty() => p.clone(), + _ => return, + }; + + let req = agent::GetDiagnosticDataRequest { + log_type: "termination_log".to_string(), + container_id: self.container_id.container_id.clone(), + }; + + // The kubelet bind-mounts a host file into the container at + // terminationMessagePath, then reads back from that host file. + // With shared_fs=none the guest cannot write through that mount, + // so we locate the host-side source path from the OCI mounts and + // write the data there directly. + let host_path = self.spec.mounts().as_ref().and_then(|mounts| { + mounts + .iter() + .find(|m| m.destination() == std::path::Path::new(&termination_path)) + .and_then(|m| m.source().clone()) + }); + + let host_path = match host_path { + Some(p) => p, + None => { + warn!( + self.logger, + "No host mount found for termination message path" + ); + return; + } + }; + + match self.agent.get_diagnostic_data(req).await { + Ok(resp) if !resp.data.is_empty() => { + if let Err(e) = tokio::fs::write(&host_path, resp.data.as_bytes()).await { + warn!(self.logger, "Failed to write termination message: {}", e); + } + } + Ok(_) => {} + Err(e) => { + warn!( + self.logger, + "Failed to get termination message from guest: {}", e + ); + } + } + } + pub async fn pause(&self) -> Result<()> { let mut inner = self.inner.write().await; let status = inner.init_process.get_status().await;