runtime-rs: wire GetDiagnosticData for termination logs

Add runtime-rs support for the GetDiagnosticData RPC. This extends
the Agent trait, types, and protocol translation layer with the new
request/response types.

During container stop, when shared_fs is "none" and the
terminationMessagePolicy annotation is "File", the runtime copies
the termination log from the guest via GetDiagnosticData. The call
is best-effort to avoid blocking container teardown.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
Fabiano Fidêncio
2026-04-15 19:57:54 +02:00
parent 411f8cf583
commit eda3bc6190
5 changed files with 122 additions and 12 deletions

View File

@@ -130,5 +130,6 @@ impl_agent!(
get_guest_details | crate::GetGuestDetailsRequest | crate::GuestDetailsResponse | None,
add_swap | crate::AddSwapRequest | crate::Empty | None,
add_swap_path | crate::AddSwapPathRequest | crate::Empty | None,
set_policy | crate::SetPolicyRequest | crate::Empty | None
set_policy | crate::SetPolicyRequest | crate::Empty | None,
get_diagnostic_data | crate::GetDiagnosticDataRequest | crate::GetDiagnosticDataResponse | None
);

View File

@@ -28,8 +28,8 @@ use crate::{
VersionCheckResponse, VolumeStatsRequest, VolumeStatsResponse, WaitProcessRequest,
WriteStreamRequest,
},
GetGuestDetailsRequest, OomEventResponse, SetPolicyRequest, WaitProcessResponse,
WriteStreamResponse,
GetDiagnosticDataRequest, GetDiagnosticDataResponse, GetGuestDetailsRequest, OomEventResponse,
SetPolicyRequest, WaitProcessResponse, WriteStreamResponse,
};
fn trans_vec<F: Sized + Clone, T: From<F>>(from: Vec<F>) -> Vec<T> {
@@ -754,6 +754,22 @@ impl From<SetPolicyRequest> for agent::SetPolicyRequest {
}
}
impl From<GetDiagnosticDataRequest> for agent::GetDiagnosticDataRequest {
fn from(from: GetDiagnosticDataRequest) -> Self {
Self {
log_type: from.log_type,
container_id: from.container_id,
..Default::default()
}
}
}
impl From<agent::GetDiagnosticDataResponse> for GetDiagnosticDataResponse {
fn from(from: agent::GetDiagnosticDataResponse) -> Self {
Self { data: from.data }
}
}
impl From<agent::AgentDetails> for AgentDetails {
fn from(src: agent::AgentDetails) -> Self {
Self {

View File

@@ -17,15 +17,16 @@ pub use types::{
ARPNeighbor, ARPNeighbors, AddArpNeighborRequest, AddSwapPathRequest, AddSwapRequest,
BlkioStatsEntry, CheckRequest, CloseStdinRequest, ContainerID, ContainerProcessID,
CopyFileRequest, CreateContainerRequest, CreateSandboxRequest, Empty, ExecProcessRequest,
GetGuestDetailsRequest, GetIPTablesRequest, GetIPTablesResponse, GuestDetailsResponse,
HealthCheckResponse, IPAddress, IPFamily, Interface, Interfaces, ListProcessesRequest,
MemHotplugByProbeRequest, MetricsResponse, OnlineCPUMemRequest, OomEventResponse,
ReadStreamRequest, ReadStreamResponse, RemoveContainerRequest, ReseedRandomDevRequest,
ResizeVolumeRequest, Route, Routes, SetGuestDateTimeRequest, SetIPTablesRequest,
SetIPTablesResponse, SignalProcessRequest, StatsContainerResponse, Storage,
TtyWinResizeRequest, UpdateContainerRequest, UpdateInterfaceRequest, UpdateRoutesRequest,
VersionCheckResponse, VolumeStatsRequest, VolumeStatsResponse, WaitProcessRequest,
WaitProcessResponse, WriteStreamRequest, WriteStreamResponse,
GetDiagnosticDataRequest, GetDiagnosticDataResponse, GetGuestDetailsRequest,
GetIPTablesRequest, GetIPTablesResponse, GuestDetailsResponse, HealthCheckResponse, IPAddress,
IPFamily, Interface, Interfaces, ListProcessesRequest, MemHotplugByProbeRequest,
MetricsResponse, OnlineCPUMemRequest, OomEventResponse, ReadStreamRequest, ReadStreamResponse,
RemoveContainerRequest, ReseedRandomDevRequest, ResizeVolumeRequest, Route, Routes,
SetGuestDateTimeRequest, SetIPTablesRequest, SetIPTablesResponse, SignalProcessRequest,
StatsContainerResponse, Storage, TtyWinResizeRequest, UpdateContainerRequest,
UpdateInterfaceRequest, UpdateRoutesRequest, VersionCheckResponse, VolumeStatsRequest,
VolumeStatsResponse, WaitProcessRequest, WaitProcessResponse, WriteStreamRequest,
WriteStreamResponse,
};
use anyhow::Result;
@@ -100,4 +101,10 @@ pub trait Agent: AgentManager + HealthService + Send + Sync {
async fn add_swap(&self, req: AddSwapRequest) -> Result<Empty>;
async fn add_swap_path(&self, req: AddSwapPathRequest) -> Result<Empty>;
async fn set_policy(&self, req: SetPolicyRequest) -> Result<Empty>;
// diagnostics
async fn get_diagnostic_data(
&self,
req: GetDiagnosticDataRequest,
) -> Result<GetDiagnosticDataResponse>;
}

View File

@@ -620,6 +620,17 @@ pub struct SetPolicyRequest {
pub policy: String,
}
#[derive(PartialEq, Clone, Default, Debug)]
pub struct GetDiagnosticDataRequest {
pub log_type: String,
pub container_id: String,
}
#[derive(PartialEq, Clone, Default, Debug)]
pub struct GetDiagnosticDataResponse {
pub data: String,
}
#[cfg(test)]
mod test {
use std::convert::TryFrom;

View File

@@ -491,6 +491,10 @@ impl Container {
}
pub async fn stop_process(&self, container_process: &ContainerProcess) -> Result<()> {
if container_process.process_type == ProcessType::Container {
self.copy_termination_log().await;
}
let mut inner = self.inner.write().await;
let device_manager = self.resource_manager.get_device_manager().await;
inner
@@ -512,6 +516,77 @@ impl Container {
Ok(())
}
async fn copy_termination_log(&self) {
let toml_config = self.resource_manager.config().await;
let shared_fs = toml_config
.hypervisor
.get(&toml_config.runtime.hypervisor_name)
.and_then(|h| h.shared_fs.shared_fs.as_deref());
// When a shared filesystem is configured the host can read the
// termination log directly. shared_fs == None means no shared
// filesystem (the "none" config value is normalised to None by
// SharedFsInfo::adjust_config).
if shared_fs.is_some() {
return;
}
let annotations = self.spec.annotations().clone().unwrap_or_default();
let policy = annotations.get("io.kubernetes.container.terminationMessagePolicy");
if policy.map(|p| p.as_str()) != Some("File") {
return;
}
let termination_path =
match annotations.get("io.kubernetes.container.terminationMessagePath") {
Some(p) if !p.is_empty() => p.clone(),
_ => return,
};
let req = agent::GetDiagnosticDataRequest {
log_type: "termination_log".to_string(),
container_id: self.container_id.container_id.clone(),
};
// The kubelet bind-mounts a host file into the container at
// terminationMessagePath, then reads back from that host file.
// With shared_fs=none the guest cannot write through that mount,
// so we locate the host-side source path from the OCI mounts and
// write the data there directly.
let host_path = self.spec.mounts().as_ref().and_then(|mounts| {
mounts
.iter()
.find(|m| m.destination() == std::path::Path::new(&termination_path))
.and_then(|m| m.source().clone())
});
let host_path = match host_path {
Some(p) => p,
None => {
warn!(
self.logger,
"No host mount found for termination message path"
);
return;
}
};
match self.agent.get_diagnostic_data(req).await {
Ok(resp) if !resp.data.is_empty() => {
if let Err(e) = tokio::fs::write(&host_path, resp.data.as_bytes()).await {
warn!(self.logger, "Failed to write termination message: {}", e);
}
}
Ok(_) => {}
Err(e) => {
warn!(
self.logger,
"Failed to get termination message from guest: {}", e
);
}
}
}
pub async fn pause(&self) -> Result<()> {
let mut inner = self.inner.write().await;
let status = inner.init_process.get_status().await;