mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-04-26 18:43:06 +00:00
runtime-rs: wire GetDiagnosticData for termination logs
Add runtime-rs support for the GetDiagnosticData RPC. This extends the Agent trait, types, and protocol translation layer with the new request/response types. During container stop, when shared_fs is "none" and the terminationMessagePolicy annotation is "File", the runtime copies the termination log from the guest via GetDiagnosticData. The call is best-effort to avoid blocking container teardown. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
@@ -130,5 +130,6 @@ impl_agent!(
|
||||
get_guest_details | crate::GetGuestDetailsRequest | crate::GuestDetailsResponse | None,
|
||||
add_swap | crate::AddSwapRequest | crate::Empty | None,
|
||||
add_swap_path | crate::AddSwapPathRequest | crate::Empty | None,
|
||||
set_policy | crate::SetPolicyRequest | crate::Empty | None
|
||||
set_policy | crate::SetPolicyRequest | crate::Empty | None,
|
||||
get_diagnostic_data | crate::GetDiagnosticDataRequest | crate::GetDiagnosticDataResponse | None
|
||||
);
|
||||
|
||||
@@ -28,8 +28,8 @@ use crate::{
|
||||
VersionCheckResponse, VolumeStatsRequest, VolumeStatsResponse, WaitProcessRequest,
|
||||
WriteStreamRequest,
|
||||
},
|
||||
GetGuestDetailsRequest, OomEventResponse, SetPolicyRequest, WaitProcessResponse,
|
||||
WriteStreamResponse,
|
||||
GetDiagnosticDataRequest, GetDiagnosticDataResponse, GetGuestDetailsRequest, OomEventResponse,
|
||||
SetPolicyRequest, WaitProcessResponse, WriteStreamResponse,
|
||||
};
|
||||
|
||||
fn trans_vec<F: Sized + Clone, T: From<F>>(from: Vec<F>) -> Vec<T> {
|
||||
@@ -754,6 +754,22 @@ impl From<SetPolicyRequest> for agent::SetPolicyRequest {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetDiagnosticDataRequest> for agent::GetDiagnosticDataRequest {
|
||||
fn from(from: GetDiagnosticDataRequest) -> Self {
|
||||
Self {
|
||||
log_type: from.log_type,
|
||||
container_id: from.container_id,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<agent::GetDiagnosticDataResponse> for GetDiagnosticDataResponse {
|
||||
fn from(from: agent::GetDiagnosticDataResponse) -> Self {
|
||||
Self { data: from.data }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<agent::AgentDetails> for AgentDetails {
|
||||
fn from(src: agent::AgentDetails) -> Self {
|
||||
Self {
|
||||
|
||||
@@ -17,15 +17,16 @@ pub use types::{
|
||||
ARPNeighbor, ARPNeighbors, AddArpNeighborRequest, AddSwapPathRequest, AddSwapRequest,
|
||||
BlkioStatsEntry, CheckRequest, CloseStdinRequest, ContainerID, ContainerProcessID,
|
||||
CopyFileRequest, CreateContainerRequest, CreateSandboxRequest, Empty, ExecProcessRequest,
|
||||
GetGuestDetailsRequest, GetIPTablesRequest, GetIPTablesResponse, GuestDetailsResponse,
|
||||
HealthCheckResponse, IPAddress, IPFamily, Interface, Interfaces, ListProcessesRequest,
|
||||
MemHotplugByProbeRequest, MetricsResponse, OnlineCPUMemRequest, OomEventResponse,
|
||||
ReadStreamRequest, ReadStreamResponse, RemoveContainerRequest, ReseedRandomDevRequest,
|
||||
ResizeVolumeRequest, Route, Routes, SetGuestDateTimeRequest, SetIPTablesRequest,
|
||||
SetIPTablesResponse, SignalProcessRequest, StatsContainerResponse, Storage,
|
||||
TtyWinResizeRequest, UpdateContainerRequest, UpdateInterfaceRequest, UpdateRoutesRequest,
|
||||
VersionCheckResponse, VolumeStatsRequest, VolumeStatsResponse, WaitProcessRequest,
|
||||
WaitProcessResponse, WriteStreamRequest, WriteStreamResponse,
|
||||
GetDiagnosticDataRequest, GetDiagnosticDataResponse, GetGuestDetailsRequest,
|
||||
GetIPTablesRequest, GetIPTablesResponse, GuestDetailsResponse, HealthCheckResponse, IPAddress,
|
||||
IPFamily, Interface, Interfaces, ListProcessesRequest, MemHotplugByProbeRequest,
|
||||
MetricsResponse, OnlineCPUMemRequest, OomEventResponse, ReadStreamRequest, ReadStreamResponse,
|
||||
RemoveContainerRequest, ReseedRandomDevRequest, ResizeVolumeRequest, Route, Routes,
|
||||
SetGuestDateTimeRequest, SetIPTablesRequest, SetIPTablesResponse, SignalProcessRequest,
|
||||
StatsContainerResponse, Storage, TtyWinResizeRequest, UpdateContainerRequest,
|
||||
UpdateInterfaceRequest, UpdateRoutesRequest, VersionCheckResponse, VolumeStatsRequest,
|
||||
VolumeStatsResponse, WaitProcessRequest, WaitProcessResponse, WriteStreamRequest,
|
||||
WriteStreamResponse,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
@@ -100,4 +101,10 @@ pub trait Agent: AgentManager + HealthService + Send + Sync {
|
||||
async fn add_swap(&self, req: AddSwapRequest) -> Result<Empty>;
|
||||
async fn add_swap_path(&self, req: AddSwapPathRequest) -> Result<Empty>;
|
||||
async fn set_policy(&self, req: SetPolicyRequest) -> Result<Empty>;
|
||||
|
||||
// diagnostics
|
||||
async fn get_diagnostic_data(
|
||||
&self,
|
||||
req: GetDiagnosticDataRequest,
|
||||
) -> Result<GetDiagnosticDataResponse>;
|
||||
}
|
||||
|
||||
@@ -620,6 +620,17 @@ pub struct SetPolicyRequest {
|
||||
pub policy: String,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone, Default, Debug)]
|
||||
pub struct GetDiagnosticDataRequest {
|
||||
pub log_type: String,
|
||||
pub container_id: String,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone, Default, Debug)]
|
||||
pub struct GetDiagnosticDataResponse {
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::convert::TryFrom;
|
||||
|
||||
@@ -491,6 +491,10 @@ impl Container {
|
||||
}
|
||||
|
||||
pub async fn stop_process(&self, container_process: &ContainerProcess) -> Result<()> {
|
||||
if container_process.process_type == ProcessType::Container {
|
||||
self.copy_termination_log().await;
|
||||
}
|
||||
|
||||
let mut inner = self.inner.write().await;
|
||||
let device_manager = self.resource_manager.get_device_manager().await;
|
||||
inner
|
||||
@@ -512,6 +516,77 @@ impl Container {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn copy_termination_log(&self) {
|
||||
let toml_config = self.resource_manager.config().await;
|
||||
let shared_fs = toml_config
|
||||
.hypervisor
|
||||
.get(&toml_config.runtime.hypervisor_name)
|
||||
.and_then(|h| h.shared_fs.shared_fs.as_deref());
|
||||
|
||||
// When a shared filesystem is configured the host can read the
|
||||
// termination log directly. shared_fs == None means no shared
|
||||
// filesystem (the "none" config value is normalised to None by
|
||||
// SharedFsInfo::adjust_config).
|
||||
if shared_fs.is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
let annotations = self.spec.annotations().clone().unwrap_or_default();
|
||||
let policy = annotations.get("io.kubernetes.container.terminationMessagePolicy");
|
||||
if policy.map(|p| p.as_str()) != Some("File") {
|
||||
return;
|
||||
}
|
||||
|
||||
let termination_path =
|
||||
match annotations.get("io.kubernetes.container.terminationMessagePath") {
|
||||
Some(p) if !p.is_empty() => p.clone(),
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let req = agent::GetDiagnosticDataRequest {
|
||||
log_type: "termination_log".to_string(),
|
||||
container_id: self.container_id.container_id.clone(),
|
||||
};
|
||||
|
||||
// The kubelet bind-mounts a host file into the container at
|
||||
// terminationMessagePath, then reads back from that host file.
|
||||
// With shared_fs=none the guest cannot write through that mount,
|
||||
// so we locate the host-side source path from the OCI mounts and
|
||||
// write the data there directly.
|
||||
let host_path = self.spec.mounts().as_ref().and_then(|mounts| {
|
||||
mounts
|
||||
.iter()
|
||||
.find(|m| m.destination() == std::path::Path::new(&termination_path))
|
||||
.and_then(|m| m.source().clone())
|
||||
});
|
||||
|
||||
let host_path = match host_path {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
warn!(
|
||||
self.logger,
|
||||
"No host mount found for termination message path"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
match self.agent.get_diagnostic_data(req).await {
|
||||
Ok(resp) if !resp.data.is_empty() => {
|
||||
if let Err(e) = tokio::fs::write(&host_path, resp.data.as_bytes()).await {
|
||||
warn!(self.logger, "Failed to write termination message: {}", e);
|
||||
}
|
||||
}
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
self.logger,
|
||||
"Failed to get termination message from guest: {}", e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn pause(&self) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
let status = inner.init_process.get_status().await;
|
||||
|
||||
Reference in New Issue
Block a user