mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-14 06:06:12 +00:00
Merge pull request #6718 from openanolis/mengze/keep_abnormal
runtime-rs: support keep_abnormal in toml config
This commit is contained in:
commit
509bc8b6c8
@ -130,6 +130,12 @@ pub struct Runtime {
|
|||||||
/// Vendor customized runtime configuration.
|
/// Vendor customized runtime configuration.
|
||||||
#[serde(default, flatten)]
|
#[serde(default, flatten)]
|
||||||
pub vendor: RuntimeVendor,
|
pub vendor: RuntimeVendor,
|
||||||
|
|
||||||
|
/// If keep_abnormal is enabled, it means that 1) if the runtime exits abnormally, the cleanup process
|
||||||
|
/// will be skipped, and 2) the runtime will not exit even if the health check fails.
|
||||||
|
/// This option is typically used to retain abnormal information for debugging.
|
||||||
|
#[serde(default)]
|
||||||
|
pub keep_abnormal: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ConfigOps for Runtime {
|
impl ConfigOps for Runtime {
|
||||||
|
@ -214,7 +214,14 @@ dial_timeout = 45
|
|||||||
# system log
|
# system log
|
||||||
# (default: disabled)
|
# (default: disabled)
|
||||||
#enable_debug = true
|
#enable_debug = true
|
||||||
#
|
|
||||||
|
# If enabled, enabled, it means that 1) if the runtime exits abnormally,
|
||||||
|
# the cleanup process will be skipped, and 2) the runtime will not exit
|
||||||
|
# even if the health check fails.
|
||||||
|
# This option is typically used to retain abnormal information for debugging.
|
||||||
|
# (default: false)
|
||||||
|
#keep_abnormal = true
|
||||||
|
|
||||||
# Internetworking model
|
# Internetworking model
|
||||||
# Determines how the VM should be connected to the
|
# Determines how the VM should be connected to the
|
||||||
# the container network interface
|
# the container network interface
|
||||||
|
@ -14,6 +14,7 @@ use common::{
|
|||||||
RuntimeHandler, RuntimeInstance, Sandbox, SandboxNetworkEnv,
|
RuntimeHandler, RuntimeInstance, Sandbox, SandboxNetworkEnv,
|
||||||
};
|
};
|
||||||
use hypervisor::Param;
|
use hypervisor::Param;
|
||||||
|
use kata_sys_util::spec::load_oci_spec;
|
||||||
use kata_types::{
|
use kata_types::{
|
||||||
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
|
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
|
||||||
};
|
};
|
||||||
@ -190,9 +191,16 @@ impl RuntimeHandlerManager {
|
|||||||
let sender = inner.msg_sender.clone();
|
let sender = inner.msg_sender.clone();
|
||||||
let sandbox_state = persist::from_disk::<SandboxState>(&inner.id)
|
let sandbox_state = persist::from_disk::<SandboxState>(&inner.id)
|
||||||
.context("failed to load the sandbox state")?;
|
.context("failed to load the sandbox state")?;
|
||||||
|
|
||||||
|
let config = if let Ok(spec) = load_oci_spec() {
|
||||||
|
load_config(&spec, &None).context("load config")?
|
||||||
|
} else {
|
||||||
|
TomlConfig::default()
|
||||||
|
};
|
||||||
|
|
||||||
let sandbox_args = SandboxRestoreArgs {
|
let sandbox_args = SandboxRestoreArgs {
|
||||||
sid: inner.id.clone(),
|
sid: inner.id.clone(),
|
||||||
toml_config: TomlConfig::default(),
|
toml_config: config,
|
||||||
sender,
|
sender,
|
||||||
};
|
};
|
||||||
match sandbox_state.sandbox_type.clone() {
|
match sandbox_state.sandbox_type.clone() {
|
||||||
@ -208,6 +216,10 @@ impl RuntimeHandlerManager {
|
|||||||
}
|
}
|
||||||
#[cfg(feature = "virt")]
|
#[cfg(feature = "virt")]
|
||||||
name if name == VirtContainer::name() => {
|
name if name == VirtContainer::name() => {
|
||||||
|
if sandbox_args.toml_config.runtime.keep_abnormal {
|
||||||
|
info!(sl!(), "skip cleanup for keep_abnormal");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
let sandbox = VirtSandbox::restore(sandbox_args, sandbox_state)
|
let sandbox = VirtSandbox::restore(sandbox_args, sandbox_state)
|
||||||
.await
|
.await
|
||||||
.context("failed to restore the sandbox")?;
|
.context("failed to restore the sandbox")?;
|
||||||
|
@ -21,17 +21,17 @@ const HEALTH_CHECK_STOP_CHANNEL_BUFFER_SIZE: usize = 1;
|
|||||||
|
|
||||||
pub struct HealthCheck {
|
pub struct HealthCheck {
|
||||||
pub keep_alive: bool,
|
pub keep_alive: bool,
|
||||||
keep_vm: bool,
|
keep_abnormal: bool,
|
||||||
stop_tx: mpsc::Sender<()>,
|
stop_tx: mpsc::Sender<()>,
|
||||||
stop_rx: Arc<Mutex<mpsc::Receiver<()>>>,
|
stop_rx: Arc<Mutex<mpsc::Receiver<()>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HealthCheck {
|
impl HealthCheck {
|
||||||
pub fn new(keep_alive: bool, keep_vm: bool) -> HealthCheck {
|
pub fn new(keep_alive: bool, keep_abnormal: bool) -> HealthCheck {
|
||||||
let (tx, rx) = mpsc::channel(HEALTH_CHECK_STOP_CHANNEL_BUFFER_SIZE);
|
let (tx, rx) = mpsc::channel(HEALTH_CHECK_STOP_CHANNEL_BUFFER_SIZE);
|
||||||
HealthCheck {
|
HealthCheck {
|
||||||
keep_alive,
|
keep_alive,
|
||||||
keep_vm,
|
keep_abnormal,
|
||||||
stop_tx: tx,
|
stop_tx: tx,
|
||||||
stop_rx: Arc::new(Mutex::new(rx)),
|
stop_rx: Arc::new(Mutex::new(rx)),
|
||||||
}
|
}
|
||||||
@ -46,7 +46,7 @@ impl HealthCheck {
|
|||||||
info!(sl!(), "start runtime keep alive");
|
info!(sl!(), "start runtime keep alive");
|
||||||
|
|
||||||
let stop_rx = self.stop_rx.clone();
|
let stop_rx = self.stop_rx.clone();
|
||||||
let keep_vm = self.keep_vm;
|
let keep_abnormal = self.keep_abnormal;
|
||||||
let _ = tokio::spawn(async move {
|
let _ = tokio::spawn(async move {
|
||||||
let mut version_check_threshold_count = 0;
|
let mut version_check_threshold_count = 0;
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ impl HealthCheck {
|
|||||||
error!(sl!(), "failed to do {} agent health check: {}", id, e);
|
error!(sl!(), "failed to do {} agent health check: {}", id, e);
|
||||||
if let Err(mpsc::error::TryRecvError::Empty) = stop_rx.try_recv() {
|
if let Err(mpsc::error::TryRecvError::Empty) = stop_rx.try_recv() {
|
||||||
error!(sl!(), "failed to receive stop monitor signal");
|
error!(sl!(), "failed to receive stop monitor signal");
|
||||||
if !keep_vm {
|
if !keep_abnormal {
|
||||||
::std::process::exit(1);
|
::std::process::exit(1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -75,6 +75,8 @@ impl VirtSandbox {
|
|||||||
hypervisor: Arc<dyn Hypervisor>,
|
hypervisor: Arc<dyn Hypervisor>,
|
||||||
resource_manager: Arc<ResourceManager>,
|
resource_manager: Arc<ResourceManager>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
|
let config = resource_manager.config().await;
|
||||||
|
let keep_abnormal = config.runtime.keep_abnormal;
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
sid: sid.to_string(),
|
sid: sid.to_string(),
|
||||||
msg_sender: Arc::new(Mutex::new(msg_sender)),
|
msg_sender: Arc::new(Mutex::new(msg_sender)),
|
||||||
@ -82,7 +84,7 @@ impl VirtSandbox {
|
|||||||
agent,
|
agent,
|
||||||
hypervisor,
|
hypervisor,
|
||||||
resource_manager,
|
resource_manager,
|
||||||
monitor: Arc::new(HealthCheck::new(true, false)),
|
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -440,6 +442,7 @@ impl Persist for VirtSandbox {
|
|||||||
}?;
|
}?;
|
||||||
let agent = Arc::new(KataAgent::new(kata_types::config::Agent::default()));
|
let agent = Arc::new(KataAgent::new(kata_types::config::Agent::default()));
|
||||||
let sid = sandbox_args.sid;
|
let sid = sandbox_args.sid;
|
||||||
|
let keep_abnormal = config.runtime.keep_abnormal;
|
||||||
let args = ManagerArgs {
|
let args = ManagerArgs {
|
||||||
sid: sid.clone(),
|
sid: sid.clone(),
|
||||||
agent: agent.clone(),
|
agent: agent.clone(),
|
||||||
@ -454,7 +457,7 @@ impl Persist for VirtSandbox {
|
|||||||
agent,
|
agent,
|
||||||
hypervisor,
|
hypervisor,
|
||||||
resource_manager,
|
resource_manager,
|
||||||
monitor: Arc::new(HealthCheck::new(true, false)),
|
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user