Merge pull request #6718 from openanolis/mengze/keep_abnormal

runtime-rs: support keep_abnormal in toml config
This commit is contained in:
Bin Liu 2023-04-26 12:36:52 +08:00 committed by GitHub
commit 509bc8b6c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 37 additions and 9 deletions

View File

@ -130,6 +130,12 @@ pub struct Runtime {
/// Vendor customized runtime configuration.
#[serde(default, flatten)]
pub vendor: RuntimeVendor,
/// If keep_abnormal is enabled, it means that 1) if the runtime exits abnormally, the cleanup process
/// will be skipped, and 2) the runtime will not exit even if the health check fails.
/// This option is typically used to retain abnormal information for debugging.
#[serde(default)]
pub keep_abnormal: bool,
}
impl ConfigOps for Runtime {

View File

@ -214,7 +214,14 @@ dial_timeout = 45
# system log
# (default: disabled)
#enable_debug = true
#
# If enabled, enabled, it means that 1) if the runtime exits abnormally,
# the cleanup process will be skipped, and 2) the runtime will not exit
# even if the health check fails.
# This option is typically used to retain abnormal information for debugging.
# (default: false)
#keep_abnormal = true
# Internetworking model
# Determines how the VM should be connected to the
# the container network interface

View File

@ -14,6 +14,7 @@ use common::{
RuntimeHandler, RuntimeInstance, Sandbox, SandboxNetworkEnv,
};
use hypervisor::Param;
use kata_sys_util::spec::load_oci_spec;
use kata_types::{
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
};
@ -190,9 +191,16 @@ impl RuntimeHandlerManager {
let sender = inner.msg_sender.clone();
let sandbox_state = persist::from_disk::<SandboxState>(&inner.id)
.context("failed to load the sandbox state")?;
let config = if let Ok(spec) = load_oci_spec() {
load_config(&spec, &None).context("load config")?
} else {
TomlConfig::default()
};
let sandbox_args = SandboxRestoreArgs {
sid: inner.id.clone(),
toml_config: TomlConfig::default(),
toml_config: config,
sender,
};
match sandbox_state.sandbox_type.clone() {
@ -208,6 +216,10 @@ impl RuntimeHandlerManager {
}
#[cfg(feature = "virt")]
name if name == VirtContainer::name() => {
if sandbox_args.toml_config.runtime.keep_abnormal {
info!(sl!(), "skip cleanup for keep_abnormal");
return Ok(());
}
let sandbox = VirtSandbox::restore(sandbox_args, sandbox_state)
.await
.context("failed to restore the sandbox")?;

View File

@ -21,17 +21,17 @@ const HEALTH_CHECK_STOP_CHANNEL_BUFFER_SIZE: usize = 1;
pub struct HealthCheck {
pub keep_alive: bool,
keep_vm: bool,
keep_abnormal: bool,
stop_tx: mpsc::Sender<()>,
stop_rx: Arc<Mutex<mpsc::Receiver<()>>>,
}
impl HealthCheck {
pub fn new(keep_alive: bool, keep_vm: bool) -> HealthCheck {
pub fn new(keep_alive: bool, keep_abnormal: bool) -> HealthCheck {
let (tx, rx) = mpsc::channel(HEALTH_CHECK_STOP_CHANNEL_BUFFER_SIZE);
HealthCheck {
keep_alive,
keep_vm,
keep_abnormal,
stop_tx: tx,
stop_rx: Arc::new(Mutex::new(rx)),
}
@ -46,7 +46,7 @@ impl HealthCheck {
info!(sl!(), "start runtime keep alive");
let stop_rx = self.stop_rx.clone();
let keep_vm = self.keep_vm;
let keep_abnormal = self.keep_abnormal;
let _ = tokio::spawn(async move {
let mut version_check_threshold_count = 0;
@ -87,7 +87,7 @@ impl HealthCheck {
error!(sl!(), "failed to do {} agent health check: {}", id, e);
if let Err(mpsc::error::TryRecvError::Empty) = stop_rx.try_recv() {
error!(sl!(), "failed to receive stop monitor signal");
if !keep_vm {
if !keep_abnormal {
::std::process::exit(1);
}
} else {

View File

@ -75,6 +75,8 @@ impl VirtSandbox {
hypervisor: Arc<dyn Hypervisor>,
resource_manager: Arc<ResourceManager>,
) -> Result<Self> {
let config = resource_manager.config().await;
let keep_abnormal = config.runtime.keep_abnormal;
Ok(Self {
sid: sid.to_string(),
msg_sender: Arc::new(Mutex::new(msg_sender)),
@ -82,7 +84,7 @@ impl VirtSandbox {
agent,
hypervisor,
resource_manager,
monitor: Arc::new(HealthCheck::new(true, false)),
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
})
}
@ -440,6 +442,7 @@ impl Persist for VirtSandbox {
}?;
let agent = Arc::new(KataAgent::new(kata_types::config::Agent::default()));
let sid = sandbox_args.sid;
let keep_abnormal = config.runtime.keep_abnormal;
let args = ManagerArgs {
sid: sid.clone(),
agent: agent.clone(),
@ -454,7 +457,7 @@ impl Persist for VirtSandbox {
agent,
hypervisor,
resource_manager,
monitor: Arc::new(HealthCheck::new(true, false)),
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
})
}
}