Merge pull request #8311 from TimePrinciple/log-system-enhancement

runtime-rs: Log system enhancement
This commit is contained in:
Fabiano Fidêncio 2023-10-31 10:14:41 +01:00 committed by GitHub
commit 53cda12a71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 312 additions and 32 deletions

2
src/libs/Cargo.lock generated
View File

@ -629,6 +629,8 @@ dependencies = [
name = "logging"
version = "0.1.0"
dependencies = [
"arc-swap",
"lazy_static",
"serde_json",
"slog",
"slog-async",

View File

@ -24,6 +24,17 @@ pub struct Agent {
#[serde(default, rename = "enable_debug")]
pub debug: bool,
/// The log log level will be applied to agent.
/// Possible values are:
/// - trace
/// - debug
/// - info
/// - warn
/// - error
/// - critical
#[serde(default = "default_agent_log_level")]
pub log_level: String,
/// Enable agent tracing.
///
/// If enabled, the agent will generate OpenTelemetry trace spans.
@ -88,6 +99,7 @@ impl std::default::Default for Agent {
fn default() -> Self {
Self {
debug: true,
log_level: "info".to_string(),
enable_tracing: false,
debug_console_enabled: false,
server_port: DEFAULT_AGENT_VSOCK_PORT,
@ -102,6 +114,10 @@ impl std::default::Default for Agent {
}
}
fn default_agent_log_level() -> String {
String::from("info")
}
fn default_server_port() -> u32 {
DEFAULT_AGENT_VSOCK_PORT
}

View File

@ -379,6 +379,17 @@ pub struct DebugInfo {
#[serde(default)]
pub enable_debug: bool,
/// The log log level will be applied to hypervisor.
/// Possible values are:
/// - trace
/// - debug
/// - info
/// - warn
/// - error
/// - critical
#[serde(default = "default_hypervisor_log_level")]
pub log_level: String,
/// Enable dumping information about guest page structures if true.
#[serde(default)]
pub guest_memory_dump_paging: bool,
@ -408,6 +419,10 @@ impl DebugInfo {
}
}
fn default_hypervisor_log_level() -> String {
String::from("info")
}
/// Virtual machine device configuration information.
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct DeviceInfo {

View File

@ -37,6 +37,17 @@ pub struct Runtime {
#[serde(default, rename = "enable_debug")]
pub debug: bool,
/// The log level will be applied to runtime.
/// Possible values are:
/// - trace
/// - debug
/// - info
/// - warn
/// - error
/// - critical
#[serde(default = "default_runtime_log_level")]
pub log_level: String,
/// Enabled experimental feature list, format: ["a", "b"].
///
/// Experimental features are features not stable enough for production, they may break
@ -226,6 +237,10 @@ impl Runtime {
}
}
fn default_runtime_log_level() -> String {
String::from("info")
}
#[cfg(not(feature = "enable-vendor"))]
mod vendor {
use super::*;

View File

@ -18,6 +18,8 @@ slog-json = "2.4.0"
slog-term = "2.9.0"
slog-async = "2.7.0"
slog-scope = "4.4.0"
lazy_static = "1.3.0"
arc-swap = "1.5.0"
[dev-dependencies]
tempfile = "3.2.0"

View File

@ -3,13 +3,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#[macro_use]
extern crate lazy_static;
use arc_swap::ArcSwap;
use slog::{o, record_static, BorrowedKV, Drain, Key, OwnedKV, OwnedKVList, Record, KV};
use std::collections::HashMap;
use std::io;
use std::io::Write;
use std::process;
use std::result;
use std::sync::Mutex;
use std::sync::Arc;
mod file_rotate;
mod log_writer;
@ -17,12 +20,19 @@ mod log_writer;
pub use file_rotate::FileRotator;
pub use log_writer::LogWriter;
lazy_static! {
pub static ref FILTER_RULE: ArcSwap<HashMap<String, slog::Level>> =
ArcSwap::from(Arc::new(HashMap::new()));
pub static ref LOGGERS: ArcSwap<HashMap<String, slog::Logger>> =
ArcSwap::from(Arc::new(HashMap::new()));
}
#[macro_export]
macro_rules! logger_with_subsystem {
($name: ident, $subsystem: expr) => {
macro_rules! $name {
() => {
slog_scope::logger().new(slog::o!("subsystem" => $subsystem))
slog::Logger::clone(logging::LOGGERS.load().get($subsystem).unwrap_or(&slog_scope::logger().new(slog::o!("subsystem" => $subsystem))))
};
}
};
@ -46,8 +56,19 @@ pub fn create_term_logger(level: slog::Level) -> (slog::Logger, slog_async::Asyn
// Ensure only a unique set of key/value fields is logged
let unique_drain = UniqueDrain::new(term_drain).fuse();
// Adjust the level which will be applied to the log-system
// Info is the default level, but if Debug flag is set, the overall log level will be changed to Debug here
FILTER_RULE.rcu(|inner| {
let mut updated_inner = HashMap::new();
updated_inner.clone_from(inner);
for v in updated_inner.values_mut() {
*v = level;
}
updated_inner
});
// Allow runtime filtering of records by log level
let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse();
let filter_drain = RuntimeComponentLevelFilter::new(unique_drain).fuse();
// Ensure the logger is thread-safe
let (async_drain, guard) = slog_async::Async::new(filter_drain)
@ -79,8 +100,19 @@ where
// Ensure only a unique set of key/value fields is logged
let unique_drain = UniqueDrain::new(json_drain).fuse();
// Adjust the level which will be applied to the log-system
// Info is the default level, but if Debug flag is set, the overall log level will be changed to Debug here
FILTER_RULE.rcu(|inner| {
let mut updated_inner = HashMap::new();
updated_inner.clone_from(inner);
for v in updated_inner.values_mut() {
*v = level;
}
updated_inner
});
// Allow runtime filtering of records by log level
let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse();
let filter_drain = RuntimeComponentLevelFilter::new(unique_drain).fuse();
// Ensure the logger is thread-safe
let (async_drain, guard) = slog_async::Async::new(filter_drain)
@ -126,6 +158,37 @@ pub fn slog_level_to_level_name(level: slog::Level) -> Result<&'static str, &'st
Err("invalid slog level")
}
pub fn register_component_logger(component_name: &str) {
let component = String::from(component_name);
LOGGERS.rcu(|inner| {
let mut updated_inner = HashMap::new();
updated_inner.clone_from(inner);
updated_inner.insert(
component_name.to_string(),
slog_scope::logger()
.new(slog::o!("component" => component.clone(), "subsystem" => component.clone())),
);
updated_inner
});
}
pub fn register_subsystem_logger(component_name: &str, subsystem_name: &str) {
let subsystem = String::from(subsystem_name);
LOGGERS.rcu(|inner| {
let mut updated_inner = HashMap::new();
updated_inner.clone_from(inner);
updated_inner.insert(
subsystem_name.to_string(),
// This will update the original `subsystem` field.
inner
.get(component_name)
.unwrap_or(&slog_scope::logger())
.new(slog::o!("subsystem" => subsystem.clone())),
);
updated_inner
});
}
// Used to convert an slog::OwnedKVList into a hash map.
#[derive(Debug)]
struct HashSerializer {
@ -216,23 +279,19 @@ where
}
}
// A RuntimeLevelFilter will discard all log records whose log level is less than the level
// specified in the struct.
struct RuntimeLevelFilter<D> {
// A RuntimeComponentLevelFilter will discard all log records whose log level is less than the level
// specified in the struct according to the component it belongs to.
struct RuntimeComponentLevelFilter<D> {
drain: D,
level: Mutex<slog::Level>,
}
impl<D> RuntimeLevelFilter<D> {
fn new(drain: D, level: slog::Level) -> Self {
RuntimeLevelFilter {
drain,
level: Mutex::new(level),
}
impl<D> RuntimeComponentLevelFilter<D> {
fn new(drain: D) -> Self {
RuntimeComponentLevelFilter { drain }
}
}
impl<D> Drain for RuntimeLevelFilter<D>
impl<D> Drain for RuntimeComponentLevelFilter<D>
where
D: Drain,
{
@ -244,9 +303,34 @@ where
record: &slog::Record,
values: &slog::OwnedKVList,
) -> result::Result<Self::Ok, Self::Err> {
let log_level = self.level.lock().unwrap();
let component_level_config = FILTER_RULE.load();
if record.level().is_at_least(*log_level) {
let mut logger_serializer = HashSerializer::new();
values
.serialize(record, &mut logger_serializer)
.expect("log values serialization failed");
let mut record_serializer = HashSerializer::new();
record
.kv()
.serialize(record, &mut record_serializer)
.expect("log record serialization failed");
let mut component = None;
for (k, v) in record_serializer
.fields
.iter()
.chain(logger_serializer.fields.iter())
{
if k == "component" {
component = Some(v.to_string());
break;
}
}
let according_level = component_level_config
.get(&component.unwrap_or(DEFAULT_SUBSYSTEM.to_string()))
.unwrap_or(&slog::Level::Info);
if record.level().is_at_least(*according_level) {
self.drain.log(record, values)?;
}

View File

@ -1460,6 +1460,7 @@ dependencies = [
"path-clean",
"persist",
"rand 0.8.5",
"regex",
"rust-ini",
"safe-path 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"seccompiler",
@ -1773,6 +1774,8 @@ dependencies = [
name = "logging"
version = "0.1.0"
dependencies = [
"arc-swap",
"lazy_static",
"serde_json",
"slog",
"slog-async",
@ -1803,9 +1806,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.5.0"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "memoffset"
@ -2831,9 +2834,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.9.1"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
dependencies = [
"aho-corasick",
"memchr",
@ -2843,9 +2846,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.3.3"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
@ -2854,9 +2857,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.7.4"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "reqwest"

View File

@ -111,6 +111,17 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@"
# Default false
#enable_debug = true
# The log level will be applied to hypervisor.
# Possible values are:
# - trace
# - debug
# - info
# - warn
# - error
# - critical
# Default: info
#log_level = info
# Disable the customizations done in the runtime when it detects
# that it is running on top a VMM. This will result in the runtime
# behaving as it would when running on bare metal.
@ -189,6 +200,17 @@ container_pipe_size=@PIPESIZE@
# (default: disabled)
#enable_debug = true
# The log level will be applied to agent.
# Possible values are:
# - trace
# - debug
# - info
# - warn
# - error
# - critical
# (default: info)
#log_level = info
# Enable agent tracing.
#
# If enabled, the agent will generate OpenTelemetry trace spans.
@ -220,6 +242,17 @@ dial_timeout = 45
# (default: disabled)
#enable_debug = true
# The log level will be applied to runtimes.
# Possible values are:
# - trace
# - debug
# - info
# - warn
# - error
# - critical
# (default: info)
#log_level = info
# If enabled, enabled, it means that 1) if the runtime exits abnormally,
# the cleanup process will be skipped, and 2) the runtime will not exit
# even if the health check fails.

View File

@ -16,6 +16,7 @@ go-flag = "0.1.0"
libc = ">=0.2.39"
nix = "0.24.2"
persist = { path = "../persist" }
regex = "1.10.2"
rust-ini = "0.18.0"
seccompiler = "0.2.0"
serde = { version = "1.0.138", features = ["derive"] }

View File

@ -23,6 +23,7 @@ use kata_sys_util::protection::{available_guest_protection, GuestProtection};
use kata_types::capabilities::{Capabilities, CapabilityBits};
use kata_types::config::default::DEFAULT_CH_ROOTFS_TYPE;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::convert::TryFrom;
@ -50,6 +51,12 @@ const CH_FEATURES_KEY: &str = "features";
// The name of the CH build-time feature for Intel TDX.
const CH_FEATURE_TDX: &str = "tdx";
const LOG_LEVEL_TRACE: &str = "TRCE";
const LOG_LEVEL_DEBUG: &str = "DEBG";
const LOG_LEVEL_INFO: &str = "INFO";
const LOG_LEVEL_WARNING: &str = "WARN";
const LOG_LEVEL_ERROR: &str = "ERRO";
#[derive(Clone, Deserialize, Serialize)]
pub struct VmmPingResponse {
pub build_version: String,
@ -723,15 +730,25 @@ async fn cloud_hypervisor_log_output(mut child: Child, mut shutdown: Receiver<bo
stderr_line = poll_fn(|cx| Pin::new(&mut stderr_lines).poll_next_line(cx)) => {
if let Ok(line) = stderr_line {
let line = line.ok_or("missing stderr line").map_err(|e| anyhow!(e))?;
info!(sl!(), "{:?}", line; "stream" => "stderr");
match parse_ch_log_level(&line) {
LOG_LEVEL_TRACE => trace!(sl!(), "{:?}", line; "stream" => "stderr"),
LOG_LEVEL_DEBUG => debug!(sl!(), "{:?}", line; "stream" => "stderr"),
LOG_LEVEL_WARNING => warn!(sl!(), "{:?}", line; "stream" => "stderr"),
LOG_LEVEL_ERROR => error!(sl!(), "{:?}", line; "stream" => "stderr"),
_ => info!(sl!(), "{:?}", line; "stream" => "stderr"),
}
}
},
stdout_line = poll_fn(|cx| Pin::new(&mut stdout_lines).poll_next_line(cx)) => {
if let Ok(line) = stdout_line {
let line = line.ok_or("missing stdout line").map_err(|e| anyhow!(e))?;
info!(sl!(), "{:?}", line; "stream" => "stdout");
match parse_ch_log_level(&line) {
LOG_LEVEL_TRACE => trace!(sl!(), "{:?}", line; "stream" => "stdout"),
LOG_LEVEL_DEBUG => debug!(sl!(), "{:?}", line; "stream" => "stdout"),
LOG_LEVEL_WARNING => warn!(sl!(), "{:?}", line; "stream" => "stdout"),
LOG_LEVEL_ERROR => error!(sl!(), "{:?}", line; "stream" => "stdout"),
_ => info!(sl!(), "{:?}", line; "stream" => "stdout"),
}
}
},
};
@ -743,6 +760,23 @@ async fn cloud_hypervisor_log_output(mut child: Child, mut shutdown: Receiver<bo
Ok(())
}
pub fn parse_ch_log_level(line: &str) -> &str {
let re = Regex::new(r"cloud-hypervisor: [0-9]*[.][0-9]+ms: <\w+> (?<level>\w+)").unwrap();
let level = re
.captures(line)
.expect("There should be a match for level")
.name("level")
.expect("Level should be found in record")
.as_str();
match level {
"TRACE" => LOG_LEVEL_TRACE,
"DEBUG" => LOG_LEVEL_DEBUG,
"WARN" => LOG_LEVEL_WARNING,
"ERROR" => LOG_LEVEL_ERROR,
_ => LOG_LEVEL_INFO, // info or other values will return info,
}
}
lazy_static! {
// Store the fake guest protection value used by
// get_fake_guest_protection() and set_fake_guest_protection().

View File

@ -48,6 +48,9 @@ impl ResourceManager {
hypervisor: Arc<dyn Hypervisor>,
toml_config: Arc<TomlConfig>,
) -> Result<Self> {
// Regist resource logger for later use.
logging::register_subsystem_logger("runtimes", "resource");
Ok(Self {
inner: Arc::new(RwLock::new(
ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,

View File

@ -12,6 +12,7 @@ use async_trait::async_trait;
use kata_types::mount::Mount;
mod block_rootfs;
use hypervisor::{device::device_manager::DeviceManager, Hypervisor};
use std::{sync::Arc, vec::Vec};
use tokio::sync::RwLock;

View File

@ -4,7 +4,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::{path::PathBuf, str::from_utf8, sync::Arc};
use std::{collections::HashMap, path::PathBuf, str::from_utf8, sync::Arc};
use anyhow::{anyhow, Context, Result};
use common::{
@ -19,6 +19,7 @@ use kata_types::{
};
#[cfg(feature = "linux")]
use linux_container::LinuxContainer;
use logging::FILTER_RULE;
use netns_rs::NetNs;
use persist::sandbox_persist::Persist;
use resource::{
@ -43,6 +44,18 @@ use crate::{
tracer::{KataTracer, ROOTSPAN},
};
fn convert_string_to_slog_level(string_level: &str) -> slog::Level {
match string_level {
"trace" => slog::Level::Trace,
"debug" => slog::Level::Debug,
"info" => slog::Level::Info,
"warn" => slog::Level::Warning,
"error" => slog::Level::Error,
"critical" => slog::Level::Critical,
_ => slog::Level::Info,
}
}
struct RuntimeHandlerManagerInner {
id: String,
msg_sender: Sender<Message>,
@ -149,6 +162,8 @@ impl RuntimeHandlerManagerInner {
let config = load_config(spec, options).context("load config")?;
update_component_log_level(&config);
let dan_path = dan_config_path(&config, &self.id);
let mut network_created = false;
// set netns to None if we want no network for the VM
@ -469,7 +484,11 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
} else {
String::from("")
};
info!(sl!(), "get config path {:?}", &config_path);
// Clone a logger from global logger to ensure the logs in this function get flushed when drop
let logger = slog::Logger::clone(&slog_scope::logger());
info!(logger, "get config path {:?}", &config_path);
let (mut toml_config, _) =
TomlConfig::load_from_file(&config_path).context("load toml config")?;
annotation.update_config_by_annotation(&mut toml_config)?;
@ -492,7 +511,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
.setup_config(&mut toml_config)
.context("failed to setup static resource mgmt config")?;
info!(sl!(), "get config content {:?}", &toml_config);
info!(logger, "get config content {:?}", &toml_config);
Ok(toml_config)
}
@ -512,3 +531,41 @@ fn update_agent_kernel_params(config: &mut TomlConfig) -> Result<()> {
}
Ok(())
}
// this update the log_level of three component: agent, hypervisor, runtime
// according to the settings read from configuration file
fn update_component_log_level(config: &TomlConfig) {
// Retrieve the log-levels set in configuration file, modify the FILTER_RULE accordingly
let default_level = String::from("info");
let agent_level = if let Some(agent_config) = config.agent.get(&config.runtime.agent_name) {
agent_config.log_level.clone()
} else {
default_level.clone()
};
let hypervisor_level =
if let Some(hypervisor_config) = config.hypervisor.get(&config.runtime.hypervisor_name) {
hypervisor_config.debug_info.log_level.clone()
} else {
default_level.clone()
};
let runtime_level = config.runtime.log_level.clone();
// Update FILTER_RULE to apply changes
FILTER_RULE.rcu(|inner| {
let mut updated_inner = HashMap::new();
updated_inner.clone_from(inner);
updated_inner.insert(
"runtimes".to_string(),
convert_string_to_slog_level(&runtime_level),
);
updated_inner.insert(
"agent".to_string(),
convert_string_to_slog_level(&agent_level),
);
updated_inner.insert(
"hypervisor".to_string(),
convert_string_to_slog_level(&hypervisor_level),
);
updated_inner
});
}

View File

@ -44,6 +44,9 @@ pub struct VirtContainer {}
#[async_trait]
impl RuntimeHandler for VirtContainer {
fn init() -> Result<()> {
// Before start logging with virt-container, regist it
logging::register_subsystem_logger("runtimes", "virt-container");
// register
let dragonball_config = Arc::new(DragonballConfig::new());
register_hypervisor_plugin("dragonball", dragonball_config);

View File

@ -62,6 +62,9 @@ impl ServiceManager {
namespace: &str,
task_server_fd: RawFd,
) -> Result<Self> {
// Regist service logger for later use.
logging::register_subsystem_logger("runtimes", "service");
let (sender, receiver) = channel::<Message>(MESSAGE_BUFFER_SIZE);
let rt_mgr = RuntimeHandlerManager::new(id, sender).context("new runtime handler")?;
let handler = Arc::new(rt_mgr);

View File

@ -37,5 +37,10 @@ pub(crate) fn set_logger(path: &str, sid: &str, is_debug: bool) -> Result<slog_a
};
slog_stdlog::init_with_level(level).context(format!("init with level {}", level))?;
// Regist component loggers for later use, there loggers are set directly by configuration
logging::register_component_logger("agent");
logging::register_component_logger("runtimes");
logging::register_component_logger("hypervisor");
Ok(async_guard)
}

View File

@ -23,6 +23,9 @@ impl ShimExecutor {
let path = bundle_path.join("log");
let _logger_guard =
logger::set_logger(path.to_str().unwrap(), &sid, self.args.debug).context("set logger");
// Regist shim logger for later use.
logging::register_subsystem_logger("runtimes", "shim");
if try_core_sched().is_err() {
warn!(
sl!(),