cleanup(userspace/falco): only push metrics for enabled sources.

Refactor `::to_text` a bit to be more clear.
Also, we will push agent_info and machine_info only for the first
inspector that exposes them, to avoid duplicated entries in the prometheus text.

Signed-off-by: Federico Di Pierro <nierro92@gmail.com>
This commit is contained in:
Federico Di Pierro 2025-05-27 11:53:34 +02:00 committed by poiana
parent 466373359c
commit 79a5f9f3d9
2 changed files with 358 additions and 342 deletions

View File

@ -72,118 +72,30 @@ namespace fs = std::filesystem;
*/
const std::string falco_metrics::content_type = "text/plain; version=0.0.4";
/*!
\brief this method takes an application \c state and returns a textual representation of
its configured metrics.
The current implementation returns a Prometheus exposition formatted string.
*/
std::string falco_metrics::to_text(const falco::app::state& state) {
static const char* all_driver_engines[] = {BPF_ENGINE,
KMOD_ENGINE,
MODERN_BPF_ENGINE,
SOURCE_PLUGIN_ENGINE,
NODRIVER_ENGINE,
GVISOR_ENGINE};
std::vector<std::shared_ptr<sinsp>> inspectors;
std::vector<libs::metrics::libs_metrics_collector> metrics_collectors;
// Note: Must rely on loaded_sources, which ensures that the syscall source (if applicable) is
// ordered first.
for(const auto& source : state.loaded_sources) {
auto source_info = state.source_infos.at(source);
auto source_inspector = source_info->inspector;
inspectors.emplace_back(source_inspector);
metrics_collectors.emplace_back(
libs::metrics::libs_metrics_collector(source_inspector.get(),
state.config->m_metrics_flags));
}
libs::metrics::prometheus_metrics_converter prometheus_metrics_converter;
std::string falco_metrics::falco_to_text(
const falco::app::state& state,
libs::metrics::prometheus_metrics_converter& prometheus_metrics_converter,
std::vector<metrics_v2>& additional_wrapper_metrics) {
std::string prometheus_text;
for(size_t i = 0; i < inspectors.size(); ++i) { // Start inspector loop
auto& inspector = inspectors[i];
// Falco wrapper metrics Part A: Repeated for each inspector, accounting for plugins w/
// event sources
/* Examples ...
# HELP falcosecurity_scap_engine_name_info https://falco.org/docs/metrics/
# TYPE falcosecurity_scap_engine_name_info gauge
falcosecurity_scap_engine_name_info{engine_name="source_plugin"} 1
# HELP falcosecurity_scap_engine_name_info https://falco.org/docs/metrics/
# TYPE falcosecurity_scap_engine_name_info gauge
falcosecurity_scap_engine_name_info{engine_name="bpf"} 1
*/
for(size_t i = 0; i < sizeof(all_driver_engines) / sizeof(const char*); i++) {
if(inspector->check_current_engine(all_driver_engines[i])) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"engine_name",
"falcosecurity",
"scap",
{{"engine_name", all_driver_engines[i]}});
break;
}
}
/* Examples ...
# HELP falcosecurity_falco_evt_source_info https://falco.org/docs/metrics/
# TYPE falcosecurity_falco_evt_source_info gauge
falcosecurity_falco_evt_source_info{evt_source="syscall"} 1
# HELP falcosecurity_falco_evt_source_info https://falco.org/docs/metrics/
# TYPE falcosecurity_falco_evt_source_info gauge
falcosecurity_falco_evt_source_info{evt_source="dummy_c"} 1
*/
// It seems that the current sources are appended. For instance, the dummy_c plugin event
// source contains syscalls again as well
const std::string& last_source = inspector->event_sources().back();
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"evt_source",
"falcosecurity",
"falco",
{{"evt_source", last_source}});
// Note: For this to hold true, we must rely on loaded_sources above, which ensures that the
// syscall source (if applicable) is ordered first.
if(i != 0) {
continue;
}
// Falco wrapper metrics Part B: Agnostic, performed only once.
const scap_agent_info* agent_info = inspector->get_agent_info();
const scap_machine_info* machine_info = inspector->get_machine_info();
// # HELP falcosecurity_falco_version_info https://falco.org/docs/metrics/
// # TYPE falcosecurity_falco_version_info gauge
// falcosecurity_falco_version_info{version="0.41.0-100+334ca42"} 1
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"version",
"falcosecurity",
"falco",
{{"version", FALCO_VERSION}});
// Not all scap engines report agent and machine infos.
// However, recent lib refactors enable a linux lite platform, allowing non-syscall
// inspectors to retrieve these metrics if the syscall inspector is unavailable.
if(agent_info) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"kernel_release",
"falcosecurity",
"falco",
{{"kernel_release", agent_info->uname_r}});
}
if(machine_info) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"hostname",
"falcosecurity",
"evt",
{{"hostname", machine_info->hostname}});
}
#if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__)
// Note that the rule counter metrics are retrieved from the state, not from any inspector
// Distinguish between config and rules files using labels, following Prometheus best
// practices: https://prometheus.io/docs/practices/naming/#labels
// # HELP falcosecurity_falco_sha256_rules_files_info https://falco.org/docs/metrics/
// # TYPE falcosecurity_falco_sha256_rules_files_info gauge
// falcosecurity_falco_sha256_rules_files_info{file_name="falco_rules.yaml",sha256="6f0078862a26528cb50a860f9ebebbfbe3162e5009187089c73cb0cdf91d0b06"}
// 1
for(const auto& item : state.config.get()->m_loaded_rules_filenames_sha256sum) {
fs::path fs_path = item.first;
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
@ -193,6 +105,10 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
{{"file_name", fs_path.filename()}, {"sha256", item.second}});
}
// # HELP falcosecurity_falco_sha256_config_files_info https://falco.org/docs/metrics/
// # TYPE falcosecurity_falco_sha256_config_files_info gauge
// falcosecurity_falco_sha256_config_files_info{file_name="falco.yaml",sha256="f97de5fa6f513b5e07cd9f29ee9904ee4267cb120ef6501f8555543d5a98dd1c"}
// 1
for(const auto& item : state.config.get()->m_loaded_configs_filenames_sha256sum) {
fs::path fs_path = item.first;
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
@ -203,41 +119,9 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
}
#endif
std::vector<metrics_v2> additional_wrapper_metrics;
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"reload_ts",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_S64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
state.config->m_falco_reload_ts));
if(agent_info) {
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"start_ts",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
agent_info->start_ts_epoch));
}
if(machine_info) {
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"host_boot_ts",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
machine_info->boot_ts_epoch));
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"host_num_cpus",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U32,
METRIC_VALUE_UNIT_COUNT,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
machine_info->num_cpus));
}
// # HELP falcosecurity_falco_outputs_queue_num_drops_total https://falco.org/docs/metrics/
// # TYPE falcosecurity_falco_outputs_queue_num_drops_total counter
// falcosecurity_falco_outputs_queue_num_drops_total 0
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"outputs_queue_num_drops",
METRICS_V2_MISC,
@ -246,32 +130,19 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
METRIC_VALUE_METRIC_TYPE_MONOTONIC,
state.outputs->get_outputs_queue_num_drops()));
if(agent_info) {
auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
// # HELP falcosecurity_falco_reload_timestamp_nanoseconds https://falco.org/docs/metrics/
// # TYPE falcosecurity_falco_reload_timestamp_nanoseconds gauge
// falcosecurity_falco_reload_timestamp_nanoseconds 1748338536592811359
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"duration_sec",
"reload_ts",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_S_COUNT,
METRIC_VALUE_METRIC_TYPE_MONOTONIC,
(uint64_t)((now - agent_info->start_ts_epoch) / ONE_SECOND_IN_NS)));
}
METRIC_VALUE_TYPE_S64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
state.config->m_falco_reload_ts));
for(auto metric : additional_wrapper_metrics) {
prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
prometheus_text +=
prometheus_metrics_converter.convert_metric_to_text_prometheus(metric,
"falcosecurity",
"falco");
}
// Falco metrics categories
//
// rules_counters_enabled
// jemalloc_stats_enabled
if(state.config->m_metrics_flags & METRICS_V2_RULE_COUNTERS) {
// rules_counters_enabled
const stats_manager& rule_stats_manager = state.engine->get_rule_stats_manager();
const indexed_vector<falco_rule>& rules = state.engine->get_rules();
const std::vector<std::unique_ptr<std::atomic<uint64_t>>>& rules_by_id =
@ -282,21 +153,16 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
auto rule = rules.at(i);
auto count = rules_by_id[i]->load();
if(count > 0) {
/* Examples ...
# HELP falcosecurity_falco_rules_matches_total
https://falco.org/docs/metrics/ # TYPE
falcosecurity_falco_rules_matches_total counter
falcosecurity_falco_rules_matches_total{priority="4",rule_name="Read
sensitive file
untrusted",source="syscall",tag_T1555="true",tag_container="true",tag_filesystem="true",tag_host="true",tag_maturity_stable="true",tag_mitre_credential_access="true"}
10 # HELP falcosecurity_falco_rules_matches_total
https://falco.org/docs/metrics/ # TYPE
falcosecurity_falco_rules_matches_total counter
falcosecurity_falco_rules_matches_total{priority="5",rule_name="Unexpected
UDP
Traffic",source="syscall",tag_TA0011="true",tag_container="true",tag_host="true",tag_maturity_incubating="true",tag_mitre_exfiltration="true",tag_network="true"}
1
*/
// # HELP falcosecurity_falco_rules_matches_total https://falco.org/docs/metrics/
// # TYPE falcosecurity_falco_rules_matches_total counter
// falcosecurity_falco_rules_matches_total{priority="4",rule_name="Read sensitive
// file
// untrusted",source="syscall",tag_T1555="true",tag_container="true",tag_filesystem="true",tag_host="true",tag_maturity_stable="true",tag_mitre_credential_access="true"}
// 32 # HELP falcosecurity_falco_rules_matches_total https://falco.org/docs/metrics/
// # TYPE falcosecurity_falco_rules_matches_total counter
// falcosecurity_falco_rules_matches_total{priority="5",rule_name="Terminal shell in
// container",source="syscall",tag_T1059="true",tag_container="true",tag_maturity_stable="true",tag_mitre_execution="true",tag_shell="true"}
// 1
auto metric = libs::metrics::libsinsp_metrics::new_metric(
"rules_matches",
METRICS_V2_RULE_COUNTERS,
@ -315,8 +181,7 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
[&const_labels](std::string const& tag) {
const_labels.emplace(std::string{"tag_"} + tag, "true");
});
prometheus_text +=
prometheus_metrics_converter.convert_metric_to_text_prometheus(
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
metric,
"falcosecurity",
"falco",
@ -326,6 +191,7 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
}
#ifdef HAS_JEMALLOC
if(state.config->m_metrics_flags & METRICS_V2_JEMALLOC_STATS) {
// jemalloc_stats_enabled
nlohmann::json j;
malloc_stats_print(
[](void* to, const char* from) {
@ -347,8 +213,7 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
METRIC_VALUE_METRIC_TYPE_MONOTONIC,
val);
prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
prometheus_text +=
prometheus_metrics_converter.convert_metric_to_text_prometheus(
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
metric,
"falcosecurity",
"falco");
@ -356,8 +221,55 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
}
}
#endif
} // End inspector loop
return prometheus_text;
}
std::string falco_metrics::sources_to_text(
const falco::app::state& state,
libs::metrics::prometheus_metrics_converter& prometheus_metrics_converter,
std::vector<metrics_v2>& additional_wrapper_metrics) {
static const char* all_driver_engines[] = {BPF_ENGINE,
KMOD_ENGINE,
MODERN_BPF_ENGINE,
SOURCE_PLUGIN_ENGINE,
NODRIVER_ENGINE,
GVISOR_ENGINE};
std::string prometheus_text;
bool agent_info_written = false;
bool machine_info_written = false;
// Then, source-bound metrics
for(const auto& source : state.enabled_sources) {
auto source_info = state.source_infos.at(source);
auto source_inspector = source_info->inspector;
// First thing: list of enabled engine names
// Falco wrapper metrics Part A: Repeated for each inspector, accounting for plugins w/
// event sources
/* Examples ...
# HELP falcosecurity_scap_engine_name_info https://falco.org/docs/metrics/
# TYPE falcosecurity_scap_engine_name_info gauge
falcosecurity_scap_engine_name_info{engine_name="source_plugin",evt_source="dummy"} 1
# HELP falcosecurity_scap_engine_name_info https://falco.org/docs/metrics/
# TYPE falcosecurity_scap_engine_name_info gauge
falcosecurity_scap_engine_name_info{engine_name="bpf",evt_source="syscall"} 1
*/
for(size_t j = 0; j < sizeof(all_driver_engines) / sizeof(const char*); j++) {
if(source_inspector->check_current_engine(all_driver_engines[j])) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"engine_name",
"falcosecurity",
"scap",
{{"engine_name", std::string(all_driver_engines[j])},
{"evt_source", source}});
break;
}
}
// Inspectors' metrics collectors
// Libs metrics categories
//
// resource_utilization_enabled
@ -365,17 +277,15 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
// kernel_event_counters_enabled
// kernel_event_counters_per_cpu_enabled
// libbpf_stats_enabled
for(size_t i = 0; i < metrics_collectors.size();
++i) { // Start inspector libs metrics collector loop
auto& metrics_collector = metrics_collectors[i];
auto metrics_collector =
libs::metrics::libs_metrics_collector(source_inspector.get(),
state.config->m_metrics_flags);
metrics_collector.snapshot();
auto metrics_snapshot = metrics_collector.get_metrics();
if(i != 0) {
// Source plugin
if(source != falco_common::syscall_source) {
// Performed repeatedly for each inspectors' libs metrics collector
for(auto& metric : metrics_snapshot) {
if(metric.flags & METRICS_V2_PLUGINS) {
prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
@ -386,12 +296,8 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
"plugins");
}
}
continue;
}
// Performed only once, the first inspector must be the syscalls event source, as ensured by
// the ordering above, if applicable, to maximize metrics retrieval.
} else {
// Source syscall
for(auto& metric : metrics_snapshot) {
prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
std::string prometheus_subsystem = "scap";
@ -421,7 +327,8 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
METRIC_VALUE_UNIT_COUNT,
METRIC_VALUE_METRIC_TYPE_MONOTONIC,
metric.value.u64);
const std::map<std::string, std::string>& const_labels = {{"cpu", cpu_number}};
const std::map<std::string, std::string>& const_labels = {
{"cpu", cpu_number}};
/* Examples ...
# HELP falcosecurity_scap_n_evts_cpu_total
https://falco.org/docs/metrics/ # TYPE
@ -477,13 +384,112 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
const_labels);
}
} else {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
prometheus_text +=
prometheus_metrics_converter.convert_metric_to_text_prometheus(
metric,
"falcosecurity",
prometheus_subsystem);
}
}
} // End inspector libs metrics collector loop
}
// Source wrapper metrics Part B: Agnostic, performed only once.
if(agent_info_written && machine_info_written) {
continue;
}
const scap_agent_info* agent_info = nullptr;
if(!agent_info_written) {
agent_info = source_inspector->get_agent_info();
}
const scap_machine_info* machine_info = nullptr;
if(!machine_info_written) {
machine_info = source_inspector->get_machine_info();
}
// Not all scap engines report agent and machine infos.
// However, recent lib refactors enable a linux lite platform, allowing non-syscall
// inspectors to retrieve these metrics if the syscall inspector is unavailable.
// We only push these info once.
if(agent_info) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"kernel_release",
"falcosecurity",
"falco",
{{"kernel_release", agent_info->uname_r}});
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"start_ts",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
agent_info->start_ts_epoch));
auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"duration_sec",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_S_COUNT,
METRIC_VALUE_METRIC_TYPE_MONOTONIC,
(uint64_t)((now - agent_info->start_ts_epoch) / ONE_SECOND_IN_NS)));
agent_info_written = true;
}
if(machine_info) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"hostname",
"falcosecurity",
"evt",
{{"hostname", machine_info->hostname}});
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"host_boot_ts",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
machine_info->boot_ts_epoch));
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
"host_num_cpus",
METRICS_V2_MISC,
METRIC_VALUE_TYPE_U32,
METRIC_VALUE_UNIT_COUNT,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
machine_info->num_cpus));
machine_info_written = true;
}
} // End inspector loop
return prometheus_text;
}
/*!
\brief this method takes an application \c state and returns a textual representation of
its configured metrics.
The current implementation returns a Prometheus exposition formatted string.
*/
std::string falco_metrics::to_text(const falco::app::state& state) {
libs::metrics::prometheus_metrics_converter prometheus_metrics_converter;
std::string prometheus_text;
std::vector<metrics_v2> additional_wrapper_metrics;
// Falco global metrics, once
prometheus_text +=
falco_to_text(state, prometheus_metrics_converter, additional_wrapper_metrics);
// Metrics for each source
prometheus_text +=
sources_to_text(state, prometheus_metrics_converter, additional_wrapper_metrics);
for(auto metric : additional_wrapper_metrics) {
prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
prometheus_text +=
prometheus_metrics_converter.convert_metric_to_text_prometheus(metric,
"falcosecurity",
"falco");
}
return prometheus_text;
}

View File

@ -28,4 +28,14 @@ class falco_metrics {
public:
static const std::string content_type;
static std::string to_text(const falco::app::state& state);
private:
static std::string falco_to_text(
const falco::app::state& state,
libs::metrics::prometheus_metrics_converter& prometheus_metrics_converter,
std::vector<metrics_v2>& additional_wrapper_metrics);
static std::string sources_to_text(
const falco::app::state& state,
libs::metrics::prometheus_metrics_converter& prometheus_metrics_converter,
std::vector<metrics_v2>& additional_wrapper_metrics);
};