fix(metrics/prometheus): gracefully handle multiple event sources, avoid erroneous duplicate metrics

Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
This commit is contained in:
Melissa Kilby 2025-05-19 03:42:26 +00:00 committed by poiana
parent 9c2cff370d
commit cf6857f13b

View File

@ -72,9 +72,22 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
libs::metrics::prometheus_metrics_converter prometheus_metrics_converter; libs::metrics::prometheus_metrics_converter prometheus_metrics_converter;
std::string prometheus_text; std::string prometheus_text;
for(auto inspector : inspectors) { for(size_t i = 0; i < inspectors.size(); ++i) { // Start inspector loop
// Falco wrapper metrics auto& inspector = inspectors[i];
// Falco wrapper metrics, repeated for each inspector, accounting for plugins w/ event
// sources
// //
/* Examples ...
# HELP falcosecurity_scap_engine_name_info https://falco.org/docs/metrics/
# TYPE falcosecurity_scap_engine_name_info gauge
falcosecurity_scap_engine_name_info{engine_name="source_plugin"} 1
# HELP falcosecurity_scap_engine_name_info https://falco.org/docs/metrics/
# TYPE falcosecurity_scap_engine_name_info gauge
falcosecurity_scap_engine_name_info{engine_name="bpf"} 1
*/
for(size_t i = 0; i < sizeof(all_driver_engines) / sizeof(const char*); i++) { for(size_t i = 0; i < sizeof(all_driver_engines) / sizeof(const char*); i++) {
if(inspector->check_current_engine(all_driver_engines[i])) { if(inspector->check_current_engine(all_driver_engines[i])) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus( prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
@ -86,6 +99,31 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
} }
} }
if(i != 0) {
continue;
}
// Falco wrapper metrics; Performed only once, the first inspector is typically the syscalls
// event source
//
// Each inspector includes all event sources
/* Examples ...
# HELP falcosecurity_falco_evt_source_info https://falco.org/docs/metrics/
# TYPE falcosecurity_falco_evt_source_info gauge
falcosecurity_falco_evt_source_info{evt_source="syscall"} 1
# HELP falcosecurity_falco_evt_source_info https://falco.org/docs/metrics/
# TYPE falcosecurity_falco_evt_source_info gauge
falcosecurity_falco_evt_source_info{evt_source="dummy_c"} 1
*/
for(const std::string& source : inspector->event_sources()) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"evt_source",
"falcosecurity",
"falco",
{{"evt_source", source}});
}
const scap_agent_info* agent_info = inspector->get_agent_info(); const scap_agent_info* agent_info = inspector->get_agent_info();
const scap_machine_info* machine_info = inspector->get_machine_info(); const scap_machine_info* machine_info = inspector->get_machine_info();
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus( prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
@ -132,14 +170,6 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
} }
#endif #endif
for(const std::string& source : inspector->event_sources()) {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(
"evt_source",
"falcosecurity",
"falco",
{{"evt_source", source}});
}
std::vector<metrics_v2> additional_wrapper_metrics; std::vector<metrics_v2> additional_wrapper_metrics;
additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric( additional_wrapper_metrics.emplace_back(libs::metrics::libsinsp_metrics::new_metric(
@ -207,31 +237,32 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
// Falco metrics categories // Falco metrics categories
// //
// rules_counters_enabled // rules_counters_enabled
// jemalloc_stats_enabled
if(state.config->m_metrics_flags & METRICS_V2_RULE_COUNTERS) { if(state.config->m_metrics_flags & METRICS_V2_RULE_COUNTERS) {
const stats_manager& rule_stats_manager = state.engine->get_rule_stats_manager(); const stats_manager& rule_stats_manager = state.engine->get_rule_stats_manager();
const indexed_vector<falco_rule>& rules = state.engine->get_rules(); const indexed_vector<falco_rule>& rules = state.engine->get_rules();
const std::vector<std::unique_ptr<std::atomic<uint64_t>>>& rules_by_id = const std::vector<std::unique_ptr<std::atomic<uint64_t>>>& rules_by_id =
rule_stats_manager.get_by_rule_id(); rule_stats_manager.get_by_rule_id();
// Distinguish between rules counters using labels, following Prometheus best practices: // Distinguish between rules counters using labels, following Prometheus best
// https://prometheus.io/docs/practices/naming/#labels // practices: https://prometheus.io/docs/practices/naming/#labels
for(size_t i = 0; i < rules_by_id.size(); i++) { for(size_t i = 0; i < rules_by_id.size(); i++) {
auto rule = rules.at(i); auto rule = rules.at(i);
auto count = rules_by_id[i]->load(); auto count = rules_by_id[i]->load();
if(count > 0) { if(count > 0) {
/* Examples ... /* Examples ...
# HELP falcosecurity_falco_rules_matches_total # HELP falcosecurity_falco_rules_matches_total
https://falco.org/docs/metrics/ # TYPE https://falco.org/docs/metrics/ # TYPE
falcosecurity_falco_rules_matches_total counter falcosecurity_falco_rules_matches_total counter
falcosecurity_falco_rules_matches_total{priority="4",rule_name="Read falcosecurity_falco_rules_matches_total{priority="4",rule_name="Read
sensitive file sensitive file
untrusted",source="syscall",tag_T1555="true",tag_container="true",tag_filesystem="true",tag_host="true",tag_maturity_stable="true",tag_mitre_credential_access="true"} untrusted",source="syscall",tag_T1555="true",tag_container="true",tag_filesystem="true",tag_host="true",tag_maturity_stable="true",tag_mitre_credential_access="true"}
10 # HELP falcosecurity_falco_rules_matches_total 10 # HELP falcosecurity_falco_rules_matches_total
https://falco.org/docs/metrics/ # TYPE https://falco.org/docs/metrics/ # TYPE
falcosecurity_falco_rules_matches_total counter falcosecurity_falco_rules_matches_total counter
falcosecurity_falco_rules_matches_total{priority="5",rule_name="Unexpected falcosecurity_falco_rules_matches_total{priority="5",rule_name="Unexpected
UDP UDP
Traffic",source="syscall",tag_TA0011="true",tag_container="true",tag_host="true",tag_maturity_incubating="true",tag_mitre_exfiltration="true",tag_network="true"} Traffic",source="syscall",tag_TA0011="true",tag_container="true",tag_host="true",tag_maturity_incubating="true",tag_mitre_exfiltration="true",tag_network="true"}
1 1
*/ */
auto metric = libs::metrics::libsinsp_metrics::new_metric( auto metric = libs::metrics::libsinsp_metrics::new_metric(
"rules_matches", "rules_matches",
@ -292,18 +323,42 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
} }
} }
#endif #endif
} } // End inspector loop
// Libs metrics categories // Libs metrics categories
// //
// resource_utilization_enabled // resource_utilization_enabled
// state_counters_enabled // state_counters_enabled
// kernel_event_counters_enabled // kernel_event_counters_enabled
// kernel_event_counters_per_cpu_enabled
// libbpf_stats_enabled // libbpf_stats_enabled
for(auto metrics_collector : metrics_collectors) {
for(size_t i = 0; i < metrics_collectors.size();
++i) { // Start inspector libs metrics collector loop
auto& metrics_collector = metrics_collectors[i];
metrics_collector.snapshot(); metrics_collector.snapshot();
auto metrics_snapshot = metrics_collector.get_metrics(); auto metrics_snapshot = metrics_collector.get_metrics();
if(i != 0) {
// Performed repeatedly for each inspectors' libs metrics collector
for(auto& metric : metrics_snapshot) {
if(metric.flags & METRICS_V2_PLUGINS) {
prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
prometheus_text +=
prometheus_metrics_converter.convert_metric_to_text_prometheus(
metric,
"falcosecurity",
"plugins");
}
}
continue;
}
// Performed only once, the first inspector is typically the syscalls event source
//
for(auto& metric : metrics_snapshot) { for(auto& metric : metrics_snapshot) {
prometheus_metrics_converter.convert_metric_to_unit_convention(metric); prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
std::string prometheus_subsystem = "scap"; std::string prometheus_subsystem = "scap";
@ -335,11 +390,13 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
metric.value.u64); metric.value.u64);
const std::map<std::string, std::string>& const_labels = {{"cpu", cpu_number}}; const std::map<std::string, std::string>& const_labels = {{"cpu", cpu_number}};
/* Examples ... /* Examples ...
# HELP falcosecurity_scap_n_evts_cpu_total https://falco.org/docs/metrics/ # HELP falcosecurity_scap_n_evts_cpu_total
# TYPE falcosecurity_scap_n_evts_cpu_total counter https://falco.org/docs/metrics/ # TYPE
falcosecurity_scap_n_evts_cpu_total counter
falcosecurity_scap_n_evts_cpu_total{cpu="7"} 237 falcosecurity_scap_n_evts_cpu_total{cpu="7"} 237
# HELP falcosecurity_scap_n_drops_cpu_total https://falco.org/docs/metrics/ # HELP falcosecurity_scap_n_drops_cpu_total
# TYPE falcosecurity_scap_n_drops_cpu_total counter https://falco.org/docs/metrics/ # TYPE
falcosecurity_scap_n_drops_cpu_total counter
falcosecurity_scap_n_drops_cpu_total{cpu="7"} 0 falcosecurity_scap_n_drops_cpu_total{cpu="7"} 0
*/ */
prometheus_text += prometheus_text +=
@ -350,8 +407,8 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
const_labels); const_labels);
} }
} else if(strcmp(metric.name, "n_drops_buffer_total") == 0) { } else if(strcmp(metric.name, "n_drops_buffer_total") == 0) {
// Skip the libs aggregate metric since we distinguish between buffer drops using // Skip the libs aggregate metric since we distinguish between buffer drops
// labels similar to the rules_matches // using labels similar to the rules_matches
continue; continue;
} else if(strncmp(metric.name, "n_drops_buffer", 14) == 0) // prefix match } else if(strncmp(metric.name, "n_drops_buffer", 14) == 0) // prefix match
{ {
@ -393,6 +450,7 @@ std::string falco_metrics::to_text(const falco::app::state& state) {
prometheus_subsystem); prometheus_subsystem);
} }
} }
} } // End inspector libs metrics collector loop
return prometheus_text; return prometheus_text;
} }