From 9d543c70390f95d2b84dfd7fe20068bfaa2f0799 Mon Sep 17 00:00:00 2001 From: Federico Di Pierro Date: Thu, 29 May 2025 09:19:33 +0200 Subject: [PATCH] fix(userspace/falco): when collecting metrics for stats_writer, create a `libs_metrics_collector` for each source. In case multiple sources are enabled, each source has its own `libs_metrics_collector` with correct flags, so that it can retrieve all metrics. Signed-off-by: Federico Di Pierro --- userspace/falco/app/actions/process_events.cpp | 6 ------ userspace/falco/stats_writer.cpp | 18 +++++++++++------- userspace/falco/stats_writer.h | 7 +++++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/userspace/falco/app/actions/process_events.cpp b/userspace/falco/app/actions/process_events.cpp index 3a4b46a7..ca713fb8 100644 --- a/userspace/falco/app/actions/process_events.cpp +++ b/userspace/falco/app/actions/process_events.cpp @@ -235,12 +235,6 @@ static falco::app::run_result do_inspect( } // for capture mode, the source name can change at every event - // TODO: This may currently cause issues for multiple event sources. We are deferring - // the fix to Falco 0.42.0. - // For multiple event sources, it generates `n` metrics logs per source at a time, as - // expected, with the engine_name correctly reflected. However, the order may interfere, - // as the correct inspector for the syscalls event source seems to never get passed, - // resulting in most metrics being missing. stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx], num_evts); diff --git a/userspace/falco/stats_writer.cpp b/userspace/falco/stats_writer.cpp index ac601b25..2c8e910a 100644 --- a/userspace/falco/stats_writer.cpp +++ b/userspace/falco/stats_writer.cpp @@ -414,7 +414,8 @@ void stats_writer::collector::get_metrics_output_fields_wrapper( void stats_writer::collector::get_metrics_output_fields_additional( nlohmann::json& output_fields, - double stats_snapshot_time_delta_sec) { + double stats_snapshot_time_delta_sec, + const std::string& src) { // Falco metrics categories // // rules_counters_enabled @@ -478,7 +479,8 @@ void stats_writer::collector::get_metrics_output_fields_additional( #endif #if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__) - if(m_writer->m_libs_metrics_collector && m_writer->m_output_rule_metrics_converter) { + if(m_writer->m_libs_metrics_collectors.find(src) != m_writer->m_libs_metrics_collectors.end() && + m_writer->m_output_rule_metrics_converter) { // Libs metrics categories // // resource_utilization_enabled @@ -487,8 +489,9 @@ void stats_writer::collector::get_metrics_output_fields_additional( // libbpf_stats_enabled // Refresh / New snapshot - m_writer->m_libs_metrics_collector->snapshot(); - auto metrics_snapshot = m_writer->m_libs_metrics_collector->get_metrics(); + auto& libs_metrics_collector = m_writer->m_libs_metrics_collectors[src]; + libs_metrics_collector->snapshot(); + auto metrics_snapshot = libs_metrics_collector->get_metrics(); // Cache n_evts and n_drops to derive n_drops_perc. uint64_t n_evts = 0; uint64_t n_drops = 0; @@ -611,7 +614,8 @@ void stats_writer::collector::collect(const std::shared_ptr& inspector, uint64_t num_evts) { if(m_writer->has_output()) { #if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__) - if(!m_writer->m_libs_metrics_collector) { + if(m_writer->m_libs_metrics_collectors.find(src) == + m_writer->m_libs_metrics_collectors.end()) { uint32_t flags = m_writer->m_config->m_metrics_flags; // Note: ENGINE_FLAG_BPF_STATS_ENABLED check has been moved to libs, that is, when // libbpf stats is not enabled in the kernel settings we won't collect them even if the @@ -625,7 +629,7 @@ void stats_writer::collector::collect(const std::shared_ptr& inspector, flags &= ~(METRICS_V2_KERNEL_COUNTERS | METRICS_V2_KERNEL_COUNTERS_PER_CPU | METRICS_V2_STATE_COUNTERS | METRICS_V2_LIBBPF_STATS); } - m_writer->m_libs_metrics_collector = + m_writer->m_libs_metrics_collectors[src] = std::make_unique(inspector.get(), flags); } @@ -658,7 +662,7 @@ void stats_writer::collector::collect(const std::shared_ptr& inspector, now, stats_snapshot_time_delta_sec); - get_metrics_output_fields_additional(output_fields, stats_snapshot_time_delta_sec); + get_metrics_output_fields_additional(output_fields, stats_snapshot_time_delta_sec, src); /* Send message in the queue */ stats_writer::msg msg; diff --git a/userspace/falco/stats_writer.h b/userspace/falco/stats_writer.h index 4145559d..3b75f4e9 100644 --- a/userspace/falco/stats_writer.h +++ b/userspace/falco/stats_writer.h @@ -79,7 +79,8 @@ public: fields. */ void get_metrics_output_fields_additional(nlohmann::json& output_fields, - double stats_snapshot_time_delta_sec); + double stats_snapshot_time_delta_sec, + const std::string& src); std::shared_ptr m_writer; // Init m_last_tick w/ invalid value to enable metrics logging immediately after @@ -153,7 +154,9 @@ private: tbb::concurrent_bounded_queue m_queue; #endif #if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__) - std::unique_ptr m_libs_metrics_collector; + // Per source map of libs metrics collectors + std::unordered_map> + m_libs_metrics_collectors; std::unique_ptr m_output_rule_metrics_converter; #endif std::shared_ptr m_outputs;