From 4d24bcdd2f4a4cd6ca4ca401739c7005a2a1c40d Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Thu, 27 Apr 2023 11:14:03 +0000 Subject: [PATCH] new(userspace/falco)!: introduce native support for resource_utilization metrics / stats v2 Intended to phase out previous stats writer settings and log schema. Signed-off-by: Melissa Kilby --- .../falco/app/actions/process_events.cpp | 55 ++++- userspace/falco/stats_writer.cpp | 202 ++++++++++++++++-- userspace/falco/stats_writer.h | 35 ++- 3 files changed, 254 insertions(+), 38 deletions(-) diff --git a/userspace/falco/app/actions/process_events.cpp b/userspace/falco/app/actions/process_events.cpp index d4e5879c..08eb7ee3 100644 --- a/userspace/falco/app/actions/process_events.cpp +++ b/userspace/falco/app/actions/process_events.cpp @@ -275,7 +275,7 @@ static falco::app::run_result do_inspect( } // for capture mode, the source name can change at every event - stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx]); + stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx], num_evts); } else { @@ -291,7 +291,7 @@ static falco::app::run_result do_inspect( } // for live mode, the source name is constant - stats_collector.collect(inspector, source); + stats_collector.collect(inspector, source, num_evts); } // Reset the timeouts counter, Falco successfully got an event to process @@ -397,17 +397,54 @@ static void process_inspector_events( } } -static std::shared_ptr init_stats_writer(const options& opts) +static std::shared_ptr init_stats_writer(const options& opts, std::shared_ptr outputs, std::shared_ptr config) { - auto statsw = std::make_shared(); - if (!opts.stats_filename.empty()) + auto statsw = std::make_shared(outputs, config); + std::string err; + uint64_t stats_interval_ms = 0; + if (config->m_stats_v2_enabled && config->m_stats_v2_stats_interval_preset > 0) { - std::string err; - if (!stats_writer::init_ticker(opts.stats_interval, err)) + uint16_t index = config->m_stats_v2_stats_interval_preset; + if(index <= MAX_STATS_PRESET_INDEX) + { + /* Index 0 reserved, milliseconds representation for 15min, 30min, 1hr, 4hrs, 6hrs, 12hrs. */ + std::vector vect{0LLU, 900000LU, 1800000LU, 3600000LU, 14400000LU, 21600000LU, 43200000LU}; + stats_interval_ms = vect[index]; + } + else + { + // todo: warning message + stats_interval_ms = 0; + } + } + + /* Continue cmd args support and old defaults for backward compatibility, scheduled for deprecation. */ + if (stats_interval_ms == 0 && opts.stats_interval > 0) + { + stats_interval_ms = opts.stats_interval; + } + /* New config. Exact stats_interval_ms in falco.yaml overrides presets. */ + if (config->m_stats_v2_enabled && config->m_stats_v2_stats_interval_ms > 0) + { + stats_interval_ms = config->m_stats_v2_stats_interval_ms; + } + + if (stats_interval_ms > 0) + { + if (!stats_writer::init_ticker(stats_interval_ms, err)) { throw falco_exception(err); } - statsw.reset(new stats_writer(opts.stats_filename)); + } + /* Continue cmd args support for backward compatibility, scheduled for deprecation. */ + if (!config->m_stats_v2_enabled && !opts.stats_filename.empty()) + { + statsw.reset(new stats_writer(opts.stats_filename, outputs, config)); + } + /* New config. */ + else if (config->m_stats_v2_enabled && !config->m_stats_v2_stats_filename.empty()) + { + statsw.reset(new stats_writer(config->m_stats_v2_stats_filename, outputs, config)); } return statsw; } @@ -421,7 +458,7 @@ falco::app::run_result falco::app::actions::process_events(falco::app::state& s) s.engine->complete_rule_loading(); // Initialize stats writer - auto statsw = init_stats_writer(s.options); + auto statsw = init_stats_writer(s.options, s.outputs, s.config); if (s.options.dry_run) { diff --git a/userspace/falco/stats_writer.cpp b/userspace/falco/stats_writer.cpp index 578853c6..11c034e7 100644 --- a/userspace/falco/stats_writer.cpp +++ b/userspace/falco/stats_writer.cpp @@ -25,7 +25,7 @@ limitations under the License. #include "stats_writer.h" #include "logger.h" #include "banned.h" // This raises a compilation error when certain functions are used -#include "logger.h" +#include "config_falco.h" // note: ticker_t is an uint16_t, which is enough because we don't care about // overflows here. Threads calling stats_writer::handle() will just @@ -67,18 +67,21 @@ stats_writer::ticker_t stats_writer::get_ticker() return s_timer.load(std::memory_order_relaxed); } -stats_writer::stats_writer() +stats_writer::stats_writer(std::shared_ptr outputs, std::shared_ptr config) : m_initialized(false), m_total_samples(0) { - + m_outputs = outputs; + m_config = config; } -stats_writer::stats_writer(const std::string &filename) +stats_writer::stats_writer(const std::string &filename, std::shared_ptr outputs, std::shared_ptr config) : m_initialized(true), m_total_samples(0) { m_output.exceptions(std::ofstream::failbit | std::ofstream::badbit); m_output.open(filename, std::ios_base::app); m_worker = std::thread(&stats_writer::worker, this); + m_outputs = outputs; + m_config = config; } stats_writer::~stats_writer() @@ -162,39 +165,196 @@ void stats_writer::worker() noexcept } stats_writer::collector::collector(std::shared_ptr writer) - : m_writer(writer), m_last_tick(0), m_samples(0) + : m_writer(writer), m_last_tick(0), m_samples(0), m_last_now(0), m_last_n_evts(0), m_last_n_drops(0), m_last_num_evts(0) { +} + +std::map stats_writer::collector::get_stats_v2_output_fields_wrapper(std::shared_ptr inspector, uint64_t now, std::string src, uint64_t num_evts, uint64_t stats_snapshot_time_delta_sec) +{ + std::map output_fields; + const scap_agent_info* agent_info = inspector->get_agent_info(); + const scap_machine_info* machine_info = inspector->get_machine_info(); + + /* Wrapper fields needed for statistical analyses and attributions. Always enabled. */ + output_fields["evt.time"] = std::to_string(now); /* Some ETLs may prefer a consistent timestamp within output. */ + output_fields["falco_version"] = FALCO_VERSION; + output_fields["falco_start_ts"] = std::to_string(agent_info->start_ts_epoch); + output_fields["kernel_release"] = agent_info->uname_r; + output_fields["host_boot_ts"] = std::to_string(machine_info->boot_ts_epoch); + output_fields["hostname"] = machine_info->hostname; /* Explicitly add hostname to log msg in case hostname rule output field is disabled. */ + output_fields["host_num_cpus"] = std::to_string(machine_info->num_cpus); + if(inspector->check_current_engine(BPF_ENGINE)) + { + output_fields["driver"] = "bpf"; + } + else if (inspector->check_current_engine(MODERN_BPF_ENGINE)) + { + output_fields["driver"] = "modern_bpf"; + } + else if (inspector->check_current_engine(KMOD_ENGINE)) + { + output_fields["driver"] = "kmod"; + } + else + { + output_fields["driver"] = "no_driver"; + } + output_fields["src"] = src; + + /* Falco userspace events counters. Always enabled. */ + if (m_last_num_evts != 0 && stats_snapshot_time_delta_sec > 0) + { + /* Successfully processed userspace events. */ + output_fields["falco_evts_rate_sec"] = std::to_string((num_evts - m_last_num_evts) / stats_snapshot_time_delta_sec); + } + output_fields["falco_num_evts"] = std::to_string(num_evts); + output_fields["falco_num_evts_prev"] = std::to_string(m_last_num_evts); + m_last_num_evts = num_evts; + + return output_fields; } -void stats_writer::collector::collect(std::shared_ptr inspector, const std::string& src) +std::map stats_writer::collector::get_stats_v2_output_fields_syscalls(std::shared_ptr inspector, std::map output_fields, uint64_t stats_snapshot_time_delta_sec) +{ + const scap_agent_info* agent_info = inspector->get_agent_info(); + const scap_machine_info* machine_info = inspector->get_machine_info(); + +#ifndef MINIMAL_BUILD + /* Resource utilization, CPU and memory usage etc. */ + uint32_t nstats = 0; + int32_t rc = 0; + if (m_writer->m_config->m_stats_v2_include_resource_utilization) + { + const scap_stats_v2* utilization; + auto buffer = inspector->get_sinsp_stats_v2_buffer(); + utilization = libsinsp::resource_utilization::get_resource_utilization(agent_info, buffer, &nstats, &rc); + if (utilization && rc == 0 && nstats > 0) + { + // todo: support unit conversions for memory metrics + for(uint32_t stat = 0; stat < nstats; stat++) + { + switch(utilization[stat].type) + { + case STATS_VALUE_TYPE_U64: + output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.u64); + break; + case STATS_VALUE_TYPE_U32: + output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.u32); + break; + case STATS_VALUE_TYPE_D: + output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.d); + break; + default: + break; + } + } + } + } + + /* Kernel side stats counters and libbpf stats if applicable. */ + nstats = 0; + rc = 0; + uint32_t flags = 0; + + if (m_writer->m_config->m_stats_v2_include_kernel_evts_counters) + { + flags |= PPM_SCAP_STATS_KERNEL_COUNTERS; + } + if (m_writer->m_config->m_stats_v2_include_libbpf_stats && !inspector->check_current_engine(KMOD_ENGINE) && (machine_info->flags & PPM_BPF_STATS_ENABLED)) + { + flags |= PPM_SCAP_STATS_LIBBPF_STATS; + } + const scap_stats_v2* stats_v2 = inspector->get_capture_stats_v2(flags, &nstats, &rc); + if (stats_v2 && nstats > 0 && rc == 0) + { + for(uint32_t stat = 0; stat < nstats; stat++) + { + switch(stats_v2[stat].type) + { + case STATS_VALUE_TYPE_U64: + if (strncmp(stats_v2[stat].name, "n_evts", 6) == 0) + { + output_fields["falco_evts_rate_kernel_sec"] = std::to_string(0); + if (m_last_n_evts != 0 && stats_snapshot_time_delta_sec > 0) + { + /* n_evts is total number of kernel side events. */ + output_fields["falco_evts_rate_kernel_sec"] = std::to_string((stats_v2[stat].value.u64 - m_last_n_evts) / stats_snapshot_time_delta_sec); + } + output_fields["n_evts_prev"] = std::to_string(m_last_n_evts); + m_last_n_evts = stats_v2[stat].value.u64; + } + else if (strncmp(stats_v2[stat].name, "n_drops", 7) == 0) + { + output_fields["falco_evts_drop_rate_kernel_sec"] = std::to_string(0); + if (m_last_n_drops != 0 && stats_snapshot_time_delta_sec > 0) + { + /* n_drops is total number of kernel side event drops. */ + output_fields["falco_evts_drop_rate_kernel_sec"] = std::to_string((stats_v2[stat].value.u64 - m_last_n_evts) / stats_snapshot_time_delta_sec); + } + output_fields["n_drops_prev"] = std::to_string(m_last_n_drops); + m_last_n_drops = stats_v2[stat].value.u64; + } + output_fields[stats_v2[stat].name] = std::to_string(stats_v2[stat].value.u64); + break; + default: + break; + } + } + } +#endif + + return output_fields; +} + +void stats_writer::collector::collect(std::shared_ptr inspector, const std::string &src, uint64_t num_evts) { // just skip if no output is configured - if (m_writer->has_output()) + if (m_writer->m_config->m_stats_v2_enabled || m_writer->has_output()) { // collect stats once per each ticker period auto tick = stats_writer::get_ticker(); if (tick != m_last_tick) { - stats_writer::msg msg; - msg.stop = false; - msg.source = src; - inspector->get_capture_stats(&msg.stats); - m_samples++; - if(m_samples == 1) + auto now = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + uint64_t stats_snapshot_time_delta = 0; + if (m_last_now != 0) { - msg.delta = msg.stats; + stats_snapshot_time_delta = now - m_last_now; } - else + m_last_now = now; + std::map output_fields = stats_writer::collector::get_stats_v2_output_fields_wrapper(inspector, now, src, num_evts, (stats_snapshot_time_delta / ONE_SECOND_IN_NS)); + if (src == falco_common::syscall_source) { - msg.delta.n_evts = msg.stats.n_evts - m_last_stats.n_evts; - msg.delta.n_drops = msg.stats.n_drops - m_last_stats.n_drops; - msg.delta.n_preemptions = msg.stats.n_preemptions - m_last_stats.n_preemptions; + output_fields = stats_writer::collector::get_stats_v2_output_fields_syscalls(inspector, output_fields, (stats_snapshot_time_delta / ONE_SECOND_IN_NS)); + } + if (m_writer->m_config->m_stats_v2_enabled && m_writer->m_config->m_stats_v2_stats_internal_rule && m_writer->m_outputs) + { + std::string rule = "Falco internal: resource utilization stats metrics"; + std::string msg = ""; + m_writer->m_outputs->handle_msg(now, falco_common::PRIORITY_DEBUG, msg, rule, output_fields); + } + if (m_writer->has_output()) + { + stats_writer::msg msg; + msg.stop = false; + msg.source = src; + inspector->get_capture_stats(&msg.stats); + m_samples++; + if(m_samples == 1) + { + msg.delta = msg.stats; + } + else + { + msg.delta.n_evts = msg.stats.n_evts - m_last_stats.n_evts; + msg.delta.n_drops = msg.stats.n_drops - m_last_stats.n_drops; + msg.delta.n_preemptions = msg.stats.n_preemptions - m_last_stats.n_preemptions; + } + m_last_stats = msg.stats; + m_writer->push(msg); } - m_last_tick = tick; - m_last_stats = msg.stats; - m_writer->push(msg); } } } diff --git a/userspace/falco/stats_writer.h b/userspace/falco/stats_writer.h index 2144f0c5..8177fe74 100644 --- a/userspace/falco/stats_writer.h +++ b/userspace/falco/stats_writer.h @@ -23,6 +23,10 @@ limitations under the License. #include #include "tbb/concurrent_queue.h" +#include "falco_outputs.h" +#include "configuration.h" + +#define MAX_STATS_PRESET_INDEX 6 /*! \brief Writes stats samples collected from inspectors into a given output. @@ -56,13 +60,27 @@ public: \brief Collects one stats sample from an inspector and for the given event source name */ - void collect(std::shared_ptr inspector, const std::string& src); + void collect(std::shared_ptr inspector, const std::string& src, uint64_t num_evts); + + /*! + \brief Collect snapshot stats v2 wrapper fields as internal rule formatted output fields. + */ + std::map get_stats_v2_output_fields_wrapper(std::shared_ptr inspector, uint64_t now, std::string src, uint64_t num_evts, uint64_t stats_snapshot_time_delta_sec); + + /*! + \brief Collect snapshot stats v2 syscalls related metrics as internal rule formatted output fields. + */ + std::map get_stats_v2_output_fields_syscalls(std::shared_ptr inspector, std::map output_fields, uint64_t stats_snapshot_time_delta_sec); private: std::shared_ptr m_writer; stats_writer::ticker_t m_last_tick; uint64_t m_samples; scap_stats m_last_stats; + uint64_t m_last_now; + uint64_t m_last_n_evts; + uint64_t m_last_n_drops; + uint64_t m_last_num_evts; }; stats_writer(const stats_writer&) = delete; @@ -76,19 +94,18 @@ public: ~stats_writer(); /*! - \brief Initializes a writer without any output. - With this constructor, has_output() always returns false + \brief Initializes a writer without file output. */ - stats_writer(); + stats_writer(std::shared_ptr outputs, std::shared_ptr config); /*! - \brief Initializes a writer that prints to a file at the given filename. + \brief Initializes a writer that can print to a file at the given filename. With this constructor, has_output() always returns true */ - explicit stats_writer(const std::string &filename); + explicit stats_writer(const std::string &filename, std::shared_ptr outputs, std::shared_ptr config); /*! - \brief Returns true if the writer is configured with a valid output + \brief Returns true if the writer is configured with a valid file output */ inline bool has_output() const; @@ -129,7 +146,9 @@ private: uint64_t m_total_samples; std::thread m_worker; std::ofstream m_output; - tbb::concurrent_bounded_queue m_queue; + tbb::concurrent_bounded_queue m_queue; + std::shared_ptr m_outputs; + std::shared_ptr m_config; // note: in this way, only collectors can push into the queue friend class stats_writer::collector;