diff --git a/falco.yaml b/falco.yaml index e9a8aca4..588fa31f 100644 --- a/falco.yaml +++ b/falco.yaml @@ -981,6 +981,8 @@ syscall_event_drops: # as Falco does not automatically rotate the file. It can be used in combination # with `output_rule`. # +# `rules_counters_enabled`: Emit counts for each rule. +# # `resource_utilization_enabled`: Emit CPU and memory usage metrics. CPU usage # is reported as a percentage of one CPU and can be normalized to the total # number of CPUs to determine overall usage. Memory metrics are provided in raw @@ -1025,7 +1027,8 @@ syscall_event_drops: # values are included in the output. # # If metrics are enabled, the web server can be configured to activate the -# corresponding Prometheus endpoint using webserver.prometheus_metrics_enabled. +# corresponding Prometheus endpoint using `webserver.prometheus_metrics_enabled`. +# Prometheus output can be used in combination with the other output options. # # todo: syscall_counters_enabled option metrics: @@ -1033,8 +1036,10 @@ metrics: interval: 1h # Typically, in production, you only use `output_rule` or `output_file`, but not both. # However, if you have a very unique use case, you can use both together. + # Set `webserver.prometheus_metrics_enabled` for Prometheus output. output_rule: true # output_file: /tmp/falco_stats.jsonl + rules_counters_enabled: true resource_utilization_enabled: true state_counters_enabled: true kernel_event_counters_enabled: true diff --git a/userspace/engine/falco_engine.cpp b/userspace/engine/falco_engine.cpp index 921e4b98..69da2bd1 100644 --- a/userspace/engine/falco_engine.cpp +++ b/userspace/engine/falco_engine.cpp @@ -907,6 +907,11 @@ void falco_engine::print_stats() const fprintf(stdout, "%s", out.c_str()); } +const stats_manager& falco_engine::get_rule_stats_manager() const +{ + return m_rule_stats_manager; +} + bool falco_engine::is_source_valid(const std::string &source) const { return m_sources.at(source) != nullptr; diff --git a/userspace/engine/falco_engine.h b/userspace/engine/falco_engine.h index b01f7705..e7e49293 100644 --- a/userspace/engine/falco_engine.h +++ b/userspace/engine/falco_engine.h @@ -149,11 +149,21 @@ public: // nlohmann::json describe_rule(std::string *rule_name, const std::vector>& plugins) const; + // + // Return const /ref to rules stored in the Falco engine. + // + inline const indexed_vector& get_rules() const { return m_rules; } + // // Print statistics on how many events matched each rule. // void print_stats() const; + // + // Return const /ref to stats_manager to access current rules stats (how many events matched each rule so far). + // + const stats_manager& get_rule_stats_manager() const; + // // Set the sampling ratio, which can affect which events are // matched against the set of rules. diff --git a/userspace/engine/stats_manager.h b/userspace/engine/stats_manager.h index 3812319b..18f8d8ae 100644 --- a/userspace/engine/stats_manager.h +++ b/userspace/engine/stats_manager.h @@ -63,7 +63,6 @@ public: const indexed_vector& rules, std::string& out) const; -private: std::atomic m_total; std::vector>> m_by_priority; std::vector>> m_by_rule_id; diff --git a/userspace/falco/app/actions/process_events.cpp b/userspace/falco/app/actions/process_events.cpp index ab825d4c..6f3dfa4f 100644 --- a/userspace/falco/app/actions/process_events.cpp +++ b/userspace/falco/app/actions/process_events.cpp @@ -436,7 +436,7 @@ falco::app::run_result falco::app::actions::process_events(falco::app::state& s) s.engine->complete_rule_loading(); // Initialize stats writer - auto statsw = std::make_shared(s.outputs, s.config); + auto statsw = std::make_shared(s.outputs, s.config, s.engine); auto res = init_stats_writer(statsw, s.config, s.options.dry_run); if (s.options.dry_run) diff --git a/userspace/falco/configuration.cpp b/userspace/falco/configuration.cpp index 9b6c5887..b03eeea5 100644 --- a/userspace/falco/configuration.cpp +++ b/userspace/falco/configuration.cpp @@ -72,7 +72,7 @@ falco_configuration::falco_configuration(): m_metrics_interval(5000), m_metrics_stats_rule_enabled(false), m_metrics_output_file(""), - m_metrics_flags((METRICS_V2_KERNEL_COUNTERS | METRICS_V2_LIBBPF_STATS | METRICS_V2_RESOURCE_UTILIZATION | METRICS_V2_STATE_COUNTERS)), + m_metrics_flags((METRICS_V2_KERNEL_COUNTERS | METRICS_V2_LIBBPF_STATS | METRICS_V2_RESOURCE_UTILIZATION | METRICS_V2_STATE_COUNTERS | METRICS_V2_RULE_COUNTERS)), m_metrics_convert_memory_to_mb(true), m_metrics_include_empty_values(false) { @@ -535,20 +535,21 @@ void falco_configuration::load_yaml(const std::string& config_name) m_metrics_output_file = config.get_scalar("metrics.output_file", ""); m_metrics_flags = 0; + if (config.get_scalar("metrics.rules_counters_enabled", true)) + { + m_metrics_flags |= METRICS_V2_RULE_COUNTERS; + } if (config.get_scalar("metrics.resource_utilization_enabled", true)) { m_metrics_flags |= METRICS_V2_RESOURCE_UTILIZATION; - } if (config.get_scalar("metrics.state_counters_enabled", true)) { m_metrics_flags |= METRICS_V2_STATE_COUNTERS; - } if (config.get_scalar("metrics.kernel_event_counters_enabled", true)) { m_metrics_flags |= METRICS_V2_KERNEL_COUNTERS; - } if (config.get_scalar("metrics.libbpf_stats_enabled", true)) { diff --git a/userspace/falco/falco_metrics.cpp b/userspace/falco/falco_metrics.cpp index 58f83b5c..11283ea9 100644 --- a/userspace/falco/falco_metrics.cpp +++ b/userspace/falco/falco_metrics.cpp @@ -57,8 +57,8 @@ std::string falco_metrics::to_text(const falco::app::state& state) for (const auto& source_info: state.source_infos) { sinsp *source_inspector = source_info.inspector.get(); - inspectors.push_back(source_inspector); - metrics_collectors.push_back(libs::metrics::libs_metrics_collector(source_inspector, state.config->m_metrics_flags)); + inspectors.emplace_back(source_inspector); + metrics_collectors.emplace_back(libs::metrics::libs_metrics_collector(source_inspector, state.config->m_metrics_flags)); } libs::metrics::prometheus_metrics_converter prometheus_metrics_converter; @@ -106,26 +106,59 @@ std::string falco_metrics::to_text(const falco::app::state& state) { prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus("evt_source", "falcosecurity", "falco", {{"evt_source", source}}); } - std::vector static_metrics; - static_metrics.push_back(libs_metrics_collector.new_metric("start_ts", + std::vector falco_metrics; + + if(state.config->m_metrics_flags & METRICS_V2_RULE_COUNTERS) + { + const stats_manager& rule_stats_manager = state.engine->get_rule_stats_manager(); + const indexed_vector& rules = state.engine->get_rules(); + falco_metrics.emplace_back(libs_metrics_collector.new_metric("rules.matches_total", + METRICS_V2_RULE_COUNTERS, + METRIC_VALUE_TYPE_U64, + METRIC_VALUE_UNIT_COUNT, + METRIC_VALUE_METRIC_TYPE_MONOTONIC, + rule_stats_manager.m_total.load())); + + for (size_t i = 0; i < rule_stats_manager.m_by_rule_id.size(); i++) + { + auto rule = rules.at(i); + std::string rules_metric_name = "rules." + rule->name; + // Separate processing of rules counter metrics given we add extra tags + auto metric = libs_metrics_collector.new_metric(rules_metric_name.c_str(), + METRICS_V2_RULE_COUNTERS, + METRIC_VALUE_TYPE_U64, + METRIC_VALUE_UNIT_COUNT, + METRIC_VALUE_METRIC_TYPE_MONOTONIC, + rule_stats_manager.m_by_rule_id[i]->load()); + prometheus_metrics_converter.convert_metric_to_unit_convention(metric); + const std::map& const_labels = { + {"priority", std::to_string(rule->priority)}, + {"source", rule->source}, + {"tags", concat_set_in_order(rule->tags)} + }; + prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metric, "falcosecurity", "falco", const_labels); + } + } + + falco_metrics.emplace_back(libs_metrics_collector.new_metric("start_ts", METRICS_V2_MISC, METRIC_VALUE_TYPE_U64, METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS, METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT, agent_info->start_ts_epoch)); - static_metrics.push_back(libs_metrics_collector.new_metric("host_boot_ts", + falco_metrics.emplace_back(libs_metrics_collector.new_metric("host_boot_ts", METRICS_V2_MISC, METRIC_VALUE_TYPE_U64, METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS, METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT, machine_info->boot_ts_epoch)); - static_metrics.push_back(libs_metrics_collector.new_metric("host_num_cpus", + falco_metrics.emplace_back(libs_metrics_collector.new_metric("host_num_cpus", METRICS_V2_MISC, METRIC_VALUE_TYPE_U32, METRIC_VALUE_UNIT_COUNT, METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT, machine_info->num_cpus)); - static_metrics.push_back(libs_metrics_collector.new_metric("outputs_queue_num_drops", + falco_metrics.emplace_back(libs_metrics_collector.new_metric("outputs_queue_num_drops", METRICS_V2_MISC, METRIC_VALUE_TYPE_U64, METRIC_VALUE_UNIT_COUNT, @@ -134,18 +167,20 @@ std::string falco_metrics::to_text(const falco::app::state& state) auto now = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); - static_metrics.push_back(libs_metrics_collector.new_metric("duration_sec", + falco_metrics.emplace_back(libs_metrics_collector.new_metric("duration_sec", METRICS_V2_MISC, METRIC_VALUE_TYPE_U64, METRIC_VALUE_UNIT_TIME_S_COUNT, METRIC_VALUE_METRIC_TYPE_MONOTONIC, (uint64_t)((now - agent_info->start_ts_epoch) / ONE_SECOND_IN_NS))); - for (auto metrics: static_metrics) + + for (auto metric: falco_metrics) { - prometheus_metrics_converter.convert_metric_to_unit_convention(metrics); - prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metrics, "falcosecurity", "falco"); + prometheus_metrics_converter.convert_metric_to_unit_convention(metric); + prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metric, "falcosecurity", "falco"); } + } for (auto metrics_collector: metrics_collectors) @@ -153,15 +188,15 @@ std::string falco_metrics::to_text(const falco::app::state& state) metrics_collector.snapshot(); auto metrics_snapshot = metrics_collector.get_metrics(); - for (auto& metrics: metrics_snapshot) + for (auto& metric: metrics_snapshot) { - prometheus_metrics_converter.convert_metric_to_unit_convention(metrics); + prometheus_metrics_converter.convert_metric_to_unit_convention(metric); std::string namespace_name = "scap"; - if (metrics.flags & METRICS_V2_RESOURCE_UTILIZATION || metrics.flags & METRICS_V2_KERNEL_COUNTERS) + if (metric.flags & METRICS_V2_RESOURCE_UTILIZATION || metric.flags & METRICS_V2_KERNEL_COUNTERS) { namespace_name = "falco"; } - prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metrics, "falcosecurity", namespace_name); + prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metric, "falcosecurity", namespace_name); } } diff --git a/userspace/falco/stats_writer.cpp b/userspace/falco/stats_writer.cpp index 22a33dbc..a221496f 100644 --- a/userspace/falco/stats_writer.cpp +++ b/userspace/falco/stats_writer.cpp @@ -170,8 +170,9 @@ stats_writer::ticker_t stats_writer::get_ticker() stats_writer::stats_writer( const std::shared_ptr& outputs, - const std::shared_ptr& config) - : m_config(config) + const std::shared_ptr& config, + const std::shared_ptr& engine) + : m_config(config), m_engine(engine) { if (config->m_metrics_enabled) { @@ -372,6 +373,23 @@ void stats_writer::collector::get_metrics_output_fields_additional( nlohmann::json& output_fields, double stats_snapshot_time_delta_sec) { + if(m_writer->m_config->m_metrics_flags & METRICS_V2_RULE_COUNTERS) + { + const stats_manager& rule_stats_manager = m_writer->m_engine->get_rule_stats_manager(); + const indexed_vector& rules = m_writer->m_engine->get_rules(); + output_fields["falco.rules.matches_total"] = rule_stats_manager.m_total.load(); + for (size_t i = 0; i < rule_stats_manager.m_by_rule_id.size(); i++) + { + auto rule_count = rule_stats_manager.m_by_rule_id[i]->load(); + if (rule_count == 0 && !m_writer->m_config->m_metrics_include_empty_values) + { + continue; + } + std::string rules_metric_name = "falco.rules." + rules.at(i)->name; + output_fields[rules_metric_name] = rule_count; + } + } + #if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__) if (m_writer->m_libs_metrics_collector && m_writer->m_output_rule_metrics_converter) { diff --git a/userspace/falco/stats_writer.h b/userspace/falco/stats_writer.h index 8e39c3c3..b25ed4dd 100644 --- a/userspace/falco/stats_writer.h +++ b/userspace/falco/stats_writer.h @@ -96,7 +96,8 @@ public: \brief Initializes a writer. */ stats_writer(const std::shared_ptr& outputs, - const std::shared_ptr& config); + const std::shared_ptr& config, + const std::shared_ptr& engine); /*! \brief Returns true if the writer is configured with a valid output. @@ -152,6 +153,7 @@ private: #endif std::shared_ptr m_outputs; std::shared_ptr m_config; + std::shared_ptr m_engine; // note: in this way, only collectors can push into the queue friend class stats_writer::collector; };