new(metrics): add rules_counters_enabled option

Intended to replace https://github.com/falcosecurity/falco-exporter
when used with Prometheus output

Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
This commit is contained in:
Melissa Kilby 2024-05-13 20:50:01 +00:00 committed by poiana
parent 64039196ad
commit b7adcd251d
9 changed files with 100 additions and 25 deletions

View File

@ -981,6 +981,8 @@ syscall_event_drops:
# as Falco does not automatically rotate the file. It can be used in combination # as Falco does not automatically rotate the file. It can be used in combination
# with `output_rule`. # with `output_rule`.
# #
# `rules_counters_enabled`: Emit counts for each rule.
#
# `resource_utilization_enabled`: Emit CPU and memory usage metrics. CPU usage # `resource_utilization_enabled`: Emit CPU and memory usage metrics. CPU usage
# is reported as a percentage of one CPU and can be normalized to the total # is reported as a percentage of one CPU and can be normalized to the total
# number of CPUs to determine overall usage. Memory metrics are provided in raw # number of CPUs to determine overall usage. Memory metrics are provided in raw
@ -1025,7 +1027,8 @@ syscall_event_drops:
# values are included in the output. # values are included in the output.
# #
# If metrics are enabled, the web server can be configured to activate the # If metrics are enabled, the web server can be configured to activate the
# corresponding Prometheus endpoint using webserver.prometheus_metrics_enabled. # corresponding Prometheus endpoint using `webserver.prometheus_metrics_enabled`.
# Prometheus output can be used in combination with the other output options.
# #
# todo: syscall_counters_enabled option # todo: syscall_counters_enabled option
metrics: metrics:
@ -1033,8 +1036,10 @@ metrics:
interval: 1h interval: 1h
# Typically, in production, you only use `output_rule` or `output_file`, but not both. # Typically, in production, you only use `output_rule` or `output_file`, but not both.
# However, if you have a very unique use case, you can use both together. # However, if you have a very unique use case, you can use both together.
# Set `webserver.prometheus_metrics_enabled` for Prometheus output.
output_rule: true output_rule: true
# output_file: /tmp/falco_stats.jsonl # output_file: /tmp/falco_stats.jsonl
rules_counters_enabled: true
resource_utilization_enabled: true resource_utilization_enabled: true
state_counters_enabled: true state_counters_enabled: true
kernel_event_counters_enabled: true kernel_event_counters_enabled: true

View File

@ -907,6 +907,11 @@ void falco_engine::print_stats() const
fprintf(stdout, "%s", out.c_str()); fprintf(stdout, "%s", out.c_str());
} }
const stats_manager& falco_engine::get_rule_stats_manager() const
{
return m_rule_stats_manager;
}
bool falco_engine::is_source_valid(const std::string &source) const bool falco_engine::is_source_valid(const std::string &source) const
{ {
return m_sources.at(source) != nullptr; return m_sources.at(source) != nullptr;

View File

@ -149,11 +149,21 @@ public:
// //
nlohmann::json describe_rule(std::string *rule_name, const std::vector<std::shared_ptr<sinsp_plugin>>& plugins) const; nlohmann::json describe_rule(std::string *rule_name, const std::vector<std::shared_ptr<sinsp_plugin>>& plugins) const;
//
// Return const /ref to rules stored in the Falco engine.
//
inline const indexed_vector<falco_rule>& get_rules() const { return m_rules; }
// //
// Print statistics on how many events matched each rule. // Print statistics on how many events matched each rule.
// //
void print_stats() const; void print_stats() const;
//
// Return const /ref to stats_manager to access current rules stats (how many events matched each rule so far).
//
const stats_manager& get_rule_stats_manager() const;
// //
// Set the sampling ratio, which can affect which events are // Set the sampling ratio, which can affect which events are
// matched against the set of rules. // matched against the set of rules.

View File

@ -63,7 +63,6 @@ public:
const indexed_vector<falco_rule>& rules, const indexed_vector<falco_rule>& rules,
std::string& out) const; std::string& out) const;
private:
std::atomic<uint64_t> m_total; std::atomic<uint64_t> m_total;
std::vector<std::unique_ptr<std::atomic<uint64_t>>> m_by_priority; std::vector<std::unique_ptr<std::atomic<uint64_t>>> m_by_priority;
std::vector<std::unique_ptr<std::atomic<uint64_t>>> m_by_rule_id; std::vector<std::unique_ptr<std::atomic<uint64_t>>> m_by_rule_id;

View File

@ -436,7 +436,7 @@ falco::app::run_result falco::app::actions::process_events(falco::app::state& s)
s.engine->complete_rule_loading(); s.engine->complete_rule_loading();
// Initialize stats writer // Initialize stats writer
auto statsw = std::make_shared<stats_writer>(s.outputs, s.config); auto statsw = std::make_shared<stats_writer>(s.outputs, s.config, s.engine);
auto res = init_stats_writer(statsw, s.config, s.options.dry_run); auto res = init_stats_writer(statsw, s.config, s.options.dry_run);
if (s.options.dry_run) if (s.options.dry_run)

View File

@ -72,7 +72,7 @@ falco_configuration::falco_configuration():
m_metrics_interval(5000), m_metrics_interval(5000),
m_metrics_stats_rule_enabled(false), m_metrics_stats_rule_enabled(false),
m_metrics_output_file(""), m_metrics_output_file(""),
m_metrics_flags((METRICS_V2_KERNEL_COUNTERS | METRICS_V2_LIBBPF_STATS | METRICS_V2_RESOURCE_UTILIZATION | METRICS_V2_STATE_COUNTERS)), m_metrics_flags((METRICS_V2_KERNEL_COUNTERS | METRICS_V2_LIBBPF_STATS | METRICS_V2_RESOURCE_UTILIZATION | METRICS_V2_STATE_COUNTERS | METRICS_V2_RULE_COUNTERS)),
m_metrics_convert_memory_to_mb(true), m_metrics_convert_memory_to_mb(true),
m_metrics_include_empty_values(false) m_metrics_include_empty_values(false)
{ {
@ -535,20 +535,21 @@ void falco_configuration::load_yaml(const std::string& config_name)
m_metrics_output_file = config.get_scalar<std::string>("metrics.output_file", ""); m_metrics_output_file = config.get_scalar<std::string>("metrics.output_file", "");
m_metrics_flags = 0; m_metrics_flags = 0;
if (config.get_scalar<bool>("metrics.rules_counters_enabled", true))
{
m_metrics_flags |= METRICS_V2_RULE_COUNTERS;
}
if (config.get_scalar<bool>("metrics.resource_utilization_enabled", true)) if (config.get_scalar<bool>("metrics.resource_utilization_enabled", true))
{ {
m_metrics_flags |= METRICS_V2_RESOURCE_UTILIZATION; m_metrics_flags |= METRICS_V2_RESOURCE_UTILIZATION;
} }
if (config.get_scalar<bool>("metrics.state_counters_enabled", true)) if (config.get_scalar<bool>("metrics.state_counters_enabled", true))
{ {
m_metrics_flags |= METRICS_V2_STATE_COUNTERS; m_metrics_flags |= METRICS_V2_STATE_COUNTERS;
} }
if (config.get_scalar<bool>("metrics.kernel_event_counters_enabled", true)) if (config.get_scalar<bool>("metrics.kernel_event_counters_enabled", true))
{ {
m_metrics_flags |= METRICS_V2_KERNEL_COUNTERS; m_metrics_flags |= METRICS_V2_KERNEL_COUNTERS;
} }
if (config.get_scalar<bool>("metrics.libbpf_stats_enabled", true)) if (config.get_scalar<bool>("metrics.libbpf_stats_enabled", true))
{ {

View File

@ -57,8 +57,8 @@ std::string falco_metrics::to_text(const falco::app::state& state)
for (const auto& source_info: state.source_infos) for (const auto& source_info: state.source_infos)
{ {
sinsp *source_inspector = source_info.inspector.get(); sinsp *source_inspector = source_info.inspector.get();
inspectors.push_back(source_inspector); inspectors.emplace_back(source_inspector);
metrics_collectors.push_back(libs::metrics::libs_metrics_collector(source_inspector, state.config->m_metrics_flags)); metrics_collectors.emplace_back(libs::metrics::libs_metrics_collector(source_inspector, state.config->m_metrics_flags));
} }
libs::metrics::prometheus_metrics_converter prometheus_metrics_converter; libs::metrics::prometheus_metrics_converter prometheus_metrics_converter;
@ -106,26 +106,59 @@ std::string falco_metrics::to_text(const falco::app::state& state)
{ {
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus("evt_source", "falcosecurity", "falco", {{"evt_source", source}}); prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus("evt_source", "falcosecurity", "falco", {{"evt_source", source}});
} }
std::vector<metrics_v2> static_metrics; std::vector<metrics_v2> falco_metrics;
static_metrics.push_back(libs_metrics_collector.new_metric("start_ts",
if(state.config->m_metrics_flags & METRICS_V2_RULE_COUNTERS)
{
const stats_manager& rule_stats_manager = state.engine->get_rule_stats_manager();
const indexed_vector<falco_rule>& rules = state.engine->get_rules();
falco_metrics.emplace_back(libs_metrics_collector.new_metric("rules.matches_total",
METRICS_V2_RULE_COUNTERS,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_COUNT,
METRIC_VALUE_METRIC_TYPE_MONOTONIC,
rule_stats_manager.m_total.load()));
for (size_t i = 0; i < rule_stats_manager.m_by_rule_id.size(); i++)
{
auto rule = rules.at(i);
std::string rules_metric_name = "rules." + rule->name;
// Separate processing of rules counter metrics given we add extra tags
auto metric = libs_metrics_collector.new_metric(rules_metric_name.c_str(),
METRICS_V2_RULE_COUNTERS,
METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_COUNT,
METRIC_VALUE_METRIC_TYPE_MONOTONIC,
rule_stats_manager.m_by_rule_id[i]->load());
prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
const std::map<std::string, std::string>& const_labels = {
{"priority", std::to_string(rule->priority)},
{"source", rule->source},
{"tags", concat_set_in_order(rule->tags)}
};
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metric, "falcosecurity", "falco", const_labels);
}
}
falco_metrics.emplace_back(libs_metrics_collector.new_metric("start_ts",
METRICS_V2_MISC, METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64, METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS, METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT, METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
agent_info->start_ts_epoch)); agent_info->start_ts_epoch));
static_metrics.push_back(libs_metrics_collector.new_metric("host_boot_ts", falco_metrics.emplace_back(libs_metrics_collector.new_metric("host_boot_ts",
METRICS_V2_MISC, METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64, METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS, METRIC_VALUE_UNIT_TIME_TIMESTAMP_NS,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT, METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
machine_info->boot_ts_epoch)); machine_info->boot_ts_epoch));
static_metrics.push_back(libs_metrics_collector.new_metric("host_num_cpus", falco_metrics.emplace_back(libs_metrics_collector.new_metric("host_num_cpus",
METRICS_V2_MISC, METRICS_V2_MISC,
METRIC_VALUE_TYPE_U32, METRIC_VALUE_TYPE_U32,
METRIC_VALUE_UNIT_COUNT, METRIC_VALUE_UNIT_COUNT,
METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT, METRIC_VALUE_METRIC_TYPE_NON_MONOTONIC_CURRENT,
machine_info->num_cpus)); machine_info->num_cpus));
static_metrics.push_back(libs_metrics_collector.new_metric("outputs_queue_num_drops", falco_metrics.emplace_back(libs_metrics_collector.new_metric("outputs_queue_num_drops",
METRICS_V2_MISC, METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64, METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_COUNT, METRIC_VALUE_UNIT_COUNT,
@ -134,18 +167,20 @@ std::string falco_metrics::to_text(const falco::app::state& state)
auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()).count(); auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
static_metrics.push_back(libs_metrics_collector.new_metric("duration_sec", falco_metrics.emplace_back(libs_metrics_collector.new_metric("duration_sec",
METRICS_V2_MISC, METRICS_V2_MISC,
METRIC_VALUE_TYPE_U64, METRIC_VALUE_TYPE_U64,
METRIC_VALUE_UNIT_TIME_S_COUNT, METRIC_VALUE_UNIT_TIME_S_COUNT,
METRIC_VALUE_METRIC_TYPE_MONOTONIC, METRIC_VALUE_METRIC_TYPE_MONOTONIC,
(uint64_t)((now - agent_info->start_ts_epoch) / ONE_SECOND_IN_NS))); (uint64_t)((now - agent_info->start_ts_epoch) / ONE_SECOND_IN_NS)));
for (auto metrics: static_metrics)
for (auto metric: falco_metrics)
{ {
prometheus_metrics_converter.convert_metric_to_unit_convention(metrics); prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metrics, "falcosecurity", "falco"); prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metric, "falcosecurity", "falco");
} }
} }
for (auto metrics_collector: metrics_collectors) for (auto metrics_collector: metrics_collectors)
@ -153,15 +188,15 @@ std::string falco_metrics::to_text(const falco::app::state& state)
metrics_collector.snapshot(); metrics_collector.snapshot();
auto metrics_snapshot = metrics_collector.get_metrics(); auto metrics_snapshot = metrics_collector.get_metrics();
for (auto& metrics: metrics_snapshot) for (auto& metric: metrics_snapshot)
{ {
prometheus_metrics_converter.convert_metric_to_unit_convention(metrics); prometheus_metrics_converter.convert_metric_to_unit_convention(metric);
std::string namespace_name = "scap"; std::string namespace_name = "scap";
if (metrics.flags & METRICS_V2_RESOURCE_UTILIZATION || metrics.flags & METRICS_V2_KERNEL_COUNTERS) if (metric.flags & METRICS_V2_RESOURCE_UTILIZATION || metric.flags & METRICS_V2_KERNEL_COUNTERS)
{ {
namespace_name = "falco"; namespace_name = "falco";
} }
prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metrics, "falcosecurity", namespace_name); prometheus_text += prometheus_metrics_converter.convert_metric_to_text_prometheus(metric, "falcosecurity", namespace_name);
} }
} }

View File

@ -170,8 +170,9 @@ stats_writer::ticker_t stats_writer::get_ticker()
stats_writer::stats_writer( stats_writer::stats_writer(
const std::shared_ptr<falco_outputs>& outputs, const std::shared_ptr<falco_outputs>& outputs,
const std::shared_ptr<const falco_configuration>& config) const std::shared_ptr<const falco_configuration>& config,
: m_config(config) const std::shared_ptr<const falco_engine>& engine)
: m_config(config), m_engine(engine)
{ {
if (config->m_metrics_enabled) if (config->m_metrics_enabled)
{ {
@ -372,6 +373,23 @@ void stats_writer::collector::get_metrics_output_fields_additional(
nlohmann::json& output_fields, nlohmann::json& output_fields,
double stats_snapshot_time_delta_sec) double stats_snapshot_time_delta_sec)
{ {
if(m_writer->m_config->m_metrics_flags & METRICS_V2_RULE_COUNTERS)
{
const stats_manager& rule_stats_manager = m_writer->m_engine->get_rule_stats_manager();
const indexed_vector<falco_rule>& rules = m_writer->m_engine->get_rules();
output_fields["falco.rules.matches_total"] = rule_stats_manager.m_total.load();
for (size_t i = 0; i < rule_stats_manager.m_by_rule_id.size(); i++)
{
auto rule_count = rule_stats_manager.m_by_rule_id[i]->load();
if (rule_count == 0 && !m_writer->m_config->m_metrics_include_empty_values)
{
continue;
}
std::string rules_metric_name = "falco.rules." + rules.at(i)->name;
output_fields[rules_metric_name] = rule_count;
}
}
#if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__) #if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__)
if (m_writer->m_libs_metrics_collector && m_writer->m_output_rule_metrics_converter) if (m_writer->m_libs_metrics_collector && m_writer->m_output_rule_metrics_converter)
{ {

View File

@ -96,7 +96,8 @@ public:
\brief Initializes a writer. \brief Initializes a writer.
*/ */
stats_writer(const std::shared_ptr<falco_outputs>& outputs, stats_writer(const std::shared_ptr<falco_outputs>& outputs,
const std::shared_ptr<const falco_configuration>& config); const std::shared_ptr<const falco_configuration>& config,
const std::shared_ptr<const falco_engine>& engine);
/*! /*!
\brief Returns true if the writer is configured with a valid output. \brief Returns true if the writer is configured with a valid output.
@ -152,6 +153,7 @@ private:
#endif #endif
std::shared_ptr<falco_outputs> m_outputs; std::shared_ptr<falco_outputs> m_outputs;
std::shared_ptr<const falco_configuration> m_config; std::shared_ptr<const falco_configuration> m_config;
std::shared_ptr<const falco_engine> m_engine;
// note: in this way, only collectors can push into the queue // note: in this way, only collectors can push into the queue
friend class stats_writer::collector; friend class stats_writer::collector;
}; };