new(userspace/falco)!: introduce native support for resource_utilization metrics / stats v2

Intended to phase out previous stats writer settings and log schema.

Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
This commit is contained in:
Melissa Kilby 2023-04-27 11:14:03 +00:00 committed by poiana
parent 44d9f99c72
commit 4d24bcdd2f
3 changed files with 254 additions and 38 deletions

View File

@ -275,7 +275,7 @@ static falco::app::run_result do_inspect(
}
// for capture mode, the source name can change at every event
stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx]);
stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx], num_evts);
}
else
{
@ -291,7 +291,7 @@ static falco::app::run_result do_inspect(
}
// for live mode, the source name is constant
stats_collector.collect(inspector, source);
stats_collector.collect(inspector, source, num_evts);
}
// Reset the timeouts counter, Falco successfully got an event to process
@ -397,17 +397,54 @@ static void process_inspector_events(
}
}
static std::shared_ptr<stats_writer> init_stats_writer(const options& opts)
static std::shared_ptr<stats_writer> init_stats_writer(const options& opts, std::shared_ptr<falco_outputs> outputs, std::shared_ptr<falco_configuration> config)
{
auto statsw = std::make_shared<stats_writer>();
if (!opts.stats_filename.empty())
auto statsw = std::make_shared<stats_writer>(outputs, config);
std::string err;
uint64_t stats_interval_ms = 0;
if (config->m_stats_v2_enabled && config->m_stats_v2_stats_interval_preset > 0)
{
std::string err;
if (!stats_writer::init_ticker(opts.stats_interval, err))
uint16_t index = config->m_stats_v2_stats_interval_preset;
if(index <= MAX_STATS_PRESET_INDEX)
{
/* Index 0 reserved, milliseconds representation for 15min, 30min, 1hr, 4hrs, 6hrs, 12hrs. */
std::vector<uint64_t> vect{0LLU, 900000LU, 1800000LU, 3600000LU, 14400000LU, 21600000LU, 43200000LU};
stats_interval_ms = vect[index];
}
else
{
// todo: warning message
stats_interval_ms = 0;
}
}
/* Continue cmd args support and old defaults for backward compatibility, scheduled for deprecation. */
if (stats_interval_ms == 0 && opts.stats_interval > 0)
{
stats_interval_ms = opts.stats_interval;
}
/* New config. Exact stats_interval_ms in falco.yaml overrides presets. */
if (config->m_stats_v2_enabled && config->m_stats_v2_stats_interval_ms > 0)
{
stats_interval_ms = config->m_stats_v2_stats_interval_ms;
}
if (stats_interval_ms > 0)
{
if (!stats_writer::init_ticker(stats_interval_ms, err))
{
throw falco_exception(err);
}
statsw.reset(new stats_writer(opts.stats_filename));
}
/* Continue cmd args support for backward compatibility, scheduled for deprecation. */
if (!config->m_stats_v2_enabled && !opts.stats_filename.empty())
{
statsw.reset(new stats_writer(opts.stats_filename, outputs, config));
}
/* New config. */
else if (config->m_stats_v2_enabled && !config->m_stats_v2_stats_filename.empty())
{
statsw.reset(new stats_writer(config->m_stats_v2_stats_filename, outputs, config));
}
return statsw;
}
@ -421,7 +458,7 @@ falco::app::run_result falco::app::actions::process_events(falco::app::state& s)
s.engine->complete_rule_loading();
// Initialize stats writer
auto statsw = init_stats_writer(s.options);
auto statsw = init_stats_writer(s.options, s.outputs, s.config);
if (s.options.dry_run)
{

View File

@ -25,7 +25,7 @@ limitations under the License.
#include "stats_writer.h"
#include "logger.h"
#include "banned.h" // This raises a compilation error when certain functions are used
#include "logger.h"
#include "config_falco.h"
// note: ticker_t is an uint16_t, which is enough because we don't care about
// overflows here. Threads calling stats_writer::handle() will just
@ -67,18 +67,21 @@ stats_writer::ticker_t stats_writer::get_ticker()
return s_timer.load(std::memory_order_relaxed);
}
stats_writer::stats_writer()
stats_writer::stats_writer(std::shared_ptr<falco_outputs> outputs, std::shared_ptr<falco_configuration> config)
: m_initialized(false), m_total_samples(0)
{
m_outputs = outputs;
m_config = config;
}
stats_writer::stats_writer(const std::string &filename)
stats_writer::stats_writer(const std::string &filename, std::shared_ptr<falco_outputs> outputs, std::shared_ptr<falco_configuration> config)
: m_initialized(true), m_total_samples(0)
{
m_output.exceptions(std::ofstream::failbit | std::ofstream::badbit);
m_output.open(filename, std::ios_base::app);
m_worker = std::thread(&stats_writer::worker, this);
m_outputs = outputs;
m_config = config;
}
stats_writer::~stats_writer()
@ -162,39 +165,196 @@ void stats_writer::worker() noexcept
}
stats_writer::collector::collector(std::shared_ptr<stats_writer> writer)
: m_writer(writer), m_last_tick(0), m_samples(0)
: m_writer(writer), m_last_tick(0), m_samples(0), m_last_now(0), m_last_n_evts(0), m_last_n_drops(0), m_last_num_evts(0)
{
}
std::map<std::string, std::string> stats_writer::collector::get_stats_v2_output_fields_wrapper(std::shared_ptr<sinsp> inspector, uint64_t now, std::string src, uint64_t num_evts, uint64_t stats_snapshot_time_delta_sec)
{
std::map<std::string, std::string> output_fields;
const scap_agent_info* agent_info = inspector->get_agent_info();
const scap_machine_info* machine_info = inspector->get_machine_info();
/* Wrapper fields needed for statistical analyses and attributions. Always enabled. */
output_fields["evt.time"] = std::to_string(now); /* Some ETLs may prefer a consistent timestamp within output. */
output_fields["falco_version"] = FALCO_VERSION;
output_fields["falco_start_ts"] = std::to_string(agent_info->start_ts_epoch);
output_fields["kernel_release"] = agent_info->uname_r;
output_fields["host_boot_ts"] = std::to_string(machine_info->boot_ts_epoch);
output_fields["hostname"] = machine_info->hostname; /* Explicitly add hostname to log msg in case hostname rule output field is disabled. */
output_fields["host_num_cpus"] = std::to_string(machine_info->num_cpus);
if(inspector->check_current_engine(BPF_ENGINE))
{
output_fields["driver"] = "bpf";
}
else if (inspector->check_current_engine(MODERN_BPF_ENGINE))
{
output_fields["driver"] = "modern_bpf";
}
else if (inspector->check_current_engine(KMOD_ENGINE))
{
output_fields["driver"] = "kmod";
}
else
{
output_fields["driver"] = "no_driver";
}
output_fields["src"] = src;
/* Falco userspace events counters. Always enabled. */
if (m_last_num_evts != 0 && stats_snapshot_time_delta_sec > 0)
{
/* Successfully processed userspace events. */
output_fields["falco_evts_rate_sec"] = std::to_string((num_evts - m_last_num_evts) / stats_snapshot_time_delta_sec);
}
output_fields["falco_num_evts"] = std::to_string(num_evts);
output_fields["falco_num_evts_prev"] = std::to_string(m_last_num_evts);
m_last_num_evts = num_evts;
return output_fields;
}
void stats_writer::collector::collect(std::shared_ptr<sinsp> inspector, const std::string& src)
std::map<std::string, std::string> stats_writer::collector::get_stats_v2_output_fields_syscalls(std::shared_ptr<sinsp> inspector, std::map<std::string, std::string> output_fields, uint64_t stats_snapshot_time_delta_sec)
{
const scap_agent_info* agent_info = inspector->get_agent_info();
const scap_machine_info* machine_info = inspector->get_machine_info();
#ifndef MINIMAL_BUILD
/* Resource utilization, CPU and memory usage etc. */
uint32_t nstats = 0;
int32_t rc = 0;
if (m_writer->m_config->m_stats_v2_include_resource_utilization)
{
const scap_stats_v2* utilization;
auto buffer = inspector->get_sinsp_stats_v2_buffer();
utilization = libsinsp::resource_utilization::get_resource_utilization(agent_info, buffer, &nstats, &rc);
if (utilization && rc == 0 && nstats > 0)
{
// todo: support unit conversions for memory metrics
for(uint32_t stat = 0; stat < nstats; stat++)
{
switch(utilization[stat].type)
{
case STATS_VALUE_TYPE_U64:
output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.u64);
break;
case STATS_VALUE_TYPE_U32:
output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.u32);
break;
case STATS_VALUE_TYPE_D:
output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.d);
break;
default:
break;
}
}
}
}
/* Kernel side stats counters and libbpf stats if applicable. */
nstats = 0;
rc = 0;
uint32_t flags = 0;
if (m_writer->m_config->m_stats_v2_include_kernel_evts_counters)
{
flags |= PPM_SCAP_STATS_KERNEL_COUNTERS;
}
if (m_writer->m_config->m_stats_v2_include_libbpf_stats && !inspector->check_current_engine(KMOD_ENGINE) && (machine_info->flags & PPM_BPF_STATS_ENABLED))
{
flags |= PPM_SCAP_STATS_LIBBPF_STATS;
}
const scap_stats_v2* stats_v2 = inspector->get_capture_stats_v2(flags, &nstats, &rc);
if (stats_v2 && nstats > 0 && rc == 0)
{
for(uint32_t stat = 0; stat < nstats; stat++)
{
switch(stats_v2[stat].type)
{
case STATS_VALUE_TYPE_U64:
if (strncmp(stats_v2[stat].name, "n_evts", 6) == 0)
{
output_fields["falco_evts_rate_kernel_sec"] = std::to_string(0);
if (m_last_n_evts != 0 && stats_snapshot_time_delta_sec > 0)
{
/* n_evts is total number of kernel side events. */
output_fields["falco_evts_rate_kernel_sec"] = std::to_string((stats_v2[stat].value.u64 - m_last_n_evts) / stats_snapshot_time_delta_sec);
}
output_fields["n_evts_prev"] = std::to_string(m_last_n_evts);
m_last_n_evts = stats_v2[stat].value.u64;
}
else if (strncmp(stats_v2[stat].name, "n_drops", 7) == 0)
{
output_fields["falco_evts_drop_rate_kernel_sec"] = std::to_string(0);
if (m_last_n_drops != 0 && stats_snapshot_time_delta_sec > 0)
{
/* n_drops is total number of kernel side event drops. */
output_fields["falco_evts_drop_rate_kernel_sec"] = std::to_string((stats_v2[stat].value.u64 - m_last_n_evts) / stats_snapshot_time_delta_sec);
}
output_fields["n_drops_prev"] = std::to_string(m_last_n_drops);
m_last_n_drops = stats_v2[stat].value.u64;
}
output_fields[stats_v2[stat].name] = std::to_string(stats_v2[stat].value.u64);
break;
default:
break;
}
}
}
#endif
return output_fields;
}
void stats_writer::collector::collect(std::shared_ptr<sinsp> inspector, const std::string &src, uint64_t num_evts)
{
// just skip if no output is configured
if (m_writer->has_output())
if (m_writer->m_config->m_stats_v2_enabled || m_writer->has_output())
{
// collect stats once per each ticker period
auto tick = stats_writer::get_ticker();
if (tick != m_last_tick)
{
stats_writer::msg msg;
msg.stop = false;
msg.source = src;
inspector->get_capture_stats(&msg.stats);
m_samples++;
if(m_samples == 1)
auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
uint64_t stats_snapshot_time_delta = 0;
if (m_last_now != 0)
{
msg.delta = msg.stats;
stats_snapshot_time_delta = now - m_last_now;
}
else
m_last_now = now;
std::map<std::string, std::string> output_fields = stats_writer::collector::get_stats_v2_output_fields_wrapper(inspector, now, src, num_evts, (stats_snapshot_time_delta / ONE_SECOND_IN_NS));
if (src == falco_common::syscall_source)
{
msg.delta.n_evts = msg.stats.n_evts - m_last_stats.n_evts;
msg.delta.n_drops = msg.stats.n_drops - m_last_stats.n_drops;
msg.delta.n_preemptions = msg.stats.n_preemptions - m_last_stats.n_preemptions;
output_fields = stats_writer::collector::get_stats_v2_output_fields_syscalls(inspector, output_fields, (stats_snapshot_time_delta / ONE_SECOND_IN_NS));
}
if (m_writer->m_config->m_stats_v2_enabled && m_writer->m_config->m_stats_v2_stats_internal_rule && m_writer->m_outputs)
{
std::string rule = "Falco internal: resource utilization stats metrics";
std::string msg = "";
m_writer->m_outputs->handle_msg(now, falco_common::PRIORITY_DEBUG, msg, rule, output_fields);
}
if (m_writer->has_output())
{
stats_writer::msg msg;
msg.stop = false;
msg.source = src;
inspector->get_capture_stats(&msg.stats);
m_samples++;
if(m_samples == 1)
{
msg.delta = msg.stats;
}
else
{
msg.delta.n_evts = msg.stats.n_evts - m_last_stats.n_evts;
msg.delta.n_drops = msg.stats.n_drops - m_last_stats.n_drops;
msg.delta.n_preemptions = msg.stats.n_preemptions - m_last_stats.n_preemptions;
}
m_last_stats = msg.stats;
m_writer->push(msg);
}
m_last_tick = tick;
m_last_stats = msg.stats;
m_writer->push(msg);
}
}
}

View File

@ -23,6 +23,10 @@ limitations under the License.
#include <sinsp.h>
#include "tbb/concurrent_queue.h"
#include "falco_outputs.h"
#include "configuration.h"
#define MAX_STATS_PRESET_INDEX 6
/*!
\brief Writes stats samples collected from inspectors into a given output.
@ -56,13 +60,27 @@ public:
\brief Collects one stats sample from an inspector
and for the given event source name
*/
void collect(std::shared_ptr<sinsp> inspector, const std::string& src);
void collect(std::shared_ptr<sinsp> inspector, const std::string& src, uint64_t num_evts);
/*!
\brief Collect snapshot stats v2 wrapper fields as internal rule formatted output fields.
*/
std::map<std::string, std::string> get_stats_v2_output_fields_wrapper(std::shared_ptr<sinsp> inspector, uint64_t now, std::string src, uint64_t num_evts, uint64_t stats_snapshot_time_delta_sec);
/*!
\brief Collect snapshot stats v2 syscalls related metrics as internal rule formatted output fields.
*/
std::map<std::string, std::string> get_stats_v2_output_fields_syscalls(std::shared_ptr<sinsp> inspector, std::map<std::string, std::string> output_fields, uint64_t stats_snapshot_time_delta_sec);
private:
std::shared_ptr<stats_writer> m_writer;
stats_writer::ticker_t m_last_tick;
uint64_t m_samples;
scap_stats m_last_stats;
uint64_t m_last_now;
uint64_t m_last_n_evts;
uint64_t m_last_n_drops;
uint64_t m_last_num_evts;
};
stats_writer(const stats_writer&) = delete;
@ -76,19 +94,18 @@ public:
~stats_writer();
/*!
\brief Initializes a writer without any output.
With this constructor, has_output() always returns false
\brief Initializes a writer without file output.
*/
stats_writer();
stats_writer(std::shared_ptr<falco_outputs> outputs, std::shared_ptr<falco_configuration> config);
/*!
\brief Initializes a writer that prints to a file at the given filename.
\brief Initializes a writer that can print to a file at the given filename.
With this constructor, has_output() always returns true
*/
explicit stats_writer(const std::string &filename);
explicit stats_writer(const std::string &filename, std::shared_ptr<falco_outputs> outputs, std::shared_ptr<falco_configuration> config);
/*!
\brief Returns true if the writer is configured with a valid output
\brief Returns true if the writer is configured with a valid file output
*/
inline bool has_output() const;
@ -129,7 +146,9 @@ private:
uint64_t m_total_samples;
std::thread m_worker;
std::ofstream m_output;
tbb::concurrent_bounded_queue<stats_writer::msg> m_queue;
tbb::concurrent_bounded_queue<stats_writer::msg> m_queue;
std::shared_ptr<falco_outputs> m_outputs;
std::shared_ptr<falco_configuration> m_config;
// note: in this way, only collectors can push into the queue
friend class stats_writer::collector;