diff --git a/falco.yaml b/falco.yaml index d6e273c1..79c713e8 100644 --- a/falco.yaml +++ b/falco.yaml @@ -333,17 +333,18 @@ rule_matching: first # Lowering the number of items can prevent memory from steadily increasing until the OOM # killer stops the Falco process. We provide recovery actions to self-limit or self-kill # in order to handle this situation earlier, similar to how we expose the kernel buffer size -# as a parameter. -# However, it will not address the root cause of the event pipe not keeping up. +# as a parameter. However, it will not address the root cause of the event pipe not keeping up. # -# `capacity`: the maximum number of items allowed in the queue, defaulting to 0. This means that -# the queue remains unbounded aka this setting is disabled. -# You can experiment with values greater or smaller than the anchor value 1000000. +# `capacity`: the maximum number of items allowed in the queue is determined by this value. +# Setting the value to 0 (which is the default) is equivalent to keeping the queue unbounded. +# In other words, when this configuration is set to 0, the number of allowed items is effectively +# set to the largest possible long value, disabling this setting. # -# `recovery`: the strategy to follow when the queue becomes filled up. This also applies when -# the queue is unbounded, and all available memory on the system is consumed. -# `exit` is default, `continue` does nothing special and `empty` empties the queue and then -# continues. +# `recovery`: strategy to follow when the queue becomes filled up. It applies only when the +# queue is bounded and there is still available system memory. In the case of an unbounded +# queue, if the available memory on the system is consumed, the Falco process would be +# OOM killed. The value `exit` is the default, `continue` does nothing special and `empty` +# empties the queue and then continues. outputs_queue: capacity: 0 recovery: exit diff --git a/userspace/engine/falco_common.cpp b/userspace/engine/falco_common.cpp index d94306aa..9cf27d63 100644 --- a/userspace/engine/falco_common.cpp +++ b/userspace/engine/falco_common.cpp @@ -32,7 +32,7 @@ static std::vector rule_matching_names = { "all" }; -static std::vector outputs_recovery_names = { +static std::vector outputs_queue_recovery_names = { "continue", "exit", "empty", @@ -65,13 +65,13 @@ falco_common::priority_type falco_common::parse_priority(std::string v) return out; } -bool falco_common::parse_recovery(std::string v, outputs_recovery_type& out) +bool falco_common::parse_queue_recovery(std::string v, outputs_queue_recovery_type& out) { - for (size_t i = 0; i < outputs_recovery_names.size(); i++) + for (size_t i = 0; i < outputs_queue_recovery_names.size(); i++) { - if (!strcasecmp(v.c_str(), outputs_recovery_names[i].c_str())) + if (!strcasecmp(v.c_str(), outputs_queue_recovery_names[i].c_str())) { - out = (outputs_recovery_type) i; + out = (outputs_queue_recovery_type) i; return true; } } diff --git a/userspace/engine/falco_common.h b/userspace/engine/falco_common.h index 6ddefad2..63af03f0 100644 --- a/userspace/engine/falco_common.h +++ b/userspace/engine/falco_common.h @@ -59,7 +59,7 @@ struct falco_exception : std::exception namespace falco_common { - enum outputs_recovery_type { + enum outputs_queue_recovery_type { RECOVERY_CONTINUE = 0, /* queue_capacity_outputs recovery strategy of continuing on. */ RECOVERY_EXIT = 1, /* queue_capacity_outputs recovery strategy of exiting, self OOM kill. */ RECOVERY_EMPTY = 2, /* queue_capacity_outputs recovery strategy of emptying queue then continuing. */ @@ -82,7 +82,7 @@ namespace falco_common bool parse_priority(std::string v, priority_type& out); priority_type parse_priority(std::string v); - bool parse_recovery(std::string v, outputs_recovery_type& out); + bool parse_queue_recovery(std::string v, outputs_queue_recovery_type& out); bool format_priority(priority_type v, std::string& out, bool shortfmt=false); std::string format_priority(priority_type v, bool shortfmt=false); diff --git a/userspace/falco/app/actions/process_events.cpp b/userspace/falco/app/actions/process_events.cpp index 2d92ff89..50ac60d7 100644 --- a/userspace/falco/app/actions/process_events.cpp +++ b/userspace/falco/app/actions/process_events.cpp @@ -281,7 +281,7 @@ static falco::app::run_result do_inspect( } // for capture mode, the source name can change at every event - stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx], s.outputs, num_evts); + stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx], num_evts); } else { @@ -300,7 +300,7 @@ static falco::app::run_result do_inspect( } // for live mode, the source name is constant - stats_collector.collect(inspector, source, s.outputs, num_evts); + stats_collector.collect(inspector, source, num_evts); } // Reset the timeouts counter, Falco successfully got an event to process diff --git a/userspace/falco/configuration.cpp b/userspace/falco/configuration.cpp index 0f67d3f4..5dfd2b7d 100644 --- a/userspace/falco/configuration.cpp +++ b/userspace/falco/configuration.cpp @@ -290,7 +290,7 @@ void falco_configuration::load_yaml(const std::string& config_name, const yaml_h m_outputs_queue_capacity = DEFAULT_OUTPUTS_QUEUE_CAPACITY; } std::string recovery = config.get_scalar("outputs_queue.recovery", "exit"); - if (!falco_common::parse_recovery(recovery, m_outputs_queue_recovery)) + if (!falco_common::parse_queue_recovery(recovery, m_outputs_queue_recovery)) { throw std::logic_error("Unknown recovery \"" + recovery + "\"--must be one of exit, continue, empty"); } diff --git a/userspace/falco/configuration.h b/userspace/falco/configuration.h index 86bf813f..c1a595d0 100644 --- a/userspace/falco/configuration.h +++ b/userspace/falco/configuration.h @@ -73,7 +73,7 @@ public: bool m_watch_config_files; bool m_buffered_outputs; size_t m_outputs_queue_capacity; - falco_common::outputs_recovery_type m_outputs_queue_recovery; + falco_common::outputs_queue_recovery_type m_outputs_queue_recovery; bool m_time_format_iso_8601; uint32_t m_output_timeout; diff --git a/userspace/falco/falco_outputs.cpp b/userspace/falco/falco_outputs.cpp index 1cbc6cb3..a94fc87d 100644 --- a/userspace/falco/falco_outputs.cpp +++ b/userspace/falco/falco_outputs.cpp @@ -47,7 +47,7 @@ falco_outputs::falco_outputs( uint32_t timeout, bool buffered, size_t outputs_queue_capacity, - falco_common::outputs_recovery_type outputs_queue_recovery, + falco_common::outputs_queue_recovery_type outputs_queue_recovery, bool time_format_iso_8601, const std::string& hostname) { diff --git a/userspace/falco/falco_outputs.h b/userspace/falco/falco_outputs.h index cf6efc33..3fbf2169 100644 --- a/userspace/falco/falco_outputs.h +++ b/userspace/falco/falco_outputs.h @@ -49,7 +49,7 @@ public: uint32_t timeout, bool buffered, size_t outputs_queue_capacity, - falco_common::outputs_recovery_type outputs_queue_recovery, + falco_common::outputs_queue_recovery_type outputs_queue_recovery, bool time_format_iso_8601, const std::string& hostname); @@ -118,7 +118,7 @@ private: #ifndef __EMSCRIPTEN__ typedef tbb::concurrent_bounded_queue falco_outputs_cbq; falco_outputs_cbq m_queue; - uint32_t m_recovery; + falco_common::outputs_queue_recovery_type m_recovery; uint64_t m_outputs_queue_num_drops; #endif diff --git a/userspace/falco/stats_writer.cpp b/userspace/falco/stats_writer.cpp index 4a04cd06..700335f3 100644 --- a/userspace/falco/stats_writer.cpp +++ b/userspace/falco/stats_writer.cpp @@ -220,7 +220,7 @@ stats_writer::collector::collector(const std::shared_ptr& writer) void stats_writer::collector::get_metrics_output_fields_wrapper( nlohmann::json& output_fields, const std::shared_ptr& inspector, uint64_t now, - const std::string& src, uint64_t outputs_queue_num_drops, uint64_t num_evts, double stats_snapshot_time_delta_sec) + const std::string& src, uint64_t num_evts, double stats_snapshot_time_delta_sec) { static const char* all_driver_engines[] = { BPF_ENGINE, KMOD_ENGINE, MODERN_BPF_ENGINE, @@ -237,7 +237,7 @@ void stats_writer::collector::get_metrics_output_fields_wrapper( output_fields["falco.host_boot_ts"] = machine_info->boot_ts_epoch; output_fields["falco.hostname"] = machine_info->hostname; /* Explicitly add hostname to log msg in case hostname rule output field is disabled. */ output_fields["falco.host_num_cpus"] = machine_info->num_cpus; - output_fields["falco.outputs_queue_num_drops"] = outputs_queue_num_drops; + output_fields["falco.outputs_queue_num_drops"] = m_writer->m_outputs->get_outputs_queue_num_drops(); output_fields["evt.source"] = src; for (size_t i = 0; i < sizeof(all_driver_engines) / sizeof(const char*); i++) @@ -422,7 +422,7 @@ void stats_writer::collector::get_metrics_output_fields_additional( #endif } -void stats_writer::collector::collect(const std::shared_ptr& inspector, const std::string &src, const std::shared_ptr& outputs, uint64_t num_evts) +void stats_writer::collector::collect(const std::shared_ptr& inspector, const std::string &src, uint64_t num_evts) { if (m_writer->has_output()) { @@ -443,8 +443,7 @@ void stats_writer::collector::collect(const std::shared_ptr& inspector, c /* Get respective metrics output_fields. */ nlohmann::json output_fields; - uint64_t outputs_queue_num_drops = outputs->get_outputs_queue_num_drops(); - get_metrics_output_fields_wrapper(output_fields, inspector, now, src, outputs_queue_num_drops, num_evts, stats_snapshot_time_delta_sec); + get_metrics_output_fields_wrapper(output_fields, inspector, now, src, num_evts, stats_snapshot_time_delta_sec); get_metrics_output_fields_additional(output_fields, inspector, stats_snapshot_time_delta_sec, src); /* Send message in the queue */ diff --git a/userspace/falco/stats_writer.h b/userspace/falco/stats_writer.h index 4c68d243..42a183e7 100644 --- a/userspace/falco/stats_writer.h +++ b/userspace/falco/stats_writer.h @@ -60,13 +60,13 @@ public: \brief Collects one stats sample from an inspector and for the given event source name */ - void collect(const std::shared_ptr& inspector, const std::string& src, const std::shared_ptr& outputs, uint64_t num_evts); + void collect(const std::shared_ptr& inspector, const std::string& src, uint64_t num_evts); private: /*! \brief Collect snapshot metrics wrapper fields as internal rule formatted output fields. */ - void get_metrics_output_fields_wrapper(nlohmann::json& output_fields, const std::shared_ptr& inspector, uint64_t now, const std::string& src, uint64_t outputs_queue_num_drops, uint64_t num_evts, double stats_snapshot_time_delta_sec); + void get_metrics_output_fields_wrapper(nlohmann::json& output_fields, const std::shared_ptr& inspector, uint64_t now, const std::string& src, uint64_t num_evts, double stats_snapshot_time_delta_sec); /*! \brief Collect snapshot metrics syscalls related metrics as internal rule formatted output fields.