diff --git a/falco.yaml b/falco.yaml index 23a2cc13..86fae5ce 100644 --- a/falco.yaml +++ b/falco.yaml @@ -322,24 +322,31 @@ rule_matching: first # [Experimental] `queue_capacity_outputs` # -# Falco utilizes tbb::concurrent_bounded_queue for the outputs, and this parameter -# allows you to customize the capacity. Refer to the official documentation: +# Falco utilizes tbb::concurrent_bounded_queue for handling outputs, and this parameter +# allows you to customize the queue capacity. Please refer to the official documentation: # https://oneapi-src.github.io/oneTBB/main/tbb_userguide/Concurrent_Queue_Classes.html. -# On a healthy system with tuned Falco rules, the queue should not fill up. -# If it does, it most likely happens if the entire event flow is too slow. This -# could indicate that the server is under heavy load. -# -# Lowering the number of items can prevent steadily increasing memory until the OOM -# killer stops the Falco process. We expose recovery actions to self-limit or self -# OOM kill earlier similar to how we expose the kernel buffer size as parameter. -# However, it will not address the root cause of the event pipe not holding up. +# On a healthy system with optimized Falco rules, the queue should not fill up. +# If it does, it is most likely happening due to the entire event flow being too slow, +# indicating that the server is under heavy load. +# +# Lowering the number of items can prevent memory from steadily increasing until the OOM +# killer stops the Falco process. We provide recovery actions to self-limit or self-kill +# in order to handle this situation earlier, similar to how we expose the kernel buffer size +# as a parameter. +# However, it will not address the root cause of the event pipe not keeping up. +# +# `items`: the maximum number of items allowed in the queue, defaulting to 0. This means that +# the queue is unbounded. +# You can experiment with values greater or smaller than the anchor value 1000000. +# +# `recovery`: the strategy to follow when the queue becomes filled up. This also applies when +# the queue is unbounded, and all available memory on the system is consumed. +# recovery: 0 means continue. +# recovery: 1 means simply exit (default behavior). +# recovery: 2 means empty the queue and then continue. queue_capacity_outputs: - # number of max items in queue - items: 1000000 - # continue: 0 (default) - # exit: 1 - # empty queue then continue: 2 - recovery: 0 + items: 0 + recovery: 1 ########################## diff --git a/userspace/falco/configuration.cpp b/userspace/falco/configuration.cpp index 5a43ee3c..f7dab1dc 100644 --- a/userspace/falco/configuration.cpp +++ b/userspace/falco/configuration.cpp @@ -42,7 +42,7 @@ falco_configuration::falco_configuration(): m_watch_config_files(true), m_buffered_outputs(false), m_queue_capacity_outputs_items(DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS), - m_queue_capacity_outputs_recovery(RECOVERY_DROP_CURRENT), + m_queue_capacity_outputs_recovery(RECOVERY_EXIT), m_time_format_iso_8601(false), m_output_timeout(2000), m_grpc_enabled(false), @@ -285,7 +285,7 @@ void falco_configuration::load_yaml(const std::string& config_name, const yaml_h m_buffered_outputs = config.get_scalar("buffered_outputs", false); m_queue_capacity_outputs_items = config.get_scalar("queue_capacity_outputs.items", DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS); - m_queue_capacity_outputs_recovery = config.get_scalar("queue_capacity_outputs.recovery", RECOVERY_DROP_CURRENT); + m_queue_capacity_outputs_recovery = config.get_scalar("queue_capacity_outputs.recovery", RECOVERY_EXIT); m_time_format_iso_8601 = config.get_scalar("time_format_iso_8601", false); m_webserver_enabled = config.get_scalar("webserver.enabled", false); diff --git a/userspace/falco/configuration_aux.h b/userspace/falco/configuration_aux.h index eb9344a6..31504e30 100644 --- a/userspace/falco/configuration_aux.h +++ b/userspace/falco/configuration_aux.h @@ -13,7 +13,7 @@ limitations under the License. #pragma once -#define DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS 1000000UL +#define DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS 0 enum outputs_recovery_code { RECOVERY_DROP_CURRENT = 0, /* queue_capacity_outputs recovery strategy of continuing on. */ diff --git a/userspace/falco/falco_outputs.cpp b/userspace/falco/falco_outputs.cpp index d520fc3d..655945d6 100644 --- a/userspace/falco/falco_outputs.cpp +++ b/userspace/falco/falco_outputs.cpp @@ -68,7 +68,11 @@ falco_outputs::falco_outputs( } #ifndef __EMSCRIPTEN__ m_worker_thread = std::thread(&falco_outputs::worker, this); - m_queue.set_capacity(queue_capacity_outputs_items); + if (queue_capacity_outputs_items > 0) + { + m_queue.set_capacity(queue_capacity_outputs_items); + } + m_recovery = queue_capacity_outputs_recovery; #endif } @@ -289,13 +293,13 @@ inline void falco_outputs::push(const ctrl_msg& cmsg) switch (m_recovery) { case RECOVERY_EXIT: - fprintf(stderr, "Fatal error: Output queue reached maximum capacity. Exiting ... \n"); + fprintf(stderr, "Fatal error: Output queue out of memory. Exiting ... \n"); exit(EXIT_FAILURE); case RECOVERY_EMPTY: - fprintf(stderr, "Output queue reached maximum capacity. Empty queue and continue ... \n"); + fprintf(stderr, "Output queue out of memory. Empty queue and continue ... \n"); m_queue.empty(); default: - fprintf(stderr, "Output queue reached maximum capacity. Continue on ... \n"); + fprintf(stderr, "Output queue out of memory. Continue on ... \n"); break; } } diff --git a/userspace/falco/stats_writer.cpp b/userspace/falco/stats_writer.cpp index 0f627d2e..4346e2eb 100644 --- a/userspace/falco/stats_writer.cpp +++ b/userspace/falco/stats_writer.cpp @@ -91,7 +91,10 @@ stats_writer::stats_writer( m_config = config; // capacity and controls should not be relevant for stats outputs, adopt capacity // for completeness, but do not implement config recovery strategies. - m_queue.set_capacity(config->m_queue_capacity_outputs_items); + if (config->m_queue_capacity_outputs_items > 0) + { + m_queue.set_capacity(config->m_queue_capacity_outputs_items); + } if (config->m_metrics_enabled) { if (!config->m_metrics_output_file.empty())