cleanup(outputs): ensure old defaults in queue_capacity_outputs in new config

Co-authored-by: Leonardo Grasso <me@leonardograsso.com>
Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
This commit is contained in:
Melissa Kilby 2023-08-02 03:22:26 +00:00 committed by poiana
parent b55b209edf
commit 03a557725b
5 changed files with 38 additions and 24 deletions

View File

@ -322,24 +322,31 @@ rule_matching: first
# [Experimental] `queue_capacity_outputs`
#
# Falco utilizes tbb::concurrent_bounded_queue for the outputs, and this parameter
# allows you to customize the capacity. Refer to the official documentation:
# Falco utilizes tbb::concurrent_bounded_queue for handling outputs, and this parameter
# allows you to customize the queue capacity. Please refer to the official documentation:
# https://oneapi-src.github.io/oneTBB/main/tbb_userguide/Concurrent_Queue_Classes.html.
# On a healthy system with tuned Falco rules, the queue should not fill up.
# If it does, it most likely happens if the entire event flow is too slow. This
# could indicate that the server is under heavy load.
#
# Lowering the number of items can prevent steadily increasing memory until the OOM
# killer stops the Falco process. We expose recovery actions to self-limit or self
# OOM kill earlier similar to how we expose the kernel buffer size as parameter.
# However, it will not address the root cause of the event pipe not holding up.
# On a healthy system with optimized Falco rules, the queue should not fill up.
# If it does, it is most likely happening due to the entire event flow being too slow,
# indicating that the server is under heavy load.
#
# Lowering the number of items can prevent memory from steadily increasing until the OOM
# killer stops the Falco process. We provide recovery actions to self-limit or self-kill
# in order to handle this situation earlier, similar to how we expose the kernel buffer size
# as a parameter.
# However, it will not address the root cause of the event pipe not keeping up.
#
# `items`: the maximum number of items allowed in the queue, defaulting to 0. This means that
# the queue is unbounded.
# You can experiment with values greater or smaller than the anchor value 1000000.
#
# `recovery`: the strategy to follow when the queue becomes filled up. This also applies when
# the queue is unbounded, and all available memory on the system is consumed.
# recovery: 0 means continue.
# recovery: 1 means simply exit (default behavior).
# recovery: 2 means empty the queue and then continue.
queue_capacity_outputs:
# number of max items in queue
items: 1000000
# continue: 0 (default)
# exit: 1
# empty queue then continue: 2
recovery: 0
items: 0
recovery: 1
##########################

View File

@ -42,7 +42,7 @@ falco_configuration::falco_configuration():
m_watch_config_files(true),
m_buffered_outputs(false),
m_queue_capacity_outputs_items(DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS),
m_queue_capacity_outputs_recovery(RECOVERY_DROP_CURRENT),
m_queue_capacity_outputs_recovery(RECOVERY_EXIT),
m_time_format_iso_8601(false),
m_output_timeout(2000),
m_grpc_enabled(false),
@ -285,7 +285,7 @@ void falco_configuration::load_yaml(const std::string& config_name, const yaml_h
m_buffered_outputs = config.get_scalar<bool>("buffered_outputs", false);
m_queue_capacity_outputs_items = config.get_scalar<size_t>("queue_capacity_outputs.items", DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS);
m_queue_capacity_outputs_recovery = config.get_scalar<uint32_t>("queue_capacity_outputs.recovery", RECOVERY_DROP_CURRENT);
m_queue_capacity_outputs_recovery = config.get_scalar<uint32_t>("queue_capacity_outputs.recovery", RECOVERY_EXIT);
m_time_format_iso_8601 = config.get_scalar<bool>("time_format_iso_8601", false);
m_webserver_enabled = config.get_scalar<bool>("webserver.enabled", false);

View File

@ -13,7 +13,7 @@ limitations under the License.
#pragma once
#define DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS 1000000UL
#define DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS 0
enum outputs_recovery_code {
RECOVERY_DROP_CURRENT = 0, /* queue_capacity_outputs recovery strategy of continuing on. */

View File

@ -68,7 +68,11 @@ falco_outputs::falco_outputs(
}
#ifndef __EMSCRIPTEN__
m_worker_thread = std::thread(&falco_outputs::worker, this);
m_queue.set_capacity(queue_capacity_outputs_items);
if (queue_capacity_outputs_items > 0)
{
m_queue.set_capacity(queue_capacity_outputs_items);
}
m_recovery = queue_capacity_outputs_recovery;
#endif
}
@ -289,13 +293,13 @@ inline void falco_outputs::push(const ctrl_msg& cmsg)
switch (m_recovery)
{
case RECOVERY_EXIT:
fprintf(stderr, "Fatal error: Output queue reached maximum capacity. Exiting ... \n");
fprintf(stderr, "Fatal error: Output queue out of memory. Exiting ... \n");
exit(EXIT_FAILURE);
case RECOVERY_EMPTY:
fprintf(stderr, "Output queue reached maximum capacity. Empty queue and continue ... \n");
fprintf(stderr, "Output queue out of memory. Empty queue and continue ... \n");
m_queue.empty();
default:
fprintf(stderr, "Output queue reached maximum capacity. Continue on ... \n");
fprintf(stderr, "Output queue out of memory. Continue on ... \n");
break;
}
}

View File

@ -91,7 +91,10 @@ stats_writer::stats_writer(
m_config = config;
// capacity and controls should not be relevant for stats outputs, adopt capacity
// for completeness, but do not implement config recovery strategies.
m_queue.set_capacity(config->m_queue_capacity_outputs_items);
if (config->m_queue_capacity_outputs_items > 0)
{
m_queue.set_capacity(config->m_queue_capacity_outputs_items);
}
if (config->m_metrics_enabled)
{
if (!config->m_metrics_output_file.empty())