Compare commits

...

7 Commits

Author SHA1 Message Date
Andrea Terzolo
9eb611609a chore: bump libs to the latest tag 0.13.2
Signed-off-by: Andrea Terzolo <andreaterzolo3@gmail.com>
2023-10-16 15:52:10 +02:00
Andrea Terzolo
f23b0c1a20 chore: bump libs to 0.13.2-rc1 tag
Signed-off-by: Andrea Terzolo <andreaterzolo3@gmail.com>
2023-10-13 18:53:50 +02:00
Melissa Kilby
7b28b7acec feat(userspace): remove experimental outputs queue recovery strategies
Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
2023-10-13 18:53:50 +02:00
Melissa Kilby
aa4899c1e5 cleanup(userspace/falco): reset s_timerid_exists at stats_writer teardown
Co-authored-by: Andrea Terzolo <andreaterzolo3@gmail.com>
Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
2023-10-13 18:53:50 +02:00
Melissa Kilby
d454c8a1f4 chore: apply codespell fixes
Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
2023-10-13 18:53:50 +02:00
Melissa Kilby
0b48da30ca cleanup(userspace/falco): add more comments around timer_delete workaround
Co-authored-by: Federico Di Pierro <nierro92@gmail.com>
Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
2023-10-13 18:53:50 +02:00
Melissa Kilby
0b761ff1da fix(userspace/falco): timer_delete() workaround due to bug in older GLIBC
Workaround for older GLIBC versions (< 2.35), where calling timer_delete()
with an invalid timer ID not returned by timer_create() causes a segfault because of
a bug in GLIBC (https://sourceware.org/bugzilla/show_bug.cgi?id=28257).

Signed-off-by: Melissa Kilby <melissa.kilby.oss@gmail.com>
2023-10-13 18:53:50 +02:00
13 changed files with 47 additions and 88 deletions

View File

@@ -22,7 +22,7 @@ Released on 2023-09-25
* new(falco-driver-loader): --source-only now prints the values as env vars [[#2353](https://github.com/falcosecurity/falco/pull/2353)] - [@steakunderscore](https://github.com/steakunderscore)
* new(docker): allow passing options to falco-driver-loader from the driver loader cointainer [[#2781](https://github.com/falcosecurity/falco/pull/2781)] - [@LucaGuerra](https://github.com/LucaGuerra)
* new(docker): allow passing options to falco-driver-loader from the driver loader container [[#2781](https://github.com/falcosecurity/falco/pull/2781)] - [@LucaGuerra](https://github.com/LucaGuerra)
* new(docker): add experimental falco-distroless image based on Wolfi [[#2768](https://github.com/falcosecurity/falco/pull/2768)] - [@LucaGuerra](https://github.com/LucaGuerra)
* new: the legacy falco image is available as driver-loader-legacy [[#2718](https://github.com/falcosecurity/falco/pull/2718)] - [@LucaGuerra](https://github.com/LucaGuerra)
* new: added option to enable/disable echoing of server answer to stdout (disabled by default) when using HTTP output [[#2602](https://github.com/falcosecurity/falco/pull/2602)] - [@FedeDP](https://github.com/FedeDP)
@@ -1086,7 +1086,7 @@ Released on 2021-01-18
### Minor Changes
* build: bump b64 to v2.0.0.1 [[#1441](https://github.com/falcosecurity/falco/pull/1441)] - [@fntlnz](https://github.com/fntlnz)
* rules(macro container_started): re-use `spawned_process` macro inside `container_started` macro [[#1449](https://github.com/falcosecurity/falco/pull/1449)] - [@leodido](https://github.com/leodido)
* rules(macro container_started): reuse `spawned_process` macro inside `container_started` macro [[#1449](https://github.com/falcosecurity/falco/pull/1449)] - [@leodido](https://github.com/leodido)
* docs: reach out documentation [[#1472](https://github.com/falcosecurity/falco/pull/1472)] - [@fntlnz](https://github.com/fntlnz)
* docs: Broken outputs.proto link [[#1493](https://github.com/falcosecurity/falco/pull/1493)] - [@deepskyblue86](https://github.com/deepskyblue86)
* docs(README.md): correct broken links [[#1506](https://github.com/falcosecurity/falco/pull/1506)] - [@leogr](https://github.com/leogr)

View File

@@ -35,8 +35,8 @@ else()
# In case you want to test against another falcosecurity/libs version (or branch, or commit) just pass the variable -
# ie., `cmake -DFALCOSECURITY_LIBS_VERSION=dev ..`
if(NOT FALCOSECURITY_LIBS_VERSION)
set(FALCOSECURITY_LIBS_VERSION "0.13.1")
set(FALCOSECURITY_LIBS_CHECKSUM "SHA256=2be42a27be3ffe6bd7e53eaa5d8358cab05a0dca821819c6e9059e51b9786219")
set(FALCOSECURITY_LIBS_VERSION "0.13.2")
set(FALCOSECURITY_LIBS_CHECKSUM "SHA256=442810cc09a63406053ec3506ca16b59b5f5514ab08d478632f65beac6f167b5")
endif()
# cd /path/to/build && cmake /path/to/source

View File

@@ -331,24 +331,17 @@ rule_matching: first
# If it does, it is most likely happening due to the entire event flow being too slow,
# indicating that the server is under heavy load.
#
# Lowering the number of items can prevent memory from steadily increasing until the OOM
# killer stops the Falco process. We provide recovery actions to self-limit or self-kill
# in order to handle this situation earlier, similar to how we expose the kernel buffer size
# as a parameter. However, it will not address the root cause of the event pipe not keeping up.
#
# `capacity`: the maximum number of items allowed in the queue is determined by this value.
# Setting the value to 0 (which is the default) is equivalent to keeping the queue unbounded.
# In other words, when this configuration is set to 0, the number of allowed items is effectively
# set to the largest possible long value, disabling this setting.
# In other words, when this configuration is set to 0, the number of allowed items is
# effectively set to the largest possible long value, disabling this setting.
#
# `recovery`: strategy to follow when the queue becomes filled up. It applies only when the
# queue is bounded and there is still available system memory. In the case of an unbounded
# queue, if the available memory on the system is consumed, the Falco process would be
# OOM killed. The value `exit` is the default, `continue` does nothing special and `empty`
# empties the queue and then continues.
# In the case of an unbounded queue, if the available memory on the system is consumed,
# the Falco process would be OOM killed. When using this option and setting the capacity,
# the current event would be dropped, and the event loop would continue. This behavior mirrors
# kernel-side event drops when the buffer between kernel space and user space is full.
outputs_queue:
capacity: 0
recovery: exit
##########################

View File

@@ -69,7 +69,7 @@ The allowed publishing channels are:
Both channels are equivalent and may publish the same artifacts. However, for historical reasons and to avoid confusion, the **`docker.io` registry should only be used for container images** and not for other kinds of artifacts (e.g., plugins, rules, etc.).
Mirrors are allowed and encouraged if they facilitate artifacts consumption by our users. This proposal reccomends to enable mirrors on the major public OCI registry, such as [Amazon ECR](https://gallery.ecr.aws/) (which is already implentend in our infra at the time of writing).
Mirrors are allowed and encouraged if they facilitate artifacts consumption by our users. This proposal recommends to enable mirrors on the major public OCI registry, such as [Amazon ECR](https://gallery.ecr.aws/) (which is already implentend in our infra at the time of writing).
Official **channels and mirrors must be listed at [falco.org](https://falco.org/)**.

View File

@@ -33,12 +33,6 @@ static std::vector<std::string> rule_matching_names = {
"all"
};
static std::vector<std::string> outputs_queue_recovery_names = {
"continue",
"exit",
"empty",
};
bool falco_common::parse_priority(std::string v, priority_type& out)
{
for (size_t i = 0; i < priority_names.size(); i++)
@@ -66,19 +60,6 @@ falco_common::priority_type falco_common::parse_priority(std::string v)
return out;
}
bool falco_common::parse_queue_recovery(std::string v, outputs_queue_recovery_type& out)
{
for (size_t i = 0; i < outputs_queue_recovery_names.size(); i++)
{
if (!strcasecmp(v.c_str(), outputs_queue_recovery_names[i].c_str()))
{
out = (outputs_queue_recovery_type) i;
return true;
}
}
return false;
}
bool falco_common::format_priority(priority_type v, std::string& out, bool shortfmt)
{
if ((size_t) v < priority_names.size())

View File

@@ -60,12 +60,6 @@ struct falco_exception : std::exception
namespace falco_common
{
enum outputs_queue_recovery_type {
RECOVERY_CONTINUE = 0, /* outputs_queue_capacity recovery strategy of continuing on. */
RECOVERY_EXIT = 1, /* outputs_queue_capacity recovery strategy of exiting, self OOM kill. */
RECOVERY_EMPTY = 2, /* outputs_queue_capacity recovery strategy of emptying queue then continuing. */
};
const std::string syscall_source = sinsp_syscall_event_source_name;
// Same as numbers/indices into the above vector
@@ -83,7 +77,6 @@ namespace falco_common
bool parse_priority(std::string v, priority_type& out);
priority_type parse_priority(std::string v);
bool parse_queue_recovery(std::string v, outputs_queue_recovery_type& out);
bool format_priority(priority_type v, std::string& out, bool shortfmt=false);
std::string format_priority(priority_type v, bool shortfmt=false);

View File

@@ -65,7 +65,6 @@ falco::app::run_result falco::app::actions::init_outputs(falco::app::state& s)
s.config->m_output_timeout,
s.config->m_buffered_outputs,
s.config->m_outputs_queue_capacity,
s.config->m_outputs_queue_recovery,
s.config->m_time_format_iso_8601,
hostname));

View File

@@ -87,9 +87,9 @@ namespace falco
/**
* @brief If a signal is triggered, performs an handler action.
* The action function will be invoked exactly once among all the
* simultaneus calls. The action will not be performed if the
* simultaneous calls. The action will not be performed if the
* signal is not triggered, or if the triggered has already been
* handled. When an action is being performed, all the simultaneus
* handled. When an action is being performed, all the simultaneous
* callers will wait and be blocked up until its execution is finished.
* If the handler action throws an exception, it will be considered
* performed. After the first handler has been performed, every

View File

@@ -42,7 +42,6 @@ falco_configuration::falco_configuration():
m_watch_config_files(true),
m_buffered_outputs(false),
m_outputs_queue_capacity(DEFAULT_OUTPUTS_QUEUE_CAPACITY_UNBOUNDED_MAX_LONG_VALUE),
m_outputs_queue_recovery(falco_common::RECOVERY_EXIT),
m_time_format_iso_8601(false),
m_output_timeout(2000),
m_grpc_enabled(false),
@@ -290,11 +289,6 @@ void falco_configuration::load_yaml(const std::string& config_name, const yaml_h
{
m_outputs_queue_capacity = DEFAULT_OUTPUTS_QUEUE_CAPACITY_UNBOUNDED_MAX_LONG_VALUE;
}
std::string recovery = config.get_scalar<std::string>("outputs_queue.recovery", "exit");
if (!falco_common::parse_queue_recovery(recovery, m_outputs_queue_recovery))
{
throw std::logic_error("Unknown recovery \"" + recovery + "\"--must be one of exit, continue, empty");
}
m_time_format_iso_8601 = config.get_scalar<bool>("time_format_iso_8601", false);

View File

@@ -74,7 +74,6 @@ public:
bool m_watch_config_files;
bool m_buffered_outputs;
size_t m_outputs_queue_capacity;
falco_common::outputs_queue_recovery_type m_outputs_queue_recovery;
bool m_time_format_iso_8601;
uint32_t m_output_timeout;

View File

@@ -48,7 +48,6 @@ falco_outputs::falco_outputs(
uint32_t timeout,
bool buffered,
size_t outputs_queue_capacity,
falco_common::outputs_queue_recovery_type outputs_queue_recovery,
bool time_format_iso_8601,
const std::string& hostname)
{
@@ -67,7 +66,6 @@ falco_outputs::falco_outputs(
add_output(output);
}
m_outputs_queue_num_drops = {0};
m_outputs_queue_recovery = outputs_queue_recovery;
#ifndef __EMSCRIPTEN__
m_queue.set_capacity(outputs_queue_capacity);
m_worker_thread = std::thread(&falco_outputs::worker, this);
@@ -287,29 +285,11 @@ inline void falco_outputs::push(const ctrl_msg& cmsg)
#ifndef __EMSCRIPTEN__
if (!m_queue.try_push(cmsg))
{
switch (m_outputs_queue_recovery)
if(m_outputs_queue_num_drops.load() == 0)
{
case falco_common::RECOVERY_EXIT:
throw falco_exception("Fatal error: Output queue out of memory. Exiting ...");
case falco_common::RECOVERY_EMPTY:
/* Print a log just the first time */
if(m_outputs_queue_num_drops.load() == 0)
{
falco_logger::log(LOG_ERR, "Output queue out of memory. Drop event plus events in queue due to emptying the queue; continue on ...");
}
m_outputs_queue_num_drops += m_queue.size() + 1;
m_queue.clear();
break;
case falco_common::RECOVERY_CONTINUE:
if(m_outputs_queue_num_drops.load() == 0)
{
falco_logger::log(LOG_ERR, "Output queue out of memory. Drop event and continue on ...");
}
m_outputs_queue_num_drops++;
break;
default:
throw falco_exception("Fatal error: strategy unknown. Exiting ...");
falco_logger::log(LOG_ERR, "Outputs queue out of memory. Drop event and continue on ...");
}
m_outputs_queue_num_drops++;
}
#else
for (auto o : m_outputs)

View File

@@ -36,7 +36,7 @@ limitations under the License.
All methods in this class are thread-safe. The output framework supports
a multi-producer model where messages are stored in a queue and consumed
by each configured output asynchrounously.
by each configured output asynchronously.
*/
class falco_outputs
{
@@ -50,7 +50,6 @@ public:
uint32_t timeout,
bool buffered,
size_t outputs_queue_capacity,
falco_common::outputs_queue_recovery_type outputs_queue_recovery,
bool time_format_iso_8601,
const std::string& hostname);
@@ -87,8 +86,8 @@ public:
void reopen_outputs();
/*!
\brief Return the number of currently dropped events as a result of failed push attempts
into the outputs queue when using `continue` or `empty` recovery strategies.
\brief Return the number of events currently dropped due to failed push
attempts into the outputs queue
*/
uint64_t get_outputs_queue_num_drops();
@@ -121,7 +120,6 @@ private:
falco_outputs_cbq m_queue;
#endif
falco_common::outputs_queue_recovery_type m_outputs_queue_recovery;
std::atomic<uint64_t> m_outputs_queue_num_drops;
std::thread m_worker_thread;
inline void push(const ctrl_msg& cmsg);

View File

@@ -34,6 +34,12 @@ limitations under the License.
// check that this value changed since their last observation.
static std::atomic<stats_writer::ticker_t> s_timer((stats_writer::ticker_t) 0);
static timer_t s_timerid;
// note: Workaround for older GLIBC versions (< 2.35), where calling timer_delete()
// with an invalid timer ID not returned by timer_create() causes a segfault because of
// a bug in GLIBC (https://sourceware.org/bugzilla/show_bug.cgi?id=28257).
// Just performing a nullptr check is not enough as even after creating the timer, s_timerid
// remains a nullptr somehow.
bool s_timerid_exists = false;
static void timer_handler(int signum)
{
@@ -60,18 +66,31 @@ bool stats_writer::init_ticker(uint32_t interval_msec, std::string &err)
sev.sigev_value.sival_ptr = &s_timerid;
#ifndef __EMSCRIPTEN__
// delete any previously set timer
timer_delete(s_timerid);
if (timer_create(CLOCK_MONOTONIC, &sev, &s_timerid) == -1) {
if (s_timerid_exists)
{
if (timer_delete(s_timerid) == -1)
{
err = std::string("Could not delete previous timer: ") + strerror(errno);
return false;
}
s_timerid_exists = false;
}
if (timer_create(CLOCK_MONOTONIC, &sev, &s_timerid) == -1)
{
err = std::string("Could not create periodic timer: ") + strerror(errno);
return false;
}
s_timerid_exists = true;
#endif
timer.it_value.tv_sec = interval_msec / 1000;
timer.it_value.tv_nsec = (interval_msec % 1000) * 1000 * 1000;
timer.it_interval = timer.it_value;
#ifndef __EMSCRIPTEN__
if (timer_settime(s_timerid, 0, &timer, NULL) == -1) {
if (timer_settime(s_timerid, 0, &timer, NULL) == -1)
{
err = std::string("Could not set up periodic timer: ") + strerror(errno);
return false;
}
@@ -113,8 +132,7 @@ stats_writer::stats_writer(
if (m_initialized)
{
#ifndef __EMSCRIPTEN__
// capacity and controls should not be relevant for stats outputs, adopt capacity
// for completeness, but do not implement config recovery strategies.
// Adopt capacity for completeness, even if it's likely not relevant
m_queue.set_capacity(config->m_outputs_queue_capacity);
m_worker = std::thread(&stats_writer::worker, this);
#endif
@@ -134,7 +152,11 @@ stats_writer::~stats_writer()
}
// delete timerID and reset timer
#ifndef __EMSCRIPTEN__
timer_delete(s_timerid);
if (s_timerid_exists)
{
timer_delete(s_timerid);
s_timerid_exists = false;
}
#endif
}
}