diff --git a/falco.yaml b/falco.yaml index b115b9f9..601f528d 100644 --- a/falco.yaml +++ b/falco.yaml @@ -65,10 +65,10 @@ # syscall_event_drops # metrics # Falco performance tuning (advanced) -# syscall_buf_size_preset -# syscall_drop_failed_exit +# syscall_buf_size_preset [DEPRECATED] +# syscall_drop_failed_exit [DEPRECATED] # base_syscalls -# modern_bpf.cpus_for_each_syscall_buffer +# modern_bpf.cpus_for_each_syscall_buffer [DEPRECATED] ################################ @@ -160,16 +160,157 @@ rules_file: # # Available engines: # - `kmod`: Kernel Module (Kernel Module) -# - `ebpf`: eBPF (Extended Berkeley Packet Filter) -# - `modern-ebpf`: Modern eBPF (Modern Extended Berkeley Packet Filter), available only for recent kernels +# - `ebpf`: eBPF (eBPF probe) +# - `modern-ebpf`: Modern eBPF (CO-RE eBPF probe) # - `gvisor`: gVisor (gVisor sandbox) # - `replay`: Replay a scap trace file -# - `none`: No engine loaded, useful to run `syscall` source plugin or just plugins without loading any other event producer. - -# Select the appropriate engine kind by uncommenting the corresponding line. -# Make sure to specify only one engine kind at a time. +# - `none`: No event producer loaded, useful to run with plugins. +# +# Only one engine can be specified in the `kind` key. # Moreover, for each engine multiple options might be available, -# grouped under engine specific configuration keys. +# grouped under engine-specific configuration keys. +# Some of them deserve an in-depth description: +# +################### `buf_size_preset` +# +# --- [Description] +# +# The syscall buffer index determines the size of the shared space between Falco +# and its drivers. This shared space serves as a temporary storage for syscall +# events, allowing them to be transferred from the kernel to the userspace +# efficiently. The buffer size for each online CPU is determined by the buffer +# index, and each CPU has its own dedicated buffer. Adjusting this index allows +# you to control the overall size of the syscall buffers. +# +# --- [Usage] +# +# The index 0 is reserved, and each subsequent index corresponds to an +# increasing size in bytes. For example, index 1 corresponds to a size of 1 MB, +# index 2 corresponds to 2 MB, and so on: +# +# [(*), 1 MB, 2 MB, 4 MB, 8 MB, 16 MB, 32 MB, 64 MB, 128 MB, 256 MB, 512 MB] +# ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ +# | | | | | | | | | | | +# 0 1 2 3 4 5 6 7 8 9 10 +# +# +# The buffer dimensions in bytes are determined by the following requirements: +# (1) a power of 2. +# (2) a multiple of your system_page_dimension. +# (3) greater than `2 * (system_page_dimension). +# +# The buffer size constraints may limit the usability of certain indexes. Let's +# consider an example to illustrate this: +# +# If your system has a page size of 1 MB, the first available buffer size would +# be 4 MB because 2 MB is exactly equal to 2 * (system_page_size), which is not +# sufficient as we require more than 2 * (system_page_size). In this example, it +# is evident that if the page size is 1 MB, the first index that can be used is 3. +# +# However, in most cases, these constraints do not pose a limitation, and all +# indexes from 1 to 10 can be used. You can check your system's page size using +# the Falco `--page-size` command-line option. +# +# --- [Suggestions] +# +# The buffer size was previously fixed at 8 MB (index 4). You now have the +# option to adjust the size based on your needs. Increasing the size, such as to +# 16 MB (index 5), can reduce syscall drops in heavy production systems, but may +# impact performance. Decreasing the size can speed up the system but may +# increase syscall drops. It's important to note that the buffer size is mapped +# twice in the process' virtual memory, so a buffer of 8 MB will result in a 16 +# MB area in virtual memory. Use this parameter with caution and only modify it +# if the default size is not suitable for your use case. +# +################### `drop_failed_exit` +# +# --- [Description] +# +# Enabling this option in Falco allows it to drop failed system call exit events +# in the kernel drivers before pushing them onto the ring buffer. This +# optimization can result in lower CPU usage and more efficient utilization of +# the ring buffer, potentially reducing the number of event losses. However, it +# is important to note that enabling this option also means sacrificing some +# visibility into the system. +# +################### `cpus_for_each_buffer` (modern-ebpf only) +# +# --- [Description] +# +# The modern_bpf driver in Falco utilizes the new BPF ring buffer, which has a +# different memory footprint compared to the current BPF driver that uses the +# perf buffer. The Falco core maintainers have discussed the differences and +# their implications, particularly in Kubernetes environments where limits need +# to be carefully set to avoid interference with the Falco daemonset deployment +# from the OOM killer. Based on guidance received from the kernel mailing list, +# it is recommended to assign multiple CPUs to one buffer instead of allocating +# a buffer for each CPU individually. This helps optimize resource allocation +# and prevent potential issues related to memory usage. +# +# This is an index that controls how many CPUs you want to assign to a single +# syscall buffer (ring buffer). By default, for modern_bpf every syscall buffer +# is associated to 2 CPUs, so the mapping is 1:2. The modern BPF probe allows +# you to choose different mappings, for example, changing the value to `1` +# results in a 1:1 mapping and would mean one syscall buffer for each CPU (this +# is the default for the `bpf` driver). +# +# --- [Usage] +# +# You can choose an index from 0 to MAX_NUMBER_ONLINE_CPUs to set the dimension +# of the syscall buffers. The value 0 represents a single buffer shared among +# all online CPUs. It serves as a flexible option when the exact number of +# online CPUs is unknown. Here's an example to illustrate this: +# +# Consider a system with 7 online CPUs: +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# +# - `1` means a syscall buffer for each CPU so 7 buffers +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# | | | | | | | +# BUFFERs 0 1 2 3 4 5 6 +# +# - `2` (Default value) means a syscall buffer for each CPU pair, so 4 buffers +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# | | | | | | | +# BUFFERs 0 0 1 1 2 2 3 +# +# Please note that in this example, there are 4 buffers in total. Three of the +# buffers are associated with pairs of CPUs, while the last buffer is mapped to +# a single CPU. This arrangement is necessary because we have an odd number of +# CPUs. +# +# - `0` or `MAX_NUMBER_ONLINE_CPUs` mean a syscall buffer shared between all +# CPUs, so 1 buffer +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# | | | | | | | +# BUFFERs 0 0 0 0 0 0 0 +# +# Moreover, you have the option to combine this parameter with +# `syscall_buf_size_preset` index. For instance, you can create a large shared +# syscall buffer of 512 MB (using syscall_buf_size_preset=10) that is +# allocated among all the online CPUs. +# +# --- [Suggestions] +# +# The default choice of index 2 (one syscall buffer for each CPU pair) was made +# because the modern bpf probe utilizes a different memory allocation strategy +# compared to the other two drivers (bpf and kernel module). However, you have +# the flexibility to experiment and find the optimal configuration for your +# system. +# +# When considering a fixed syscall_buf_size_preset and a fixed buffer dimension: +# - Increasing this configs value results in lower number of buffers and you can +# speed up your system and reduce memory usage +# - However, using too few buffers may increase contention in the kernel, +# leading to a slowdown. +# +# If you have low event throughputs and minimal drops, reducing the number of +# buffers (higher `cpus_for_each_buffer`) can lower the memory footprint. +# engine: kind: kmod kmod: @@ -181,7 +322,7 @@ engine: buf_size_preset: 4 drop_failed_exit: false modern-ebpf: - cpus_for_each_buffer: 2 ## todo! rename it without syscall + cpus_for_each_buffer: 2 buf_size_preset: 4 drop_failed_exit: false replay: @@ -826,7 +967,9 @@ metrics: # [DEPRECATED] `syscall_buf_size_preset` # -# Deprecated in favor of engine.{kmod,ebpf,modern-ebpf}.buf_size_preset +# Deprecated in favor of engine.{kmod,ebpf,modern-ebpf}.buf_size_preset. +# This config is evaluated only if the default `engine` config block is not changed, +# otherwise it is ignored. # # --- [Description] # @@ -879,7 +1022,10 @@ metrics: syscall_buf_size_preset: 4 # [DEPRECATED] `syscall_drop_failed_exit` -# Deprecated in favor of engine.{kmod,ebpf,modern-ebpf}.drop_failed_exit +# +# Deprecated in favor of engine.{kmod,ebpf,modern-ebpf}.drop_failed_exit. +# This config is evaluated only if the default `engine` config block is not changed, +# otherwise it is ignored. # # Enabling this option in Falco allows it to drop failed system call exit events # in the kernel drivers before pushing them onto the ring buffer. This @@ -1006,7 +1152,9 @@ base_syscalls: # [DEPRECATED] `modern_bpf.cpus_for_each_syscall_buffer`, modern_bpf only # -# Deprecated in favor of engine.modern-ebpf.cpus_for_each_syscall_buffer +# Deprecated in favor of engine.modern-ebpf.cpus_for_each_buffer. +# This config is evaluated only if the default `engine` config block is not changed, +# otherwise it is ignored. # # --- [Description] #