From b4237545751dcfe03a99292ded5436700f977ea4 Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Tue, 23 May 2023 22:40:39 -0700 Subject: [PATCH] cleanup(config): re-arrange falco.yaml configs in logical categories * add an index for logical categories * move configs around without changing description content, solely add a uniform header to each config * indicate "Stable" or "Experimental" for most configs to indicate current stability or maturity Signed-off-by: Melissa Kilby --- falco.yaml | 1009 +++++++++++++++++++++++++++++----------------------- 1 file changed, 557 insertions(+), 452 deletions(-) diff --git a/falco.yaml b/falco.yaml index 3aaba482..98c4e5f6 100644 --- a/falco.yaml +++ b/falco.yaml @@ -1,5 +1,5 @@ # -# Copyright (C) 2022 The Falco Authors. +# Copyright (C) 2023 The Falco Authors. # # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,72 @@ # limitations under the License. # +################ +# Config index # +################ + +# Here is an index of the configuration categories to help you navigate +# the Falco configuration file: +# +# Falco rules + config files +# rules_file +# watch_config_files +# Falco outputs (basic) +# time_format_iso_8601 +# priority +# json_output +# json_include_output_property +# json_include_tags_property +# stdout_output +# syslog_output +# file_output +# buffered_outputs +# outputs (throttling) +# Falco internal logging / alerting / metrics (basic) +# log_stderr +# log_syslog +# log_level +# libs_logger +# Falco outputs (advanced) +# grpc_output +# grpc +# http_output +# program_output +# webserver +# Falco internal logging / alerting / metrics (advanced) +# syscall_event_timeouts +# syscall_event_drops +# metrics +# output_timeout +# Falco cloud native relevant configs +# metadata_download +# load_plugins +# plugins +# Falco performance tuning +# syscall_buf_size_preset +# syscall_drop_failed_exit +# base_syscalls +# modern_bpf.cpus_for_each_syscall_buffer + +################################ +# Falco command-line arguments # +################################ + +# To explore the latest command-line arguments supported by Falco for additional +# configuration, you can run `falco --help` in your terminal. You can also pass +# configuration options from this config file as command-line arguments by using +# the `-o` flag followed by the option name and value. In the following example, +# three config options (`json_output`, `log_level`, and `log_stderr`) are passed as +# command-line arguments with their corresponding values : +# `falco -o "json_output=true" -o "log_level=debug" -o "log_stderr=true"`. Please +# note that command-line arguments take precedence over the options specified in +# this config file. + + +############################## +# Falco rules + config files # +############################## + # File(s) or Directories containing Falco rules, loaded at startup. # The name "rules_file" is only for backwards compatibility. # If the entry is a file, it will be read directly. If the entry is a directory, @@ -32,47 +98,27 @@ rules_file: - /etc/falco/falco_rules.local.yaml - /etc/falco/rules.d -# -# Plugins that are available for use. These plugins are not loaded by -# default, as they require explicit configuration to point to -# cloudtrail log files. -# - -# To learn more about the supported formats for -# init_config/open_params for the cloudtrail plugin, see the README at -# https://github.com/falcosecurity/plugins/blob/master/plugins/cloudtrail/README.md. -plugins: - - name: k8saudit - library_path: libk8saudit.so - init_config: - # maxEventSize: 262144 - # webhookMaxBatchSize: 12582912 - # sslCertificate: /etc/falco/falco.pem - open_params: "http://:9765/k8s-audit" - - name: cloudtrail - library_path: libcloudtrail.so - # see docs for init_config and open_params: - # https://github.com/falcosecurity/plugins/blob/master/plugins/cloudtrail/README.md - - name: json - library_path: libjson.so - -# Setting this list to empty ensures that the above plugins are *not* -# loaded and enabled by default. If you want to use the above plugins, -# set a meaningful init_config/open_params for the cloudtrail plugin -# and then change this to: -# load_plugins: [cloudtrail, json] -load_plugins: [] - # Watch config file and rules files for modification. # When a file is modified, Falco will propagate new config, # by reloading itself. watch_config_files: true + +######################### +# Falco outputs (basic) # +######################### + # If true, the times displayed in log messages and output messages # will be in ISO 8601. By default, times are displayed in the local # time zone, as governed by /etc/localtime. time_format_iso_8601: false +# Minimum rule priority level to load and run. All rules having a +# priority more severe than this level will be loaded/run. Can be one +# of "emergency", "alert", "critical", "error", "warning", "notice", +# "informational", "debug". +priority: debug + # If "true", print falco alert messages and rules file # loading/validation results as json, which allows for easier # consumption by downstream programs. Default is "false". @@ -89,6 +135,53 @@ json_include_output_property: true # false, the "tags" field will not be included in the json output at all. json_include_tags_property: true +# Where security notifications should go. +# Multiple outputs can be enabled. +stdout_output: + enabled: true + +syslog_output: + enabled: true + +# If keep_alive is set to true, the file will be opened once and +# continuously written to, with each output message on its own +# line. If keep_alive is set to false, the file will be re-opened +# for each output message. +# +# Also, the file will be closed and reopened if falco is signaled with +# SIGUSR1. +file_output: + enabled: false + keep_alive: false + filename: ./events.txt + +# Whether or not output to any of the output channels below is +# buffered. Defaults to false +buffered_outputs: false + +# A throttling mechanism implemented as a token bucket limits the +# rate of Falco notifications. One rate limiter is assigned to each event +# source, so that alerts coming from one can't influence the throttling +# mechanism of the others. This is controlled by the following options: +# - rate: the number of tokens (i.e. right to send a notification) +# gained per second. When 0, the throttling mechanism is disabled. +# Defaults to 0. +# - max_burst: the maximum number of tokens outstanding. Defaults to 1000. +# +# With these defaults, the throttling mechanism is disabled. +# For example, by setting rate to 1 Falco could send up to 1000 notifications +# after an initial quiet period, and then up to 1 notification per second +# afterward. It would gain the full burst back after 1000 seconds of +# no activity. +outputs: + rate: 0 + max_burst: 1000 + + +####################################################### +# Falco internal logging / alerting / metrics (basic) # +####################################################### + # Send information logs to stderr and/or syslog Note these are *not* security # notification logs! These are just Falco lifecycle (and possibly error) logs. log_stderr: true @@ -111,311 +204,19 @@ libs_logger: # "info", "debug", "trace". severity: debug -# Minimum rule priority level to load and run. All rules having a -# priority more severe than this level will be loaded/run. Can be one -# of "emergency", "alert", "critical", "error", "warning", "notice", -# "informational", "debug". -priority: debug -# Whether or not output to any of the output channels below is -# buffered. Defaults to false -buffered_outputs: false +############################ +# Falco outputs (advanced) # +############################ -# Falco uses a shared buffer between the kernel and userspace to pass -# system call information. When Falco detects that this buffer is -# full and system calls have been dropped, it can take one or more of -# the following actions: -# - ignore: do nothing (default when list of actions is empty) -# - log: log a DEBUG message noting that the buffer was full -# - alert: emit a Falco alert noting that the buffer was full -# - exit: exit Falco with a non-zero rc +# [Stable] `grpc_output` and `grpc` # -# Notice it is not possible to ignore and log/alert messages at the same time. -# -# The rate at which log/alert messages are emitted is governed by a -# token bucket. The rate corresponds to one message every 30 seconds -# with a burst of one message (by default). -# -# The messages are emitted when the percentage of dropped system calls -# with respect the number of events in the last second -# is greater than the given threshold (a double in the range [0, 1]). -# -# For debugging/testing it is possible to simulate the drops using -# the `simulate_drops: true`. In this case the threshold does not apply. - -syscall_event_drops: - threshold: .1 - actions: - - log - - alert - rate: .03333 - max_burst: 1 - simulate_drops: false - -# Falco uses a shared buffer between the kernel and userspace to receive -# the events (eg., system call information) in userspace. -# -# Anyways, the underlying libraries can also timeout for various reasons. -# For example, there could have been issues while reading an event. -# Or the particular event needs to be skipped. -# Normally, it's very unlikely that Falco does not receive events consecutively. -# -# Falco is able to detect such uncommon situation. -# -# Here you can configure the maximum number of consecutive timeouts without an event -# after which you want Falco to alert. -# By default this value is set to 1000 consecutive timeouts without an event at all. -# How this value maps to a time interval depends on the CPU frequency. - -syscall_event_timeouts: - max_consecutives: 1000 - -# Enabling this option allows Falco to drop failed syscalls exit events -# in the kernel driver before the event is pushed onto the ring buffer. -# This can enable some small optimization both in CPU usage and ring buffer usage, -# possibly leading to lower number of event losses. -# Be careful: enabling it also means losing a bit of visibility on the system. -syscall_drop_failed_exit: false - -# --- [Description] -# -# This is an index that controls the dimension of the syscall buffers. -# The syscall buffer is the shared space between Falco and its drivers where all the syscall events -# are stored. -# Falco uses a syscall buffer for every online CPU, and all these buffers share the same dimension. -# So this parameter allows you to control the size of all the buffers! -# -# --- [Usage] -# -# You can choose between different indexes: from `1` to `10` (`0` is reserved for future uses). -# Every index corresponds to a dimension in bytes: -# -# [(*), 1 MB, 2 MB, 4 MB, 8 MB, 16 MB, 32 MB, 64 MB, 128 MB, 256 MB, 512 MB] -# ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ -# | | | | | | | | | | | -# 0 1 2 3 4 5 6 7 8 9 10 -# -# As you can see the `0` index is reserved, while the index `1` corresponds to -# `1 MB` and so on. -# -# These dimensions in bytes derive from the fact that the buffer size must be: -# (1) a power of 2. -# (2) a multiple of your system_page_dimension. -# (3) greater than `2 * (system_page_dimension)`. -# -# According to these constraints is possible that sometimes you cannot use all the indexes, let's consider an -# example to better understand it: -# If you have a `page_size` of 1 MB the first available buffer size is 4 MB because 2 MB is exactly -# `2 * (system_page_size)` -> `2 * 1 MB`, but this is not enough we need more than `2 * (system_page_size)`! -# So from this example is clear that if you have a page size of 1 MB the first index that you can use is `3`. -# -# Please note: this is a very extreme case just to let you understand the mechanism, usually the page size is something -# like 4 KB so you have no problem at all and you can use all the indexes (from `1` to `10`). -# -# To check your system page size use the Falco `--page-size` command line option. The output on a system with a page -# size of 4096 Bytes (4 KB) should be the following: -# -# "Your system page size is: 4096 bytes." -# -# --- [Suggestions] -# -# Before the introduction of this param the buffer size was fixed to 8 MB (so index `4`, as you can see -# in the default value below). -# You can increase the buffer size when you face syscall drops. A size of 16 MB (so index `5`) can reduce -# syscall drops in production-heavy systems without noticeable impact. Very large buffers however could -# slow down the entire machine. -# On the other side you can try to reduce the buffer size to speed up the system, but this could -# increase the number of syscall drops! -# As a final remark consider that the buffer size is mapped twice in the process' virtual memory so a buffer of 8 MB -# will result in a 16 MB area in the process virtual memory. -# Please pay attention when you use this parameter and change it only if the default size doesn't fit your use case. - -syscall_buf_size_preset: 4 - -############## Modern BPF probe specific ############## -# Please note: these configs regard only the modern BPF probe. -# -# `cpus_for_each_syscall_buffer` -# -# --- [Description] -# -# This is an index that controls how many CPUs you want to assign to a single -# syscall buffer (ring buffer). By default, every syscall buffer is associated to -# 2 CPUs, so the mapping is 1:2. The modern BPF probe allows you to choose different -# mappings, for example, 1:1 would mean a syscall buffer for each CPU. -# -# --- [Usage] -# -# You can choose between different indexes: from `0` to `MAX_NUMBER_ONLINE_CPUs`. -# `0` is a special value and it means a single syscall buffer shared between all -# your online CPUs. `0` has the same effect as `MAX_NUMBER_ONLINE_CPUs`, the rationale -# is that `0` allows you to create a single buffer without knowing the number of online -# CPUs on your system. -# Let's consider an example to better understand it: -# -# Consider a system with 7 online CPUs: -# -# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) -# -# - `1` means a syscall buffer for each CPU so 7 buffers -# -# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) -# | | | | | | | -# BUFFERs 0 1 2 3 4 5 6 -# -# - `2` (Default value) means a syscall buffer for each CPU pair, so 4 buffers -# -# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) -# | | | | | | | -# BUFFERs 0 0 1 1 2 2 3 -# -# Please note that we need 4 buffers, 3 buffers are associated with CPU pairs, the last -# one is mapped with just 1 CPU since we have an odd number of CPUs. -# -# - `0` or `MAX_NUMBER_ONLINE_CPUs` mean a syscall buffer shared between all CPUs, so 1 buffer -# -# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) -# | | | | | | | -# BUFFERs 0 0 0 0 0 0 0 -# -# Moreover you can combine this param with `syscall_buf_size_preset` -# index, for example, you could create a huge single syscall buffer -# shared between all your online CPUs of 512 MB (so `syscall_buf_size_preset=10`). -# -# --- [Suggestions] -# -# We chose index `2` (so one syscall buffer for each CPU pair) as default because the modern bpf probe -# follows a different memory allocation strategy with respect to the other 2 drivers (bpf and kernel module). -# By the way, you are free to find the preferred configuration for your system. -# Considering a fixed `syscall_buf_size_preset` and so a fixed buffer dimension: -# - a lower number of buffers can speed up your system (lower memory footprint) -# - a too lower number of buffers could increase contention in the kernel causing an -# overall slowdown of the system. -# If you don't have huge events throughputs and you are not experimenting with tons of drops -# you can try to reduce the number of buffers to have a lower memory footprint - -modern_bpf: - cpus_for_each_syscall_buffer: 2 -############## Modern BPF probe specific ############## - -# Falco continuously monitors outputs performance. When an output channel does not allow -# to deliver an alert within a given deadline, an error is reported indicating -# which output is blocking notifications. -# The timeout error will be reported to the log according to the above log_* settings. -# Note that the notification will not be discarded from the output queue; thus, -# output channels may indefinitely remain blocked. -# An output timeout error indeed indicate a misconfiguration issue or I/O problems -# that cannot be recovered by Falco and should be fixed by the user. -# -# The "output_timeout" value specifies the duration in milliseconds to wait before -# considering the deadline exceed. -# -# With a 2000ms default, the notification consumer can block the Falco output -# for up to 2 seconds without reaching the timeout. - -output_timeout: 2000 - -# A throttling mechanism implemented as a token bucket limits the -# rate of Falco notifications. One rate limiter is assigned to each event -# source, so that alerts coming from one can't influence the throttling -# mechanism of the others. This is controlled by the following options: -# - rate: the number of tokens (i.e. right to send a notification) -# gained per second. When 0, the throttling mechanism is disabled. -# Defaults to 0. -# - max_burst: the maximum number of tokens outstanding. Defaults to 1000. -# -# With these defaults, the throttling mechanism is disabled. -# For example, by setting rate to 1 Falco could send up to 1000 notifications -# after an initial quiet period, and then up to 1 notification per second -# afterward. It would gain the full burst back after 1000 seconds of -# no activity. - -outputs: - rate: 0 - max_burst: 1000 - -# Where security notifications should go. -# Multiple outputs can be enabled. - -syslog_output: - enabled: true - -# If keep_alive is set to true, the file will be opened once and -# continuously written to, with each output message on its own -# line. If keep_alive is set to false, the file will be re-opened -# for each output message. -# -# Also, the file will be closed and reopened if falco is signaled with -# SIGUSR1. - -file_output: +# gRPC output service. +# By default it is off. +# By enabling this all the output events will be kept in memory until you read them with a gRPC client. +# Make sure to have a consumer for them or leave this disabled. +grpc_output: enabled: false - keep_alive: false - filename: ./events.txt - -stdout_output: - enabled: true - -# Falco supports an embedded webserver and exposes the following endpoints: -# - /healthz: health endpoint useful for checking if Falco is up and running -# (the endpoint name is configurable). -# - /versions: responds with a JSON object containing version numbers of the -# internal Falco components (similar output as `falco --version -o json_output=true`). -# -# # NOTE: the /versions endpoint is useful to other services (such as falcoctl) -# to retrieve info about a running Falco instance. Make sure the webserver is -# enabled if you're using falcoctl either locally or with Kubernetes. -# -# The following options control the behavior of that webserver (enabled by default). -# -# The ssl_certificate is a combination SSL Certificate and corresponding -# key contained in a single file. You can generate a key/cert as follows: -# -# $ openssl req -newkey rsa:2048 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem -# $ cat certificate.pem key.pem > falco.pem -# $ sudo cp falco.pem /etc/falco/falco.pem -webserver: - enabled: true - # when threadiness is 0, Falco automatically guesses it depending on the number of online cores - threadiness: 0 - listen_port: 8765 - k8s_healthz_endpoint: /healthz - ssl_enabled: false - ssl_certificate: /etc/falco/falco.pem - -# Possible additional things you might want to do with program output: -# - send to a slack webhook: -# program: "jq '{text: .output}' | curl -d @- -X POST https://hooks.slack.com/services/XXX" -# - logging (alternate method than syslog): -# program: logger -t falco-test -# - send over a network connection: -# program: nc host.example.com 80 - -# If keep_alive is set to true, the program will be started once and -# continuously written to, with each output message on its own -# line. If keep_alive is set to false, the program will be re-spawned -# for each output message. -# -# Also, the program will be closed and reopened if falco is signaled with -# SIGUSR1. -program_output: - enabled: false - keep_alive: false - program: "jq '{text: .output}' | curl -d @- -X POST https://hooks.slack.com/services/XXX" - -http_output: - enabled: false - url: http://some.url - user_agent: "falcosecurity/falco" - # Tell Falco to not verify the remote server. - insecure: false - # Path to the CA certificate that can verify the remote server. - ca_cert: "" - # Path to a specific file that will be used as the CA certificate store. - ca_bundle: "" - # Path to a folder that will be used as the CA certificate store. CA certificate need to be - # stored as indivitual PEM files in this directory. - ca_path: "/etc/ssl/certs" # Falco supports running a gRPC server with two main binding types # 1. Over the network with mandatory mutual TLS authentication (mTLS) @@ -445,129 +246,132 @@ grpc: # when threadiness is 0, Falco automatically guesses it depending on the number of online cores threadiness: 0 -# gRPC output service. -# By default it is off. -# By enabling this all the output events will be kept in memory until you read them with a gRPC client. -# Make sure to have a consumer for them or leave this disabled. -grpc_output: +# [Stable] `http_output` +# +http_output: enabled: false + url: http://some.url + user_agent: "falcosecurity/falco" + # Tell Falco to not verify the remote server. + insecure: false + # Path to the CA certificate that can verify the remote server. + ca_cert: "" + # Path to a specific file that will be used as the CA certificate store. + ca_bundle: "" + # Path to a folder that will be used as the CA certificate store. CA certificate need to be + # stored as indivitual PEM files in this directory. + ca_path: "/etc/ssl/certs" -# Container orchestrator metadata fetching params -metadata_download: - max_mb: 100 - chunk_wait_us: 1000 - watch_freq_sec: 1 +# [Stable] `program_output` +# +# Possible additional things you might want to do with program output: +# - send to a slack webhook: +# program: "jq '{text: .output}' | curl -d @- -X POST https://hooks.slack.com/services/XXX" +# - logging (alternate method than syslog): +# program: logger -t falco-test +# - send over a network connection: +# program: nc host.example.com 80 +# If keep_alive is set to true, the program will be started once and +# continuously written to, with each output message on its own +# line. If keep_alive is set to false, the program will be re-spawned +# for each output message. +# +# Also, the program will be closed and reopened if falco is signaled with +# SIGUSR1. +program_output: + enabled: false + keep_alive: false + program: "jq '{text: .output}' | curl -d @- -X POST https://hooks.slack.com/services/XXX" + +# [Stable] `webserver` +# +# Falco supports an embedded webserver and exposes the following endpoints: +# - /healthz: health endpoint useful for checking if Falco is up and running +# (the endpoint name is configurable). +# - /versions: responds with a JSON object containing version numbers of the +# internal Falco components (similar output as `falco --version -o json_output=true`). +# +# # NOTE: the /versions endpoint is useful to other services (such as falcoctl) +# to retrieve info about a running Falco instance. Make sure the webserver is +# enabled if you're using falcoctl either locally or with Kubernetes. +# +# The following options control the behavior of that webserver (enabled by default). +# +# The ssl_certificate is a combination SSL Certificate and corresponding +# key contained in a single file. You can generate a key/cert as follows: +# +# $ openssl req -newkey rsa:2048 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem +# $ cat certificate.pem key.pem > falco.pem +# $ sudo cp falco.pem /etc/falco/falco.pem +webserver: + enabled: true + # when threadiness is 0, Falco automatically guesses it depending on the number of online cores + threadiness: 0 + listen_port: 8765 + k8s_healthz_endpoint: /healthz + ssl_enabled: false + ssl_certificate: /etc/falco/falco.pem -# base_syscalls ! [EXPERIMENTAL] Use with caution, read carefully ! -# -# --- [Description] -# -# This option configures the set of syscalls that Falco traces. -# -# --- [Falco's State Engine] -# -# Falco requires a set of syscalls to build up state in userspace. -# For example, when spawning a new process or network connection, multiple syscalls are involved. -# Furthermore, properties of a process during its lifetime can be modified by -# syscalls. Falco accounts for this by enabling the collection of additional syscalls than the -# ones defined in the rules and by managing a smart process cache table in -# userspace. Processes are purged from this table when a process exits. -# -# By default, with -# ``` -# base_syscalls.custom_set = [] -# base_syscalls.repair = false -# ``` -# Falco enables tracing for a syscall set gathered: -# (1) from (enabled) Falco rules -# (2) from a static, more verbose set defined in `libsinsp::events::sinsp_state_sc_set` in libs/userspace/libsinsp/events/sinsp_events_ppm_sc.cpp -# This allows Falco to successfully build up it's state engine and life-cycle management. -# -# If the default behavior described above does not fit the user's use case for Falco, -# the `base_syscalls` option allows for finer end-user control of syscalls traced by Falco. -# -# --- [ base_syscalls.custom_set ] -# -# CAUTION: Misconfiguration of this setting may result in incomplete Falco event -# logs or Falco being unable to trace events entirely. -# -# `base_syscalls.custom_set` allows the user to explicitly define an additional -# set of syscalls to be traced in addition to the syscalls from each enabled Falco rule. -# -# This is useful in lowering CPU utilization and further tailoring Falco to -# specific environments according to your threat model and budget constraints. -# -# --- [ base_syscalls.repair ] -# -# `base_syscalls.repair` is an alternative to Falco's default state engine enforcement. -# When enabled, this option is designed to -# (1) ensure that Falco's state engine is correctly and successfully built-up -# (2) be the most system resource-friendly by activating the least number of -# additional syscalls (outside of those enabled for enabled rules) -# -# Setting `base_syscalls.repair` to `true` allows Falco to automatically configure -# what is described in the [Suggestions] section below. -# -# `base_syscalls.repair` can be enabled with an empty custom set, meaning with the following, -# ``` -# base_syscalls.custom_set = [] -# base_syscalls.repair = true -# ``` -# Falco enables tracing for a syscall set gathered: -# (1) from (enabled) Falco rules -# (2) from minimal set of additional syscalls needed to "repair" the -# state engine and properly log event conditions specified in enabled Falco rules -# -# --- [Usage] -# -# List of system calls names (), negative ("!") notation supported. -# -# Example: -# base_syscalls.custom_set: [, , "!"] -# base_syscalls.repair: -# -# We recommend to only exclude syscalls, e.g. "!mprotect" if you need a fast deployment update -# (overriding rules), else remove unwanted syscalls from the Falco rules. -# -# Passing `-o "log_level=debug" -o "log_stderr=true" --dry-run` to Falco's -# cmd args will print the final set of syscalls to STDOUT. -# -# --- [Suggestions] -# -# NOTE: setting `base_syscalls.repair: true` automates the following suggestions for you. -# -# These suggestions are subject to change as Falco and its state engine evolve. -# -# For execve* events: -# Some Falco fields for an execve* syscall are retrieved from the associated -# `clone`, `clone3`, `fork`, `vfork` syscalls when spawning a new process. -# The `close` syscall is used to purge file descriptors from Falco's internal -# thread / process cache table and is necessary for rules relating to file -# descriptors (e.g. open, openat, openat2, socket, connect, accept, accept4 ... and many more) -# -# Consider enabling the following syscalls in `base_syscalls.custom_set` for process rules: -# [clone, clone3, fork, vfork, execve, execveat, close] -# -# For networking related events: -# While you can log `connect` or `accept*` syscalls without the socket syscall, -# the log will not contain the ip tuples. -# Additionally, for `listen` and `accept*` syscalls, the `bind` syscall is also necessary. -# -# We recommend the following as the minimum set for networking-related rules: -# [clone, clone3, fork, vfork, execve, execveat, close, socket, bind, getsockopt] -# -# Lastly, for tracking the correct `uid`, `gid` or `sid`, `pgid` of a process when the -# running process opens a file or makes a network connection, consider adding the -# following to the above recommended syscall sets: -# ... setresuid, setsid, setuid, setgid, setpgid, setresgid, setsid, capset, chdir, chroot, fchdir ... -# -base_syscalls: - custom_set: [] - repair: false +########################################################## +# Falco internal logging / alerting / metrics (advanced) # +########################################################## -# metrics: [EXPERIMENTAL] periodic metric snapshots -# (including stats and resource utilization) captured at regular intervals +# [Stable] `syscall_event_timeouts` +# +# Falco uses a shared buffer between the kernel and userspace to receive +# the events (eg., system call information) in userspace. +# +# Anyways, the underlying libraries can also timeout for various reasons. +# For example, there could have been issues while reading an event. +# Or the particular event needs to be skipped. +# Normally, it's very unlikely that Falco does not receive events consecutively. +# +# Falco is able to detect such uncommon situation. +# +# Here you can configure the maximum number of consecutive timeouts without an event +# after which you want Falco to alert. +# By default this value is set to 1000 consecutive timeouts without an event at all. +# How this value maps to a time interval depends on the CPU frequency. +syscall_event_timeouts: + max_consecutives: 1000 + +# [Stable] `syscall_event_drops` +# +# Falco uses a shared buffer between the kernel and userspace to pass +# system call information. When Falco detects that this buffer is +# full and system calls have been dropped, it can take one or more of +# the following actions: +# - ignore: do nothing (default when list of actions is empty) +# - log: log a DEBUG message noting that the buffer was full +# - alert: emit a Falco alert noting that the buffer was full +# - exit: exit Falco with a non-zero rc +# +# Notice it is not possible to ignore and log/alert messages at the same time. +# +# The rate at which log/alert messages are emitted is governed by a +# token bucket. The rate corresponds to one message every 30 seconds +# with a burst of one message (by default). +# +# The messages are emitted when the percentage of dropped system calls +# with respect the number of events in the last second +# is greater than the given threshold (a double in the range [0, 1]). +# +# For debugging/testing it is possible to simulate the drops using +# the `simulate_drops: true`. In this case the threshold does not apply. +syscall_event_drops: + threshold: .1 + actions: + - log + - alert + rate: .03333 + max_burst: 1 + simulate_drops: false + +# [Experimental] `metrics` +# +# periodic metric snapshots (including stats and resource utilization) +# captured at regular intervals # # --- [Description] # @@ -664,7 +468,6 @@ base_syscalls: # # todo: prometheus export option # todo: syscall_counters_enabled option - metrics: enabled: false interval: 1h @@ -674,3 +477,305 @@ metrics: kernel_event_counters_enabled: true libbpf_stats_enabled: true convert_memory_to_mb: true + +# [Stable] `output_timeout` +# +# Falco continuously monitors outputs performance. When an output channel does not allow +# to deliver an alert within a given deadline, an error is reported indicating +# which output is blocking notifications. +# The timeout error will be reported to the log according to the above log_* settings. +# Note that the notification will not be discarded from the output queue; thus, +# output channels may indefinitely remain blocked. +# An output timeout error indeed indicate a misconfiguration issue or I/O problems +# that cannot be recovered by Falco and should be fixed by the user. +# +# The "output_timeout" value specifies the duration in milliseconds to wait before +# considering the deadline exceed. +# +# With a 2000ms default, the notification consumer can block the Falco output +# for up to 2 seconds without reaching the timeout. +output_timeout: 2000 + + +####################################### +# Falco cloud native relevant configs # +####################################### + +# [Stable] `metadata_download` +# +# Container orchestrator metadata fetching params +metadata_download: + max_mb: 100 + chunk_wait_us: 1000 + watch_freq_sec: 1 + +# [Experimental] `load_plugins` and `plugins` +# +# Setting this list to empty ensures that the above plugins are *not* +# loaded and enabled by default. If you want to use the above plugins, +# set a meaningful init_config/open_params for the cloudtrail plugin +# and then change this to: +# load_plugins: [cloudtrail, json] +load_plugins: [] + +# Plugins that are available for use. These plugins are not loaded by +# default, as they require explicit configuration to point to +# cloudtrail log files. +# To learn more about the supported formats for +# init_config/open_params for the cloudtrail plugin, see the README at +# https://github.com/falcosecurity/plugins/blob/master/plugins/cloudtrail/README.md. +plugins: + - name: k8saudit + library_path: libk8saudit.so + init_config: + # maxEventSize: 262144 + # webhookMaxBatchSize: 12582912 + # sslCertificate: /etc/falco/falco.pem + open_params: "http://:9765/k8s-audit" + - name: cloudtrail + library_path: libcloudtrail.so + # see docs for init_config and open_params: + # https://github.com/falcosecurity/plugins/blob/master/plugins/cloudtrail/README.md + - name: json + library_path: libjson.so + + +############################ +# Falco performance tuning # +############################ + +# [Stable] `syscall_buf_size_preset` +# +# --- [Description] +# +# This is an index that controls the dimension of the syscall buffers. +# The syscall buffer is the shared space between Falco and its drivers where all the syscall events +# are stored. +# Falco uses a syscall buffer for every online CPU, and all these buffers share the same dimension. +# So this parameter allows you to control the size of all the buffers! +# +# --- [Usage] +# +# You can choose between different indexes: from `1` to `10` (`0` is reserved for future uses). +# Every index corresponds to a dimension in bytes: +# +# [(*), 1 MB, 2 MB, 4 MB, 8 MB, 16 MB, 32 MB, 64 MB, 128 MB, 256 MB, 512 MB] +# ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ +# | | | | | | | | | | | +# 0 1 2 3 4 5 6 7 8 9 10 +# +# As you can see the `0` index is reserved, while the index `1` corresponds to +# `1 MB` and so on. +# +# These dimensions in bytes derive from the fact that the buffer size must be: +# (1) a power of 2. +# (2) a multiple of your system_page_dimension. +# (3) greater than `2 * (system_page_dimension)`. +# +# According to these constraints is possible that sometimes you cannot use all the indexes, let's consider an +# example to better understand it: +# If you have a `page_size` of 1 MB the first available buffer size is 4 MB because 2 MB is exactly +# `2 * (system_page_size)` -> `2 * 1 MB`, but this is not enough we need more than `2 * (system_page_size)`! +# So from this example is clear that if you have a page size of 1 MB the first index that you can use is `3`. +# +# Please note: this is a very extreme case just to let you understand the mechanism, usually the page size is something +# like 4 KB so you have no problem at all and you can use all the indexes (from `1` to `10`). +# +# To check your system page size use the Falco `--page-size` command line option. The output on a system with a page +# size of 4096 Bytes (4 KB) should be the following: +# +# "Your system page size is: 4096 bytes." +# +# --- [Suggestions] +# +# Before the introduction of this param the buffer size was fixed to 8 MB (so index `4`, as you can see +# in the default value below). +# You can increase the buffer size when you face syscall drops. A size of 16 MB (so index `5`) can reduce +# syscall drops in production-heavy systems without noticeable impact. Very large buffers however could +# slow down the entire machine. +# On the other side you can try to reduce the buffer size to speed up the system, but this could +# increase the number of syscall drops! +# As a final remark consider that the buffer size is mapped twice in the process' virtual memory so a buffer of 8 MB +# will result in a 16 MB area in the process virtual memory. +# Please pay attention when you use this parameter and change it only if the default size doesn't fit your use case. + +syscall_buf_size_preset: 4 + +# [Experimental] `syscall_drop_failed_exit` +# +# Enabling this option allows Falco to drop failed syscalls exit events +# in the kernel driver before the event is pushed onto the ring buffer. +# This can enable some small optimization both in CPU usage and ring buffer usage, +# possibly leading to lower number of event losses. +# Be careful: enabling it also means losing a bit of visibility on the system. + +syscall_drop_failed_exit: false + +# [Experimental] `base_syscalls`, use with caution, read carefully +# +# --- [Description] +# +# This option configures the set of syscalls that Falco traces. +# +# --- [Falco's State Engine] +# +# Falco requires a set of syscalls to build up state in userspace. +# For example, when spawning a new process or network connection, multiple syscalls are involved. +# Furthermore, properties of a process during its lifetime can be modified by +# syscalls. Falco accounts for this by enabling the collection of additional syscalls than the +# ones defined in the rules and by managing a smart process cache table in +# userspace. Processes are purged from this table when a process exits. +# +# By default, with +# ``` +# base_syscalls.custom_set = [] +# base_syscalls.repair = false +# ``` +# Falco enables tracing for a syscall set gathered: +# (1) from (enabled) Falco rules +# (2) from a static, more verbose set defined in `libsinsp::events::sinsp_state_sc_set` in libs/userspace/libsinsp/events/sinsp_events_ppm_sc.cpp +# This allows Falco to successfully build up it's state engine and life-cycle management. +# +# If the default behavior described above does not fit the user's use case for Falco, +# the `base_syscalls` option allows for finer end-user control of syscalls traced by Falco. +# +# --- [base_syscalls.custom_set] +# +# CAUTION: Misconfiguration of this setting may result in incomplete Falco event +# logs or Falco being unable to trace events entirely. +# +# `base_syscalls.custom_set` allows the user to explicitly define an additional +# set of syscalls to be traced in addition to the syscalls from each enabled Falco rule. +# +# This is useful in lowering CPU utilization and further tailoring Falco to +# specific environments according to your threat model and budget constraints. +# +# --- [base_syscalls.repair] +# +# `base_syscalls.repair` is an alternative to Falco's default state engine enforcement. +# When enabled, this option is designed to +# (1) ensure that Falco's state engine is correctly and successfully built-up +# (2) be the most system resource-friendly by activating the least number of +# additional syscalls (outside of those enabled for enabled rules) +# +# Setting `base_syscalls.repair` to `true` allows Falco to automatically configure +# what is described in the [Suggestions] section below. +# +# `base_syscalls.repair` can be enabled with an empty custom set, meaning with the following, +# ``` +# base_syscalls.custom_set = [] +# base_syscalls.repair = true +# ``` +# Falco enables tracing for a syscall set gathered: +# (1) from (enabled) Falco rules +# (2) from minimal set of additional syscalls needed to "repair" the +# state engine and properly log event conditions specified in enabled Falco rules +# +# --- [Usage] +# +# List of system calls names (), negative ("!") notation supported. +# +# Example: +# base_syscalls.custom_set: [, , "!"] +# base_syscalls.repair: +# +# We recommend to only exclude syscalls, e.g. "!mprotect" if you need a fast deployment update +# (overriding rules), else remove unwanted syscalls from the Falco rules. +# +# Passing `-o "log_level=debug" -o "log_stderr=true" --dry-run` to Falco's +# cmd args will print the final set of syscalls to STDOUT. +# +# --- [Suggestions] +# +# NOTE: setting `base_syscalls.repair: true` automates the following suggestions for you. +# +# These suggestions are subject to change as Falco and its state engine evolve. +# +# For execve* events: +# Some Falco fields for an execve* syscall are retrieved from the associated +# `clone`, `clone3`, `fork`, `vfork` syscalls when spawning a new process. +# The `close` syscall is used to purge file descriptors from Falco's internal +# thread / process cache table and is necessary for rules relating to file +# descriptors (e.g. open, openat, openat2, socket, connect, accept, accept4 ... and many more) +# +# Consider enabling the following syscalls in `base_syscalls.custom_set` for process rules: +# [clone, clone3, fork, vfork, execve, execveat, close] +# +# For networking related events: +# While you can log `connect` or `accept*` syscalls without the socket syscall, +# the log will not contain the ip tuples. +# Additionally, for `listen` and `accept*` syscalls, the `bind` syscall is also necessary. +# +# We recommend the following as the minimum set for networking-related rules: +# [clone, clone3, fork, vfork, execve, execveat, close, socket, bind, getsockopt] +# +# Lastly, for tracking the correct `uid`, `gid` or `sid`, `pgid` of a process when the +# running process opens a file or makes a network connection, consider adding the +# following to the above recommended syscall sets: +# ... setresuid, setsid, setuid, setgid, setpgid, setresgid, setsid, capset, chdir, chroot, fchdir ... + +base_syscalls: + custom_set: [] + repair: false + +# [Experimental] `modern_bpf.cpus_for_each_syscall_buffer`, modern_bpf only +# +# --- [Description] +# +# This is an index that controls how many CPUs you want to assign to a single +# syscall buffer (ring buffer). By default, every syscall buffer is associated to +# 2 CPUs, so the mapping is 1:2. The modern BPF probe allows you to choose different +# mappings, for example, 1:1 would mean a syscall buffer for each CPU. +# +# --- [Usage] +# +# You can choose between different indexes: from `0` to `MAX_NUMBER_ONLINE_CPUs`. +# `0` is a special value and it means a single syscall buffer shared between all +# your online CPUs. `0` has the same effect as `MAX_NUMBER_ONLINE_CPUs`, the rationale +# is that `0` allows you to create a single buffer without knowing the number of online +# CPUs on your system. +# Let's consider an example to better understand it: +# +# Consider a system with 7 online CPUs: +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# +# - `1` means a syscall buffer for each CPU so 7 buffers +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# | | | | | | | +# BUFFERs 0 1 2 3 4 5 6 +# +# - `2` (Default value) means a syscall buffer for each CPU pair, so 4 buffers +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# | | | | | | | +# BUFFERs 0 0 1 1 2 2 3 +# +# Please note that we need 4 buffers, 3 buffers are associated with CPU pairs, the last +# one is mapped with just 1 CPU since we have an odd number of CPUs. +# +# - `0` or `MAX_NUMBER_ONLINE_CPUs` mean a syscall buffer shared between all CPUs, so 1 buffer +# +# CPUs 0 X 2 3 X X 6 7 8 9 (X means offline CPU) +# | | | | | | | +# BUFFERs 0 0 0 0 0 0 0 +# +# Moreover you can combine this param with `syscall_buf_size_preset` +# index, for example, you could create a huge single syscall buffer +# shared between all your online CPUs of 512 MB (so `syscall_buf_size_preset=10`). +# +# --- [Suggestions] +# +# We chose index `2` (so one syscall buffer for each CPU pair) as default because the modern bpf probe +# follows a different memory allocation strategy with respect to the other 2 drivers (bpf and kernel module). +# By the way, you are free to find the preferred configuration for your system. +# Considering a fixed `syscall_buf_size_preset` and so a fixed buffer dimension: +# - a lower number of buffers can speed up your system (lower memory footprint) +# - a too lower number of buffers could increase contention in the kernel causing an +# overall slowdown of the system. +# If you don't have huge events throughputs and you are not experimenting with tons of drops +# you can try to reduce the number of buffers to have a lower memory footprint + +modern_bpf: + cpus_for_each_syscall_buffer: 2