From d4c42814d676cdfab1ed25355616b5e8ca11bc3e Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Mon, 22 May 2023 13:29:12 +0000 Subject: [PATCH] cleanup(config): improve metrics config description for technical clarity Co-authored-by: Jason Dellaluce Signed-off-by: Melissa Kilby --- falco.yaml | 100 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 42 deletions(-) diff --git a/falco.yaml b/falco.yaml index bcf224ce..3aaba482 100644 --- a/falco.yaml +++ b/falco.yaml @@ -566,19 +566,38 @@ base_syscalls: custom_set: [] repair: false -# metrics: [EXPERIMENTAL] periodic metric snapshots (stats and resource utilization) +# metrics: [EXPERIMENTAL] periodic metric snapshots +# (including stats and resource utilization) captured at regular intervals # # --- [Description] # -# `metrics` reflects a stats/metrics re-design from the ground up. Falco natively supports -# resource utilization metrics and enhanced specialized metrics to monitor Falco's -# performance in production. Metrics are exposed as monotonic counters or snapshots -# emitted at a pre-defined interval. Each metric is captured in the same consolidated log message. -# In addition, relevant wrapper fields are added, allowing the end user to perform customized -# statistical analyses and correlations. Lastly, the metrics framework can be easily extended in the future. +# Consider these key points about the `metrics` feature in Falco: # -# Notice: Schema and output field names are not guaranteed to be stable -# and might change until `metrics` reaches a stable release. +# - It introduces a redesigned stats/metrics system. +# - Native support for resource utilization metrics and specialized performance metrics. +# - Metrics are emitted as monotonic counters at predefined intervals (snapshots). +# - All metrics are consolidated into a single log message, adhering to the established +# rules schema and naming conventions. +# - Additional info fields complement the metrics and facilitate customized +# statistical analyses and correlations. +# - The metrics framework is designed for easy future extension. +# +# The `metrics` feature follows a specific schema and field naming convention. All metrics +# are collected as subfields under the `output_fields` key, similar to regular Falco rules. +# Each metric field name adheres to the grammar used in Falco rules. +# There are two new field classes introduced: `falco.` and `scap.`. +# The `falco.` class represents userspace counters, statistics, resource utilization, +# or useful information fields. +# The `scap.` class represents counters and statistics mostly obtained from Falco's +# kernel instrumentation before events are sent to userspace, but can include scap +# userspace stats as well. +# +# It's important to note that the output fields and their names can be subject to change +# until the metrics feature reaches a stable release. +# +# To customize the hostname in Falco, you can set the environment variable `FALCO_HOSTNAME` +# to your desired hostname. This is particularly useful in Kubernetes deployments +# where the hostname can be set to the pod name. # # --- [Usage] # @@ -586,7 +605,7 @@ base_syscalls: # Disabled by default. # # `interval`: -# Define the stats interval following the Prometheus time duration definitions. +# The stats interval in Falco follows the time duration definitions used by Prometheus. # https://prometheus.io/docs/prometheus/latest/querying/basics/#time-durations # # Time durations are specified as a number, followed immediately by one of the following units: @@ -600,7 +619,8 @@ base_syscalls: # # Example of a valid time duration: 1h30m20s10ms # -# A minimum of 100ms is enforced, however we recommend choosing one of the following intervals for production: +# A minimum interval of 100ms is enforced for metric collection. However, for production environments, +# we recommend selecting one of the following intervals for optimal monitoring: # 15m # 30m # 1h @@ -608,46 +628,42 @@ base_syscalls: # 6h # # `output_rule`: -# Emit metrics as rule `Falco internal: metrics snapshot`. -# We recommend this option for seamless metrics and performance monitoring especially -# if Falco logs are preserved in a data lake. -# Note: This option at minimum requires setting `log_level` to `info`. +# To enable seamless metrics and performance monitoring, we recommend emitting metrics as the rule +# "Falco internal: metrics snapshot." This option is particularly useful when Falco logs are preserved +# in a data lake. +# Please note that to use this option, the `log_level` must be set to `info` at a minimum. # # `output_file`: -# Append stats to a `jsonl` file. Use with caution in production, Falco does not rotate the file. +# Append stats to a `jsonl` file. Use with caution in production as Falco does not automatically rotate the file. # # `resource_utilization_enabled`: -# Emit CPU and memory usages. CPU usage is percentage of one CPU and can -# be normalized to total number of CPUs to determine the overall usage. -# Memory metrics are currently kept in raw units, `kb` for RSS, PSS and VSZ -# or `bytes` for container_memory_used. Use `convert_memory_to_mb` to -# uniformly convert each memory metric to MB. -# Creating and setting an environmnet variable `FALCO_CGROUP_MEM_PATH=customfile` -# let's you customize the container_memory_used file which defaults to Kubernetes -# `/sys/fs/cgroup/memory/memory.usage_in_bytes` holding the memory metric that is -# similar to Kubernetes `container_memory_working_set_bytes` of the Falco container. +# Emit CPU and memory usage metrics. CPU usage is reported as a percentage of one CPU and +# can be normalized to the total number of CPUs to determine overall usage. +# Memory metrics are provided in raw units (`kb` for `RSS`, `PSS` and `VSZ` or +# `bytes` for `container_memory_used`) and can be uniformly converted +# to megabytes (MB) using the `convert_memory_to_mb` functionality. +# In environments such as Kubernetes, it is crucial to track Falco's container memory usage. +# To customize the path of the memory metric file, you can create an environment variable +# named `FALCO_CGROUP_MEM_PATH` and set it to the desired file path. By default, Falco uses +# the file `/sys/fs/cgroup/memory/memory.usage_in_bytes` to monitor container memory usage, +# which aligns with Kubernetes' `container_memory_working_set_bytes` metric. # # `kernel_event_counters_enabled`: -# Emit kernel side event and drop counters, compare to `syscall_event_drops`, -# however this option reflects monotonic counters since Falco start, -# exported at a constant stats interval and therefore can be regarded as an alternative. -# kernel event counters are prefixed with `k.` vs userspace counters with `u.` ... +# Emit kernel side event and drop counters, as an alternative to `syscall_event_drops`, +# but with some differences. These counters reflect monotonic values since Falco's start +# and are exported at a constant stats interval. # # `libbpf_stats_enabled`: -# Exposes `bpftool prog show` like stats, e.g. number of invocations -# of each bpf program Falco attached as well as time spent in each program in nanoseconds. -# Requires kernels >= 5.1 plus setting kernel config `/proc/sys/kernel/bpf_stats_enabled`. -# This option or equivalent stats are not supported for non `*bpf*` drivers. -# Note that currently `libbpf` does not support stats granularity at the bpf tail call level. +# Exposes statistics similar to `bpftool prog show`, providing information such as the number +# of invocations of each BPF program attached by Falco and the time spent in each program +# measured in nanoseconds. +# To enable this feature, the kernel must be >= 5.1, and the kernel configuration `/proc/sys/kernel/bpf_stats_enabled` +# must be set. This option, or an equivalent statistics feature, is not available for non `*bpf*` drivers. +# Additionally, please be aware that the current implementation of `libbpf` does not +# support granularity of statistics at the bpf tail call level. # -# Customization with relevant environment variables: -# Creating an env variable `FALCO_HOSTNAME=myhostname` customizes the hostname, -# especially useful for Kubernetes deployments where the hostname can be equivalent to the pod name. -# Refer to section `resource_utilization_enabled` re customization via creating an -# env variable `FALCO_CGROUP_MEM_PATH=customfile` to point to a custom file holding the memory metric. -# -# todo: Prometheus export option -# todo: userspace_syscall_event_counters_enabled option +# todo: prometheus export option +# todo: syscall_counters_enabled option metrics: enabled: false