From e5cd5eacf534590c357af4076b7c2d85e7075e6e Mon Sep 17 00:00:00 2001 From: Mark Stemm Date: Wed, 14 Sep 2022 13:41:58 -0700 Subject: [PATCH] Save syscall source separately and check explicitly in process_event When doing some testing of falco on very high event volumes (> 1.5M events/second), I found that the time taken to look up a falco_source struct had a non-negligible contribution to cpu usage. So instead of looking up the source from the source_idx every time, separately save the source for syscalls in the falco_engine object directly. The separately saved copy is only used once someone calls add_source with source="syscall". Signed-off-by: Mark Stemm --- userspace/engine/falco_engine.cpp | 28 +++++++++++++++++++++++++--- userspace/engine/falco_engine.h | 9 ++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/userspace/engine/falco_engine.cpp b/userspace/engine/falco_engine.cpp index 78286688..a63a19eb 100644 --- a/userspace/engine/falco_engine.cpp +++ b/userspace/engine/falco_engine.cpp @@ -41,7 +41,9 @@ using namespace std; using namespace falco; falco_engine::falco_engine(bool seed_rng) - : m_next_ruleset_id(0), + : m_syscall_source(NULL), + m_syscall_source_idx(SIZE_MAX), + m_next_ruleset_id(0), m_min_priority(falco_common::PRIORITY_DEBUG), m_sampling_ratio(1), m_sampling_multiplier(0), m_replace_container_info(false) @@ -338,7 +340,19 @@ unique_ptr falco_engine::process_event(std::size_t so // imply that concurrent invokers use different and non-switchable values of // source_idx, which means that at any time each filter_ruleset will only // be accessed by a single thread. - if(should_drop_evt() || !find_source(source_idx)->ruleset->run(ev, rule, ruleset_id)) + + const falco_source *source; + + if(source_idx == m_syscall_source_idx) + { + source = m_syscall_source; + } + else + { + source = find_source(source_idx); + } + + if(should_drop_evt() || !source || !source->ruleset->run(ev, source->m_rule, ruleset_id)) { return unique_ptr(); } @@ -367,7 +381,15 @@ std::size_t falco_engine::add_source(const std::string &source, // evttype_index_ruleset is the default ruleset implementation std::shared_ptr ruleset_factory( new evttype_index_ruleset_factory(filter_factory)); - return add_source(source, filter_factory, formatter_factory, ruleset_factory); + size_t idx = add_source(source, filter_factory, formatter_factory, ruleset_factory); + + if(source == falco_common::syscall_source) + { + m_syscall_source_idx = idx; + m_syscall_source = find_source(m_syscall_source_idx); + } + + return idx; } std::size_t falco_engine::add_source(const std::string &source, diff --git a/userspace/engine/falco_engine.h b/userspace/engine/falco_engine.h index ce7d6105..b85931d8 100644 --- a/userspace/engine/falco_engine.h +++ b/userspace/engine/falco_engine.h @@ -22,6 +22,7 @@ limitations under the License. #pragma once +#include #include #include #include @@ -171,7 +172,7 @@ public: // configured the engine. In particular, invoking this with a source_idx // not previosly-returned by a call to add_source() would cause a // falco_exception to be thrown. - // + // // This method is thread-safe only with the assumption that every invoker // uses a different source_idx. Moreover, each invoker must not switch // source_idx in subsequent invocations of this method. @@ -264,6 +265,12 @@ private: const falco_source* find_source(std::size_t index) const; const falco_source* find_source(const std::string& name) const; + // To allow the engine to be extremely fast for syscalls (can + // be > 1M events/sec), we save the syscall source/source_idx + // separately and check it explicitly in process_event() + const falco_source* m_syscall_source; + std::atomic m_syscall_source_idx; + // // Determine whether the given event should be matched at all // against the set of rules, given the current sampling