Save syscall source separately and check explicitly in process_event

When doing some testing of falco on very high event volumes (> 1.5M
events/second), I found that the time taken to look up a falco_source
struct had a non-negligible contribution to cpu usage.

So instead of looking up the source from the source_idx every time,
separately save the source for syscalls in the falco_engine object
directly. The separately saved copy is only used once someone calls
add_source with source="syscall".

Signed-off-by: Mark Stemm <mark.stemm@gmail.com>
This commit is contained in:
Mark Stemm 2022-09-14 13:41:58 -07:00 committed by poiana
parent 366bcfd7a3
commit e5cd5eacf5
2 changed files with 33 additions and 4 deletions

View File

@ -41,7 +41,9 @@ using namespace std;
using namespace falco; using namespace falco;
falco_engine::falco_engine(bool seed_rng) falco_engine::falco_engine(bool seed_rng)
: m_next_ruleset_id(0), : m_syscall_source(NULL),
m_syscall_source_idx(SIZE_MAX),
m_next_ruleset_id(0),
m_min_priority(falco_common::PRIORITY_DEBUG), m_min_priority(falco_common::PRIORITY_DEBUG),
m_sampling_ratio(1), m_sampling_multiplier(0), m_sampling_ratio(1), m_sampling_multiplier(0),
m_replace_container_info(false) m_replace_container_info(false)
@ -338,7 +340,19 @@ unique_ptr<falco_engine::rule_result> falco_engine::process_event(std::size_t so
// imply that concurrent invokers use different and non-switchable values of // imply that concurrent invokers use different and non-switchable values of
// source_idx, which means that at any time each filter_ruleset will only // source_idx, which means that at any time each filter_ruleset will only
// be accessed by a single thread. // be accessed by a single thread.
if(should_drop_evt() || !find_source(source_idx)->ruleset->run(ev, rule, ruleset_id))
const falco_source *source;
if(source_idx == m_syscall_source_idx)
{
source = m_syscall_source;
}
else
{
source = find_source(source_idx);
}
if(should_drop_evt() || !source || !source->ruleset->run(ev, source->m_rule, ruleset_id))
{ {
return unique_ptr<struct rule_result>(); return unique_ptr<struct rule_result>();
} }
@ -367,7 +381,15 @@ std::size_t falco_engine::add_source(const std::string &source,
// evttype_index_ruleset is the default ruleset implementation // evttype_index_ruleset is the default ruleset implementation
std::shared_ptr<filter_ruleset_factory> ruleset_factory( std::shared_ptr<filter_ruleset_factory> ruleset_factory(
new evttype_index_ruleset_factory(filter_factory)); new evttype_index_ruleset_factory(filter_factory));
return add_source(source, filter_factory, formatter_factory, ruleset_factory); size_t idx = add_source(source, filter_factory, formatter_factory, ruleset_factory);
if(source == falco_common::syscall_source)
{
m_syscall_source_idx = idx;
m_syscall_source = find_source(m_syscall_source_idx);
}
return idx;
} }
std::size_t falco_engine::add_source(const std::string &source, std::size_t falco_engine::add_source(const std::string &source,

View File

@ -22,6 +22,7 @@ limitations under the License.
#pragma once #pragma once
#include <atomic>
#include <string> #include <string>
#include <memory> #include <memory>
#include <set> #include <set>
@ -171,7 +172,7 @@ public:
// configured the engine. In particular, invoking this with a source_idx // configured the engine. In particular, invoking this with a source_idx
// not previosly-returned by a call to add_source() would cause a // not previosly-returned by a call to add_source() would cause a
// falco_exception to be thrown. // falco_exception to be thrown.
// //
// This method is thread-safe only with the assumption that every invoker // This method is thread-safe only with the assumption that every invoker
// uses a different source_idx. Moreover, each invoker must not switch // uses a different source_idx. Moreover, each invoker must not switch
// source_idx in subsequent invocations of this method. // source_idx in subsequent invocations of this method.
@ -264,6 +265,12 @@ private:
const falco_source* find_source(std::size_t index) const; const falco_source* find_source(std::size_t index) const;
const falco_source* find_source(const std::string& name) const; const falco_source* find_source(const std::string& name) const;
// To allow the engine to be extremely fast for syscalls (can
// be > 1M events/sec), we save the syscall source/source_idx
// separately and check it explicitly in process_event()
const falco_source* m_syscall_source;
std::atomic<size_t> m_syscall_source_idx;
// //
// Determine whether the given event should be matched at all // Determine whether the given event should be matched at all
// against the set of rules, given the current sampling // against the set of rules, given the current sampling