Improve Falco engine performance when loading rules and creating the rule sets

- replace std::set<uint16_t> with fixed size vector in event types propagation
- rework lists expansion by replacing repetitive string::find in constantly growing expansion string with regex tokenization
- improve json_event parsing by moving const initializations into static routines

Signed-off-by: VadimZy <vadim.zyarko@sysdig.com>
This commit is contained in:
VadimZy 2022-08-10 15:12:09 -07:00 committed by poiana
parent 7d2f82fddc
commit f9ee45b38e
8 changed files with 441 additions and 205 deletions

View File

@ -27,21 +27,17 @@ static bool is_evttype_operator(const string& op)
return op == "==" || op == "=" || op == "!=" || op == "in";
}
void filter_evttype_resolver::visitor::inversion(set<uint16_t>& types)
void filter_evttype_resolver::visitor::inversion(falco_event_types& types)
{
set<uint16_t> all_types;
falco_event_types all_types;
evttypes("", all_types);
if (types != all_types) // we don't invert the "all types" set
{
set<uint16_t> diff = types;
types.clear();
set_difference(
all_types.begin(), all_types.end(), diff.begin(), diff.end(),
inserter(types, types.begin()));
types = all_types.diff(types);
}
}
void filter_evttype_resolver::visitor::evttypes(string evtname, set<uint16_t>& out)
void filter_evttype_resolver::visitor::evttypes(string evtname, falco_event_types& out)
{
// Fill in from 2 to PPM_EVENT_MAX-1. 0 and 1 are excluded as
// those are PPM_GENERIC_E/PPME_GENERIC_X
@ -59,42 +55,38 @@ void filter_evttype_resolver::visitor::evttypes(string evtname, set<uint16_t>& o
void filter_evttype_resolver::evttypes(
ast::expr* filter,
set<uint16_t>& out) const
std::set<uint16_t>& out) const
{
visitor v;
v.m_expect_value = false;
v.m_last_node_evttypes.clear();
filter->accept(&v);
out.insert(v.m_last_node_evttypes.begin(), v.m_last_node_evttypes.end());
v.m_last_node_evttypes.for_each([&out](uint16_t val){out.insert(val); return true;});
}
void filter_evttype_resolver::evttypes(
shared_ptr<ast::expr> filter,
set<uint16_t>& out) const
std::set<uint16_t>& out) const
{
visitor v;
v.m_expect_value = false;
v.m_last_node_evttypes.clear();
filter.get()->accept(&v);
out.insert(v.m_last_node_evttypes.begin(), v.m_last_node_evttypes.end());
v.m_last_node_evttypes.for_each([&out](uint16_t val){out.insert(val); return true;} );
}
// "and" nodes evttypes are the intersection of the evttypes of their children.
// we initialize the set with "all event types"
void filter_evttype_resolver::visitor::visit(ast::and_expr* e)
{
set<uint16_t> types, inters;
falco_event_types types;
evttypes("", types);
m_last_node_evttypes.clear();
for (auto &c : e->children)
{
inters.clear();
falco_event_types inters;
c->accept(this);
set_intersection(
types.begin(), types.end(),
m_last_node_evttypes.begin(), m_last_node_evttypes.end(),
inserter(inters, inters.begin()));
types = inters;
types = types.intersect(m_last_node_evttypes);
}
m_last_node_evttypes = types;
}
@ -102,12 +94,12 @@ void filter_evttype_resolver::visitor::visit(ast::and_expr* e)
// "or" nodes evttypes are the union of the evttypes their children
void filter_evttype_resolver::visitor::visit(ast::or_expr* e)
{
set<uint16_t> types;
falco_event_types types;
m_last_node_evttypes.clear();
for (auto &c : e->children)
{
c->accept(this);
types.insert(m_last_node_evttypes.begin(), m_last_node_evttypes.end());
types.merge(m_last_node_evttypes);
}
m_last_node_evttypes = types;
}

View File

@ -16,10 +16,130 @@ limitations under the License.
#pragma once
#include <sinsp.h>
#include <filter/parser.h>
#include <string>
#include <set>
#include <memory>
#include <functional>
class falco_event_types
{
private:
using vec_t = std::vector<uint8_t>;
vec_t m_types{};
static inline void check_range(uint16_t e)
{
if(e > PPM_EVENT_MAX)
{
throw std::range_error("invalid event type");
}
}
public:
falco_event_types(falco_event_types&&) = default;
falco_event_types(const falco_event_types&) = default;
falco_event_types& operator=(falco_event_types&&) = default;
falco_event_types& operator=(const falco_event_types&) = default;
inline falco_event_types():
m_types(PPM_EVENT_MAX + 1, 0)
{
}
inline void insert(uint16_t e)
{
check_range(e);
m_types[e] = 1;
}
void merge(const falco_event_types& other)
{
for(int i = 0; i <= PPM_EVENT_MAX; ++i)
{
m_types[i] |= other.m_types[i];
}
}
void merge(const std::set<uint16_t>& other)
{
for(const auto& e : other)
{
insert(e);
}
}
inline bool contains(uint16_t e) const
{
check_range(e);
return m_types[e] != 0;
}
void clear()
{
for(auto& v : m_types)
{
v = 0;
}
}
bool equals(const falco_event_types& other) const
{
return m_types == other.m_types;
}
falco_event_types diff(const falco_event_types& other)
{
falco_event_types ret;
for(size_t i = 0; i <= PPM_EVENT_MAX; ++i)
{
if(m_types[i] == 1 && other.m_types[i] == 0)
{
ret.m_types[i] = 1;
}
}
return ret;
}
falco_event_types intersect(const falco_event_types& other)
{
falco_event_types ret;
for(size_t i = 0; i <= PPM_EVENT_MAX; ++i)
{
if(m_types[i] == 1 && other.m_types[i] == 1)
{
ret.m_types[i] = 1;
}
}
return ret;
}
void for_each(std::function<bool(uint16_t)> consumer) const
{
for(uint16_t i = 0; i < m_types.size(); ++i)
{
if(m_types[i] != 0)
{
if(!consumer(i))
{
return;
}
}
}
}
};
inline bool operator==(const falco_event_types& lhs, const falco_event_types& rhs)
{
return lhs.equals(rhs);
}
inline bool operator!=(const falco_event_types& lhs, const falco_event_types& rhs)
{
return !(lhs == rhs);
}
/*!
\brief Helper class for finding event types
@ -35,9 +155,12 @@ public:
string is passed, all the available evttypes are collected
\param out The set to be filled with the evttypes
*/
inline void evttypes(std::string evtname, std::set<uint16_t>& out) const
inline void evttypes(std::string evtname, falco_event_types& out) const
{
visitor().evttypes(evtname, out);
falco_event_types evt_types;
visitor().evttypes(evtname, evt_types);
evt_types.for_each([&out](uint16_t val)
{out.insert(val); return true; });
}
/*!
@ -64,7 +187,7 @@ private:
struct visitor : public libsinsp::filter::ast::expr_visitor
{
bool m_expect_value;
std::set<uint16_t> m_last_node_evttypes;
falco_event_types m_last_node_evttypes;
void visit(libsinsp::filter::ast::and_expr* e) override;
void visit(libsinsp::filter::ast::or_expr* e) override;
@ -73,7 +196,7 @@ private:
void visit(libsinsp::filter::ast::list_expr* e) override;
void visit(libsinsp::filter::ast::unary_check_expr* e) override;
void visit(libsinsp::filter::ast::binary_check_expr* e) override;
void inversion(std::set<uint16_t>& types);
void evttypes(std::string evtname, std::set<uint16_t>& out);
void inversion(falco_event_types& types);
void evttypes(std::string evtname, falco_event_types& out);
};
};

View File

@ -61,12 +61,12 @@ void filter_macro_resolver::set_macro(
m_macros[name] = macro;
}
const set<string>& filter_macro_resolver::get_unknown_macros() const
const unordered_set<string>& filter_macro_resolver::get_unknown_macros() const
{
return m_unknown_macros;
}
const set<string>& filter_macro_resolver::get_resolved_macros() const
const unordered_set<string>& filter_macro_resolver::get_resolved_macros() const
{
return m_resolved_macros;
}

View File

@ -18,8 +18,8 @@ limitations under the License.
#include <filter/parser.h>
#include <string>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <memory>
/*!
@ -63,7 +63,7 @@ class filter_macro_resolver
substituted during the last invocation of run(). Should be
non-empty if the last invocation of run() returned true.
*/
const std::set<std::string>& get_resolved_macros() const;
const std::unordered_set<std::string>& get_resolved_macros() const;
/*!
\brief Returns a set containing the names of all the macros
@ -71,10 +71,10 @@ class filter_macro_resolver
A macro remains unresolved if it is found inside the processed
filter but it was not defined with set_macro();
*/
const std::set<std::string>& get_unknown_macros() const;
const std::unordered_set<std::string>& get_unknown_macros() const;
private:
typedef std::map<
typedef std::unordered_map<
std::string,
std::shared_ptr<libsinsp::filter::ast::expr>
> macro_defs;
@ -82,8 +82,8 @@ class filter_macro_resolver
struct visitor : public libsinsp::filter::ast::expr_visitor
{
std::unique_ptr<libsinsp::filter::ast::expr> m_node_substitute;
std::set<std::string>* m_unknown_macros;
std::set<std::string>* m_resolved_macros;
std::unordered_set<std::string>* m_unknown_macros;
std::unordered_set<std::string>* m_resolved_macros;
macro_defs* m_macros;
void visit(libsinsp::filter::ast::and_expr* e) override;
@ -95,7 +95,7 @@ class filter_macro_resolver
void visit(libsinsp::filter::ast::binary_check_expr* e) override;
};
std::set<std::string> m_unknown_macros;
std::set<std::string> m_resolved_macros;
std::unordered_set<std::string> m_unknown_macros;
std::unordered_set<std::string> m_resolved_macros;
macro_defs m_macros;
};

View File

@ -131,5 +131,5 @@ public:
private:
std::vector<T> m_entries;
std::map<std::string, size_t> m_index;
std::unordered_map<std::string, size_t> m_index;
};

View File

@ -515,16 +515,17 @@ int32_t json_event_filter_check::parse_field_name(const char *str, bool alloc_st
size_t idx_len = 0;
for(auto &info : m_info.m_fields)
for(const auto &info : get_info().m_fields)
{
if(m_aliases.find(info.m_name) == m_aliases.end())
auto iter = get_aliases().find(info.m_name);
if( iter == get_aliases().end())
{
throw falco_exception("Could not find alias for field name " + info.m_name);
}
m_uses_paths = info.m_uses_paths;
auto &al = m_aliases[info.m_name];
auto &al = iter->second;
// What follows the match must not be alphanumeric or a dot
if(strncmp(info.m_name.c_str(), str, info.m_name.size()) == 0 &&
@ -692,11 +693,6 @@ size_t json_event_filter_check::parsed_size()
}
}
json_event_filter_check::check_info &json_event_filter_check::get_info()
{
return m_info;
}
void json_event_filter_check::add_extracted_value(const std::string &str)
{
m_evalues.first.emplace_back(json_event_value(str));
@ -793,9 +789,9 @@ std::string jevt_filter_check::s_jevt_rawtime_field = "jevt.rawtime";
std::string jevt_filter_check::s_jevt_value_field = "jevt.value";
std::string jevt_filter_check::s_jevt_obj_field = "jevt.obj";
jevt_filter_check::jevt_filter_check()
const jevt_filter_check::check_info &jevt_filter_check::get_info() const
{
m_info = {"jevt",
static const check_info info = {"jevt",
"generic ways to access json events",
"",
{{s_jevt_time_field, "json event timestamp as a string that includes the nanosecond part"},
@ -803,6 +799,11 @@ jevt_filter_check::jevt_filter_check()
{s_jevt_rawtime_field, "absolute event timestamp, i.e. nanoseconds from epoch."},
{s_jevt_value_field, "General way to access single property from json object. The syntax is [<json pointer expression>]. The property is returned as a string", IDX_REQUIRED, IDX_KEY},
{s_jevt_obj_field, "The entire json object, stringified"}}};
return info;
}
jevt_filter_check::jevt_filter_check()
{
}
jevt_filter_check::~jevt_filter_check()
@ -1282,9 +1283,10 @@ bool k8s_audit_filter_check::extract_any_privileged(const json &j,
return true;
}
k8s_audit_filter_check::k8s_audit_filter_check()
const json_event_filter_check::check_info &k8s_audit_filter_check::get_info() const
{
m_info = {"ka",
static const json_event_filter_check::check_info
info = {"ka",
"Access K8s Audit Log Events",
"Fields with an IDX_ALLOWED annotation can be indexed (e.g. ka.req.containers.image[k] returns the image for the kth container). The index is optional--without any index the field returns values for all items. The index must be numeric with an IDX_NUMERIC annotation, and can be any string with an IDX_KEY annotation. Fields with an IDX_REQUIRED annotation require an index.",
{{"ka.auditid", "The unique id of the audit event"},
@ -1344,9 +1346,14 @@ k8s_audit_filter_check::k8s_audit_filter_check()
{"ka.response.code", "The response code"},
{"ka.response.reason", "The response reason (usually present only for failures)"},
{"ka.useragent", "The useragent of the client who made the request to the apiserver"}}};
return info;
}
const std::unordered_map<std::string, k8s_audit_filter_check::alias> &k8s_audit_filter_check::get_aliases() const
{
m_aliases = {
static const std::unordered_map<std::string, k8s_audit_filter_check::alias>
aliases = {
{"ka.auditid", {{"/auditID"_json_pointer}}},
{"ka.stage", {{"/stage"_json_pointer}}},
{"ka.auth.decision", {{"/annotations/authorization.k8s.io~1decision"_json_pointer}}},
@ -1404,7 +1411,11 @@ k8s_audit_filter_check::k8s_audit_filter_check()
{"ka.response.code", {{"/responseStatus/code"_json_pointer}}},
{"ka.response.reason", {{"/responseStatus/reason"_json_pointer}}},
{"ka.useragent", {{"/userAgent"_json_pointer}}}};
return aliases;
}
k8s_audit_filter_check::k8s_audit_filter_check()
{
}
k8s_audit_filter_check::~k8s_audit_filter_check()
@ -1475,14 +1486,14 @@ std::list<gen_event_filter_factory::filter_fieldclass_info> json_event_filter_fa
for(auto &chk: m_defined_checks)
{
json_event_filter_check::check_info &info = chk->get_info();
const json_event_filter_check::check_info &info = chk->get_info();
gen_event_filter_factory::filter_fieldclass_info cinfo;
cinfo.name = info.m_name;
cinfo.desc = info.m_desc;
cinfo.shortdesc = info.m_shortdesc;
for(auto &field : info.m_fields)
for(const auto &field : info.m_fields)
{
gen_event_filter_factory::filter_field_info info;
info.name = field.m_name;

View File

@ -173,7 +173,7 @@ public:
};
json_event_filter_check();
virtual ~json_event_filter_check();
virtual ~json_event_filter_check() = 0;
virtual int32_t parse_field_name(const char *str, bool alloc_state, bool needed_for_filtering);
void add_filter_value(const char *str, uint32_t len, uint32_t i = 0);
@ -197,7 +197,7 @@ public:
// brackets (e.g. ka.image[foo])
size_t parsed_size();
check_info &get_info();
virtual const check_info &get_info() const = 0;
//
// Allocate a new check of the same type. Must be overridden.
@ -260,9 +260,9 @@ protected:
//
// The version of parse_field_name in this base class will
// check a field specification against all the aliases.
std::map<std::string, struct alias> m_aliases;
virtual const std::unordered_map<std::string, alias> &get_aliases() const = 0;
check_info m_info;
//check_info m_info;
// The actual field name parsed in parse_field_name.
std::string m_field;
@ -315,11 +315,18 @@ public:
int32_t parse_field_name(const char* str, bool alloc_state, bool needed_for_filtering) final;
json_event_filter_check *allocate_new();
json_event_filter_check *allocate_new() override;
const check_info &get_info() const override;
protected:
bool extract_values(json_event *jevt) final;
const std::unordered_map<std::string, alias> &get_aliases() const override
{
static std::unordered_map<std::string, alias> a;
return a;
};
private:
@ -340,7 +347,10 @@ public:
k8s_audit_filter_check();
virtual ~k8s_audit_filter_check();
json_event_filter_check *allocate_new();
json_event_filter_check *allocate_new() override;
const check_info &get_info() const override;
const std::unordered_map<std::string, alias> &get_aliases() const override;
// Extract all images/image repositories from the provided containers
static bool extract_images(const nlohmann::json &j,

View File

@ -21,7 +21,7 @@ limitations under the License.
#include "filter_evttype_resolver.h"
#include "filter_warning_resolver.h"
#include <version.h>
#include <sstream>
#include <regex.h>
#define MAX_VISIBILITY ((uint32_t) -1)
@ -682,77 +682,6 @@ static void build_rule_exception_infos(
}
}
// todo(jasondellaluce): this breaks string escaping in lists
static bool resolve_list(string& cnd, const rule_loader::list_info& list)
{
static string blanks = " \t\n\r";
static string delims = blanks + "(),=";
string new_cnd;
size_t start, end;
bool used = false;
start = cnd.find(list.name);
while (start != string::npos)
{
// the characters surrounding the name must
// be delims of beginning/end of string
end = start + list.name.length();
if ((start == 0 || delims.find(cnd[start - 1]) != string::npos)
&& (end >= cnd.length() || delims.find(cnd[end]) != string::npos))
{
// shift pointers to consume all whitespaces
while (start > 0
&& blanks.find(cnd[start - 1]) != string::npos)
{
start--;
}
while (end < cnd.length()
&& blanks.find(cnd[end]) != string::npos)
{
end++;
}
// create substitution string by concatenating all values
string sub = "";
for (auto &v : list.items)
{
if (!sub.empty())
{
sub += ", ";
}
sub += v;
}
// if substituted list is empty, we need to
// remove a comma from the left or the right
if (sub.empty())
{
if (start > 0 && cnd[start - 1] == ',')
{
start--;
}
else if (end < cnd.length() && cnd[end] == ',')
{
end++;
}
}
// compose new string with substitution
new_cnd = "";
if (start > 0)
{
new_cnd += cnd.substr(0, start) + " ";
}
new_cnd += sub + " ";
if (end <= cnd.length())
{
new_cnd += cnd.substr(end);
}
cnd = new_cnd;
start += sub.length() + 1;
used = true;
}
start = cnd.find(list.name, start + 1);
}
return used;
}
static void resolve_macros(
indexed_vector<rule_loader::macro_info>& macros,
shared_ptr<ast::expr>& ast,
@ -782,20 +711,192 @@ static void resolve_macros(
}
}
/*
* delim_chars
* helper class to look for delimiters
*/
struct delim_chars
{
std::vector<char> m_delims = std::vector<char>(256, 0);
explicit delim_chars(const std::string &char_array)
{
for (auto c : char_array)
{
m_delims[c] = 1;
}
}
bool contains(char c) const
{
return m_delims[c]!=0;
}
size_t find_in(const std::string& s) const
{
for (size_t i = 0, j = s.size(); i < j; ++i)
{
if (contains(s[i]))
{
return i;
}
}
return std::string::npos;
}
};
#define LIST_DELMS "\t\n\r ,"
/*
* list_inserter
*/
struct list_inserter
{
using list_info_t = rule_loader::list_info;
using lists_map_t = indexed_vector<list_info_t>;
constexpr static const char* list_full = R"([\(][^()]+[\)])";
constexpr static const char* list_sub = {"[_a-z0-9]+[" LIST_DELMS "]*"};
const delim_chars delims{LIST_DELMS};
regex_t re_list{};
regex_t re_sub{};
list_inserter()
{
if (regcomp(&re_list, list_full, REG_EXTENDED) != 0)
{
ASSERT(false);
}
if (regcomp(&re_sub, list_sub, REG_EXTENDED) != 0)
{
ASSERT(false);
}
}
~list_inserter()
{
regfree(&re_list);
regfree(&re_sub);
}
static bool list_to_ret(std::string& ret, list_info_t* li, bool first)
{
li->used = true;
if (li->items.empty())
{
return true;
}
for (const auto &item : li->items)
{
if (item.empty())
{
continue;
}
if (first)
{
first = false;
}
else
{
ret += ", ";
}
ret += item;
}
return !first;
}
// split string found in insert_lists by delimiters
// concatenate lists expansion
void cat_lists(std::string& ret, const std::string& cond, lists_map_t &lists) const
{
regmatch_t re_match;
size_t start = 0;
bool first = true;
auto put = [&ret, &first](const std::string &item)
{
if (first)
{
first = false;
}
else
{
ret += ", ";
}
ret += item;
};
while (start < cond.size() && regexec(&re_sub, cond.c_str() + start, 1, &re_match, 0) == 0)
{
auto s = cond.substr(start + re_match.rm_so, re_match.rm_eo - re_match.rm_so);
auto tok = s.substr(0, delims.find_in(s));
auto *li = lists.at(tok);
if (li)
{
li->used = true;
for (const auto &item : li->items)
{
if (item.empty())
{
continue;
}
put(item);
}
}
else // not a list
{
put(tok);
}
start += re_match.rm_eo;
}
}
// top level search
// find all "(..)" entries
std::string insert_lists(const std::string &cond, lists_map_t &lists) const
{
std::string ret;
regmatch_t re_match;
size_t start = 0;
while (start < cond.size() && regexec(&re_list, cond.c_str() + start, 1, &re_match, 0)==0)
{
ret += cond.substr(start, re_match.rm_so);
ret += "(";
cat_lists(ret, cond.substr(start + re_match.rm_so + 1, re_match.rm_eo - re_match.rm_so - 2), lists);
ret += ")";
start += re_match.rm_eo;
}
if (start <= cond.size())
{
ret += cond.substr(start);
}
return ret.empty() ? cond : ret;
}
};
// note: there is no visibility order between filter conditions and lists
static shared_ptr<ast::expr> parse_condition(
string condition,
indexed_vector<rule_loader::list_info>& lists,
const rule_loader::context &ctx)
{
for (auto &l : lists)
{
if (resolve_list(condition, l))
{
l.used = true;
}
}
libsinsp::filter::parser p(condition);
static const list_inserter list_inserter;
auto cond = list_inserter.insert_lists(condition, lists);
libsinsp::filter::parser p(cond);
p.set_max_depth(1000);
try
{
@ -804,10 +905,9 @@ static shared_ptr<ast::expr> parse_condition(
}
catch (const sinsp_exception& e)
{
throw rule_loader::rule_load_exception(
load_result::LOAD_ERR_COMPILE_CONDITION,
e.what(),
ctx);
throw falco_exception("Compilation error when compiling \n"
+ condition + "\n"
+ cond + "\n: " + to_string(p.get_pos().col) + ": " + e.what());
}
}