From f9ee45b38e3e33a456d093d843bc9fd4a6f924a9 Mon Sep 17 00:00:00 2001 From: VadimZy Date: Wed, 10 Aug 2022 15:12:09 -0700 Subject: [PATCH] Improve Falco engine performance when loading rules and creating the rule sets - replace std::set with fixed size vector in event types propagation - rework lists expansion by replacing repetitive string::find in constantly growing expansion string with regex tokenization - improve json_event parsing by moving const initializations into static routines Signed-off-by: VadimZy --- userspace/engine/filter_evttype_resolver.cpp | 34 +-- userspace/engine/filter_evttype_resolver.h | 133 ++++++++- userspace/engine/filter_macro_resolver.cpp | 4 +- userspace/engine/filter_macro_resolver.h | 18 +- userspace/engine/indexed_vector.h | 2 +- userspace/engine/json_evt.cpp | 163 +++++------ userspace/engine/json_evt.h | 24 +- userspace/engine/rule_loader.cpp | 268 +++++++++++++------ 8 files changed, 441 insertions(+), 205 deletions(-) diff --git a/userspace/engine/filter_evttype_resolver.cpp b/userspace/engine/filter_evttype_resolver.cpp index cf23b6e4..fa0c0b70 100644 --- a/userspace/engine/filter_evttype_resolver.cpp +++ b/userspace/engine/filter_evttype_resolver.cpp @@ -27,21 +27,17 @@ static bool is_evttype_operator(const string& op) return op == "==" || op == "=" || op == "!=" || op == "in"; } -void filter_evttype_resolver::visitor::inversion(set& types) +void filter_evttype_resolver::visitor::inversion(falco_event_types& types) { - set all_types; + falco_event_types all_types; evttypes("", all_types); if (types != all_types) // we don't invert the "all types" set { - set diff = types; - types.clear(); - set_difference( - all_types.begin(), all_types.end(), diff.begin(), diff.end(), - inserter(types, types.begin())); + types = all_types.diff(types); } } -void filter_evttype_resolver::visitor::evttypes(string evtname, set& out) +void filter_evttype_resolver::visitor::evttypes(string evtname, falco_event_types& out) { // Fill in from 2 to PPM_EVENT_MAX-1. 0 and 1 are excluded as // those are PPM_GENERIC_E/PPME_GENERIC_X @@ -59,42 +55,38 @@ void filter_evttype_resolver::visitor::evttypes(string evtname, set& o void filter_evttype_resolver::evttypes( ast::expr* filter, - set& out) const + std::set& out) const { visitor v; v.m_expect_value = false; v.m_last_node_evttypes.clear(); filter->accept(&v); - out.insert(v.m_last_node_evttypes.begin(), v.m_last_node_evttypes.end()); + v.m_last_node_evttypes.for_each([&out](uint16_t val){out.insert(val); return true;}); } void filter_evttype_resolver::evttypes( shared_ptr filter, - set& out) const + std::set& out) const { visitor v; v.m_expect_value = false; v.m_last_node_evttypes.clear(); filter.get()->accept(&v); - out.insert(v.m_last_node_evttypes.begin(), v.m_last_node_evttypes.end()); + v.m_last_node_evttypes.for_each([&out](uint16_t val){out.insert(val); return true;} ); } // "and" nodes evttypes are the intersection of the evttypes of their children. // we initialize the set with "all event types" void filter_evttype_resolver::visitor::visit(ast::and_expr* e) { - set types, inters; + falco_event_types types; evttypes("", types); m_last_node_evttypes.clear(); for (auto &c : e->children) { - inters.clear(); + falco_event_types inters; c->accept(this); - set_intersection( - types.begin(), types.end(), - m_last_node_evttypes.begin(), m_last_node_evttypes.end(), - inserter(inters, inters.begin())); - types = inters; + types = types.intersect(m_last_node_evttypes); } m_last_node_evttypes = types; } @@ -102,12 +94,12 @@ void filter_evttype_resolver::visitor::visit(ast::and_expr* e) // "or" nodes evttypes are the union of the evttypes their children void filter_evttype_resolver::visitor::visit(ast::or_expr* e) { - set types; + falco_event_types types; m_last_node_evttypes.clear(); for (auto &c : e->children) { c->accept(this); - types.insert(m_last_node_evttypes.begin(), m_last_node_evttypes.end()); + types.merge(m_last_node_evttypes); } m_last_node_evttypes = types; } diff --git a/userspace/engine/filter_evttype_resolver.h b/userspace/engine/filter_evttype_resolver.h index 10b5a45a..a99e8bc3 100644 --- a/userspace/engine/filter_evttype_resolver.h +++ b/userspace/engine/filter_evttype_resolver.h @@ -16,10 +16,130 @@ limitations under the License. #pragma once +#include + #include #include #include #include +#include + +class falco_event_types +{ +private: + using vec_t = std::vector; + vec_t m_types{}; + + static inline void check_range(uint16_t e) + { + if(e > PPM_EVENT_MAX) + { + throw std::range_error("invalid event type"); + } + } + +public: + falco_event_types(falco_event_types&&) = default; + falco_event_types(const falco_event_types&) = default; + falco_event_types& operator=(falco_event_types&&) = default; + falco_event_types& operator=(const falco_event_types&) = default; + + inline falco_event_types(): + m_types(PPM_EVENT_MAX + 1, 0) + { + } + + inline void insert(uint16_t e) + { + check_range(e); + m_types[e] = 1; + } + + void merge(const falco_event_types& other) + { + for(int i = 0; i <= PPM_EVENT_MAX; ++i) + { + m_types[i] |= other.m_types[i]; + } + } + + void merge(const std::set& other) + { + for(const auto& e : other) + { + insert(e); + } + } + + inline bool contains(uint16_t e) const + { + check_range(e); + return m_types[e] != 0; + } + + void clear() + { + for(auto& v : m_types) + { + v = 0; + } + } + + bool equals(const falco_event_types& other) const + { + return m_types == other.m_types; + } + + falco_event_types diff(const falco_event_types& other) + { + falco_event_types ret; + for(size_t i = 0; i <= PPM_EVENT_MAX; ++i) + { + if(m_types[i] == 1 && other.m_types[i] == 0) + { + ret.m_types[i] = 1; + } + } + return ret; + } + + falco_event_types intersect(const falco_event_types& other) + { + falco_event_types ret; + for(size_t i = 0; i <= PPM_EVENT_MAX; ++i) + { + if(m_types[i] == 1 && other.m_types[i] == 1) + { + ret.m_types[i] = 1; + } + } + return ret; + } + + void for_each(std::function consumer) const + { + for(uint16_t i = 0; i < m_types.size(); ++i) + { + if(m_types[i] != 0) + { + if(!consumer(i)) + { + return; + } + } + } + } +}; + +inline bool operator==(const falco_event_types& lhs, const falco_event_types& rhs) +{ + return lhs.equals(rhs); +} + +inline bool operator!=(const falco_event_types& lhs, const falco_event_types& rhs) +{ + return !(lhs == rhs); +} /*! \brief Helper class for finding event types @@ -35,9 +155,12 @@ public: string is passed, all the available evttypes are collected \param out The set to be filled with the evttypes */ - inline void evttypes(std::string evtname, std::set& out) const + inline void evttypes(std::string evtname, falco_event_types& out) const { - visitor().evttypes(evtname, out); + falco_event_types evt_types; + visitor().evttypes(evtname, evt_types); + evt_types.for_each([&out](uint16_t val) + {out.insert(val); return true; }); } /*! @@ -64,7 +187,7 @@ private: struct visitor : public libsinsp::filter::ast::expr_visitor { bool m_expect_value; - std::set m_last_node_evttypes; + falco_event_types m_last_node_evttypes; void visit(libsinsp::filter::ast::and_expr* e) override; void visit(libsinsp::filter::ast::or_expr* e) override; @@ -73,7 +196,7 @@ private: void visit(libsinsp::filter::ast::list_expr* e) override; void visit(libsinsp::filter::ast::unary_check_expr* e) override; void visit(libsinsp::filter::ast::binary_check_expr* e) override; - void inversion(std::set& types); - void evttypes(std::string evtname, std::set& out); + void inversion(falco_event_types& types); + void evttypes(std::string evtname, falco_event_types& out); }; }; diff --git a/userspace/engine/filter_macro_resolver.cpp b/userspace/engine/filter_macro_resolver.cpp index bc1d2982..0e86136f 100644 --- a/userspace/engine/filter_macro_resolver.cpp +++ b/userspace/engine/filter_macro_resolver.cpp @@ -61,12 +61,12 @@ void filter_macro_resolver::set_macro( m_macros[name] = macro; } -const set& filter_macro_resolver::get_unknown_macros() const +const unordered_set& filter_macro_resolver::get_unknown_macros() const { return m_unknown_macros; } -const set& filter_macro_resolver::get_resolved_macros() const +const unordered_set& filter_macro_resolver::get_resolved_macros() const { return m_resolved_macros; } diff --git a/userspace/engine/filter_macro_resolver.h b/userspace/engine/filter_macro_resolver.h index 359f876f..e73bd983 100644 --- a/userspace/engine/filter_macro_resolver.h +++ b/userspace/engine/filter_macro_resolver.h @@ -18,8 +18,8 @@ limitations under the License. #include #include -#include -#include +#include +#include #include /*! @@ -63,7 +63,7 @@ class filter_macro_resolver substituted during the last invocation of run(). Should be non-empty if the last invocation of run() returned true. */ - const std::set& get_resolved_macros() const; + const std::unordered_set& get_resolved_macros() const; /*! \brief Returns a set containing the names of all the macros @@ -71,10 +71,10 @@ class filter_macro_resolver A macro remains unresolved if it is found inside the processed filter but it was not defined with set_macro(); */ - const std::set& get_unknown_macros() const; + const std::unordered_set& get_unknown_macros() const; private: - typedef std::map< + typedef std::unordered_map< std::string, std::shared_ptr > macro_defs; @@ -82,8 +82,8 @@ class filter_macro_resolver struct visitor : public libsinsp::filter::ast::expr_visitor { std::unique_ptr m_node_substitute; - std::set* m_unknown_macros; - std::set* m_resolved_macros; + std::unordered_set* m_unknown_macros; + std::unordered_set* m_resolved_macros; macro_defs* m_macros; void visit(libsinsp::filter::ast::and_expr* e) override; @@ -95,7 +95,7 @@ class filter_macro_resolver void visit(libsinsp::filter::ast::binary_check_expr* e) override; }; - std::set m_unknown_macros; - std::set m_resolved_macros; + std::unordered_set m_unknown_macros; + std::unordered_set m_resolved_macros; macro_defs m_macros; }; diff --git a/userspace/engine/indexed_vector.h b/userspace/engine/indexed_vector.h index 41bacecb..86aa4989 100644 --- a/userspace/engine/indexed_vector.h +++ b/userspace/engine/indexed_vector.h @@ -131,5 +131,5 @@ public: private: std::vector m_entries; - std::map m_index; + std::unordered_map m_index; }; diff --git a/userspace/engine/json_evt.cpp b/userspace/engine/json_evt.cpp index 26d0e2b4..5b9250d9 100644 --- a/userspace/engine/json_evt.cpp +++ b/userspace/engine/json_evt.cpp @@ -515,16 +515,17 @@ int32_t json_event_filter_check::parse_field_name(const char *str, bool alloc_st size_t idx_len = 0; - for(auto &info : m_info.m_fields) + for(const auto &info : get_info().m_fields) { - if(m_aliases.find(info.m_name) == m_aliases.end()) + auto iter = get_aliases().find(info.m_name); + if( iter == get_aliases().end()) { throw falco_exception("Could not find alias for field name " + info.m_name); } m_uses_paths = info.m_uses_paths; - auto &al = m_aliases[info.m_name]; + auto &al = iter->second; // What follows the match must not be alphanumeric or a dot if(strncmp(info.m_name.c_str(), str, info.m_name.size()) == 0 && @@ -692,11 +693,6 @@ size_t json_event_filter_check::parsed_size() } } -json_event_filter_check::check_info &json_event_filter_check::get_info() -{ - return m_info; -} - void json_event_filter_check::add_extracted_value(const std::string &str) { m_evalues.first.emplace_back(json_event_value(str)); @@ -793,9 +789,9 @@ std::string jevt_filter_check::s_jevt_rawtime_field = "jevt.rawtime"; std::string jevt_filter_check::s_jevt_value_field = "jevt.value"; std::string jevt_filter_check::s_jevt_obj_field = "jevt.obj"; -jevt_filter_check::jevt_filter_check() +const jevt_filter_check::check_info &jevt_filter_check::get_info() const { - m_info = {"jevt", + static const check_info info = {"jevt", "generic ways to access json events", "", {{s_jevt_time_field, "json event timestamp as a string that includes the nanosecond part"}, @@ -803,6 +799,11 @@ jevt_filter_check::jevt_filter_check() {s_jevt_rawtime_field, "absolute event timestamp, i.e. nanoseconds from epoch."}, {s_jevt_value_field, "General way to access single property from json object. The syntax is []. The property is returned as a string", IDX_REQUIRED, IDX_KEY}, {s_jevt_obj_field, "The entire json object, stringified"}}}; + return info; +} + +jevt_filter_check::jevt_filter_check() +{ } jevt_filter_check::~jevt_filter_check() @@ -1282,71 +1283,77 @@ bool k8s_audit_filter_check::extract_any_privileged(const json &j, return true; } -k8s_audit_filter_check::k8s_audit_filter_check() +const json_event_filter_check::check_info &k8s_audit_filter_check::get_info() const { - m_info = {"ka", - "Access K8s Audit Log Events", - "Fields with an IDX_ALLOWED annotation can be indexed (e.g. ka.req.containers.image[k] returns the image for the kth container). The index is optional--without any index the field returns values for all items. The index must be numeric with an IDX_NUMERIC annotation, and can be any string with an IDX_KEY annotation. Fields with an IDX_REQUIRED annotation require an index.", - {{"ka.auditid", "The unique id of the audit event"}, - {"ka.stage", "Stage of the request (e.g. RequestReceived, ResponseComplete, etc.)"}, - {"ka.auth.decision", "The authorization decision"}, - {"ka.auth.reason", "The authorization reason"}, - {"ka.user.name", "The user name performing the request"}, - {"ka.user.groups", "The groups to which the user belongs"}, - {"ka.impuser.name", "The impersonated user name"}, - {"ka.verb", "The action being performed"}, - {"ka.uri", "The request URI as sent from client to server"}, - {"ka.uri.param", "The value of a given query parameter in the uri (e.g. when uri=/foo?key=val, ka.uri.param[key] is val).", IDX_REQUIRED, IDX_KEY}, - {"ka.target.name", "The target object name"}, - {"ka.target.namespace", "The target object namespace"}, - {"ka.target.resource", "The target object resource"}, - {"ka.target.subresource", "The target object subresource"}, - {"ka.req.binding.subjects", "When the request object refers to a cluster role binding, the subject (e.g. account/users) being linked by the binding"}, - {"ka.req.binding.role", "When the request object refers to a cluster role binding, the role being linked by the binding"}, - {"ka.req.binding.subject.has_name", "Deprecated, always returns \"N/A\". Only provided for backwards compatibility", IDX_REQUIRED, IDX_KEY}, - {"ka.req.configmap.name", "If the request object refers to a configmap, the configmap name"}, - {"ka.req.configmap.obj", "If the request object refers to a configmap, the entire configmap object"}, - {"ka.req.pod.containers.image", "When the request object refers to a pod, the container's images.", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.container.image", "Deprecated by ka.req.pod.containers.image. Returns the image of the first container only"}, - {"ka.req.pod.containers.image.repository", "The same as req.container.image, but only the repository part (e.g. falcosecurity/falco).", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.container.image.repository", "Deprecated by ka.req.pod.containers.image.repository. Returns the repository of the first container only"}, - {"ka.req.pod.host_ipc", "When the request object refers to a pod, the value of the hostIPC flag."}, - {"ka.req.pod.host_network", "When the request object refers to a pod, the value of the hostNetwork flag."}, - {"ka.req.container.host_network", "Deprecated alias for ka.req.pod.host_network"}, - {"ka.req.pod.host_pid", "When the request object refers to a pod, the value of the hostPID flag."}, - {"ka.req.pod.containers.host_port", "When the request object refers to a pod, all container's hostPort values.", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.containers.privileged", "When the request object refers to a pod, the value of the privileged flag for all containers.", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.container.privileged", "Deprecated by ka.req.pod.containers.privileged. Returns true if any container has privileged=true"}, - {"ka.req.pod.containers.allow_privilege_escalation", "When the request object refers to a pod, the value of the allowPrivilegeEscalation flag for all containers", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.containers.read_only_fs", "When the request object refers to a pod, the value of the readOnlyRootFilesystem flag for all containers", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.run_as_user", "When the request object refers to a pod, the runAsUser uid specified in the security context for the pod. See ....containers.run_as_user for the runAsUser for individual containers"}, - {"ka.req.pod.containers.run_as_user", "When the request object refers to a pod, the runAsUser uid for all containers", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.containers.eff_run_as_user", "When the request object refers to a pod, the initial uid that will be used for all containers. This combines information from both the pod and container security contexts and uses 0 if no uid is specified", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.run_as_group", "When the request object refers to a pod, the runAsGroup gid specified in the security context for the pod. See ....containers.run_as_group for the runAsGroup for individual containers"}, - {"ka.req.pod.containers.run_as_group", "When the request object refers to a pod, the runAsGroup gid for all containers", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.containers.eff_run_as_group", "When the request object refers to a pod, the initial gid that will be used for all containers. This combines information from both the pod and container security contexts and uses 0 if no gid is specified", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.containers.proc_mount", "When the request object refers to a pod, the procMount types for all containers", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.role.rules", "When the request object refers to a role/cluster role, the rules associated with the role"}, - {"ka.req.role.rules.apiGroups", "When the request object refers to a role/cluster role, the api groups associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.role.rules.nonResourceURLs", "When the request object refers to a role/cluster role, the non resource urls associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.role.rules.verbs", "When the request object refers to a role/cluster role, the verbs associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.role.rules.resources", "When the request object refers to a role/cluster role, the resources associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.fs_group", "When the request object refers to a pod, the fsGroup gid specified by the security context."}, - {"ka.req.pod.supplemental_groups", "When the request object refers to a pod, the supplementalGroup gids specified by the security context."}, - {"ka.req.pod.containers.add_capabilities", "When the request object refers to a pod, all capabilities to add when running the container.", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.service.type", "When the request object refers to a service, the service type"}, - {"ka.req.service.ports", "When the request object refers to a service, the service's ports", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.volumes.hostpath", "When the request object refers to a pod, all hostPath paths specified for all volumes", IDX_ALLOWED, IDX_NUMERIC, true}, - {"ka.req.volume.hostpath", "Deprecated by ka.req.pod.volumes.hostpath. Return true if the provided (host) path prefix is used by any volume", IDX_ALLOWED, IDX_KEY}, - {"ka.req.pod.volumes.flexvolume_driver", "When the request object refers to a pod, all flexvolume drivers specified for all volumes", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.req.pod.volumes.volume_type", "When the request object refers to a pod, all volume types for all volumes", IDX_ALLOWED, IDX_NUMERIC}, - {"ka.resp.name", "The response object name"}, - {"ka.response.code", "The response code"}, - {"ka.response.reason", "The response reason (usually present only for failures)"}, - {"ka.useragent", "The useragent of the client who made the request to the apiserver"}}}; + static const json_event_filter_check::check_info + info = {"ka", + "Access K8s Audit Log Events", + "Fields with an IDX_ALLOWED annotation can be indexed (e.g. ka.req.containers.image[k] returns the image for the kth container). The index is optional--without any index the field returns values for all items. The index must be numeric with an IDX_NUMERIC annotation, and can be any string with an IDX_KEY annotation. Fields with an IDX_REQUIRED annotation require an index.", + {{"ka.auditid", "The unique id of the audit event"}, + {"ka.stage", "Stage of the request (e.g. RequestReceived, ResponseComplete, etc.)"}, + {"ka.auth.decision", "The authorization decision"}, + {"ka.auth.reason", "The authorization reason"}, + {"ka.user.name", "The user name performing the request"}, + {"ka.user.groups", "The groups to which the user belongs"}, + {"ka.impuser.name", "The impersonated user name"}, + {"ka.verb", "The action being performed"}, + {"ka.uri", "The request URI as sent from client to server"}, + {"ka.uri.param", "The value of a given query parameter in the uri (e.g. when uri=/foo?key=val, ka.uri.param[key] is val).", IDX_REQUIRED, IDX_KEY}, + {"ka.target.name", "The target object name"}, + {"ka.target.namespace", "The target object namespace"}, + {"ka.target.resource", "The target object resource"}, + {"ka.target.subresource", "The target object subresource"}, + {"ka.req.binding.subjects", "When the request object refers to a cluster role binding, the subject (e.g. account/users) being linked by the binding"}, + {"ka.req.binding.role", "When the request object refers to a cluster role binding, the role being linked by the binding"}, + {"ka.req.binding.subject.has_name", "Deprecated, always returns \"N/A\". Only provided for backwards compatibility", IDX_REQUIRED, IDX_KEY}, + {"ka.req.configmap.name", "If the request object refers to a configmap, the configmap name"}, + {"ka.req.configmap.obj", "If the request object refers to a configmap, the entire configmap object"}, + {"ka.req.pod.containers.image", "When the request object refers to a pod, the container's images.", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.container.image", "Deprecated by ka.req.pod.containers.image. Returns the image of the first container only"}, + {"ka.req.pod.containers.image.repository", "The same as req.container.image, but only the repository part (e.g. falcosecurity/falco).", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.container.image.repository", "Deprecated by ka.req.pod.containers.image.repository. Returns the repository of the first container only"}, + {"ka.req.pod.host_ipc", "When the request object refers to a pod, the value of the hostIPC flag."}, + {"ka.req.pod.host_network", "When the request object refers to a pod, the value of the hostNetwork flag."}, + {"ka.req.container.host_network", "Deprecated alias for ka.req.pod.host_network"}, + {"ka.req.pod.host_pid", "When the request object refers to a pod, the value of the hostPID flag."}, + {"ka.req.pod.containers.host_port", "When the request object refers to a pod, all container's hostPort values.", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.containers.privileged", "When the request object refers to a pod, the value of the privileged flag for all containers.", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.container.privileged", "Deprecated by ka.req.pod.containers.privileged. Returns true if any container has privileged=true"}, + {"ka.req.pod.containers.allow_privilege_escalation", "When the request object refers to a pod, the value of the allowPrivilegeEscalation flag for all containers", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.containers.read_only_fs", "When the request object refers to a pod, the value of the readOnlyRootFilesystem flag for all containers", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.run_as_user", "When the request object refers to a pod, the runAsUser uid specified in the security context for the pod. See ....containers.run_as_user for the runAsUser for individual containers"}, + {"ka.req.pod.containers.run_as_user", "When the request object refers to a pod, the runAsUser uid for all containers", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.containers.eff_run_as_user", "When the request object refers to a pod, the initial uid that will be used for all containers. This combines information from both the pod and container security contexts and uses 0 if no uid is specified", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.run_as_group", "When the request object refers to a pod, the runAsGroup gid specified in the security context for the pod. See ....containers.run_as_group for the runAsGroup for individual containers"}, + {"ka.req.pod.containers.run_as_group", "When the request object refers to a pod, the runAsGroup gid for all containers", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.containers.eff_run_as_group", "When the request object refers to a pod, the initial gid that will be used for all containers. This combines information from both the pod and container security contexts and uses 0 if no gid is specified", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.containers.proc_mount", "When the request object refers to a pod, the procMount types for all containers", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.role.rules", "When the request object refers to a role/cluster role, the rules associated with the role"}, + {"ka.req.role.rules.apiGroups", "When the request object refers to a role/cluster role, the api groups associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.role.rules.nonResourceURLs", "When the request object refers to a role/cluster role, the non resource urls associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.role.rules.verbs", "When the request object refers to a role/cluster role, the verbs associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.role.rules.resources", "When the request object refers to a role/cluster role, the resources associated with the role's rules", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.fs_group", "When the request object refers to a pod, the fsGroup gid specified by the security context."}, + {"ka.req.pod.supplemental_groups", "When the request object refers to a pod, the supplementalGroup gids specified by the security context."}, + {"ka.req.pod.containers.add_capabilities", "When the request object refers to a pod, all capabilities to add when running the container.", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.service.type", "When the request object refers to a service, the service type"}, + {"ka.req.service.ports", "When the request object refers to a service, the service's ports", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.volumes.hostpath", "When the request object refers to a pod, all hostPath paths specified for all volumes", IDX_ALLOWED, IDX_NUMERIC, true}, + {"ka.req.volume.hostpath", "Deprecated by ka.req.pod.volumes.hostpath. Return true if the provided (host) path prefix is used by any volume", IDX_ALLOWED, IDX_KEY}, + {"ka.req.pod.volumes.flexvolume_driver", "When the request object refers to a pod, all flexvolume drivers specified for all volumes", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.req.pod.volumes.volume_type", "When the request object refers to a pod, all volume types for all volumes", IDX_ALLOWED, IDX_NUMERIC}, + {"ka.resp.name", "The response object name"}, + {"ka.response.code", "The response code"}, + {"ka.response.reason", "The response reason (usually present only for failures)"}, + {"ka.useragent", "The useragent of the client who made the request to the apiserver"}}}; + return info; - { - m_aliases = { +} + +const std::unordered_map &k8s_audit_filter_check::get_aliases() const +{ + static const std::unordered_map + aliases = { {"ka.auditid", {{"/auditID"_json_pointer}}}, {"ka.stage", {{"/stage"_json_pointer}}}, {"ka.auth.decision", {{"/annotations/authorization.k8s.io~1decision"_json_pointer}}}, @@ -1404,7 +1411,11 @@ k8s_audit_filter_check::k8s_audit_filter_check() {"ka.response.code", {{"/responseStatus/code"_json_pointer}}}, {"ka.response.reason", {{"/responseStatus/reason"_json_pointer}}}, {"ka.useragent", {{"/userAgent"_json_pointer}}}}; - } + return aliases; +} + +k8s_audit_filter_check::k8s_audit_filter_check() +{ } k8s_audit_filter_check::~k8s_audit_filter_check() @@ -1475,14 +1486,14 @@ std::list json_event_filter_fa for(auto &chk: m_defined_checks) { - json_event_filter_check::check_info &info = chk->get_info(); + const json_event_filter_check::check_info &info = chk->get_info(); gen_event_filter_factory::filter_fieldclass_info cinfo; cinfo.name = info.m_name; cinfo.desc = info.m_desc; cinfo.shortdesc = info.m_shortdesc; - for(auto &field : info.m_fields) + for(const auto &field : info.m_fields) { gen_event_filter_factory::filter_field_info info; info.name = field.m_name; diff --git a/userspace/engine/json_evt.h b/userspace/engine/json_evt.h index aa84f4ea..cd6a5de1 100644 --- a/userspace/engine/json_evt.h +++ b/userspace/engine/json_evt.h @@ -173,7 +173,7 @@ public: }; json_event_filter_check(); - virtual ~json_event_filter_check(); + virtual ~json_event_filter_check() = 0; virtual int32_t parse_field_name(const char *str, bool alloc_state, bool needed_for_filtering); void add_filter_value(const char *str, uint32_t len, uint32_t i = 0); @@ -197,7 +197,7 @@ public: // brackets (e.g. ka.image[foo]) size_t parsed_size(); - check_info &get_info(); + virtual const check_info &get_info() const = 0; // // Allocate a new check of the same type. Must be overridden. @@ -260,9 +260,9 @@ protected: // // The version of parse_field_name in this base class will // check a field specification against all the aliases. - std::map m_aliases; + virtual const std::unordered_map &get_aliases() const = 0; - check_info m_info; + //check_info m_info; // The actual field name parsed in parse_field_name. std::string m_field; @@ -315,11 +315,18 @@ public: int32_t parse_field_name(const char* str, bool alloc_state, bool needed_for_filtering) final; - json_event_filter_check *allocate_new(); + json_event_filter_check *allocate_new() override; + const check_info &get_info() const override; protected: bool extract_values(json_event *jevt) final; + const std::unordered_map &get_aliases() const override + { + static std::unordered_map a; + return a; + }; + private: @@ -340,9 +347,12 @@ public: k8s_audit_filter_check(); virtual ~k8s_audit_filter_check(); - json_event_filter_check *allocate_new(); + json_event_filter_check *allocate_new() override; - // Extract all images/image repositories from the provided containers + const check_info &get_info() const override; + const std::unordered_map &get_aliases() const override; + + // Extract all images/image repositories from the provided containers static bool extract_images(const nlohmann::json &j, json_event_filter_check &jchk); diff --git a/userspace/engine/rule_loader.cpp b/userspace/engine/rule_loader.cpp index b87825e9..f328ceaf 100644 --- a/userspace/engine/rule_loader.cpp +++ b/userspace/engine/rule_loader.cpp @@ -21,7 +21,7 @@ limitations under the License. #include "filter_evttype_resolver.h" #include "filter_warning_resolver.h" #include -#include +#include #define MAX_VISIBILITY ((uint32_t) -1) @@ -682,77 +682,6 @@ static void build_rule_exception_infos( } } -// todo(jasondellaluce): this breaks string escaping in lists -static bool resolve_list(string& cnd, const rule_loader::list_info& list) -{ - static string blanks = " \t\n\r"; - static string delims = blanks + "(),="; - string new_cnd; - size_t start, end; - bool used = false; - start = cnd.find(list.name); - while (start != string::npos) - { - // the characters surrounding the name must - // be delims of beginning/end of string - end = start + list.name.length(); - if ((start == 0 || delims.find(cnd[start - 1]) != string::npos) - && (end >= cnd.length() || delims.find(cnd[end]) != string::npos)) - { - // shift pointers to consume all whitespaces - while (start > 0 - && blanks.find(cnd[start - 1]) != string::npos) - { - start--; - } - while (end < cnd.length() - && blanks.find(cnd[end]) != string::npos) - { - end++; - } - // create substitution string by concatenating all values - string sub = ""; - for (auto &v : list.items) - { - if (!sub.empty()) - { - sub += ", "; - } - sub += v; - } - // if substituted list is empty, we need to - // remove a comma from the left or the right - if (sub.empty()) - { - if (start > 0 && cnd[start - 1] == ',') - { - start--; - } - else if (end < cnd.length() && cnd[end] == ',') - { - end++; - } - } - // compose new string with substitution - new_cnd = ""; - if (start > 0) - { - new_cnd += cnd.substr(0, start) + " "; - } - new_cnd += sub + " "; - if (end <= cnd.length()) - { - new_cnd += cnd.substr(end); - } - cnd = new_cnd; - start += sub.length() + 1; - used = true; - } - start = cnd.find(list.name, start + 1); - } - return used; -} - static void resolve_macros( indexed_vector& macros, shared_ptr& ast, @@ -782,20 +711,192 @@ static void resolve_macros( } } + +/* + * delim_chars + * helper class to look for delimiters + */ +struct delim_chars +{ + std::vector m_delims = std::vector(256, 0); + + explicit delim_chars(const std::string &char_array) + { + for (auto c : char_array) + { + m_delims[c] = 1; + } + } + + bool contains(char c) const + { + return m_delims[c]!=0; + } + + size_t find_in(const std::string& s) const + { + for (size_t i = 0, j = s.size(); i < j; ++i) + { + if (contains(s[i])) + { + return i; + } + } + return std::string::npos; + } +}; + +#define LIST_DELMS "\t\n\r ," + +/* + * list_inserter + */ +struct list_inserter +{ + using list_info_t = rule_loader::list_info; + using lists_map_t = indexed_vector; + + constexpr static const char* list_full = R"([\(][^()]+[\)])"; + constexpr static const char* list_sub = {"[_a-z0-9]+[" LIST_DELMS "]*"}; + + const delim_chars delims{LIST_DELMS}; + + regex_t re_list{}; + regex_t re_sub{}; + + list_inserter() + { + if (regcomp(&re_list, list_full, REG_EXTENDED) != 0) + { + ASSERT(false); + } + + if (regcomp(&re_sub, list_sub, REG_EXTENDED) != 0) + { + ASSERT(false); + } + } + + ~list_inserter() + { + regfree(&re_list); + regfree(&re_sub); + } + + static bool list_to_ret(std::string& ret, list_info_t* li, bool first) + { + li->used = true; + if (li->items.empty()) + { + return true; + } + + for (const auto &item : li->items) + { + if (item.empty()) + { + continue; + } + + if (first) + { + first = false; + } + else + { + ret += ", "; + } + ret += item; + } + return !first; + } + + // split string found in insert_lists by delimiters + // concatenate lists expansion + void cat_lists(std::string& ret, const std::string& cond, lists_map_t &lists) const + { + regmatch_t re_match; + size_t start = 0; + bool first = true; + + auto put = [&ret, &first](const std::string &item) + { + if (first) + { + first = false; + } + else + { + ret += ", "; + } + ret += item; + }; + + while (start < cond.size() && regexec(&re_sub, cond.c_str() + start, 1, &re_match, 0) == 0) + { + auto s = cond.substr(start + re_match.rm_so, re_match.rm_eo - re_match.rm_so); + auto tok = s.substr(0, delims.find_in(s)); + auto *li = lists.at(tok); + + if (li) + { + li->used = true; + for (const auto &item : li->items) + { + if (item.empty()) + { + continue; + } + put(item); + } + } + else // not a list + { + put(tok); + } + + start += re_match.rm_eo; + } + } + + // top level search + // find all "(..)" entries + std::string insert_lists(const std::string &cond, lists_map_t &lists) const + { + std::string ret; + regmatch_t re_match; + size_t start = 0; + while (start < cond.size() && regexec(&re_list, cond.c_str() + start, 1, &re_match, 0)==0) + { + ret += cond.substr(start, re_match.rm_so); + ret += "("; + cat_lists(ret, cond.substr(start + re_match.rm_so + 1, re_match.rm_eo - re_match.rm_so - 2), lists); + ret += ")"; + start += re_match.rm_eo; + } + + if (start <= cond.size()) + { + ret += cond.substr(start); + } + + return ret.empty() ? cond : ret; + } +}; + + // note: there is no visibility order between filter conditions and lists static shared_ptr parse_condition( string condition, indexed_vector& lists, const rule_loader::context &ctx) { - for (auto &l : lists) - { - if (resolve_list(condition, l)) - { - l.used = true; - } - } - libsinsp::filter::parser p(condition); + + static const list_inserter list_inserter; + + auto cond = list_inserter.insert_lists(condition, lists); + + libsinsp::filter::parser p(cond); p.set_max_depth(1000); try { @@ -804,10 +905,9 @@ static shared_ptr parse_condition( } catch (const sinsp_exception& e) { - throw rule_loader::rule_load_exception( - load_result::LOAD_ERR_COMPILE_CONDITION, - e.what(), - ctx); + throw falco_exception("Compilation error when compiling \n" + + condition + "\n" + + cond + "\n: " + to_string(p.get_pos().col) + ": " + e.what()); } }