cleanup(userspace,unit_tests): moved rule schema under engine.

Also, moved yaml_helper under engine/ folder.
Ported rule json schema validation in the engine.

Also, updated rule_loader tests to check for validation.

Signed-off-by: Federico Di Pierro <nierro92@gmail.com>
This commit is contained in:
Federico Di Pierro
2024-09-06 14:52:11 +02:00
committed by poiana
parent 895e50d3a0
commit 5bd2d5a63e
15 changed files with 295 additions and 136 deletions

View File

@@ -47,6 +47,153 @@ limitations under the License.
const std::string falco_engine::s_default_ruleset = "falco-default-ruleset";
static const std::string rule_schema_string = R"(
{
"$schema": "http://json-schema.org/draft-06/schema#",
"type": "array",
"items": {
"$ref": "#/definitions/FalcoRule"
},
"definitions": {
"FalcoRule": {
"type": "object",
"additionalProperties": false,
"properties": {
"required_engine_version": {
"type": "string"
},
"macro": {
"type": "string"
},
"condition": {
"type": "string"
},
"list": {
"type": "string"
},
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/Item"
}
},
"rule": {
"type": "string"
},
"desc": {
"type": "string"
},
"enabled": {
"type": "boolean"
},
"output": {
"type": "string"
},
"append": {
"type": "boolean"
},
"priority": {
"$ref": "#/definitions/Priority"
},
"exceptions": {
"type": "array",
"items": {
"$ref": "#/definitions/Exception"
}
},
"override": {
"$ref": "#/definitions/Override"
},
"tags": {
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [],
"title": "FalcoRule"
},
"Item": {
"anyOf": [
{
"type": "integer"
},
{
"type": "string"
}
],
"title": "Item"
},
"Exception": {
"type": "object",
"additionalProperties": false,
"properties": {
"name": {
"type": "string"
},
"fields": {},
"comps": {},
"values": {}
},
"required": [
"name",
"values"
],
"title": "Exception"
},
"Priority": {
"type": "string",
"enum": [
"WARNING",
"NOTICE",
"INFO",
"ERROR",
"CRITICAL"
],
"title": "Priority"
},
"OverriddenItem": {
"type": "string",
"enum": [
"append",
"replace"
],
"title": "Priority"
},
"Override": {
"type": "object",
"additionalProperties": false,
"properties": {
"items": {
"$ref": "#/definitions/OverriddenItem"
},
"desc": {
"$ref": "#/definitions/OverriddenItem"
},
"condition": {
"$ref": "#/definitions/OverriddenItem"
},
"output": {
"$ref": "#/definitions/OverriddenItem"
},
"priority": {
"$ref": "#/definitions/OverriddenItem"
},
"enabled": {
"$ref": "#/definitions/OverriddenItem"
},
"exceptions": {
"$ref": "#/definitions/OverriddenItem"
}
},
"minProperties":1,
"title": "Override"
}
}
}
)";
using namespace falco;
falco_engine::falco_engine(bool seed_rng)
@@ -67,6 +214,8 @@ falco_engine::falco_engine(bool seed_rng)
m_default_ruleset_id = find_ruleset_id(s_default_ruleset);
fill_engine_state_funcs(m_engine_state);
m_rule_schema = nlohmann::json::parse(rule_schema_string);
}
falco_engine::~falco_engine()
@@ -198,7 +347,7 @@ std::unique_ptr<load_result> falco_engine::load_rules(const std::string &rules_c
cfg.extra_output_fields = m_extra_output_fields;
// read rules YAML file and collect its definitions
if(m_rule_reader->read(cfg, *m_rule_collector))
if(m_rule_reader->read(cfg, *m_rule_collector, m_rule_schema))
{
// compile the definitions (resolve macro/list refs, exceptions, ...)
m_last_compile_output = m_rule_compiler->new_compile_output();

View File

@@ -355,6 +355,8 @@ public:
const std::vector<plugin_version_requirement>& plugins,
std::string& err) const;
nlohmann::json m_rule_schema;
private:
// Create a ruleset using the provided factory and set the
// engine state funcs for it.

View File

@@ -87,6 +87,9 @@ public:
// has_warnings() can both be true if there were only warnings.
virtual bool has_warnings() = 0;
// Return json schema validation status.
virtual std::string schema_validation() = 0;
// This represents a set of rules contents as a mapping from
// rules content name (usually filename) to rules content. The
// rules content is actually a reference to the actual string

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include <string>
#include "rule_loader.h"
#include "yaml_helper.h"
static const std::string item_type_strings[] = {
@@ -282,7 +283,8 @@ std::string rule_loader::context::snippet(const falco::load_result::rules_conten
rule_loader::result::result(const std::string &name)
: name(name),
success(true)
success(true),
schema_validation_str(yaml_helper::validation_none)
{
}
@@ -296,6 +298,11 @@ bool rule_loader::result::has_warnings()
return (warnings.size() > 0);
}
std::string rule_loader::result::schema_validation()
{
return schema_validation_str;
}
void rule_loader::result::add_error(load_result::error_code ec, const std::string& msg, const context& ctx)
{
error err = {ec, msg, ctx};
@@ -311,6 +318,11 @@ void rule_loader::result::add_warning(load_result::warning_code wc, const std::s
warnings.push_back(warn);
}
void rule_loader::result::set_schema_validation_status(const std::string& status)
{
schema_validation_str = status;
}
const std::string& rule_loader::result::as_string(bool verbose, const rules_contents_t& contents)
{
if(verbose)

View File

@@ -247,12 +247,16 @@ namespace rule_loader
void add_warning(falco::load_result::warning_code ec,
const std::string& msg,
const context& ctx);
void set_schema_validation_status(const std::string& status);
std::string schema_validation();
protected:
const std::string& as_summary_string();
const std::string& as_verbose_string(const falco::load_result::rules_contents_t& contents);
std::string name;
bool success;
std::string schema_validation_str;
std::vector<error> errors;
std::vector<warning> warnings;

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include "rule_loader_reader.h"
#include "falco_engine_version.h"
#include "rule_loading_messages.h"
#include "yaml_helper.h"
#include <libsinsp/logger.h>
#include <re2/re2.h>
@@ -783,13 +784,15 @@ void rule_loader::reader::read_item(
}
}
bool rule_loader::reader::read(rule_loader::configuration& cfg, collector& collector)
bool rule_loader::reader::read(rule_loader::configuration& cfg, collector& collector, const nlohmann::json& schema)
{
std::vector<YAML::Node> docs;
yaml_helper reader;
std::string schema_validation;
rule_loader::context ctx(cfg.name);
try
{
docs = YAML::LoadAll(cfg.content);
docs = reader.loadall_from_string(cfg.content, schema, &schema_validation);
}
catch (YAML::ParserException& e)
{
@@ -807,7 +810,7 @@ bool rule_loader::reader::read(rule_loader::configuration& cfg, collector& colle
cfg.res->add_error(falco::load_result::LOAD_ERR_YAML_PARSE, "unknown YAML parsing error", ctx);
return false;
}
cfg.res->set_schema_validation_status(schema_validation);
for (auto doc = docs.begin(); doc != docs.end(); doc++)
{
if (doc->IsDefined() && !doc->IsNull())

View File

@@ -43,7 +43,7 @@ public:
\brief Reads the contents of a ruleset and uses a collector to store
thew new definitions
*/
virtual bool read(configuration& cfg, collector& loader);
virtual bool read(configuration& cfg, collector& loader, const nlohmann::json& schema={});
/*!
\brief Engine version used to be represented as a simple progressive

View File

@@ -0,0 +1,527 @@
// SPDX-License-Identifier: Apache-2.0
/*
Copyright (C) 2023 The Falco Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#pragma once
#include <sys/types.h>
#include <sys/stat.h>
#ifdef _WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
#include <yaml-cpp/yaml.h>
#include <string>
#include <vector>
#include <list>
#include <set>
#include <iostream>
#include <fstream>
#include <filesystem>
#include <numeric>
#include <nlohmann/json.hpp>
#include <valijson/adapters/nlohmann_json_adapter.hpp>
#include <valijson/adapters/yaml_cpp_adapter.hpp>
#include <valijson/schema.hpp>
#include <valijson/schema_parser.hpp>
#include <valijson/validator.hpp>
//#include "config_falco.h"
//#include "event_drops.h"
//#include "falco_outputs.h"
class yaml_helper;
class yaml_visitor {
private:
using Callback = std::function<void(YAML::Node&)>;
explicit yaml_visitor(Callback cb): seen(), cb(std::move(cb)) {}
void operator()(YAML::Node &cur) {
seen.push_back(cur);
if (cur.IsMap()) {
for (YAML::detail::iterator_value pair : cur) {
descend(pair.second);
}
} else if (cur.IsSequence()) {
for (YAML::detail::iterator_value child : cur) {
descend(child);
}
} else if (cur.IsScalar()) {
cb(cur);
}
}
void descend(YAML::Node &target) {
if (std::find(seen.begin(), seen.end(), target) == seen.end()) {
(*this)(target);
}
}
std::vector<YAML::Node> seen;
Callback cb;
friend class yaml_helper;
};
/**
* @brief An helper class for reading and editing YAML documents
*/
class yaml_helper
{
public:
inline static const std::string configs_key = "config_files";
inline static const std::string validation_ok = "ok";
inline static const std::string validation_failed = "failed";
inline static const std::string validation_none = "none";
/**
* Load all the YAML document represented by the input string.
* Since this is used by rule loader, does not process env vars.
*/
std::vector<YAML::Node> loadall_from_string(const std::string& input, const nlohmann::json& schema={}, std::string *validation=nullptr)
{
auto nodes = YAML::LoadAll(input);
if (validation)
{
if(!schema.empty())
{
// Validate each node.
for (const auto& node : nodes)
{
*validation = validate_node(node, schema);
if (*validation != validation_ok)
{
// Return first error
break;
}
}
}
else
{
*validation = validation_none;
}
}
return nodes;
}
/**
* Load the YAML document represented by the input string.
*/
void load_from_string(const std::string& input, const nlohmann::json& schema={}, std::string *validation=nullptr)
{
m_root = YAML::Load(input);
pre_process_env_vars(m_root);
if (validation)
{
if(!schema.empty())
{
*validation = validate_node(m_root, schema);
}
else
{
*validation = validation_none;
}
}
}
/**
* Load the YAML document from the given file path.
*/
void load_from_file(const std::string& path, const nlohmann::json& schema={}, std::string *validation=nullptr)
{
m_root = load_from_file_int(path, schema, validation);
}
void include_config_file(const std::string& include_file_path, const nlohmann::json& schema={}, std::string *validation=nullptr)
{
auto loaded_nodes = load_from_file_int(include_file_path, schema, validation);
for(auto n : loaded_nodes)
{
/*
* To avoid recursion hell,
* we don't support `config_files` directives from included config files
* (that use load_from_file_int recursively).
*/
const auto &key = n.first.Scalar();
if (key == configs_key)
{
throw std::runtime_error(
"Config error: '" + configs_key + "' directive in included config file " + include_file_path + ".");
}
// We allow to override keys.
// We don't need to use `get_node()` here,
// since key is a top-level one.
m_root[key] = n.second;
}
}
/**
* Clears the internal loaded document.
*/
void clear()
{
m_root = YAML::Node();
}
/**
* Get a scalar value from the node identified by key.
*/
template<typename T>
const T get_scalar(const std::string& key, const T& default_value) const
{
YAML::Node node;
get_node(node, key);
if(node.IsDefined())
{
return node.as<T>(default_value);
}
return default_value;
}
/**
* Set the node identified by key to value.
*/
template<typename T>
void set_scalar(const std::string& key, const T& value)
{
YAML::Node node;
get_node(node, key, true);
node = value;
}
/**
* Set the node identified by key to an object value
*/
void set_object(const std::string& key, const YAML::Node& value)
{
YAML::Node node;
get_node(node, key, true);
node = value;
}
/**
* Get the sequence value from the node identified by key.
*/
template<typename T>
void get_sequence(T& ret, const std::string& key) const
{
YAML::Node node;
get_node(node, key);
return get_sequence_from_node<T>(ret, node);
}
/**
* Return true if the node identified by key is defined.
*/
bool is_defined(const std::string& key) const
{
YAML::Node node;
get_node(node, key);
return node.IsDefined();
}
std::string dump() const
{
YAML::Emitter emitter;
emitter << YAML::DoubleQuoted << YAML::Flow << YAML::LowerNull << YAML::BeginSeq << m_root;
return emitter.c_str() + 1; // drop initial '[' char
}
private:
YAML::Node m_root;
YAML::Node load_from_file_int(const std::string& path, const nlohmann::json& schema={}, std::string *validation=nullptr)
{
auto root = YAML::LoadFile(path);
pre_process_env_vars(root);
if (validation)
{
if(!schema.empty())
{
*validation = validate_node(root, schema);
}
else
{
*validation = validation_none;
}
}
return root;
}
std::string validate_node(const YAML::Node &node, const nlohmann::json& schema={})
{
// Validate the yaml against our json schema
valijson::Schema schemaDef;
valijson::SchemaParser schemaParser;
valijson::Validator validator(valijson::Validator::kWeakTypes);
valijson::ValidationResults validationResults;
valijson::adapters::YamlCppAdapter configAdapter(node);
valijson::adapters::NlohmannJsonAdapter schemaAdapter(schema);
schemaParser.populateSchema(schemaAdapter, schemaDef);
if (!validator.validate(schemaDef, configAdapter, &validationResults))
{
valijson::ValidationResults::Error error;
// report only the top-most error
if (validationResults.popError(error))
{
return std::string(validation_failed + " for ")
+ std::accumulate(error.context.begin(), error.context.end(), std::string(""))
+ ": "
+ error.description;
}
return validation_failed;
}
return validation_ok;
}
/*
* When loading a yaml file,
* we immediately pre process all scalar values through a visitor private API,
* and resolve any "${env_var}" to its value;
* moreover, any "$${str}" is resolved to simply "${str}".
*/
void pre_process_env_vars(YAML::Node& root)
{
yaml_visitor([](YAML::Node &scalar) {
auto value = scalar.as<std::string>();
auto start_pos = value.find('$');
while (start_pos != std::string::npos)
{
auto substr = value.substr(start_pos);
// Case 1 -> ${}
if (substr.rfind("${", 0) == 0)
{
auto end_pos = substr.find('}');
if (end_pos != std::string::npos)
{
// Eat "${" and "}" when getting the env var name
auto env_str = substr.substr(2, end_pos - 2);
const char* env_value = std::getenv(env_str.c_str()); // Get the environment variable value
if(env_value)
{
// env variable name + "${}"
value.replace(start_pos, env_str.length() + 3, env_value);
}
else
{
value.erase(start_pos, env_str.length() + 3);
}
}
else
{
// There are no "}" chars anymore; just break leaving rest of value untouched.
break;
}
}
// Case 2 -> $${}
else if (substr.rfind("$${", 0) == 0)
{
auto end_pos = substr.find('}');
if (end_pos != std::string::npos)
{
// Consume first "$" token
value.erase(start_pos, 1);
}
else
{
// There are no "}" chars anymore; just break leaving rest of value untouched.
break;
}
start_pos++; // consume the second '$' token
}
else
{
start_pos += substr.length();
}
start_pos = value.find("$", start_pos);
}
scalar = value;
})(root);
}
/**
* Key is a string representing a node in the YAML document.
* The provided key string can navigate the document in its
* nested nodes, with arbitrary depth. The key string follows
* this regular language:
*
* Key := NodeKey ('.' NodeKey)*
* NodeKey := (any)+ ('[' (integer)+? ']')*
*
* If can_append is true, an empty NodeKey will append a new entry
* to the sequence, it is rejected otherwise.
*
* Some examples of accepted key strings:
* - NodeName
* - ListValue[3].subvalue
* - MatrixValue[1][3]
* - value1.subvalue2.subvalue3
*/
void get_node(YAML::Node &ret, const std::string &key, bool can_append=false) const
{
try
{
char c;
bool should_shift;
std::string nodeKey;
ret.reset(m_root);
for(std::string::size_type i = 0; i < key.size(); ++i)
{
c = key[i];
should_shift = c == '.' || c == '[' || i == key.size() - 1;
if (c != '.' && c != '[')
{
if (i > 0 && nodeKey.empty() && key[i - 1] != '.')
{
throw std::runtime_error(
"Parsing error: expected '.' character at pos "
+ std::to_string(i - 1));
}
nodeKey += c;
}
if (should_shift)
{
if (nodeKey.empty())
{
throw std::runtime_error(
"Parsing error: unexpected character at pos "
+ std::to_string(i));
}
ret.reset(ret[nodeKey]);
nodeKey.clear();
}
if (c == '[')
{
auto close_param_idx = key.find(']', i);
std::string idx_str = key.substr(i + 1, close_param_idx - i - 1);
int nodeIdx;
bool ret_appendable = !ret.IsDefined() || ret.IsSequence();
if (idx_str.empty() && ret_appendable && can_append)
{
YAML::Node newNode;
ret.push_back(newNode);
nodeIdx = ret.size() - 1;
}
else
{
try
{
nodeIdx = std::stoi(idx_str);
}
catch(const std::exception& e)
{
throw std::runtime_error("Parsing error: expected a numeric index, found '" + idx_str + "'");
}
}
ret.reset(ret[nodeIdx]);
i = close_param_idx;
if (i < key.size() - 1 && key[i + 1] == '.')
{
i++;
}
}
}
}
catch(const std::exception& e)
{
throw std::runtime_error("Config error at key \"" + key + "\": " + std::string(e.what()));
}
}
template<typename T>
void get_sequence_from_node(T& ret, const YAML::Node& node) const
{
if(node.IsDefined())
{
if(node.IsSequence())
{
for(const YAML::Node& item : node)
{
ret.insert(ret.end(), item.as<typename T::value_type>());
}
}
else if(node.IsScalar())
{
ret.insert(ret.end(), node.as<typename T::value_type>());
}
}
}
};
// define a yaml-cpp conversion function for nlohmann json objects
namespace YAML {
template<>
struct convert<nlohmann::json> {
static bool decode(const Node& node, nlohmann::json& res)
{
int int_val;
double double_val;
bool bool_val;
std::string str_val;
switch (node.Type()) {
case YAML::NodeType::Map:
for (auto &&it: node)
{
nlohmann::json sub{};
YAML::convert<nlohmann::json>::decode(it.second, sub);
res[it.first.as<std::string>()] = sub;
}
break;
case YAML::NodeType::Sequence:
for (auto &&it : node)
{
nlohmann::json sub{};
YAML::convert<nlohmann::json>::decode(it, sub);
res.emplace_back(sub);
}
break;
case YAML::NodeType::Scalar:
if (YAML::convert<int>::decode(node, int_val))
{
res = int_val;
}
else if (YAML::convert<double>::decode(node, double_val))
{
res = double_val;
}
else if (YAML::convert<bool>::decode(node, bool_val))
{
res = bool_val;
}
else if (YAML::convert<std::string>::decode(node, str_val))
{
res = str_val;
}
default:
break;
}
return true;
}
// The "encode" function is not needed here, in fact you can simply YAML::load any json string.
};
}