Restructure rules result to properly support multiple files

The old version of rules_result assumed that all errors/warnings were
related to a single file. That was generally correct for errors, as
rules parsing always stopped at the first error, so there is only one
relevant file.

However, for warnings that was not the case. When reading multiple
files A and B, you might get a warning from file A *only* after
reading file B. For example, B might redefine a rule in such a way
that you could get unused list/macro warnings from file A.

To properly address this, make some changes to how contexts are
managed:

- Instead of creating snippets at the time the error/warning was
  generated, create snippets at the time the error/warning is
  converted into a string. This requires passing all rules contents to
  as_string()/as_json(), so define a
  falco::load_result::rules_contents_t map from filename to rules
  content (reference) and pass it in as_string/as_json(). Snippets are
  now generated from the rules content matching the filename in the
  context.
- When creating warnings/errors, there's no need to pass along the
  rules content. This is only used when converting an error into a
  string/json.

Also change snippet() to handle potentially very long lines. Instead
of always printing the entire line matching a location, print up to
snippet_width(param, with default 160 chars)/2 characters surrounding
the column from the location.

Signed-off-by: Mark Stemm <mark.stemm@gmail.com>
This commit is contained in:
Mark Stemm 2022-08-08 16:25:21 -07:00 committed by poiana
parent 0828296abc
commit 98c1e3d3f1
4 changed files with 95 additions and 53 deletions

View File

@ -16,6 +16,7 @@ limitations under the License.
#pragma once
#include <functional>
#include <string>
#include <nlohmann/json.hpp>
@ -75,20 +76,35 @@ public:
// has_warnings() can both be true if there were only warnings.
virtual bool has_warnings() = 0;
// This represents a set of rules contents as a mapping from
// rules content name (usually filename) to rules content. The
// rules content is actually a reference to the actual string
// to avoid copies. Using reference_wrapper allows the
// reference to be held in the stl map (bare references can't
// be copied/assigned, but reference_wrappers can).
//
// It's used in the as_string/as_json() methods below.
typedef std::map<std::string, std::reference_wrapper<const std::string>> rules_contents_t;
// This contains a human-readable version of the result,
// suitable for display to end users.
//
// The provided rules_contents_t should map from content name
// to rules content (reference) for each rules_content that has
// been passed to rule_loader::compile() or
// rule_reader::load().
//
// When verbose is true, the returned value has full details
// on the result including document locations/context.
//
// When verbose is false, the returned value is a short string
// with the success value and a list of
// errors/warnings. Suitable for simple one-line display.
virtual const std::string& as_string(bool verbose) = 0;
virtual const std::string& as_string(bool verbose, const rules_contents_t& contents) = 0;
// This contains the full result structure as json, suitable
// for automated parsing/interpretation downstream.
virtual const nlohmann::json& as_json() = 0;
virtual const nlohmann::json& as_json(const rules_contents_t& contents) = 0;
};
} // namespace falco

View File

@ -135,46 +135,76 @@ nlohmann::json rule_loader::context::as_json()
return ret;
}
std::string rule_loader::context::snippet(const std::string& content) const
std::string rule_loader::context::snippet(const falco::load_result::rules_contents_t& rules_contents,
size_t snippet_width) const
{
std::string ret;
if(m_locs.empty() || content.size() == 0)
if(m_locs.empty())
{
return "<No context available>\n";
}
rule_loader::context::location loc = m_locs.back();
auto it = rules_contents.find(name);
if(it == rules_contents.end())
{
return "<No context available>\n";
}
const std::string& snip_content = it->second;
size_t from = loc.mark.pos;
// In some cases like this, where the content ends with a
// dangling property value:
// tags:
// The YAML::Mark position can be past the end of the file.
for(; from > 0 && from >= content.size(); from--);
for(; from > 0 && from >= snip_content.size(); from--);
// Add the line that includes the mark and a marker
// at the column number.
size_t to = from;
// The snippet is generally the line that contains the
// position. So walk backwards from pos to the preceding
// newline, and walk forwards from pos to the following
// newline.
//
// However, some lines can be very very long, so the walk
// forwards/walk backwards is capped at a maximum of
// snippet_width/2 characters in either direction.
for(; from > 0 && snip_content.at(from) != '\n' && (loc.mark.pos - from) < (snippet_width/2); from--);
for(; from > 0 && content.at(from) != '\n'; from--);
for(; to < content.size()-1 && content.at(to) != '\n'; to++);
size_t to = loc.mark.pos;
for(; to < snip_content.size()-1 && snip_content.at(to) != '\n' && (to - loc.mark.pos) < (snippet_width/2); to++);
// Don't include the newlines
if(content.at(from) == '\n')
if(snip_content.at(from) == '\n')
{
from++;
}
if(content.at(to) == '\n')
if(snip_content.at(to) == '\n')
{
to--;
}
ret = content.substr(from, to-from+1) + "\n";
ret = snip_content.substr(from, to-from+1);
// Add a blank line with a marker at the column number
ret += std::string(loc.mark.column, ' ') + '^' + "\n";
// Replace the initial/end characters with '...' if the walk
// forwards/backwards was incomplete
if(loc.mark.pos - from >= (snippet_width/2))
{
ret.replace(0, 2, "...");
}
if(to - loc.mark.pos >= (snippet_width/2))
{
ret.replace(ret.size()-2, ret.size(), "...");
}
ret += "\n";
// Add a blank line with a marker at the position within the snippet
ret += std::string(loc.mark.pos-from, ' ') + '^' + "\n";
return ret;
}
@ -195,26 +225,26 @@ bool rule_loader::result::has_warnings()
return (warnings.size() > 0);
}
void rule_loader::result::add_error(load_result::error_code ec, const std::string& msg, const context& ctx, const std::string& rules_content)
void rule_loader::result::add_error(load_result::error_code ec, const std::string& msg, const context& ctx)
{
error err = {ec, msg, ctx, ctx.snippet(rules_content)};
error err = {ec, msg, ctx};
success = false;
errors.push_back(err);
}
void rule_loader::result::add_warning(load_result::warning_code wc, const std::string& msg, const context& ctx, const std::string& rules_content)
void rule_loader::result::add_warning(load_result::warning_code wc, const std::string& msg, const context& ctx)
{
warning warn = {wc, msg, ctx, ctx.snippet(rules_content)};
warning warn = {wc, msg, ctx};
warnings.push_back(warn);
}
const std::string& rule_loader::result::as_string(bool verbose)
const std::string& rule_loader::result::as_string(bool verbose, const rules_contents_t& contents)
{
if(verbose)
{
return as_verbose_string();
return as_verbose_string(contents);
}
else
{
@ -282,7 +312,7 @@ const std::string& rule_loader::result::as_summary_string()
return res_summary_string;
}
const std::string& rule_loader::result::as_verbose_string()
const std::string& rule_loader::result::as_verbose_string(const rules_contents_t& contents)
{
std::ostringstream os;
@ -310,7 +340,7 @@ const std::string& rule_loader::result::as_verbose_string()
os << err.ctx.as_string();
os << "------" << std::endl;
os << err.snippet;
os << err.ctx.snippet(contents);
os << "------" << std::endl;
os << load_result::error_code_str(err.ec)
@ -331,7 +361,7 @@ const std::string& rule_loader::result::as_verbose_string()
os << warn.ctx.as_string();
os << "------" << std::endl;
os << warn.snippet;
os << warn.ctx.snippet(contents);
os << "------" << std::endl;
os << load_result::warning_code_str(warn.wc)
@ -345,7 +375,7 @@ const std::string& rule_loader::result::as_verbose_string()
return res_verbose_string;
}
const nlohmann::json& rule_loader::result::as_json()
const nlohmann::json& rule_loader::result::as_json(const rules_contents_t& contents)
{
nlohmann::json j;
@ -364,7 +394,7 @@ const nlohmann::json& rule_loader::result::as_json()
nlohmann::json jerr;
jerr["context"] = err.ctx.as_json();
jerr["context"]["snippet"] = err.snippet;
jerr["context"]["snippet"] = err.ctx.snippet(contents);
jerr["code"] = load_result::error_code_str(err.ec);
jerr["codedesc"] = load_result::error_desc(err.ec);
@ -380,7 +410,7 @@ const nlohmann::json& rule_loader::result::as_json()
nlohmann::json jwarn;
jwarn["context"] = warn.ctx.as_json();
jwarn["context"]["snippet"] = warn.snippet;
jwarn["context"]["snippet"] = warn.ctx.snippet(contents);
jwarn["code"] = load_result::warning_code_str(warn.wc);
jwarn["codedesc"] = load_result::warning_desc(warn.wc);
@ -842,8 +872,7 @@ void rule_loader::define(configuration& cfg, rule_info& info)
{
cfg.res->add_warning(load_result::LOAD_UNKNOWN_SOURCE,
"Unknown source " + info.source + ", skipping",
info.ctx,
cfg.content);
info.ctx);
return;
}
@ -1044,7 +1073,7 @@ void rule_loader::compile_rule_infos(
{
for (auto &w : warn_codes)
{
cfg.res->add_warning(w, "", r.ctx, cfg.content);
cfg.res->add_warning(w, "", r.ctx);
}
}
@ -1091,8 +1120,7 @@ void rule_loader::compile_rule_infos(
cfg.res->add_warning(
load_result::LOAD_UNKNOWN_FIELD,
e.what(),
r.cond_ctx,
cfg.content);
r.cond_ctx);
}
else
{
@ -1125,8 +1153,7 @@ void rule_loader::compile_rule_infos(
cfg.res->add_warning(
load_result::LOAD_NO_EVTTYPE,
"Rule matches too many evt.type values. This has a significant performance penalty.",
r.ctx,
cfg.content);
r.ctx);
}
}
}
@ -1146,7 +1173,7 @@ void rule_loader::compile(configuration& cfg, indexed_vector<falco_rule>& out) c
}
catch(rule_load_exception &e)
{
cfg.res->add_error(e.ec, e.msg, e.ctx, cfg.content);
cfg.res->add_error(e.ec, e.msg, e.ctx);
}
// print info on any dangling lists or macros that were not used anywhere
@ -1157,8 +1184,7 @@ void rule_loader::compile(configuration& cfg, indexed_vector<falco_rule>& out) c
cfg.res->add_warning(
load_result::LOAD_UNUSED_MACRO,
"Macro not referred to by any other rule/macro",
m.ctx,
cfg.content);
m.ctx);
}
}
for (auto &l : lists)
@ -1168,8 +1194,7 @@ void rule_loader::compile(configuration& cfg, indexed_vector<falco_rule>& out) c
cfg.res->add_warning(
load_result::LOAD_UNUSED_LIST,
"List not referred to by any other rule/macro",
l.ctx,
cfg.content);
l.ctx);
}
}
}

View File

@ -36,6 +36,8 @@ public:
class context
{
public:
static const size_t default_snippet_width = 160;
struct location
{
// The original location in the document
@ -51,7 +53,7 @@ public:
};
context(const std::string& name);
context(const YAML::Node& mark,
context(const YAML::Node& item,
const std::string item_type,
const std::string item_name,
const context& parent);
@ -59,7 +61,9 @@ public:
// Return a snippet of the provided rules content
// corresponding to this context.
std::string snippet(const std::string& content) const;
// Uses the provided rules_contents to look up the original
// rules content for a given location name.
std::string snippet(const falco::load_result::rules_contents_t& rules_contents, size_t snippet_width = default_snippet_width) const;
std::string as_string();
nlohmann::json as_json();
@ -77,7 +81,6 @@ public:
falco::load_result::warning_code wc;
std::string msg;
context ctx;
std::string snippet;
};
struct error
@ -85,7 +88,6 @@ public:
falco::load_result::error_code ec;
std::string msg;
context ctx;
std::string snippet;
};
class rule_load_exception : public std::exception
@ -113,22 +115,21 @@ public:
virtual bool successful() override;
virtual bool has_warnings() override;
virtual const std::string& as_string(bool verbose) override;
virtual const nlohmann::json& as_json() override;
virtual const std::string& as_string(bool verbose, const falco::load_result::rules_contents_t& contents) override;
virtual const nlohmann::json& as_json(const falco::load_result::rules_contents_t& contents) override;
void add_error(falco::load_result::error_code ec,
const std::string& msg,
const context& ctx,
const std::string& rules_content);
const context& ctx);
void add_warning(falco::load_result::warning_code ec,
const std::string& msg,
const context& ctx,
const std::string& rules_content);
const context& ctx);
protected:
const std::string& as_summary_string();
const std::string& as_verbose_string();
const std::string& as_verbose_string(const falco::load_result::rules_contents_t& contents);
std::string name;
bool success;

View File

@ -411,7 +411,7 @@ static void read_item(
else
{
rule_loader::context ctx(item, "unknown", "", parent);
cfg.res->add_warning(load_result::LOAD_UNKNOWN_ITEM, "Unknown top level item", ctx, cfg.content);
cfg.res->add_warning(load_result::LOAD_UNKNOWN_ITEM, "Unknown top level item", ctx);
}
}
@ -425,7 +425,7 @@ bool rule_reader::load(rule_loader::configuration& cfg, rule_loader& loader)
catch(const exception& e)
{
rule_loader::context ctx(cfg.name);
cfg.res->add_error(load_result::LOAD_ERR_YAML_PARSE, e.what(), ctx, cfg.content);
cfg.res->add_error(load_result::LOAD_ERR_YAML_PARSE, e.what(), ctx);
return false;
}
@ -454,7 +454,7 @@ bool rule_reader::load(rule_loader::configuration& cfg, rule_loader& loader)
}
catch (rule_loader::rule_load_exception &e)
{
cfg.res->add_error(e.ec, e.msg, e.ctx, cfg.content);
cfg.res->add_error(e.ec, e.msg, e.ctx);
// Although we *could* continue on to the next doc,
// as it's effectively a new rules file, for