Restructure rules result to properly support multiple files

The old version of rules_result assumed that all errors/warnings were
related to a single file. That was generally correct for errors, as
rules parsing always stopped at the first error, so there is only one
relevant file.

However, for warnings that was not the case. When reading multiple
files A and B, you might get a warning from file A *only* after
reading file B. For example, B might redefine a rule in such a way
that you could get unused list/macro warnings from file A.

To properly address this, make some changes to how contexts are
managed:

- Instead of creating snippets at the time the error/warning was
  generated, create snippets at the time the error/warning is
  converted into a string. This requires passing all rules contents to
  as_string()/as_json(), so define a
  falco::load_result::rules_contents_t map from filename to rules
  content (reference) and pass it in as_string/as_json(). Snippets are
  now generated from the rules content matching the filename in the
  context.
- When creating warnings/errors, there's no need to pass along the
  rules content. This is only used when converting an error into a
  string/json.

Also change snippet() to handle potentially very long lines. Instead
of always printing the entire line matching a location, print up to
snippet_width(param, with default 160 chars)/2 characters surrounding
the column from the location.

Signed-off-by: Mark Stemm <mark.stemm@gmail.com>
This commit is contained in:
Mark Stemm 2022-08-08 16:25:21 -07:00 committed by poiana
parent 0828296abc
commit 98c1e3d3f1
4 changed files with 95 additions and 53 deletions

View File

@ -16,6 +16,7 @@ limitations under the License.
#pragma once #pragma once
#include <functional>
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
@ -75,20 +76,35 @@ public:
// has_warnings() can both be true if there were only warnings. // has_warnings() can both be true if there were only warnings.
virtual bool has_warnings() = 0; virtual bool has_warnings() = 0;
// This represents a set of rules contents as a mapping from
// rules content name (usually filename) to rules content. The
// rules content is actually a reference to the actual string
// to avoid copies. Using reference_wrapper allows the
// reference to be held in the stl map (bare references can't
// be copied/assigned, but reference_wrappers can).
//
// It's used in the as_string/as_json() methods below.
typedef std::map<std::string, std::reference_wrapper<const std::string>> rules_contents_t;
// This contains a human-readable version of the result, // This contains a human-readable version of the result,
// suitable for display to end users. // suitable for display to end users.
// //
// The provided rules_contents_t should map from content name
// to rules content (reference) for each rules_content that has
// been passed to rule_loader::compile() or
// rule_reader::load().
//
// When verbose is true, the returned value has full details // When verbose is true, the returned value has full details
// on the result including document locations/context. // on the result including document locations/context.
// //
// When verbose is false, the returned value is a short string // When verbose is false, the returned value is a short string
// with the success value and a list of // with the success value and a list of
// errors/warnings. Suitable for simple one-line display. // errors/warnings. Suitable for simple one-line display.
virtual const std::string& as_string(bool verbose) = 0; virtual const std::string& as_string(bool verbose, const rules_contents_t& contents) = 0;
// This contains the full result structure as json, suitable // This contains the full result structure as json, suitable
// for automated parsing/interpretation downstream. // for automated parsing/interpretation downstream.
virtual const nlohmann::json& as_json() = 0; virtual const nlohmann::json& as_json(const rules_contents_t& contents) = 0;
}; };
} // namespace falco } // namespace falco

View File

@ -135,46 +135,76 @@ nlohmann::json rule_loader::context::as_json()
return ret; return ret;
} }
std::string rule_loader::context::snippet(const std::string& content) const std::string rule_loader::context::snippet(const falco::load_result::rules_contents_t& rules_contents,
size_t snippet_width) const
{ {
std::string ret; std::string ret;
if(m_locs.empty() || content.size() == 0) if(m_locs.empty())
{ {
return "<No context available>\n"; return "<No context available>\n";
} }
rule_loader::context::location loc = m_locs.back(); rule_loader::context::location loc = m_locs.back();
auto it = rules_contents.find(name);
if(it == rules_contents.end())
{
return "<No context available>\n";
}
const std::string& snip_content = it->second;
size_t from = loc.mark.pos; size_t from = loc.mark.pos;
// In some cases like this, where the content ends with a // In some cases like this, where the content ends with a
// dangling property value: // dangling property value:
// tags: // tags:
// The YAML::Mark position can be past the end of the file. // The YAML::Mark position can be past the end of the file.
for(; from > 0 && from >= content.size(); from--); for(; from > 0 && from >= snip_content.size(); from--);
// Add the line that includes the mark and a marker // The snippet is generally the line that contains the
// at the column number. // position. So walk backwards from pos to the preceding
size_t to = from; // newline, and walk forwards from pos to the following
// newline.
//
// However, some lines can be very very long, so the walk
// forwards/walk backwards is capped at a maximum of
// snippet_width/2 characters in either direction.
for(; from > 0 && snip_content.at(from) != '\n' && (loc.mark.pos - from) < (snippet_width/2); from--);
for(; from > 0 && content.at(from) != '\n'; from--); size_t to = loc.mark.pos;
for(; to < content.size()-1 && content.at(to) != '\n'; to++); for(; to < snip_content.size()-1 && snip_content.at(to) != '\n' && (to - loc.mark.pos) < (snippet_width/2); to++);
// Don't include the newlines // Don't include the newlines
if(content.at(from) == '\n') if(snip_content.at(from) == '\n')
{ {
from++; from++;
} }
if(content.at(to) == '\n') if(snip_content.at(to) == '\n')
{ {
to--; to--;
} }
ret = content.substr(from, to-from+1) + "\n"; ret = snip_content.substr(from, to-from+1);
// Add a blank line with a marker at the column number // Replace the initial/end characters with '...' if the walk
ret += std::string(loc.mark.column, ' ') + '^' + "\n"; // forwards/backwards was incomplete
if(loc.mark.pos - from >= (snippet_width/2))
{
ret.replace(0, 2, "...");
}
if(to - loc.mark.pos >= (snippet_width/2))
{
ret.replace(ret.size()-2, ret.size(), "...");
}
ret += "\n";
// Add a blank line with a marker at the position within the snippet
ret += std::string(loc.mark.pos-from, ' ') + '^' + "\n";
return ret; return ret;
} }
@ -195,26 +225,26 @@ bool rule_loader::result::has_warnings()
return (warnings.size() > 0); return (warnings.size() > 0);
} }
void rule_loader::result::add_error(load_result::error_code ec, const std::string& msg, const context& ctx, const std::string& rules_content) void rule_loader::result::add_error(load_result::error_code ec, const std::string& msg, const context& ctx)
{ {
error err = {ec, msg, ctx, ctx.snippet(rules_content)}; error err = {ec, msg, ctx};
success = false; success = false;
errors.push_back(err); errors.push_back(err);
} }
void rule_loader::result::add_warning(load_result::warning_code wc, const std::string& msg, const context& ctx, const std::string& rules_content) void rule_loader::result::add_warning(load_result::warning_code wc, const std::string& msg, const context& ctx)
{ {
warning warn = {wc, msg, ctx, ctx.snippet(rules_content)}; warning warn = {wc, msg, ctx};
warnings.push_back(warn); warnings.push_back(warn);
} }
const std::string& rule_loader::result::as_string(bool verbose) const std::string& rule_loader::result::as_string(bool verbose, const rules_contents_t& contents)
{ {
if(verbose) if(verbose)
{ {
return as_verbose_string(); return as_verbose_string(contents);
} }
else else
{ {
@ -282,7 +312,7 @@ const std::string& rule_loader::result::as_summary_string()
return res_summary_string; return res_summary_string;
} }
const std::string& rule_loader::result::as_verbose_string() const std::string& rule_loader::result::as_verbose_string(const rules_contents_t& contents)
{ {
std::ostringstream os; std::ostringstream os;
@ -310,7 +340,7 @@ const std::string& rule_loader::result::as_verbose_string()
os << err.ctx.as_string(); os << err.ctx.as_string();
os << "------" << std::endl; os << "------" << std::endl;
os << err.snippet; os << err.ctx.snippet(contents);
os << "------" << std::endl; os << "------" << std::endl;
os << load_result::error_code_str(err.ec) os << load_result::error_code_str(err.ec)
@ -331,7 +361,7 @@ const std::string& rule_loader::result::as_verbose_string()
os << warn.ctx.as_string(); os << warn.ctx.as_string();
os << "------" << std::endl; os << "------" << std::endl;
os << warn.snippet; os << warn.ctx.snippet(contents);
os << "------" << std::endl; os << "------" << std::endl;
os << load_result::warning_code_str(warn.wc) os << load_result::warning_code_str(warn.wc)
@ -345,7 +375,7 @@ const std::string& rule_loader::result::as_verbose_string()
return res_verbose_string; return res_verbose_string;
} }
const nlohmann::json& rule_loader::result::as_json() const nlohmann::json& rule_loader::result::as_json(const rules_contents_t& contents)
{ {
nlohmann::json j; nlohmann::json j;
@ -364,7 +394,7 @@ const nlohmann::json& rule_loader::result::as_json()
nlohmann::json jerr; nlohmann::json jerr;
jerr["context"] = err.ctx.as_json(); jerr["context"] = err.ctx.as_json();
jerr["context"]["snippet"] = err.snippet; jerr["context"]["snippet"] = err.ctx.snippet(contents);
jerr["code"] = load_result::error_code_str(err.ec); jerr["code"] = load_result::error_code_str(err.ec);
jerr["codedesc"] = load_result::error_desc(err.ec); jerr["codedesc"] = load_result::error_desc(err.ec);
@ -380,7 +410,7 @@ const nlohmann::json& rule_loader::result::as_json()
nlohmann::json jwarn; nlohmann::json jwarn;
jwarn["context"] = warn.ctx.as_json(); jwarn["context"] = warn.ctx.as_json();
jwarn["context"]["snippet"] = warn.snippet; jwarn["context"]["snippet"] = warn.ctx.snippet(contents);
jwarn["code"] = load_result::warning_code_str(warn.wc); jwarn["code"] = load_result::warning_code_str(warn.wc);
jwarn["codedesc"] = load_result::warning_desc(warn.wc); jwarn["codedesc"] = load_result::warning_desc(warn.wc);
@ -842,8 +872,7 @@ void rule_loader::define(configuration& cfg, rule_info& info)
{ {
cfg.res->add_warning(load_result::LOAD_UNKNOWN_SOURCE, cfg.res->add_warning(load_result::LOAD_UNKNOWN_SOURCE,
"Unknown source " + info.source + ", skipping", "Unknown source " + info.source + ", skipping",
info.ctx, info.ctx);
cfg.content);
return; return;
} }
@ -1044,7 +1073,7 @@ void rule_loader::compile_rule_infos(
{ {
for (auto &w : warn_codes) for (auto &w : warn_codes)
{ {
cfg.res->add_warning(w, "", r.ctx, cfg.content); cfg.res->add_warning(w, "", r.ctx);
} }
} }
@ -1091,8 +1120,7 @@ void rule_loader::compile_rule_infos(
cfg.res->add_warning( cfg.res->add_warning(
load_result::LOAD_UNKNOWN_FIELD, load_result::LOAD_UNKNOWN_FIELD,
e.what(), e.what(),
r.cond_ctx, r.cond_ctx);
cfg.content);
} }
else else
{ {
@ -1125,8 +1153,7 @@ void rule_loader::compile_rule_infos(
cfg.res->add_warning( cfg.res->add_warning(
load_result::LOAD_NO_EVTTYPE, load_result::LOAD_NO_EVTTYPE,
"Rule matches too many evt.type values. This has a significant performance penalty.", "Rule matches too many evt.type values. This has a significant performance penalty.",
r.ctx, r.ctx);
cfg.content);
} }
} }
} }
@ -1146,7 +1173,7 @@ void rule_loader::compile(configuration& cfg, indexed_vector<falco_rule>& out) c
} }
catch(rule_load_exception &e) catch(rule_load_exception &e)
{ {
cfg.res->add_error(e.ec, e.msg, e.ctx, cfg.content); cfg.res->add_error(e.ec, e.msg, e.ctx);
} }
// print info on any dangling lists or macros that were not used anywhere // print info on any dangling lists or macros that were not used anywhere
@ -1157,8 +1184,7 @@ void rule_loader::compile(configuration& cfg, indexed_vector<falco_rule>& out) c
cfg.res->add_warning( cfg.res->add_warning(
load_result::LOAD_UNUSED_MACRO, load_result::LOAD_UNUSED_MACRO,
"Macro not referred to by any other rule/macro", "Macro not referred to by any other rule/macro",
m.ctx, m.ctx);
cfg.content);
} }
} }
for (auto &l : lists) for (auto &l : lists)
@ -1168,8 +1194,7 @@ void rule_loader::compile(configuration& cfg, indexed_vector<falco_rule>& out) c
cfg.res->add_warning( cfg.res->add_warning(
load_result::LOAD_UNUSED_LIST, load_result::LOAD_UNUSED_LIST,
"List not referred to by any other rule/macro", "List not referred to by any other rule/macro",
l.ctx, l.ctx);
cfg.content);
} }
} }
} }

View File

@ -36,6 +36,8 @@ public:
class context class context
{ {
public: public:
static const size_t default_snippet_width = 160;
struct location struct location
{ {
// The original location in the document // The original location in the document
@ -51,7 +53,7 @@ public:
}; };
context(const std::string& name); context(const std::string& name);
context(const YAML::Node& mark, context(const YAML::Node& item,
const std::string item_type, const std::string item_type,
const std::string item_name, const std::string item_name,
const context& parent); const context& parent);
@ -59,7 +61,9 @@ public:
// Return a snippet of the provided rules content // Return a snippet of the provided rules content
// corresponding to this context. // corresponding to this context.
std::string snippet(const std::string& content) const; // Uses the provided rules_contents to look up the original
// rules content for a given location name.
std::string snippet(const falco::load_result::rules_contents_t& rules_contents, size_t snippet_width = default_snippet_width) const;
std::string as_string(); std::string as_string();
nlohmann::json as_json(); nlohmann::json as_json();
@ -77,7 +81,6 @@ public:
falco::load_result::warning_code wc; falco::load_result::warning_code wc;
std::string msg; std::string msg;
context ctx; context ctx;
std::string snippet;
}; };
struct error struct error
@ -85,7 +88,6 @@ public:
falco::load_result::error_code ec; falco::load_result::error_code ec;
std::string msg; std::string msg;
context ctx; context ctx;
std::string snippet;
}; };
class rule_load_exception : public std::exception class rule_load_exception : public std::exception
@ -113,22 +115,21 @@ public:
virtual bool successful() override; virtual bool successful() override;
virtual bool has_warnings() override; virtual bool has_warnings() override;
virtual const std::string& as_string(bool verbose) override;
virtual const nlohmann::json& as_json() override; virtual const std::string& as_string(bool verbose, const falco::load_result::rules_contents_t& contents) override;
virtual const nlohmann::json& as_json(const falco::load_result::rules_contents_t& contents) override;
void add_error(falco::load_result::error_code ec, void add_error(falco::load_result::error_code ec,
const std::string& msg, const std::string& msg,
const context& ctx, const context& ctx);
const std::string& rules_content);
void add_warning(falco::load_result::warning_code ec, void add_warning(falco::load_result::warning_code ec,
const std::string& msg, const std::string& msg,
const context& ctx, const context& ctx);
const std::string& rules_content);
protected: protected:
const std::string& as_summary_string(); const std::string& as_summary_string();
const std::string& as_verbose_string(); const std::string& as_verbose_string(const falco::load_result::rules_contents_t& contents);
std::string name; std::string name;
bool success; bool success;

View File

@ -411,7 +411,7 @@ static void read_item(
else else
{ {
rule_loader::context ctx(item, "unknown", "", parent); rule_loader::context ctx(item, "unknown", "", parent);
cfg.res->add_warning(load_result::LOAD_UNKNOWN_ITEM, "Unknown top level item", ctx, cfg.content); cfg.res->add_warning(load_result::LOAD_UNKNOWN_ITEM, "Unknown top level item", ctx);
} }
} }
@ -425,7 +425,7 @@ bool rule_reader::load(rule_loader::configuration& cfg, rule_loader& loader)
catch(const exception& e) catch(const exception& e)
{ {
rule_loader::context ctx(cfg.name); rule_loader::context ctx(cfg.name);
cfg.res->add_error(load_result::LOAD_ERR_YAML_PARSE, e.what(), ctx, cfg.content); cfg.res->add_error(load_result::LOAD_ERR_YAML_PARSE, e.what(), ctx);
return false; return false;
} }
@ -454,7 +454,7 @@ bool rule_reader::load(rule_loader::configuration& cfg, rule_loader& loader)
} }
catch (rule_loader::rule_load_exception &e) catch (rule_loader::rule_load_exception &e)
{ {
cfg.res->add_error(e.ec, e.msg, e.ctx, cfg.content); cfg.res->add_error(e.ec, e.msg, e.ctx);
// Although we *could* continue on to the next doc, // Although we *could* continue on to the next doc,
// as it's effectively a new rules file, for // as it's effectively a new rules file, for