mirror of
https://github.com/falcosecurity/falco.git
synced 2025-10-14 15:23:13 +00:00
When parsing condition expressions, if the type of an ast node is String (aka quoted string), don't trim whitespace from the value. This ensures that conditions that want to match exact strings e.g. command lines with leading/trailing spaces will work properly. This fixes #253.
366 lines
10 KiB
Lua
366 lines
10 KiB
Lua
--
|
|
-- Copyright (C) 2016 Draios inc.
|
|
--
|
|
-- This file is part of falco.
|
|
--
|
|
-- falco is free software; you can redistribute it and/or modify
|
|
-- it under the terms of the GNU General Public License version 2 as
|
|
-- published by the Free Software Foundation.
|
|
--
|
|
-- falco is distributed in the hope that it will be useful,
|
|
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
-- GNU General Public License for more details.
|
|
--
|
|
-- You should have received a copy of the GNU General Public License
|
|
-- along with falco. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
--[[
|
|
Falco grammar and parser.
|
|
|
|
Much of the scaffolding and helpers was derived from Andre Murbach Maidl's Lua parser (https://github.com/andremm/lua-parser).
|
|
|
|
Parses regular filters following the existing sysdig filter syntax (*), extended to support "macro" terms, which are just identifiers.
|
|
|
|
(*) There is currently one known difference with the syntax implemented in libsinsp: In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can.
|
|
|
|
--]]
|
|
|
|
local parser = {}
|
|
|
|
parser.verbose = false
|
|
|
|
function parser.set_verbose(verbose)
|
|
parser.verbose = verbose
|
|
end
|
|
|
|
local lpeg = require "lpeg"
|
|
|
|
lpeg.locale(lpeg)
|
|
|
|
local P, S, V = lpeg.P, lpeg.S, lpeg.V
|
|
local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc
|
|
local Cf, Cg, Cmt, Cp, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Ct
|
|
local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum
|
|
local xdigit = lpeg.xdigit
|
|
local space = lpeg.space
|
|
|
|
|
|
-- error message auxiliary functions
|
|
|
|
-- creates an error message for the input string
|
|
local function syntaxerror (errorinfo, pos, msg)
|
|
local error_msg = "%s: syntax error, %s"
|
|
return string.format(error_msg, pos, msg)
|
|
end
|
|
|
|
-- gets the farthest failure position
|
|
local function getffp (s, i, t)
|
|
return t.ffp or i, t
|
|
end
|
|
|
|
-- gets the table that contains the error information
|
|
local function geterrorinfo ()
|
|
return Cmt(Carg(1), getffp) * (C(V"OneWord") + Cc("EOF")) /
|
|
function (t, u)
|
|
t.unexpected = u
|
|
return t
|
|
end
|
|
end
|
|
|
|
-- creates an errror message using the farthest failure position
|
|
local function errormsg ()
|
|
return geterrorinfo() /
|
|
function (t)
|
|
local p = t.ffp or 1
|
|
local msg = "unexpected '%s', expecting %s"
|
|
msg = string.format(msg, t.unexpected, t.expected)
|
|
return nil, syntaxerror(t, p, msg)
|
|
end
|
|
end
|
|
|
|
-- reports a syntactic error
|
|
local function report_error ()
|
|
return errormsg()
|
|
end
|
|
|
|
--- sets the farthest failure position and the expected tokens
|
|
local function setffp (s, i, t, n)
|
|
if not t.ffp or i > t.ffp then
|
|
t.ffp = i
|
|
t.list = {} ; t.list[n] = n
|
|
t.expected = "'" .. n .. "'"
|
|
elseif i == t.ffp then
|
|
if not t.list[n] then
|
|
t.list[n] = n
|
|
t.expected = "'" .. n .. "', " .. t.expected
|
|
end
|
|
end
|
|
return false
|
|
end
|
|
|
|
local function updateffp (name)
|
|
return Cmt(Carg(1) * Cc(name), setffp)
|
|
end
|
|
|
|
-- regular combinators and auxiliary functions
|
|
|
|
local function token (pat, name)
|
|
return pat * V"Skip" + updateffp(name) * P(false)
|
|
end
|
|
|
|
local function symb (str)
|
|
return token (P(str), str)
|
|
end
|
|
|
|
local function kw (str)
|
|
return token (P(str) * -V"idRest", str)
|
|
end
|
|
|
|
|
|
local function list (pat, sep)
|
|
return Ct(pat^-1 * (sep * pat^0)^0) / function(elements) return {type = "List", elements=elements} end
|
|
end
|
|
|
|
--http://lua-users.org/wiki/StringTrim
|
|
function trim(s)
|
|
if (type(s) ~= "string") then return s end
|
|
return (s:gsub("^%s*(.-)%s*$", "%1"))
|
|
end
|
|
|
|
local function terminal (tag)
|
|
-- Rather than trim the whitespace in this way, it would be nicer to exclude it from the capture...
|
|
return token(V(tag), tag) / function (tok) val = tok; if tag ~= "String" then val = trim(tok) end; return { type = tag, value = val} end
|
|
end
|
|
|
|
local function unaryboolop (op, e)
|
|
return { type = "UnaryBoolOp", operator = op, argument = e }
|
|
end
|
|
|
|
local function unaryrelop (e, op)
|
|
return { type = "UnaryRelOp", operator = op, argument = e }
|
|
end
|
|
|
|
local function binaryop (e1, op, e2)
|
|
if not op then
|
|
return e1
|
|
else
|
|
return { type = "BinaryBoolOp", operator = op, left = e1, right = e2 }
|
|
end
|
|
end
|
|
|
|
local function bool (pat, sep)
|
|
return Cf(pat * Cg(sep * pat)^0, binaryop)
|
|
end
|
|
|
|
local function rel (left, sep, right)
|
|
return left * sep * right / function(e1, op, e2) return { type = "BinaryRelOp", operator = op, left = e1, right = e2 } end
|
|
end
|
|
|
|
local function fix_str (str)
|
|
str = string.gsub(str, "\\a", "\a")
|
|
str = string.gsub(str, "\\b", "\b")
|
|
str = string.gsub(str, "\\f", "\f")
|
|
str = string.gsub(str, "\\n", "\n")
|
|
str = string.gsub(str, "\\r", "\r")
|
|
str = string.gsub(str, "\\t", "\t")
|
|
str = string.gsub(str, "\\v", "\v")
|
|
str = string.gsub(str, "\\\n", "\n")
|
|
str = string.gsub(str, "\\\r", "\n")
|
|
str = string.gsub(str, "\\'", "'")
|
|
str = string.gsub(str, '\\"', '"')
|
|
str = string.gsub(str, '\\\\', '\\')
|
|
return str
|
|
end
|
|
|
|
-- grammar
|
|
|
|
|
|
local function filter(e)
|
|
return {type = "Filter", value=e}
|
|
end
|
|
|
|
local function rule(filter)
|
|
return {type = "Rule", filter = filter}
|
|
end
|
|
|
|
local G = {
|
|
V"Start", -- Entry rule
|
|
|
|
Start = V"Skip" * (V"Comment" + V"Rule" / rule)^-1 * -1 + report_error();
|
|
|
|
-- Grammar
|
|
Comment = P"#" * P(1)^0;
|
|
|
|
Rule = V"Filter" / filter * ((V"Skip")^-1 );
|
|
|
|
Filter = V"OrExpression";
|
|
OrExpression =
|
|
bool(V"AndExpression", V"OrOp");
|
|
|
|
AndExpression =
|
|
bool(V"NotExpression", V"AndOp");
|
|
|
|
NotExpression =
|
|
V"UnaryBoolOp" * V"NotExpression" / unaryboolop +
|
|
V"ExistsExpression";
|
|
|
|
ExistsExpression =
|
|
terminal "FieldName" * V"ExistsOp" / unaryrelop +
|
|
V"MacroExpression";
|
|
|
|
MacroExpression =
|
|
terminal "Macro" +
|
|
V"RelationalExpression";
|
|
|
|
RelationalExpression =
|
|
rel(terminal "FieldName", V"RelOp", V"Value") +
|
|
rel(terminal "FieldName", V"InOp", V"InList") +
|
|
rel(terminal "FieldName", V"PmatchOp", V"InList") +
|
|
V"PrimaryExp";
|
|
|
|
PrimaryExp = symb("(") * V"Filter" * symb(")");
|
|
|
|
FuncArgs = symb("(") * list(V"Value", symb(",")) * symb(")");
|
|
|
|
-- Terminals
|
|
Value = terminal "Number" + terminal "String" + terminal "BareString";
|
|
|
|
InList = symb("(") * list(V"Value", symb(",")) * symb(")");
|
|
|
|
|
|
-- Lexemes
|
|
Space = space^1;
|
|
Skip = (V"Space")^0;
|
|
idStart = alpha + P("_");
|
|
idRest = alnum + P("_");
|
|
Identifier = V"idStart" * V"idRest"^0;
|
|
Macro = V"idStart" * V"idRest"^0 * -P".";
|
|
Int = digit^1;
|
|
PathString = (alnum + S'-_/*?')^1;
|
|
Index = V"Int" + V"PathString";
|
|
FieldName = V"Identifier" * (P"." + V"Identifier")^1 * (P"[" * V"Index" * P"]")^-1;
|
|
Name = C(V"Identifier") * -V"idRest";
|
|
Hex = (P("0x") + P("0X")) * xdigit^1;
|
|
Expo = S("eE") * S("+-")^-1 * digit^1;
|
|
Float = (((digit^1 * P(".") * digit^0) +
|
|
(P(".") * digit^1)) * V"Expo"^-1) +
|
|
(digit^1 * V"Expo");
|
|
Number = C(V"Hex" + V"Float" + V"Int") /
|
|
function (n) return tonumber(n) end;
|
|
String = (P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'") / function (s) return fix_str(s) end;
|
|
BareString = C(((P(1) - S' (),='))^1);
|
|
|
|
OrOp = kw("or") / "or";
|
|
AndOp = kw("and") / "and";
|
|
Colon = kw(":");
|
|
RelOp = symb("=") / "=" +
|
|
symb("==") / "==" +
|
|
symb("!=") / "!=" +
|
|
symb("<=") / "<=" +
|
|
symb(">=") / ">=" +
|
|
symb("<") / "<" +
|
|
symb(">") / ">" +
|
|
symb("contains") / "contains" +
|
|
symb("icontains") / "icontains" +
|
|
symb("glob") / "glob" +
|
|
symb("startswith") / "startswith";
|
|
InOp = kw("in") / "in";
|
|
PmatchOp = kw("pmatch") / "pmatch";
|
|
UnaryBoolOp = kw("not") / "not";
|
|
ExistsOp = kw("exists") / "exists";
|
|
|
|
-- for error reporting
|
|
OneWord = V"Name" + V"Number" + V"String" + P(1);
|
|
}
|
|
|
|
--[[
|
|
Parses a single filter and returns the AST.
|
|
--]]
|
|
function parser.parse_filter (subject)
|
|
local errorinfo = { subject = subject }
|
|
lpeg.setmaxstack(1000)
|
|
local ast, error_msg = lpeg.match(G, subject, nil, errorinfo)
|
|
return ast, error_msg
|
|
end
|
|
|
|
function print_ast(ast, level)
|
|
local t = ast.type
|
|
level = level or 0
|
|
local prefix = string.rep(" ", level*4)
|
|
level = level + 1
|
|
|
|
if t == "Rule" then
|
|
print_ast(ast.filter, level)
|
|
elseif t == "Filter" then
|
|
print_ast(ast.value, level)
|
|
|
|
elseif t == "BinaryBoolOp" or t == "BinaryRelOp" then
|
|
print(prefix..ast.operator)
|
|
print_ast(ast.left, level)
|
|
print_ast(ast.right, level)
|
|
|
|
elseif t == "UnaryRelOp" or t == "UnaryBoolOp" then
|
|
print (prefix..ast.operator)
|
|
print_ast(ast.argument, level)
|
|
|
|
elseif t == "List" then
|
|
for i, v in ipairs(ast.elements) do
|
|
print_ast(v, level)
|
|
end
|
|
|
|
elseif t == "FieldName" or t == "Number" or t == "String" or t == "BareString" or t == "Macro" then
|
|
print (prefix..t.." "..ast.value)
|
|
|
|
elseif t == "MacroDef" then
|
|
-- don't print for now
|
|
else
|
|
error ("Unexpected type in print_ast: "..t)
|
|
end
|
|
end
|
|
parser.print_ast = print_ast
|
|
|
|
-- Traverse the provided ast and call the provided callback function
|
|
-- for any nodes of the specified type. The callback function should
|
|
-- have the signature:
|
|
-- cb(ast_node, ctx)
|
|
-- ctx is optional.
|
|
function traverse_ast(ast, node_types, cb, ctx)
|
|
local t = ast.type
|
|
|
|
if node_types[t] ~= nil then
|
|
cb(ast, ctx)
|
|
end
|
|
|
|
if t == "Rule" then
|
|
traverse_ast(ast.filter, node_types, cb, ctx)
|
|
|
|
elseif t == "Filter" then
|
|
traverse_ast(ast.value, node_types, cb, ctx)
|
|
|
|
elseif t == "BinaryBoolOp" or t == "BinaryRelOp" then
|
|
traverse_ast(ast.left, node_types, cb, ctx)
|
|
traverse_ast(ast.right, node_types, cb, ctx)
|
|
|
|
elseif t == "UnaryRelOp" or t == "UnaryBoolOp" then
|
|
traverse_ast(ast.argument, node_types, cb, ctx)
|
|
|
|
elseif t == "List" then
|
|
for i, v in ipairs(ast.elements) do
|
|
traverse_ast(v, node_types, cb, ctx)
|
|
end
|
|
|
|
elseif t == "MacroDef" then
|
|
traverse_ast(ast.value, node_types, cb, ctx)
|
|
|
|
elseif t == "FieldName" or t == "Number" or t == "String" or t == "BareString" or t == "Macro" then
|
|
-- do nothing, no traversal needed
|
|
|
|
else
|
|
error ("Unexpected type in traverse_ast: "..t)
|
|
end
|
|
end
|
|
parser.traverse_ast = traverse_ast
|
|
|
|
return parser
|