Files
falco/userspace/engine/lua/parser.lua
Mark Stemm 3da5dfa67b Properly parse numbers in condition fields
Falco won't properly parse a rule like this:

---
- rule: Some Rule
  desc: Some Desc
  condition: evt.type=execve and container.image.repository = 271931939120.dkr
  output: Some output
  priority: INFO
---

This is the error when validating the rules:

Tue Mar 30 12:00:40 2021: Validating rules file(s):
Tue Mar 30 12:00:40 2021:    /home/mstemm/test.yaml
1 errors:
Compilation error when compiling "evt.type=execve and container.image.repository = 271931939120.dkr": 63: syntax error, unexpected 'dkr', expecting 'or', 'and'

The parsing of the string on the right hand side stops at the period
before the dkr. The dkr then doesn't match the grammar, resulting in the
error.

Looking at the parser implementation more closely, the problem is in the
definition of "Number":

---
-   Number = C(V "Hex" + V "Float" + V "Int") / function(n)
          return tonumber(n)
       end,
---
Note that it stops after the number, but does not have any requirement
about what follows.

This changes the definition of number to require that what follows the
number is not an identifier character. With this change, values that are
only numbers are parsed as numbers, and values that start with numbers
don't match the Number definition and are parsed as BareStrings instead.

Signed-off-by: Mark Stemm <mark.stemm@gmail.com>
2021-04-07 16:43:44 +02:00

308 lines
9.6 KiB
Lua

-- Copyright (C) 2019 The Falco Authors.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
--[[
Falco grammar and parser.
Much of the scaffolding and helpers was derived from Andre Murbach Maidl's Lua parser (https://github.com/andremm/lua-parser).
While this is based on the sysdig filtering syntax (*), the Falco syntax is extended to support "macro" terms, which are just identifiers.
(*) There is currently one known difference with the syntax implemented in libsinsp: In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can.
--]]
local parser = {}
local lpeg = require "lpeg"
lpeg.locale(lpeg)
local P, S, V = lpeg.P, lpeg.S, lpeg.V
local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc
local Cf, Cg, Cmt, Cp, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Ct
local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum
local xdigit = lpeg.xdigit
local space = lpeg.space
-- error message auxiliary functions
-- creates an error message for the input string
local function syntaxerror(errorinfo, pos, msg)
local error_msg = "%s: syntax error, %s"
return string.format(error_msg, pos, msg)
end
-- gets the farthest failure position
local function getffp(s, i, t)
return t.ffp or i, t
end
-- gets the table that contains the error information
local function geterrorinfo()
return Cmt(Carg(1), getffp) * (C(V "OneWord") + Cc("EOF")) / function(t, u)
t.unexpected = u
return t
end
end
-- creates an errror message using the farthest failure position
local function errormsg()
return geterrorinfo() / function(t)
local p = t.ffp or 1
local msg = "unexpected '%s', expecting %s"
msg = string.format(msg, t.unexpected, t.expected)
return nil, syntaxerror(t, p, msg)
end
end
-- reports a syntactic error
local function report_error()
return errormsg()
end
--- sets the farthest failure position and the expected tokens
local function setffp(s, i, t, n)
if not t.ffp or i > t.ffp then
t.ffp = i
t.list = {}
t.list[n] = n
t.expected = "'" .. n .. "'"
elseif i == t.ffp then
if not t.list[n] then
t.list[n] = n
t.expected = "'" .. n .. "', " .. t.expected
end
end
return false
end
local function updateffp(name)
return Cmt(Carg(1) * Cc(name), setffp)
end
-- regular combinators and auxiliary functions
local function token(pat, name)
return pat * V "Skip" + updateffp(name) * P(false)
end
local function symb(str)
return token(P(str), str)
end
local function kw(str)
return token(P(str) * -V "idRest", str)
end
local function list(pat, sep)
return Ct(pat ^ -1 * (sep * pat ^ 0) ^ 0) / function(elements)
return {type = "List", elements = elements}
end
end
--http://lua-users.org/wiki/StringTrim
function trim(s)
if (type(s) ~= "string") then
return s
end
return (s:gsub("^%s*(.-)%s*$", "%1"))
end
parser.trim = trim
local function terminal(tag)
-- Rather than trim the whitespace in this way, it would be nicer to exclude it from the capture...
return token(V(tag), tag) / function(tok)
val = tok
if tag ~= "String" then
val = trim(tok)
end
return {type = tag, value = val}
end
end
local function unaryboolop(op, e)
return {type = "UnaryBoolOp", operator = op, argument = e}
end
local function unaryrelop(e, op)
return {type = "UnaryRelOp", operator = op, argument = e}
end
local function binaryop(e1, op, e2)
if not op then
return e1
else
return {type = "BinaryBoolOp", operator = op, left = e1, right = e2}
end
end
local function bool(pat, sep)
return Cf(pat * Cg(sep * pat) ^ 0, binaryop)
end
local function rel(left, sep, right)
return left * sep * right / function(e1, op, e2)
return {type = "BinaryRelOp", operator = op, left = e1, right = e2}
end
end
-- grammar
local function filter(e)
return {type = "Filter", value = e}
end
local function rule(filter)
return {type = "Rule", filter = filter}
end
local G = {
V "Start", -- Entry rule
Start = V "Skip" * (V "Comment" + V "Rule" / rule) ^ -1 * -1 + report_error(),
-- Grammar
Comment = P "#" * P(1) ^ 0,
Rule = V "Filter" / filter * ((V "Skip") ^ -1),
Filter = V "OrExpression",
OrExpression = bool(V "AndExpression", V "OrOp"),
AndExpression = bool(V "NotExpression", V "AndOp"),
NotExpression = V "UnaryBoolOp" * V "NotExpression" / unaryboolop + V "ExistsExpression",
ExistsExpression = terminal "FieldName" * V "ExistsOp" / unaryrelop + V "MacroExpression",
MacroExpression = terminal "Macro" + V "RelationalExpression",
RelationalExpression = rel(terminal "FieldName", V "RelOp", V "Value") +
rel(terminal "FieldName", V "SetOp", V "InList") +
V "PrimaryExp",
PrimaryExp = symb("(") * V "Filter" * symb(")"),
FuncArgs = symb("(") * list(V "Value", symb(",")) * symb(")"),
-- Terminals
Value = terminal "Number" + terminal "String" + terminal "BareString",
InList = symb("(") * list(V "Value", symb(",")) * symb(")"),
-- Lexemes
Space = space ^ 1,
Skip = (V "Space") ^ 0,
idStart = alpha + P("_"),
idRest = alnum + P("_"),
Identifier = V "idStart" * V "idRest" ^ 0,
Macro = V "idStart" * V "idRest" ^ 0 * -P ".",
Int = digit ^ 1,
PathString = (alnum + S ",.-_/*?") ^ 1,
PortRangeString = (V "Int" + S ":,") ^ 1,
Index = V "PortRangeString" + V "Int" + V "PathString",
FieldName = V "Identifier" * (P "." + V "Identifier") ^ 1 * (P "[" * V "Index" * P "]") ^ -1,
Name = C(V "Identifier") * -V "idRest",
Hex = (P("0x") + P("0X")) * xdigit ^ 1,
Expo = S("eE") * S("+-") ^ -1 * digit ^ 1,
Float = (((digit ^ 1 * P(".") * digit ^ 0) + (P(".") * digit ^ 1)) * V "Expo" ^ -1) + (digit ^ 1 * V "Expo"),
Number = C(V "Hex" + V "Float" + V "Int") * - V "idStart" / function(n)
return tonumber(n)
end,
String = (P '"' * C(((P "\\" * P(1)) + (P(1) - P '"')) ^ 0) * P '"' +
P "'" * C(((P "\\" * P(1)) + (P(1) - P "'")) ^ 0) * P "'"),
BareString = C((P(1) - S " (),=") ^ 1),
OrOp = kw("or") / "or",
AndOp = kw("and") / "and",
Colon = kw(":"),
RelOp = symb("=") / "=" + symb("==") / "==" + symb("!=") / "!=" + symb("<=") / "<=" + symb(">=") / ">=" +
symb("<") / "<" +
symb(">") / ">" +
symb("contains") / "contains" +
symb("icontains") / "icontains" +
symb("glob") / "glob" +
symb("startswith") / "startswith" +
symb("endswith") / "endswith",
SetOp = kw("in") / "in" + kw("intersects") / "intersects" + kw("pmatch") / "pmatch",
UnaryBoolOp = kw("not") / "not",
ExistsOp = kw("exists") / "exists",
-- for error reporting
OneWord = V "Name" + V "Number" + V "String" + P(1)
}
--[[
Parses a single filter and returns the AST.
--]]
function parser.parse_filter(subject)
local errorinfo = {subject = subject}
lpeg.setmaxstack(1000)
local ast, error_msg = lpeg.match(G, subject, nil, errorinfo)
return ast, error_msg
end
function print_ast(ast, level)
local t = ast.type
level = level or 0
local prefix = string.rep(" ", level * 4)
level = level + 1
if t == "Rule" then
print_ast(ast.filter, level)
elseif t == "Filter" then
print_ast(ast.value, level)
elseif t == "BinaryBoolOp" or t == "BinaryRelOp" then
print(prefix .. ast.operator)
print_ast(ast.left, level)
print_ast(ast.right, level)
elseif t == "UnaryRelOp" or t == "UnaryBoolOp" then
print(prefix .. ast.operator)
print_ast(ast.argument, level)
elseif t == "List" then
for i, v in ipairs(ast.elements) do
print_ast(v, level)
end
elseif t == "FieldName" or t == "Number" or t == "String" or t == "BareString" or t == "Macro" then
print(prefix .. t .. " " .. ast.value)
elseif t == "MacroDef" then
-- don't print for now
else
error("Unexpected type in print_ast: " .. t)
end
end
parser.print_ast = print_ast
-- Traverse the provided ast and call the provided callback function
-- for any nodes of the specified type. The callback function should
-- have the signature:
-- cb(ast_node, ctx)
-- ctx is optional.
function traverse_ast(ast, node_types, cb, ctx)
local t = ast.type
if node_types[t] ~= nil then
cb(ast, ctx)
end
if t == "Rule" then
traverse_ast(ast.filter, node_types, cb, ctx)
elseif t == "Filter" then
traverse_ast(ast.value, node_types, cb, ctx)
elseif t == "BinaryBoolOp" or t == "BinaryRelOp" then
traverse_ast(ast.left, node_types, cb, ctx)
traverse_ast(ast.right, node_types, cb, ctx)
elseif t == "UnaryRelOp" or t == "UnaryBoolOp" then
traverse_ast(ast.argument, node_types, cb, ctx)
elseif t == "List" then
for i, v in ipairs(ast.elements) do
traverse_ast(v, node_types, cb, ctx)
end
elseif t == "MacroDef" then
traverse_ast(ast.value, node_types, cb, ctx)
elseif t == "FieldName" or t == "Number" or t == "String" or t == "BareString" or t == "Macro" then
-- do nothing, no traversal needed
else
error("Unexpected type in traverse_ast: " .. t)
end
end
parser.traverse_ast = traverse_ast
return parser