mirror of
https://github.com/falcosecurity/falco.git
synced 2025-08-31 14:20:04 +00:00
Grammar for filters and macros
This commit is contained in:
6
lua/README.md
Normal file
6
lua/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
Installation
|
||||
------------
|
||||
|
||||
The sysdig grammar uses the `lpeg` parser. For now install it using luarocks:
|
||||
`luarocks install lpeg`.
|
||||
|
67
lua/parser-smoke.sh
Executable file
67
lua/parser-smoke.sh
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/bin/bash
|
||||
|
||||
function error_exit_good
|
||||
{
|
||||
echo "Error: '$1' did not parse" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
function error_exit_bad
|
||||
{
|
||||
echo "Error: incorrect filter '$1' parsed ok" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
function good
|
||||
{
|
||||
lua test.lua "$1" || error_exit_good "$1"
|
||||
}
|
||||
|
||||
function bad
|
||||
{
|
||||
lua test.lua "$1" && error_exit_bad "$1"
|
||||
}
|
||||
|
||||
# Filters
|
||||
good "a"
|
||||
good "a and b"
|
||||
good "(a)"
|
||||
good "(a and b)"
|
||||
good "(a.a exists and b)"
|
||||
good "(a.a exists) and (b)"
|
||||
good "a.a exists and b"
|
||||
good "a.a=1 or b.b=2 and c"
|
||||
good "not (a)"
|
||||
good "not (not (a))"
|
||||
good "not (a.b=1)"
|
||||
good "not (a.a exists)"
|
||||
good "not a"
|
||||
good "not not a"
|
||||
good "(not not a)"
|
||||
good "not a.b=1"
|
||||
good "not a.a exists"
|
||||
good "notz"
|
||||
good "a.b = bla"
|
||||
good "a.b = 'bla'"
|
||||
good "a.b = not"
|
||||
good "a.b contains bla"
|
||||
good "a.b icontains 'bla'"
|
||||
good "a.g in ()"
|
||||
good "a.g in (1, 'a', b)"
|
||||
good "a.g in ( 1 ,, , b)"
|
||||
|
||||
bad "a.g in (1, 'a', b.c)"
|
||||
bad "a.b = a.a"
|
||||
bad "(a.b = 1"
|
||||
|
||||
# Macros
|
||||
|
||||
good "a: a.b exists"
|
||||
good "a: b and c"
|
||||
good "a: b"
|
||||
good "a : b"
|
||||
good "inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')"
|
||||
bad "a:"
|
||||
|
||||
exit 0
|
251
lua/sysdig-parser.lua
Normal file
251
lua/sysdig-parser.lua
Normal file
@@ -0,0 +1,251 @@
|
||||
--[[
|
||||
Sysdig grammar and parser.
|
||||
|
||||
Much of the scaffolding and helpers was deriverd Andre Murbach Maidl's Lua parser (https://github.com/andremm/lua-parser).
|
||||
|
||||
Parses regular filters following the existing sysdig filter syntax (*), as well as "macro" definitions. Macro definitions are written like:
|
||||
|
||||
inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')
|
||||
|
||||
(*) There are a few minor differences with the syntax implemented in libsinsp:
|
||||
|
||||
- (Feature!) In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can
|
||||
|
||||
- (Bug!) In libsinsp, operator right-hand sides only need to be quoted if they contain spaces or parens. With this parser, they need to be quoted if they contain any non-alphanumeric character. For example:
|
||||
|
||||
(libsinsp) fd.name = mylog or fd.name contains .log and event.dir = <
|
||||
(this parser) fd.name = mylog or fd.name contains '.log' and event.dir = '<'
|
||||
|
||||
]]--
|
||||
|
||||
local parser = {}
|
||||
|
||||
local lpeg = require "lpeg"
|
||||
|
||||
lpeg.locale(lpeg)
|
||||
|
||||
local P, S, V = lpeg.P, lpeg.S, lpeg.V
|
||||
local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc
|
||||
local Cf, Cg, Cmt, Cp, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Ct
|
||||
local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum
|
||||
local xdigit = lpeg.xdigit
|
||||
local space = lpeg.space
|
||||
|
||||
|
||||
-- error message auxiliary functions
|
||||
|
||||
-- creates an error message for the input string
|
||||
local function syntaxerror (errorinfo, pos, msg)
|
||||
local error_msg = "%s: syntax error, %s"
|
||||
return string.format(error_msg, pos, msg)
|
||||
end
|
||||
|
||||
-- gets the farthest failure position
|
||||
local function getffp (s, i, t)
|
||||
return t.ffp or i, t
|
||||
end
|
||||
|
||||
-- gets the table that contains the error information
|
||||
local function geterrorinfo ()
|
||||
return Cmt(Carg(1), getffp) * (C(V"OneWord") + Cc("EOF")) /
|
||||
function (t, u)
|
||||
t.unexpected = u
|
||||
return t
|
||||
end
|
||||
end
|
||||
|
||||
-- creates an errror message using the farthest failure position
|
||||
local function errormsg ()
|
||||
return geterrorinfo() /
|
||||
function (t)
|
||||
local p = t.ffp or 1
|
||||
local msg = "unexpected '%s', expecting %s"
|
||||
msg = string.format(msg, t.unexpected, t.expected)
|
||||
return nil, syntaxerror(t, p, msg)
|
||||
end
|
||||
end
|
||||
|
||||
-- reports a syntactic error
|
||||
local function report_error ()
|
||||
return errormsg()
|
||||
end
|
||||
|
||||
--- sets the farthest failure position and the expected tokens
|
||||
local function setffp (s, i, t, n)
|
||||
if not t.ffp or i > t.ffp then
|
||||
t.ffp = i
|
||||
t.list = {} ; t.list[n] = n
|
||||
t.expected = "'" .. n .. "'"
|
||||
elseif i == t.ffp then
|
||||
if not t.list[n] then
|
||||
t.list[n] = n
|
||||
t.expected = "'" .. n .. "', " .. t.expected
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
local function updateffp (name)
|
||||
return Cmt(Carg(1) * Cc(name), setffp)
|
||||
end
|
||||
|
||||
-- regular combinators and auxiliary functions
|
||||
|
||||
local function token (pat, name)
|
||||
return pat * V"Skip" + updateffp(name) * P(false)
|
||||
end
|
||||
|
||||
local function symb (str)
|
||||
return token (P(str), str)
|
||||
end
|
||||
|
||||
local function kw (str)
|
||||
return token (P(str) * -V"idRest", str)
|
||||
end
|
||||
|
||||
|
||||
local function list (pat, sep)
|
||||
return Ct(pat^0 * (sep * pat^0)^0) / function(elements) return {type = "List", elements=elements} end
|
||||
end
|
||||
|
||||
local function terminal (tag)
|
||||
return token(V(tag), tag) / function (tok) return { type = tag, value = tok} end
|
||||
end
|
||||
|
||||
local function unaryboolop (op, e)
|
||||
return { type = "UnaryBoolOp", operator = op, argument = e }
|
||||
end
|
||||
|
||||
local function unaryrelop (e, op)
|
||||
return { type = "UnaryRelOp", operator = op, argument = e }
|
||||
end
|
||||
|
||||
local function binaryop (e1, op, e2)
|
||||
if not op then
|
||||
return e1
|
||||
else
|
||||
return { type = "BinaryBoolOp", operator = op, left = e1, right = e2 }
|
||||
end
|
||||
end
|
||||
|
||||
local function bool (pat, sep)
|
||||
return Cf(pat * Cg(sep * pat)^0, binaryop)
|
||||
end
|
||||
|
||||
local function rel (left, sep, right)
|
||||
return left * sep * right / function(e1, op, e2) return { type = "BinaryRelOp", operator = op, left = e1, right = e2 } end
|
||||
end
|
||||
|
||||
local function fix_str (str)
|
||||
str = string.gsub(str, "\\a", "\a")
|
||||
str = string.gsub(str, "\\b", "\b")
|
||||
str = string.gsub(str, "\\f", "\f")
|
||||
str = string.gsub(str, "\\n", "\n")
|
||||
str = string.gsub(str, "\\r", "\r")
|
||||
str = string.gsub(str, "\\t", "\t")
|
||||
str = string.gsub(str, "\\v", "\v")
|
||||
str = string.gsub(str, "\\\n", "\n")
|
||||
str = string.gsub(str, "\\\r", "\n")
|
||||
str = string.gsub(str, "\\'", "'")
|
||||
str = string.gsub(str, '\\"', '"')
|
||||
str = string.gsub(str, '\\\\', '\\')
|
||||
return str
|
||||
end
|
||||
|
||||
-- grammar
|
||||
|
||||
local function filter(e)
|
||||
return {type = "Filter", value=e}
|
||||
end
|
||||
|
||||
local function macro (name, filter)
|
||||
return {type = "MacroDef", name = name, value = filter}
|
||||
end
|
||||
|
||||
local G = {
|
||||
V"Start", -- Entry rule
|
||||
|
||||
Start = (V"MacroDef" / macro + V"Filter" / filter) * -1 + report_error();
|
||||
|
||||
-- Grammar
|
||||
Filter = V"OrExpression";
|
||||
OrExpression =
|
||||
bool(V"AndExpression", V"OrOp");
|
||||
|
||||
AndExpression =
|
||||
bool(V"NotExpression", V"AndOp");
|
||||
|
||||
NotExpression =
|
||||
V"UnaryBoolOp" * V"NotExpression" / unaryboolop +
|
||||
V"ExistsExpression";
|
||||
|
||||
ExistsExpression =
|
||||
terminal "FieldName" * V"ExistsOp" / unaryrelop +
|
||||
V"MacroExpression";
|
||||
|
||||
MacroExpression =
|
||||
terminal "Macro" +
|
||||
V"RelationalExpression";
|
||||
|
||||
RelationalExpression =
|
||||
rel(terminal "FieldName", V"RelOp", V"Value") +
|
||||
rel(terminal "FieldName", V"InOp", V"InList") +
|
||||
V"PrimaryExp";
|
||||
|
||||
PrimaryExp = symb("(") * V"Filter" * symb(")");
|
||||
|
||||
MacroDef = (C(V"Macro") * V"Skip" * V"Colon" * (V"Filter"));
|
||||
|
||||
-- Terminals
|
||||
Value = terminal "Number" + terminal "String" + terminal "Identifier";
|
||||
|
||||
InList = symb("(") * list(V"Value", symb(",")) * symb(")");
|
||||
|
||||
|
||||
-- Lexemes
|
||||
Space = space^1;
|
||||
Skip = (V"Space")^0;
|
||||
idStart = alpha + P("_");
|
||||
idRest = alnum + P("_");
|
||||
Identifier = V"idStart" * V"idRest"^0;
|
||||
Macro = V"idStart" * V"idRest"^0 * -P".";
|
||||
FieldName = V"Identifier" * (P"." + V"Identifier")^1;
|
||||
Name = C(V"Identifier") * -V"idRest";
|
||||
Hex = (P("0x") + P("0X")) * xdigit^1;
|
||||
Expo = S("eE") * S("+-")^-1 * digit^1;
|
||||
Float = (((digit^1 * P(".") * digit^0) +
|
||||
(P(".") * digit^1)) * V"Expo"^-1) +
|
||||
(digit^1 * V"Expo");
|
||||
Int = digit^1;
|
||||
Number = C(V"Hex" + V"Float" + V"Int") /
|
||||
function (n) return tonumber(n) end;
|
||||
String = (P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'") / function (s) return fix_str(s) end;
|
||||
OrOp = kw("or") / "or";
|
||||
AndOp = kw("and") / "and";
|
||||
Colon = kw(":");
|
||||
RelOp = symb("=") / "eq" +
|
||||
symb("==") / "eq" +
|
||||
symb("!=") / "ne" +
|
||||
symb("<=") / "le" +
|
||||
symb(">=") / "ge" +
|
||||
symb("<") / "lt" +
|
||||
symb(">") / "gt" +
|
||||
symb("contains") / "contains" +
|
||||
symb("icontains") / "icontains";
|
||||
InOp = kw("in") / "in";
|
||||
UnaryBoolOp = kw("not") / "not";
|
||||
ExistsOp = kw("exists") / "exists";
|
||||
|
||||
-- for error reporting
|
||||
OneWord = V"Name" + V"Number" + V"String" + P(1);
|
||||
}
|
||||
|
||||
|
||||
function parser.parse (subject)
|
||||
local errorinfo = { subject = subject }
|
||||
lpeg.setmaxstack(1000)
|
||||
local ast, error_msg = lpeg.match(G, subject, nil, errorinfo)
|
||||
return ast, error_msg
|
||||
end
|
||||
|
||||
return parser
|
14
lua/test.lua
Normal file
14
lua/test.lua
Normal file
@@ -0,0 +1,14 @@
|
||||
local parser = require "sysdig-parser"
|
||||
|
||||
if #arg ~= 1 then
|
||||
print("Usage: test.lua <string>")
|
||||
os.exit(1)
|
||||
end
|
||||
|
||||
local ast, error_msg = parser.parse(arg[1])
|
||||
if not ast then
|
||||
os.exit(1)
|
||||
end
|
||||
|
||||
os.exit(0)
|
||||
|
32
notes.txt
Normal file
32
notes.txt
Normal file
@@ -0,0 +1,32 @@
|
||||
class sinsp_filter
|
||||
::compile(str)
|
||||
call sinsp_filter::push_expression when entering a new nesting level (e.g. parens)
|
||||
call sinsp_filter::parse_check to parse a single relational expression
|
||||
parse_check creates a sinsp_filter_check 'chk' of right type for field in this expression
|
||||
this 'chk' holds the fieldname, operator, value, and also the boolean op that was "on the left" of the expression (or BO_NONE). Then it is added to the parent sinsp_filter_expression by calling sinsp_filter_expression::add_check
|
||||
|
||||
|
||||
|
||||
|
||||
class sinsp_filter_expression : sinsp_filter_check
|
||||
has a list of sinsp_filter_checks (m_checks)
|
||||
|
||||
|
||||
|
||||
class sinsp_filter_check // represents single relational expression
|
||||
|
||||
|
||||
Summary: what we'll need to do:
|
||||
|
||||
- add an bool arg `lua_parsing` to sinsp::set_filter(const string& filter) (sinsp.cpp:1285)
|
||||
that bool (defaults false) is passed to the sinsp_filter constructor
|
||||
- if true, sinsp_filter constructor will call lua_compile() instead of compile()
|
||||
- add a new method sinsp_filter::lua_compile(const string& filter) (filter.cpp)
|
||||
this method calls up into lua with the string and some handle object that lua parser will use.
|
||||
|
||||
What lua parser can do with said handle:
|
||||
- create a filter_expression
|
||||
- create new sinsp_filter_check by calling g_filterlist.new_filter_check_from_fldname (filter.cpp:1483)
|
||||
- set its comparison operator and previous bool operator (filter.cpp:1504)
|
||||
- parse field name (filter.cpp:1506)
|
||||
- parse value (filter.cpp:1610)
|
Reference in New Issue
Block a user