Files
falco/lua/sysdig-parser.lua
2016-02-14 14:31:10 -08:00

385 lines
9.7 KiB
Lua

--[[
Sysdig grammar and parser.
Much of the scaffolding and helpers was deriverd Andre Murbach Maidl's Lua parser (https://github.com/andremm/lua-parser).
Parses regular filters following the existing sysdig filter syntax (*), as well as "macro" definitions. Macro definitions are written like:
inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')
(*) There currently one known difference with the syntax implemented in libsinsp:
- In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can
--]]
local compiler = {}
compiler.parser = {}
local lpeg = require "lpeg"
lpeg.locale(lpeg)
local P, S, V = lpeg.P, lpeg.S, lpeg.V
local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc
local Cf, Cg, Cmt, Cp, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Ct
local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum
local xdigit = lpeg.xdigit
local space = lpeg.space
-- error message auxiliary functions
-- creates an error message for the input string
local function syntaxerror (errorinfo, pos, msg)
local error_msg = "%s: syntax error, %s"
return string.format(error_msg, pos, msg)
end
-- gets the farthest failure position
local function getffp (s, i, t)
return t.ffp or i, t
end
-- gets the table that contains the error information
local function geterrorinfo ()
return Cmt(Carg(1), getffp) * (C(V"OneWord") + Cc("EOF")) /
function (t, u)
t.unexpected = u
return t
end
end
-- creates an errror message using the farthest failure position
local function errormsg ()
return geterrorinfo() /
function (t)
local p = t.ffp or 1
local msg = "unexpected '%s', expecting %s"
msg = string.format(msg, t.unexpected, t.expected)
return nil, syntaxerror(t, p, msg)
end
end
-- reports a syntactic error
local function report_error ()
return errormsg()
end
--- sets the farthest failure position and the expected tokens
local function setffp (s, i, t, n)
if not t.ffp or i > t.ffp then
t.ffp = i
t.list = {} ; t.list[n] = n
t.expected = "'" .. n .. "'"
elseif i == t.ffp then
if not t.list[n] then
t.list[n] = n
t.expected = "'" .. n .. "', " .. t.expected
end
end
return false
end
local function updateffp (name)
return Cmt(Carg(1) * Cc(name), setffp)
end
-- regular combinators and auxiliary functions
local function token (pat, name)
return pat * V"Skip" + updateffp(name) * P(false)
end
local function symb (str)
return token (P(str), str)
end
local function kw (str)
return token (P(str) * -V"idRest", str)
end
local function list (pat, sep)
return Ct(pat^0 * (sep * pat^0)^0) / function(elements) return {type = "List", elements=elements} end
end
local function terminal (tag)
return token(V(tag), tag) / function (tok) return { type = tag, value = tok} end
end
local function unaryboolop (op, e)
return { type = "UnaryBoolOp", operator = op, argument = e }
end
local function unaryrelop (e, op)
return { type = "UnaryRelOp", operator = op, argument = e }
end
local function binaryop (e1, op, e2)
if not op then
return e1
else
return { type = "BinaryBoolOp", operator = op, left = e1, right = e2 }
end
end
local function bool (pat, sep)
return Cf(pat * Cg(sep * pat)^0, binaryop)
end
local function rel (left, sep, right)
return left * sep * right / function(e1, op, e2) return { type = "BinaryRelOp", operator = op, left = e1, right = e2 } end
end
local function fix_str (str)
str = string.gsub(str, "\\a", "\a")
str = string.gsub(str, "\\b", "\b")
str = string.gsub(str, "\\f", "\f")
str = string.gsub(str, "\\n", "\n")
str = string.gsub(str, "\\r", "\r")
str = string.gsub(str, "\\t", "\t")
str = string.gsub(str, "\\v", "\v")
str = string.gsub(str, "\\\n", "\n")
str = string.gsub(str, "\\\r", "\n")
str = string.gsub(str, "\\'", "'")
str = string.gsub(str, '\\"', '"')
str = string.gsub(str, '\\\\', '\\')
return str
end
-- grammar
local function filter(e)
return {type = "Filter", value=e}
end
local function macro (name, filter)
return {type = "MacroDef", name = name, value = filter}
end
local G = {
V"Start", -- Entry rule
Start = (V"MacroDef" / macro + V"Filter" / filter) * -1 + report_error();
-- Grammar
Filter = V"OrExpression";
OrExpression =
bool(V"AndExpression", V"OrOp");
AndExpression =
bool(V"NotExpression", V"AndOp");
NotExpression =
V"UnaryBoolOp" * V"NotExpression" / unaryboolop +
V"ExistsExpression";
ExistsExpression =
terminal "FieldName" * V"ExistsOp" / unaryrelop +
V"MacroExpression";
MacroExpression =
terminal "Macro" +
V"RelationalExpression";
RelationalExpression =
rel(terminal "FieldName", V"RelOp", V"Value") +
rel(terminal "FieldName", V"InOp", V"InList") +
V"PrimaryExp";
PrimaryExp = symb("(") * V"Filter" * symb(")");
MacroDef = (C(V"Macro") * V"Skip" * V"Colon" * (V"Filter"));
-- Terminals
Value = terminal "Number" + terminal "String" + terminal "BareString";
InList = symb("(") * list(V"Value", symb(",")) * symb(")");
-- Lexemes
Space = space^1;
Skip = (V"Space")^0;
idStart = alpha + P("_");
idRest = alnum + P("_");
Identifier = V"idStart" * V"idRest"^0;
Macro = V"idStart" * V"idRest"^0 * -P".";
FieldName = V"Identifier" * (P"." + V"Identifier")^1;
Name = C(V"Identifier") * -V"idRest";
Hex = (P("0x") + P("0X")) * xdigit^1;
Expo = S("eE") * S("+-")^-1 * digit^1;
Float = (((digit^1 * P(".") * digit^0) +
(P(".") * digit^1)) * V"Expo"^-1) +
(digit^1 * V"Expo");
Int = digit^1;
Number = C(V"Hex" + V"Float" + V"Int") /
function (n) return tonumber(n) end;
String = (P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'") / function (s) return fix_str(s) end;
BareString = C(((P(1) - S' (),'))^1);
OrOp = kw("or") / "or";
AndOp = kw("and") / "and";
Colon = kw(":");
RelOp = symb("=") / "eq" +
symb("==") / "eq" +
symb("!=") / "ne" +
symb("<=") / "le" +
symb(">=") / "ge" +
symb("<") / "lt" +
symb(">") / "gt" +
symb("contains") / "contains" +
symb("icontains") / "icontains";
InOp = kw("in") / "in";
UnaryBoolOp = kw("not") / "not";
ExistsOp = kw("exists") / "exists";
-- for error reporting
OneWord = V"Name" + V"Number" + V"String" + P(1);
}
function map(f, arr)
local res = {}
for i,v in ipairs(arr) do
res[i] = f(v)
end
return res
end
function foldr(f, acc, arr)
for i,v in pairs(arr) do
acc = f(acc, v)
end
return acc
end
--[[
Traverses the AST and replaces `in` relational expressions with a sequence of ORs.
For example, `a.b in [1, 2]` is expanded to `a.b = 1 or a.b = 2` (in ASTs)
--]]
function expand_in(node)
local t = node.type
if t == "Filter" then
expand_in(node.value)
elseif t == "UnaryBoolOp" then
expand_in(node.argument)
elseif t == "BinaryBoolOp" then
expand_in(node.left)
expand_in(node.right)
elseif t == "BinaryRelOp" and node.operator == "in" then
if (table.maxn(node.right.elements) == 0) then
error ("In list with zero elements")
end
local mapper = function(element)
return {
type = "BinaryRelOp",
operator = "eq",
left = node.left,
right = element
}
end
local equalities = map(mapper, node.right.elements)
local lasteq = equalities[table.maxn(equalities)]
equalities[table.maxn(equalities)] = nil
local folder = function(left, right)
return {
type = "BinaryBoolOp",
operator = "or",
left = left,
right = right
}
end
lasteq = foldr(folder, lasteq, equalities)
node.type=lasteq.type
node.operator=lasteq.operator
node.left=lasteq.left
node.right=lasteq.right
end
end
function print_ast(node, level)
local t = node.type
local prefix = string.rep(" ", level*2)
level = level + 1
if t == "Filter" then
print_ast(node.value, level)
elseif t == "BinaryBoolOp" or t == "BinaryRelOp" then
print(prefix..node.operator)
print_ast(node.left, level)
print_ast(node.right, level)
elseif t == "UnaryRelOp" or t == "UnaryBoolOp" then
print (prefix..node.operator)
print_ast(node.argument, level)
elseif t == "List" then
print(prefix.. "List: ")
for i, v in ipairs(node.elements) do
print_ast(v, level)
end
elseif t == "FieldName" or t == "Number" or t == "String" or t == "BareString" or t == "Macro" then
print (prefix..t.." "..node.value)
elseif t == "MacroDef" then
-- don't print for now
else
error ("Unexpected type: "..t)
end
end
--[[
Parses a single line (which should be either a macro definition or a filter) and returns the AST.
--]]
function compiler.parser.parseline (subject)
local errorinfo = { subject = subject }
lpeg.setmaxstack(1000)
local ast, error_msg = lpeg.match(G, subject, nil, errorinfo)
return ast, error_msg
end
--[[
Sets up compiler state and returns it.
This is an opaque blob that is passed into subsequent compiler calls and
should not be modified by the client.
It holds state such as macro definitions that must be kept across calls
to the line-oriented compiler.
--]]
function compiler.init()
return {}
end
--[[
Compiles a digwatch filter or macro
--]]
function compiler.compile_line(line, state)
ast, error_message = compiler.parser.parseline(line)
if (error_msg) then
return {}, state, error_msg
end
expand_in(ast)
-- extract_macros(ast, state)
-- expand_macros(ast, state)
return ast, state, error_msg
end
return compiler