Grammar for filters and macros

This commit is contained in:
Henri DF
2016-02-09 16:29:01 -08:00
parent 4bd8a9d401
commit 41ee6e49a5
5 changed files with 370 additions and 0 deletions

6
lua/README.md Normal file
View File

@@ -0,0 +1,6 @@
Installation
------------
The sysdig grammar uses the `lpeg` parser. For now install it using luarocks:
`luarocks install lpeg`.

67
lua/parser-smoke.sh Executable file
View File

@@ -0,0 +1,67 @@
#!/bin/bash
function error_exit_good
{
echo "Error: '$1' did not parse" 1>&2
exit 1
}
function error_exit_bad
{
echo "Error: incorrect filter '$1' parsed ok" 1>&2
exit 1
}
function good
{
lua test.lua "$1" || error_exit_good "$1"
}
function bad
{
lua test.lua "$1" && error_exit_bad "$1"
}
# Filters
good "a"
good "a and b"
good "(a)"
good "(a and b)"
good "(a.a exists and b)"
good "(a.a exists) and (b)"
good "a.a exists and b"
good "a.a=1 or b.b=2 and c"
good "not (a)"
good "not (not (a))"
good "not (a.b=1)"
good "not (a.a exists)"
good "not a"
good "not not a"
good "(not not a)"
good "not a.b=1"
good "not a.a exists"
good "notz"
good "a.b = bla"
good "a.b = 'bla'"
good "a.b = not"
good "a.b contains bla"
good "a.b icontains 'bla'"
good "a.g in ()"
good "a.g in (1, 'a', b)"
good "a.g in ( 1 ,, , b)"
bad "a.g in (1, 'a', b.c)"
bad "a.b = a.a"
bad "(a.b = 1"
# Macros
good "a: a.b exists"
good "a: b and c"
good "a: b"
good "a : b"
good "inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')"
bad "a:"
exit 0

251
lua/sysdig-parser.lua Normal file
View File

@@ -0,0 +1,251 @@
--[[
Sysdig grammar and parser.
Much of the scaffolding and helpers was deriverd Andre Murbach Maidl's Lua parser (https://github.com/andremm/lua-parser).
Parses regular filters following the existing sysdig filter syntax (*), as well as "macro" definitions. Macro definitions are written like:
inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')
(*) There are a few minor differences with the syntax implemented in libsinsp:
- (Feature!) In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can
- (Bug!) In libsinsp, operator right-hand sides only need to be quoted if they contain spaces or parens. With this parser, they need to be quoted if they contain any non-alphanumeric character. For example:
(libsinsp) fd.name = mylog or fd.name contains .log and event.dir = <
(this parser) fd.name = mylog or fd.name contains '.log' and event.dir = '<'
]]--
local parser = {}
local lpeg = require "lpeg"
lpeg.locale(lpeg)
local P, S, V = lpeg.P, lpeg.S, lpeg.V
local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc
local Cf, Cg, Cmt, Cp, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Ct
local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum
local xdigit = lpeg.xdigit
local space = lpeg.space
-- error message auxiliary functions
-- creates an error message for the input string
local function syntaxerror (errorinfo, pos, msg)
local error_msg = "%s: syntax error, %s"
return string.format(error_msg, pos, msg)
end
-- gets the farthest failure position
local function getffp (s, i, t)
return t.ffp or i, t
end
-- gets the table that contains the error information
local function geterrorinfo ()
return Cmt(Carg(1), getffp) * (C(V"OneWord") + Cc("EOF")) /
function (t, u)
t.unexpected = u
return t
end
end
-- creates an errror message using the farthest failure position
local function errormsg ()
return geterrorinfo() /
function (t)
local p = t.ffp or 1
local msg = "unexpected '%s', expecting %s"
msg = string.format(msg, t.unexpected, t.expected)
return nil, syntaxerror(t, p, msg)
end
end
-- reports a syntactic error
local function report_error ()
return errormsg()
end
--- sets the farthest failure position and the expected tokens
local function setffp (s, i, t, n)
if not t.ffp or i > t.ffp then
t.ffp = i
t.list = {} ; t.list[n] = n
t.expected = "'" .. n .. "'"
elseif i == t.ffp then
if not t.list[n] then
t.list[n] = n
t.expected = "'" .. n .. "', " .. t.expected
end
end
return false
end
local function updateffp (name)
return Cmt(Carg(1) * Cc(name), setffp)
end
-- regular combinators and auxiliary functions
local function token (pat, name)
return pat * V"Skip" + updateffp(name) * P(false)
end
local function symb (str)
return token (P(str), str)
end
local function kw (str)
return token (P(str) * -V"idRest", str)
end
local function list (pat, sep)
return Ct(pat^0 * (sep * pat^0)^0) / function(elements) return {type = "List", elements=elements} end
end
local function terminal (tag)
return token(V(tag), tag) / function (tok) return { type = tag, value = tok} end
end
local function unaryboolop (op, e)
return { type = "UnaryBoolOp", operator = op, argument = e }
end
local function unaryrelop (e, op)
return { type = "UnaryRelOp", operator = op, argument = e }
end
local function binaryop (e1, op, e2)
if not op then
return e1
else
return { type = "BinaryBoolOp", operator = op, left = e1, right = e2 }
end
end
local function bool (pat, sep)
return Cf(pat * Cg(sep * pat)^0, binaryop)
end
local function rel (left, sep, right)
return left * sep * right / function(e1, op, e2) return { type = "BinaryRelOp", operator = op, left = e1, right = e2 } end
end
local function fix_str (str)
str = string.gsub(str, "\\a", "\a")
str = string.gsub(str, "\\b", "\b")
str = string.gsub(str, "\\f", "\f")
str = string.gsub(str, "\\n", "\n")
str = string.gsub(str, "\\r", "\r")
str = string.gsub(str, "\\t", "\t")
str = string.gsub(str, "\\v", "\v")
str = string.gsub(str, "\\\n", "\n")
str = string.gsub(str, "\\\r", "\n")
str = string.gsub(str, "\\'", "'")
str = string.gsub(str, '\\"', '"')
str = string.gsub(str, '\\\\', '\\')
return str
end
-- grammar
local function filter(e)
return {type = "Filter", value=e}
end
local function macro (name, filter)
return {type = "MacroDef", name = name, value = filter}
end
local G = {
V"Start", -- Entry rule
Start = (V"MacroDef" / macro + V"Filter" / filter) * -1 + report_error();
-- Grammar
Filter = V"OrExpression";
OrExpression =
bool(V"AndExpression", V"OrOp");
AndExpression =
bool(V"NotExpression", V"AndOp");
NotExpression =
V"UnaryBoolOp" * V"NotExpression" / unaryboolop +
V"ExistsExpression";
ExistsExpression =
terminal "FieldName" * V"ExistsOp" / unaryrelop +
V"MacroExpression";
MacroExpression =
terminal "Macro" +
V"RelationalExpression";
RelationalExpression =
rel(terminal "FieldName", V"RelOp", V"Value") +
rel(terminal "FieldName", V"InOp", V"InList") +
V"PrimaryExp";
PrimaryExp = symb("(") * V"Filter" * symb(")");
MacroDef = (C(V"Macro") * V"Skip" * V"Colon" * (V"Filter"));
-- Terminals
Value = terminal "Number" + terminal "String" + terminal "Identifier";
InList = symb("(") * list(V"Value", symb(",")) * symb(")");
-- Lexemes
Space = space^1;
Skip = (V"Space")^0;
idStart = alpha + P("_");
idRest = alnum + P("_");
Identifier = V"idStart" * V"idRest"^0;
Macro = V"idStart" * V"idRest"^0 * -P".";
FieldName = V"Identifier" * (P"." + V"Identifier")^1;
Name = C(V"Identifier") * -V"idRest";
Hex = (P("0x") + P("0X")) * xdigit^1;
Expo = S("eE") * S("+-")^-1 * digit^1;
Float = (((digit^1 * P(".") * digit^0) +
(P(".") * digit^1)) * V"Expo"^-1) +
(digit^1 * V"Expo");
Int = digit^1;
Number = C(V"Hex" + V"Float" + V"Int") /
function (n) return tonumber(n) end;
String = (P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'") / function (s) return fix_str(s) end;
OrOp = kw("or") / "or";
AndOp = kw("and") / "and";
Colon = kw(":");
RelOp = symb("=") / "eq" +
symb("==") / "eq" +
symb("!=") / "ne" +
symb("<=") / "le" +
symb(">=") / "ge" +
symb("<") / "lt" +
symb(">") / "gt" +
symb("contains") / "contains" +
symb("icontains") / "icontains";
InOp = kw("in") / "in";
UnaryBoolOp = kw("not") / "not";
ExistsOp = kw("exists") / "exists";
-- for error reporting
OneWord = V"Name" + V"Number" + V"String" + P(1);
}
function parser.parse (subject)
local errorinfo = { subject = subject }
lpeg.setmaxstack(1000)
local ast, error_msg = lpeg.match(G, subject, nil, errorinfo)
return ast, error_msg
end
return parser

14
lua/test.lua Normal file
View File

@@ -0,0 +1,14 @@
local parser = require "sysdig-parser"
if #arg ~= 1 then
print("Usage: test.lua <string>")
os.exit(1)
end
local ast, error_msg = parser.parse(arg[1])
if not ast then
os.exit(1)
end
os.exit(0)

32
notes.txt Normal file
View File

@@ -0,0 +1,32 @@
class sinsp_filter
::compile(str)
call sinsp_filter::push_expression when entering a new nesting level (e.g. parens)
call sinsp_filter::parse_check to parse a single relational expression
parse_check creates a sinsp_filter_check 'chk' of right type for field in this expression
this 'chk' holds the fieldname, operator, value, and also the boolean op that was "on the left" of the expression (or BO_NONE). Then it is added to the parent sinsp_filter_expression by calling sinsp_filter_expression::add_check
class sinsp_filter_expression : sinsp_filter_check
has a list of sinsp_filter_checks (m_checks)
class sinsp_filter_check // represents single relational expression
Summary: what we'll need to do:
- add an bool arg `lua_parsing` to sinsp::set_filter(const string& filter) (sinsp.cpp:1285)
that bool (defaults false) is passed to the sinsp_filter constructor
- if true, sinsp_filter constructor will call lua_compile() instead of compile()
- add a new method sinsp_filter::lua_compile(const string& filter) (filter.cpp)
this method calls up into lua with the string and some handle object that lua parser will use.
What lua parser can do with said handle:
- create a filter_expression
- create new sinsp_filter_check by calling g_filterlist.new_filter_check_from_fldname (filter.cpp:1483)
- set its comparison operator and previous bool operator (filter.cpp:1504)
- parse field name (filter.cpp:1506)
- parse value (filter.cpp:1610)