From 41ee6e49a5da9b31c949443b3407904233612da8 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Tue, 9 Feb 2016 16:29:01 -0800 Subject: [PATCH 1/9] Grammar for filters and macros --- lua/README.md | 6 + lua/parser-smoke.sh | 67 +++++++++++ lua/sysdig-parser.lua | 251 ++++++++++++++++++++++++++++++++++++++++++ lua/test.lua | 14 +++ notes.txt | 32 ++++++ 5 files changed, 370 insertions(+) create mode 100644 lua/README.md create mode 100755 lua/parser-smoke.sh create mode 100644 lua/sysdig-parser.lua create mode 100644 lua/test.lua create mode 100644 notes.txt diff --git a/lua/README.md b/lua/README.md new file mode 100644 index 00000000..5d2016d2 --- /dev/null +++ b/lua/README.md @@ -0,0 +1,6 @@ +Installation +------------ + +The sysdig grammar uses the `lpeg` parser. For now install it using luarocks: +`luarocks install lpeg`. + diff --git a/lua/parser-smoke.sh b/lua/parser-smoke.sh new file mode 100755 index 00000000..ec5f6a11 --- /dev/null +++ b/lua/parser-smoke.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +function error_exit_good +{ + echo "Error: '$1' did not parse" 1>&2 + exit 1 +} + +function error_exit_bad +{ + echo "Error: incorrect filter '$1' parsed ok" 1>&2 + exit 1 +} + + +function good +{ + lua test.lua "$1" || error_exit_good "$1" +} + +function bad +{ + lua test.lua "$1" && error_exit_bad "$1" +} + +# Filters +good "a" +good "a and b" +good "(a)" +good "(a and b)" +good "(a.a exists and b)" +good "(a.a exists) and (b)" +good "a.a exists and b" +good "a.a=1 or b.b=2 and c" +good "not (a)" +good "not (not (a))" +good "not (a.b=1)" +good "not (a.a exists)" +good "not a" +good "not not a" +good "(not not a)" +good "not a.b=1" +good "not a.a exists" +good "notz" +good "a.b = bla" +good "a.b = 'bla'" +good "a.b = not" +good "a.b contains bla" +good "a.b icontains 'bla'" +good "a.g in ()" +good "a.g in (1, 'a', b)" +good "a.g in ( 1 ,, , b)" + +bad "a.g in (1, 'a', b.c)" +bad "a.b = a.a" +bad "(a.b = 1" + +# Macros + +good "a: a.b exists" +good "a: b and c" +good "a: b" +good "a : b" +good "inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')" +bad "a:" + +exit 0 diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua new file mode 100644 index 00000000..d645e98f --- /dev/null +++ b/lua/sysdig-parser.lua @@ -0,0 +1,251 @@ +--[[ + Sysdig grammar and parser. + + Much of the scaffolding and helpers was deriverd Andre Murbach Maidl's Lua parser (https://github.com/andremm/lua-parser). + + Parses regular filters following the existing sysdig filter syntax (*), as well as "macro" definitions. Macro definitions are written like: + + inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<') + + (*) There are a few minor differences with the syntax implemented in libsinsp: + + - (Feature!) In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can + + - (Bug!) In libsinsp, operator right-hand sides only need to be quoted if they contain spaces or parens. With this parser, they need to be quoted if they contain any non-alphanumeric character. For example: + + (libsinsp) fd.name = mylog or fd.name contains .log and event.dir = < + (this parser) fd.name = mylog or fd.name contains '.log' and event.dir = '<' + +]]-- + +local parser = {} + +local lpeg = require "lpeg" + +lpeg.locale(lpeg) + +local P, S, V = lpeg.P, lpeg.S, lpeg.V +local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc +local Cf, Cg, Cmt, Cp, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Ct +local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum +local xdigit = lpeg.xdigit +local space = lpeg.space + + +-- error message auxiliary functions + +-- creates an error message for the input string +local function syntaxerror (errorinfo, pos, msg) + local error_msg = "%s: syntax error, %s" + return string.format(error_msg, pos, msg) +end + +-- gets the farthest failure position +local function getffp (s, i, t) + return t.ffp or i, t +end + +-- gets the table that contains the error information +local function geterrorinfo () + return Cmt(Carg(1), getffp) * (C(V"OneWord") + Cc("EOF")) / + function (t, u) + t.unexpected = u + return t + end +end + +-- creates an errror message using the farthest failure position +local function errormsg () + return geterrorinfo() / + function (t) + local p = t.ffp or 1 + local msg = "unexpected '%s', expecting %s" + msg = string.format(msg, t.unexpected, t.expected) + return nil, syntaxerror(t, p, msg) + end +end + +-- reports a syntactic error +local function report_error () + return errormsg() +end + +--- sets the farthest failure position and the expected tokens +local function setffp (s, i, t, n) + if not t.ffp or i > t.ffp then + t.ffp = i + t.list = {} ; t.list[n] = n + t.expected = "'" .. n .. "'" + elseif i == t.ffp then + if not t.list[n] then + t.list[n] = n + t.expected = "'" .. n .. "', " .. t.expected + end + end + return false +end + +local function updateffp (name) + return Cmt(Carg(1) * Cc(name), setffp) +end + +-- regular combinators and auxiliary functions + +local function token (pat, name) + return pat * V"Skip" + updateffp(name) * P(false) +end + +local function symb (str) + return token (P(str), str) +end + +local function kw (str) + return token (P(str) * -V"idRest", str) +end + + +local function list (pat, sep) + return Ct(pat^0 * (sep * pat^0)^0) / function(elements) return {type = "List", elements=elements} end +end + +local function terminal (tag) + return token(V(tag), tag) / function (tok) return { type = tag, value = tok} end +end + +local function unaryboolop (op, e) + return { type = "UnaryBoolOp", operator = op, argument = e } +end + +local function unaryrelop (e, op) + return { type = "UnaryRelOp", operator = op, argument = e } +end + +local function binaryop (e1, op, e2) + if not op then + return e1 + else + return { type = "BinaryBoolOp", operator = op, left = e1, right = e2 } + end +end + +local function bool (pat, sep) + return Cf(pat * Cg(sep * pat)^0, binaryop) +end + +local function rel (left, sep, right) + return left * sep * right / function(e1, op, e2) return { type = "BinaryRelOp", operator = op, left = e1, right = e2 } end +end + +local function fix_str (str) + str = string.gsub(str, "\\a", "\a") + str = string.gsub(str, "\\b", "\b") + str = string.gsub(str, "\\f", "\f") + str = string.gsub(str, "\\n", "\n") + str = string.gsub(str, "\\r", "\r") + str = string.gsub(str, "\\t", "\t") + str = string.gsub(str, "\\v", "\v") + str = string.gsub(str, "\\\n", "\n") + str = string.gsub(str, "\\\r", "\n") + str = string.gsub(str, "\\'", "'") + str = string.gsub(str, '\\"', '"') + str = string.gsub(str, '\\\\', '\\') + return str +end + +-- grammar + +local function filter(e) + return {type = "Filter", value=e} +end + +local function macro (name, filter) + return {type = "MacroDef", name = name, value = filter} +end + +local G = { + V"Start", -- Entry rule + + Start = (V"MacroDef" / macro + V"Filter" / filter) * -1 + report_error(); + + -- Grammar + Filter = V"OrExpression"; + OrExpression = + bool(V"AndExpression", V"OrOp"); + + AndExpression = + bool(V"NotExpression", V"AndOp"); + + NotExpression = + V"UnaryBoolOp" * V"NotExpression" / unaryboolop + + V"ExistsExpression"; + + ExistsExpression = + terminal "FieldName" * V"ExistsOp" / unaryrelop + + V"MacroExpression"; + + MacroExpression = + terminal "Macro" + + V"RelationalExpression"; + + RelationalExpression = + rel(terminal "FieldName", V"RelOp", V"Value") + + rel(terminal "FieldName", V"InOp", V"InList") + + V"PrimaryExp"; + + PrimaryExp = symb("(") * V"Filter" * symb(")"); + + MacroDef = (C(V"Macro") * V"Skip" * V"Colon" * (V"Filter")); + + -- Terminals + Value = terminal "Number" + terminal "String" + terminal "Identifier"; + + InList = symb("(") * list(V"Value", symb(",")) * symb(")"); + + + -- Lexemes + Space = space^1; + Skip = (V"Space")^0; + idStart = alpha + P("_"); + idRest = alnum + P("_"); + Identifier = V"idStart" * V"idRest"^0; + Macro = V"idStart" * V"idRest"^0 * -P"."; + FieldName = V"Identifier" * (P"." + V"Identifier")^1; + Name = C(V"Identifier") * -V"idRest"; + Hex = (P("0x") + P("0X")) * xdigit^1; + Expo = S("eE") * S("+-")^-1 * digit^1; + Float = (((digit^1 * P(".") * digit^0) + + (P(".") * digit^1)) * V"Expo"^-1) + + (digit^1 * V"Expo"); + Int = digit^1; + Number = C(V"Hex" + V"Float" + V"Int") / + function (n) return tonumber(n) end; + String = (P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'") / function (s) return fix_str(s) end; + OrOp = kw("or") / "or"; + AndOp = kw("and") / "and"; + Colon = kw(":"); + RelOp = symb("=") / "eq" + + symb("==") / "eq" + + symb("!=") / "ne" + + symb("<=") / "le" + + symb(">=") / "ge" + + symb("<") / "lt" + + symb(">") / "gt" + + symb("contains") / "contains" + + symb("icontains") / "icontains"; + InOp = kw("in") / "in"; + UnaryBoolOp = kw("not") / "not"; + ExistsOp = kw("exists") / "exists"; + + -- for error reporting + OneWord = V"Name" + V"Number" + V"String" + P(1); +} + + +function parser.parse (subject) + local errorinfo = { subject = subject } + lpeg.setmaxstack(1000) + local ast, error_msg = lpeg.match(G, subject, nil, errorinfo) + return ast, error_msg +end + +return parser diff --git a/lua/test.lua b/lua/test.lua new file mode 100644 index 00000000..a34427b1 --- /dev/null +++ b/lua/test.lua @@ -0,0 +1,14 @@ +local parser = require "sysdig-parser" + +if #arg ~= 1 then + print("Usage: test.lua ") + os.exit(1) +end + +local ast, error_msg = parser.parse(arg[1]) +if not ast then + os.exit(1) +end + +os.exit(0) + diff --git a/notes.txt b/notes.txt new file mode 100644 index 00000000..26d9096b --- /dev/null +++ b/notes.txt @@ -0,0 +1,32 @@ +class sinsp_filter + ::compile(str) +call sinsp_filter::push_expression when entering a new nesting level (e.g. parens) +call sinsp_filter::parse_check to parse a single relational expression + parse_check creates a sinsp_filter_check 'chk' of right type for field in this expression +this 'chk' holds the fieldname, operator, value, and also the boolean op that was "on the left" of the expression (or BO_NONE). Then it is added to the parent sinsp_filter_expression by calling sinsp_filter_expression::add_check + + + + +class sinsp_filter_expression : sinsp_filter_check +has a list of sinsp_filter_checks (m_checks) + + + +class sinsp_filter_check // represents single relational expression + + +Summary: what we'll need to do: + +- add an bool arg `lua_parsing` to sinsp::set_filter(const string& filter) (sinsp.cpp:1285) + that bool (defaults false) is passed to the sinsp_filter constructor +- if true, sinsp_filter constructor will call lua_compile() instead of compile() +- add a new method sinsp_filter::lua_compile(const string& filter) (filter.cpp) + this method calls up into lua with the string and some handle object that lua parser will use. + +What lua parser can do with said handle: +- create a filter_expression +- create new sinsp_filter_check by calling g_filterlist.new_filter_check_from_fldname (filter.cpp:1483) +- set its comparison operator and previous bool operator (filter.cpp:1504) +- parse field name (filter.cpp:1506) +- parse value (filter.cpp:1610) From 48685f4f2f2084db0e18fac0510c99e3040dc266 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Fri, 12 Feb 2016 15:03:09 -0800 Subject: [PATCH 2/9] Don't require quoting for non-alphanum characters This commit removes the one remaining (known) difference with the sysdig c++ parser: relational expression right-hand sides now _only_ need to be quoted if they contain a paren or a space. So you can now do things like "fd.name contains (.log and event.dir = <" without needing to quote "*.log" or "<". --- lua/parser-smoke.sh | 8 +++++--- lua/sysdig-parser.lua | 13 +++++-------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/lua/parser-smoke.sh b/lua/parser-smoke.sh index ec5f6a11..d6882ec7 100755 --- a/lua/parser-smoke.sh +++ b/lua/parser-smoke.sh @@ -50,17 +50,19 @@ good "a.b icontains 'bla'" good "a.g in ()" good "a.g in (1, 'a', b)" good "a.g in ( 1 ,, , b)" +good "evt.dir=> and fd.name=*.log" +good "evt.dir=> and fd.name=/var/log/httpd.log" +good "a.g in (1, 'a', b.c)" +good "a.b = a.a" -bad "a.g in (1, 'a', b.c)" -bad "a.b = a.a" bad "(a.b = 1" - # Macros good "a: a.b exists" good "a: b and c" good "a: b" good "a : b" +good "a : evt.dir=>" good "inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')" bad "a:" diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index d645e98f..d926e1c8 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -7,14 +7,9 @@ inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<') - (*) There are a few minor differences with the syntax implemented in libsinsp: + (*) There currently one known difference with the syntax implemented in libsinsp: - - (Feature!) In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can - - - (Bug!) In libsinsp, operator right-hand sides only need to be quoted if they contain spaces or parens. With this parser, they need to be quoted if they contain any non-alphanumeric character. For example: - - (libsinsp) fd.name = mylog or fd.name contains .log and event.dir = < - (this parser) fd.name = mylog or fd.name contains '.log' and event.dir = '<' + - In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can ]]-- @@ -197,7 +192,7 @@ local G = { MacroDef = (C(V"Macro") * V"Skip" * V"Colon" * (V"Filter")); -- Terminals - Value = terminal "Number" + terminal "String" + terminal "Identifier"; + Value = terminal "Number" + terminal "String" + terminal "BareString"; InList = symb("(") * list(V"Value", symb(",")) * symb(")"); @@ -220,6 +215,8 @@ local G = { Number = C(V"Hex" + V"Float" + V"Int") / function (n) return tonumber(n) end; String = (P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'") / function (s) return fix_str(s) end; + BareString = C(((P(1) - S' (),'))^1); + OrOp = kw("or") / "or"; AndOp = kw("and") / "and"; Colon = kw(":"); From 3dab9edc9df172dbef703d44dd45e8a43516d234 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Fri, 12 Feb 2016 20:30:19 -0800 Subject: [PATCH 3/9] add a function to pretty-print ASTs --- lua/sysdig-parser.lua | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index d926e1c8..55b09366 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -237,6 +237,40 @@ local G = { OneWord = V"Name" + V"Number" + V"String" + P(1); } +function print_ast(node, level) + local t = node.type + local prefix = string.rep(" ", level*2) + level = level + 1 + + if t == "Filter" then + print_ast(node.value, level) + + elseif t == "BinaryBoolOp" or t == "BinaryRelOp" then + print(prefix..node.operator) + print_ast(node.left, level) + print_ast(node.right, level) + + elseif t == "UnaryRelOp" or t == "UnaryBoolOp" then + print (prefix..node.operator) + print_ast(node.argument, level) + + elseif t == "List" then + print(prefix.. "List: ") + for i, v in ipairs(node.elements) do + print_ast(v, level) + end + + elseif t == "FieldName" or t == "Number" or t == "String" or t == "BareString" or t == "Macro" then + print (prefix..t.." "..node.value) + + elseif t == "MacroDef" then + -- don't print for now + else + error ("Unexpected type: "..t) + end +end + + function parser.parse (subject) local errorinfo = { subject = subject } From 79cdf31aa7b3767e9f05913aaf0603eaf3e89910 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Fri, 12 Feb 2016 20:30:44 -0800 Subject: [PATCH 4/9] Expand `in` relational expressions For example, `a.b in [1, 2]` is expanded to `a.b = 1 or a.b = 2`. This is done over the AST, not filter text. --- lua/parser-smoke.sh | 8 +++-- lua/sysdig-parser.lua | 72 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/lua/parser-smoke.sh b/lua/parser-smoke.sh index d6882ec7..2f9ee69f 100755 --- a/lua/parser-smoke.sh +++ b/lua/parser-smoke.sh @@ -15,12 +15,12 @@ function error_exit_bad function good { - lua test.lua "$1" || error_exit_good "$1" + lua test.lua "$1" 2> /dev/null || error_exit_good "$1" } function bad { - lua test.lua "$1" && error_exit_bad "$1" + lua test.lua "$1" 2> /dev/null && error_exit_bad "$1" } # Filters @@ -47,7 +47,6 @@ good "a.b = 'bla'" good "a.b = not" good "a.b contains bla" good "a.b icontains 'bla'" -good "a.g in ()" good "a.g in (1, 'a', b)" good "a.g in ( 1 ,, , b)" good "evt.dir=> and fd.name=*.log" @@ -55,6 +54,7 @@ good "evt.dir=> and fd.name=/var/log/httpd.log" good "a.g in (1, 'a', b.c)" good "a.b = a.a" +bad "a.g in ()" bad "(a.b = 1" # Macros @@ -66,4 +66,6 @@ good "a : evt.dir=>" good "inbound: (syscall.type=listen and evt.dir='>') or (syscall.type=accept and evt.dir='<')" bad "a:" +echo +echo "All tests passed." exit 0 diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index 55b09366..2a279b13 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -237,6 +237,74 @@ local G = { OneWord = V"Name" + V"Number" + V"String" + P(1); } +function map(f, arr) + local res = {} + for i,v in ipairs(arr) do + res[i] = f(v) + end + return res +end + +function foldr(f, acc, arr) + for i,v in pairs(arr) do + acc = f(acc, v) + end + return acc +end + +--[[ + Traverses the AST and replaces `in` relational expressions with a sequence of ORs. + + For example, `a.b in [1, 2]` is expanded to `a.b = 1 or a.b = 2` (in ASTs) +]]-- +function expand_in(node) + local t = node.type + + if t == "Filter" then + expand_in(node.value) + + elseif t == "UnaryBoolOp" then + expand_in(node.argument) + + elseif t == "BinaryBoolOp" then + expand_in(node.left) + expand_in(node.right) + + elseif t == "BinaryRelOp" and node.operator == "in" then + if (table.maxn(node.right.elements) == 0) then + error ("In list with zero elements") + end + + local mapper = function(element) + return { + type = "BinaryRelOp", + operator = "eq", + left = node.left, + right = element + } + end + + local equalities = map(mapper, node.right.elements) + local lasteq = equalities[table.maxn(equalities)] + equalities[table.maxn(equalities)] = nil + + local folder = function(left, right) + return { + type = "BinaryBoolOp", + operator = "or", + left = left, + right = right + } + end + lasteq = foldr(folder, lasteq, equalities) + + node.type=lasteq.type + node.operator=lasteq.operator + node.left=lasteq.left + node.right=lasteq.right + end +end + function print_ast(node, level) local t = node.type local prefix = string.rep(" ", level*2) @@ -276,6 +344,10 @@ function parser.parse (subject) local errorinfo = { subject = subject } lpeg.setmaxstack(1000) local ast, error_msg = lpeg.match(G, subject, nil, errorinfo) + if (error_msg) then + return ast, error_msg + end + expand_in(ast) return ast, error_msg end From 0d10a3f39c21f9c3b98b7a219283b35e87ac05e0 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Sat, 13 Feb 2016 15:26:40 -0800 Subject: [PATCH 5/9] Fix multi-line comments --- lua/sysdig-parser.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index 2a279b13..720eac54 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -11,7 +11,7 @@ - In libsinsp, field names cannot start with 'a', 'o', or 'n'. With this parser they can -]]-- +--]] local parser = {} @@ -256,7 +256,7 @@ end Traverses the AST and replaces `in` relational expressions with a sequence of ORs. For example, `a.b in [1, 2]` is expanded to `a.b = 1 or a.b = 2` (in ASTs) -]]-- +--]] function expand_in(node) local t = node.type From aeba0760db69b9ff2f02f9c83101eff4c38cd0c5 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Sun, 14 Feb 2016 14:31:10 -0800 Subject: [PATCH 6/9] Scaffold basic line-oriented compiler API --- lua/sysdig-parser.lua | 44 ++++++++++++++++++++++++++++++++++++------- lua/test.lua | 6 ++++-- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index 720eac54..a68b9e50 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -13,7 +13,8 @@ --]] -local parser = {} +local compiler = {} +compiler.parser = {} local lpeg = require "lpeg" @@ -340,15 +341,44 @@ end -function parser.parse (subject) +--[[ + Parses a single line (which should be either a macro definition or a filter) and returns the AST. +--]] +function compiler.parser.parseline (subject) local errorinfo = { subject = subject } lpeg.setmaxstack(1000) local ast, error_msg = lpeg.match(G, subject, nil, errorinfo) - if (error_msg) then - return ast, error_msg - end - expand_in(ast) return ast, error_msg end -return parser + +--[[ + Sets up compiler state and returns it. + + This is an opaque blob that is passed into subsequent compiler calls and + should not be modified by the client. + + It holds state such as macro definitions that must be kept across calls + to the line-oriented compiler. +--]] +function compiler.init() + return {} +end + +--[[ + Compiles a digwatch filter or macro +--]] +function compiler.compile_line(line, state) + ast, error_message = compiler.parser.parseline(line) + + if (error_msg) then + return {}, state, error_msg + end + expand_in(ast) +-- extract_macros(ast, state) +-- expand_macros(ast, state) + return ast, state, error_msg +end + + +return compiler diff --git a/lua/test.lua b/lua/test.lua index a34427b1..9c50d724 100644 --- a/lua/test.lua +++ b/lua/test.lua @@ -1,11 +1,13 @@ -local parser = require "sysdig-parser" +local compiler = require "sysdig-parser" if #arg ~= 1 then print("Usage: test.lua ") os.exit(1) end -local ast, error_msg = parser.parse(arg[1]) +local state = compiler.init() + +local ast, state, error_msg = compiler.compile_line(arg[1], state) if not ast then os.exit(1) end From c21e8207f8c358e1f3dbb4fcf73d449608283ff9 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Sun, 14 Feb 2016 20:37:32 -0800 Subject: [PATCH 7/9] Don't allow '=' in BareStrings --- lua/parser-smoke.sh | 1 + lua/sysdig-parser.lua | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lua/parser-smoke.sh b/lua/parser-smoke.sh index 2f9ee69f..95f69ec4 100755 --- a/lua/parser-smoke.sh +++ b/lua/parser-smoke.sh @@ -55,6 +55,7 @@ good "a.g in (1, 'a', b.c)" good "a.b = a.a" bad "a.g in ()" +bad "a.b = b = 1" bad "(a.b = 1" # Macros diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index a68b9e50..e27a04ca 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -216,7 +216,7 @@ local G = { Number = C(V"Hex" + V"Float" + V"Int") / function (n) return tonumber(n) end; String = (P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'") / function (s) return fix_str(s) end; - BareString = C(((P(1) - S' (),'))^1); + BareString = C(((P(1) - S' (),='))^1); OrOp = kw("or") / "or"; AndOp = kw("and") / "and"; From b3f7b1d765159f426a011a64162af18707902097 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Sun, 14 Feb 2016 21:26:04 -0800 Subject: [PATCH 8/9] Remove whitespace from token values in AST --- lua/sysdig-parser.lua | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index e27a04ca..5cc7ac7f 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -104,8 +104,15 @@ local function list (pat, sep) return Ct(pat^0 * (sep * pat^0)^0) / function(elements) return {type = "List", elements=elements} end end +--http://lua-users.org/wiki/StringTrim +function trim(s) + if (type(s) ~= "string") then return s end + return (s:gsub("^%s*(.-)%s*$", "%1")) +end + local function terminal (tag) - return token(V(tag), tag) / function (tok) return { type = tag, value = tok} end + -- Rather than trim the whitespace in this way, it would be nicer to exclude it from the capture... + return token(V(tag), tag) / function (tok) return { type = tag, value = trim(tok)} end end local function unaryboolop (op, e) From 6e2d8679c448de1da0196d52ba7ed849be99e969 Mon Sep 17 00:00:00 2001 From: Henri DF Date: Sun, 14 Feb 2016 20:36:54 -0800 Subject: [PATCH 9/9] Macro expansion --- lua/parser-smoke.sh | 6 +- lua/sysdig-parser.lua | 125 ++++++++++++++++++++++++++++++++++++++---- lua/test.lua | 15 ++++- 3 files changed, 130 insertions(+), 16 deletions(-) diff --git a/lua/parser-smoke.sh b/lua/parser-smoke.sh index 95f69ec4..96ebcd65 100755 --- a/lua/parser-smoke.sh +++ b/lua/parser-smoke.sh @@ -15,12 +15,12 @@ function error_exit_bad function good { - lua test.lua "$1" 2> /dev/null || error_exit_good "$1" + lua test.lua "a: x.y=1; b: a and z.x exists; c: b; $1" 2> /dev/null || error_exit_good "$1" } function bad { - lua test.lua "$1" 2> /dev/null && error_exit_bad "$1" + lua test.lua "a: x.y=1; b: a and z.x exists; c: b; $1" 2> /dev/null && error_exit_bad "$1" } # Filters @@ -41,7 +41,7 @@ good "not not a" good "(not not a)" good "not a.b=1" good "not a.a exists" -good "notz" +good "notz: a and b; notz" good "a.b = bla" good "a.b = 'bla'" good "a.b = not" diff --git a/lua/sysdig-parser.lua b/lua/sysdig-parser.lua index 5cc7ac7f..3aeacbee 100644 --- a/lua/sysdig-parser.lua +++ b/lua/sysdig-parser.lua @@ -168,7 +168,7 @@ end local G = { V"Start", -- Entry rule - Start = (V"MacroDef" / macro + V"Filter" / filter) * -1 + report_error(); + Start = V"Skip" * (V"MacroDef" / macro + V"Filter" / filter) * -1 + report_error(); -- Grammar Filter = V"OrExpression"; @@ -313,8 +313,94 @@ function expand_in(node) end end +--[[ + + Given a map of macro definitions, traverse AST and replace macro references + with their definitions. + + The AST is changed in-place. + + The return value is a boolean which is true if any macro was + substitued. This allows a caller to re-traverse until no more macros are + found, a simple strategy for recursive resoltuions (e.g. when a macro + definition uses another macro). + +--]] +function expand_macros(node, defs, changed) + if node.type == "Filter" then + if (node.value.type == "Macro") then + if (defs[node.value.value] == nil) then + tostring = require 'ml'.tstring + error("Undefined macro '".. node.value.value .. "' used in filter.") + end + node.value = defs[node.value.value] + changed = true + end + return expand_macros(node.value, defs, changed) + + elseif node.type == "BinaryBoolOp" then + + if (node.left.type == "Macro") then + if (defs[node.left.value] == nil) then + error("Undefined macro '".. node.left.value .. "' used in filter.") + end + node.left = defs[node.left.value] + changed = true + end + + if (node.right.type == "Macro") then + if (defs[node.right.value] == nil) then + error("Undefined macro ".. node.right.value .. "used in filter.") + end + node.right = defs[node.right.value] + changed = true + end + + local changed_left = expand_macros(node.left, defs, false) + local changed_right = expand_macros(node.right, defs, false) + return changed or changed_left or changed_right + + elseif node.type == "UnaryBoolOp" then + if (node.argument.type == "Macro") then + if (defs[node.argument.value] == nil) then + error("Undefined macro ".. node.argument.value .. "used in filter.") + end + node.argument = defs[node.argument.value] + changed = true + end + return expand_macros(node.argument, defs, changed) + end + return changed +end + +function get_macros(node, set) + if (node.type == "Macro") then + set[node.value] = true + return set + end + + if node.type == "Filter" then + return get_macros(node.value, set) + end + + if node.type == "BinaryBoolOp" then + local left = get_macros(node.left, {}) + local right = get_macros(node.right, {}) + + for m, _ in pairs(left) do set[m] = true end + for m, _ in pairs(right) do set[m] = true end + + return set + end + if node.type == "UnaryBoolOp" then + return get_macros(node.argument, set) + end + return set +end + function print_ast(node, level) local t = node.type + level = level or 0 local prefix = string.rep(" ", level*2) level = level + 1 @@ -345,13 +431,13 @@ function print_ast(node, level) error ("Unexpected type: "..t) end end - +compiler.parser.print_ast = print_ast --[[ Parses a single line (which should be either a macro definition or a filter) and returns the AST. --]] -function compiler.parser.parseline (subject) +function compiler.parser.parse_line (subject) local errorinfo = { subject = subject } lpeg.setmaxstack(1000) local ast, error_msg = lpeg.match(G, subject, nil, errorinfo) @@ -369,22 +455,41 @@ end to the line-oriented compiler. --]] function compiler.init() - return {} + return {macros={}} end --[[ Compiles a digwatch filter or macro --]] function compiler.compile_line(line, state) - ast, error_message = compiler.parser.parseline(line) + local ast, error_msg = compiler.parser.parse_line(line) if (error_msg) then - return {}, state, error_msg + return nil, error_msg end - expand_in(ast) --- extract_macros(ast, state) --- expand_macros(ast, state) - return ast, state, error_msg + + local macros = get_macros(ast.value, {}) + for m, _ in pairs(macros) do + if state.macros[m] == nil then + error ("Undefined macro '"..m.."' used in '"..line.."'") + end + end + + if (ast.type == "MacroDef") then + state.macros[ast.name] = ast.value + return ast, error_msg + elseif (ast.type == "Filter") then + expand_in(ast) + + repeat + expanded = expand_macros(ast, state.macros, false) + until expanded == false + + else + error("Unexpected top-level AST type: "..ast.type) + end + + return ast, error_msg end diff --git a/lua/test.lua b/lua/test.lua index 9c50d724..3009c5b6 100644 --- a/lua/test.lua +++ b/lua/test.lua @@ -7,9 +7,18 @@ end local state = compiler.init() -local ast, state, error_msg = compiler.compile_line(arg[1], state) -if not ast then - os.exit(1) +local function doit(line) + local ast, error_msg = compiler.compile_line(line, state) + + if not ast then + print("error", error_msg) + os.exit(1) + end + + compiler.parser.print_ast(ast) +end +for str in string.gmatch(arg[1], "([^;]+)") do + doit(str) end os.exit(0)