From 32b35602fb79e0299cf5b5bb502ce7e3b58234ae Mon Sep 17 00:00:00 2001 From: Lionel Sambuc Date: Thu, 23 May 2019 15:57:37 +0200 Subject: [PATCH] Grammars, v1 --- .gitignore | 1 + Grammars/JSON.g4 | 81 +++++++++++++ Grammars/filters.g4 | 287 ++++++++++++++++++++++++++++++++++++++++++++ Grammars/queries.g4 | 82 +++++++++++++ 4 files changed, 451 insertions(+) create mode 100644 Grammars/JSON.g4 create mode 100644 Grammars/filters.g4 create mode 100644 Grammars/queries.g4 diff --git a/.gitignore b/.gitignore index 952013b..9d6fbaf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/gen /target **/*.rs.bk .DS_Store diff --git a/Grammars/JSON.g4 b/Grammars/JSON.g4 new file mode 100644 index 0000000..51f1dee --- /dev/null +++ b/Grammars/JSON.g4 @@ -0,0 +1,81 @@ + +/** Taken from https://github.com/antlr/grammars-v4/blob/master/json/JSON.g4 */ +/** Taken from "The Definitive ANTLR 4 Reference" by Terence Parr */ + +// Derived from http://json.org +grammar JSON; + +json + : value + ; + +obj + : '{' pair (',' pair)* '}' + | '{' '}' + ; + +pair + : STRING ':' value + ; + +array + : '[' value (',' value)* ']' + | '[' ']' + ; + +value + : STRING + | NUMBER + | obj + | array + | 'true' + | 'false' + | 'null' + ; + + +STRING + : '"' (ESC | SAFECODEPOINT)* '"' + ; + + +fragment ESC + : '\\' (["\\/bfnrt] | UNICODE) + ; + + +fragment UNICODE + : 'u' HEX HEX HEX HEX + ; + + +fragment HEX + : [0-9a-fA-F] + ; + + +fragment SAFECODEPOINT + : ~ ["\\\u0000-\u001F] + ; + + +NUMBER + : '-'? INT ('.' [0-9] +)? EXP? + ; + + +fragment INT + : '0' | [1-9] [0-9]* + ; + +// no leading zeros + +fragment EXP + : [Ee] [+\-]? INT + ; + +// \- since - means "range" inside [...] + +WS + : [ \t\n\r] + -> skip + ; \ No newline at end of file diff --git a/Grammars/filters.g4 b/Grammars/filters.g4 new file mode 100644 index 0000000..1e85c64 --- /dev/null +++ b/Grammars/filters.g4 @@ -0,0 +1,287 @@ +grammar filters; + +/**********************************************************************/ +/* SELECTING / FILTERING DATA */ +/**********************************************************************/ +filters + : bag_expression + ; + +/* All these expressions generate bags. */ +bag_expression + // Bag Operators + : distinct + | filter + | complement + | intersection + | union + | bag + // Spatial Operators + | inside + | outside + // When used directly here, the inside() operation on the shape is + // implied. + | shapes + ; + +/**********************************************************************/ +/* BAG OPERATORS */ +/**********************************************************************/ +distinct + : 'distinct' '(' bag_expression ')' + ; + +/* Returns all the points which are NOT part of the bag. */ +complement + : 'complement' '(' bag_expression ')' + ; + +/* Returns points which are part of both left and right sets. */ +intersection + : 'intersection' '(' bag_expression ',' bag_expression ')' + ; + +/* Returns points which are either part of left or right sets + * (or both). */ +union + : 'union' '(' bag_expression ',' bag_expression ')' + ; + +/* Filters point so that points part of the resulting bag respect + * the predicate. */ +filter + : 'filter' '(' ( bag_expression | predicate ( ',' bag_expression )? ) ')' + ; + +predicate + : less + | greater + | equal + | str_cmp + | str_cmp_icase + | not + | and + | or + ; + +less + : '<' '(' position_expr ',' position ')' + ; + +greater + : '>' '(' position_expr ',' position ')' + ; + +equal + : '=' '(' position_expr ',' position ')' + ; + +not + : '!' '(' predicate ')' + ; + +and + : '&' '(' predicate ',' predicate ')' + ; + +or + : '|' '(' predicate ',' predicate ')' + ; + +/* Arbitrary bag of positions. */ +bag + : 'bag' '{' bag_expression (',' bag_expression )* '}' + ; + +/**********************************************************************/ +/* SPATIAL OPERATORS */ +/**********************************************************************/ + +/* Faces | vertices are included to allow selection on a pure plane or + * boundary. + * + * For example: + * intersection(outside(hyperrectangle{[0,0], [1,1]}, + * inside(hyperrectangle{[0,0], [1,1]}) + * will be true for any point lying EXACTLY on a face, corner or edge + * of the cube [0,0], [1,1]. + */ + +/* Returns the set of points outside the shape, (face included) */ +outside + : 'outside' '(' shapes ')' + ; + +/* Returns the set of points inside the shape, (face included) */ +inside + : 'inside' '(' shapes ')' + ; + +/**********************************************************************/ +/* SHAPES */ +/**********************************************************************/ +shapes + : point + | hyperrectangle + | hypersphere + | nifti + ; + +/* If the hyperrectangle is aligned with the axes, then two points are + * enough, if not we need all the points to be specified. + */ +hyperrectangle + : 'hyperrectangle' '{' + position ',' position + ( ',' position ',' position )* + '}' + ; + +/* A hypersphere is defined by its center and a radius, independantly + * of the number of dimensions of the space. */ +hypersphere + : 'hypersphere' '{' position ( ',' positive_number ) '}' + ; + +point + : 'point' '{' position '}' + ; + +/* Define a shape as the non-zero values in a NIfTI object, defined by + * nifti{ + * spaceId: string, + * lower_corner: position, // Optional, default to the origin + * rotation: [ position+ ], // Optional, no rotation by default + * bytes: uri(STRING) // uri to the NIfTI object + * } + */ +nifti + : 'nifti' '{' + STRING ',' + (position ',' )? + ( '[' position ( ',' position )* ']' ',' )? + byte_provider + '}' + ; + +/* TODO: STRING is assumed to be a well-formed URI, fully specify here? + * + * TODO: Add a provider for in-line raw-byte stream. + */ +byte_provider + : 'uri' '(' STRING ')' + ; + +/**********************************************************************/ +/* POSITIONS */ +/**********************************************************************/ + +/* Always returns a vector of numbers, a.k.a a position (a scalar will + * be represented as a vector of one element) */ +position_expr + : str_cmp_icase + | str_cmp + | selector + | position + ; + +/* Compare lexicographically two strings, and returns a `position`: + * [-1] : String is lexicographically before, + * [ 0] : is equal, + * [ 1] : is after. + */ +str_cmp + : 'str_cmp' '(' selector ',' STRING ')' + ; + +/* Same, but case insensitive. */ +str_cmp_icase + : 'str_cmp_ignore_case' '(' selector ',' STRING ')' + ; + +/* TODO: FIELDS are expected to be exisiting in the data model. Root Object is assumed to be the type of the ressource on which the POST call was done. + */ +selector + : ( FIELD )+ + ; + +position + : '[' number ( ',' number )* ']' + ; + +/**********************************************************************/ +/* TOKENS - STRINGS */ +/**********************************************************************/ + +/* Accept field descriptor which + * 1. start with a dot ('.') + * 2. optionnally followed by a field name consisting of a letter or + * underscore, followed by letters, numbers or underscore, + * 3. optionnally followed by brakets enclosing an natural number + * denoting an offset in a list or array. */ +FIELD + : '.' ( [a-zA-Z_] [a-zA-Z0-9_]* )? ('[' INTEGER ']')? + ; + +STRING + : '"' (ESC | SAFECODEPOINT)* '"' + ; + +fragment ESC + : '\\' (["\\/bfnrt] | UNICODE) + ; + +fragment UNICODE + : 'u' HEX HEX HEX HEX + ; + +fragment HEX + : [0-9a-fA-F] + ; + +fragment SAFECODEPOINT + : ~ ["\\\u0000-\u001F] + ; + +/**********************************************************************/ +/* TOKENS - NUMBERS */ +/**********************************************************************/ +/* We define 3 kinds of number, to avoid ambiguities in the rules. */ + +/* No optional leading '+' */ +json_number + : '-'? NUM + ; + +positive_number + : '+'? NUM + ; + +number + : ( '+' | '-' )? NUM + ; + +NUM + : INTEGER ('.' [0-9]+ )? EXP? + ; + +fragment EXP + : [Ee] [+\-]? INTEGER + ; + + +/* No leading zeros */ +fragment INTEGER + : '0' | [1-9] [0-9]* + ; + +/**********************************************************************/ +/* WHITESPACES & COMMENTS */ +/**********************************************************************/ +COMMENTS + : ( '//' ~[\r\n]* | '/*' .*? '*/' ) -> skip + ; + +WS + : [ \t\r\n]+ -> skip + ; // skip spaces, tabs, newlines diff --git a/Grammars/queries.g4 b/Grammars/queries.g4 new file mode 100644 index 0000000..1b7509e --- /dev/null +++ b/Grammars/queries.g4 @@ -0,0 +1,82 @@ +grammar queries; +import filters; + +/**********************************************************************/ +/* FORMATTING DATA */ +/**********************************************************************/ +queries + : projection_operators? + ; + +projection_operators + : nifti_operator + | json_operator + ; + +/* If selector is not provided, one (1) will be used as the values for + * each position where there is a point in bag_expression. + * + * If it is provided, it MUST resolve to a NUMBER. */ +nifti_operator + : 'nifti' '(' ( selector ',' )? bag_expression ')' + ; + +json_operator + : 'json' '(' jslt ',' bag_expression ')' + ; + +jslt + : json + ; + +/**********************************************************************/ +/* JSON */ +/**********************************************************************/ + +/** + * Taken and adapted from: + * https://github.com/antlr/grammars-v4/blob/master/json/JSON.g4 + * + * Some of the parser / lexer rules are in the imported grammar as well. + */ +json + : json_value + ; + +json_obj + : '{' json_pair (',' json_pair)* '}' + | '{' '}' + ; + +json_pair + : STRING ':' json_value + ; + +json_array + : '[' json_value (',' json_value)* ']' + | '[' ']' + ; + +json_value + : STRING + | json_number + | json_obj + | json_array + | 'true' + | 'false' + | 'null' + /* Add support to reference values from the selected bag. */ + | selector + | aggregation_expr + ; + +/* The bag expression is implicit here, as this is te + * second argument to the json operator */ +aggregation_expr + : 'count' '(' 'distinct'? selector ')' + | 'sum' '(' selector ')' + | 'min' '(' selector ')' + | 'max' '(' selector ')' + | 'nifti' '(' selector ')' + | 'mbb' '(' ')' + ;