From 32b35602fb79e0299cf5b5bb502ce7e3b58234ae Mon Sep 17 00:00:00 2001
From: Lionel Sambuc <lionel.sambuc@epfl.ch>
Date: Thu, 23 May 2019 15:57:37 +0200
Subject: [PATCH] Grammars, v1

---
 .gitignore          |   1 +
 Grammars/JSON.g4    |  81 +++++++++++++
 Grammars/filters.g4 | 287 ++++++++++++++++++++++++++++++++++++++++++++
 Grammars/queries.g4 |  82 +++++++++++++
 4 files changed, 451 insertions(+)
 create mode 100644 Grammars/JSON.g4
 create mode 100644 Grammars/filters.g4
 create mode 100644 Grammars/queries.g4

diff --git a/.gitignore b/.gitignore
index 952013b..9d6fbaf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/gen
 /target
 **/*.rs.bk
 .DS_Store
diff --git a/Grammars/JSON.g4 b/Grammars/JSON.g4
new file mode 100644
index 0000000..51f1dee
--- /dev/null
+++ b/Grammars/JSON.g4
@@ -0,0 +1,81 @@
+
+/** Taken from https://github.com/antlr/grammars-v4/blob/master/json/JSON.g4 */
+/** Taken from "The Definitive ANTLR 4 Reference" by Terence Parr */
+
+// Derived from http://json.org
+grammar JSON;
+
+json
+   : value
+   ;
+
+obj
+   : '{' pair (',' pair)* '}'
+   | '{' '}'
+   ;
+
+pair
+   : STRING ':' value
+   ;
+
+array
+   : '[' value (',' value)* ']'
+   | '[' ']'
+   ;
+
+value
+   : STRING
+   | NUMBER
+   | obj
+   | array
+   | 'true'
+   | 'false'
+   | 'null'
+   ;
+
+
+STRING
+   : '"' (ESC | SAFECODEPOINT)* '"'
+   ;
+
+
+fragment ESC
+   : '\\' (["\\/bfnrt] | UNICODE)
+   ;
+
+
+fragment UNICODE
+   : 'u' HEX HEX HEX HEX
+   ;
+
+
+fragment HEX
+   : [0-9a-fA-F]
+   ;
+
+
+fragment SAFECODEPOINT
+   : ~ ["\\\u0000-\u001F]
+   ;
+
+
+NUMBER
+   : '-'? INT ('.' [0-9] +)? EXP?
+   ;
+
+
+fragment INT
+   : '0' | [1-9] [0-9]*
+   ;
+
+// no leading zeros
+
+fragment EXP
+   : [Ee] [+\-]? INT
+   ;
+
+// \- since - means "range" inside [...]
+
+WS
+   : [ \t\n\r] + -> skip
+   ;
\ No newline at end of file
diff --git a/Grammars/filters.g4 b/Grammars/filters.g4
new file mode 100644
index 0000000..1e85c64
--- /dev/null
+++ b/Grammars/filters.g4
@@ -0,0 +1,287 @@
+grammar filters;
+
+/**********************************************************************/
+/* SELECTING / FILTERING DATA                                         */
+/**********************************************************************/
+filters
+    : bag_expression
+    ;
+
+/* All these expressions generate bags. */
+bag_expression
+    // Bag Operators
+    : distinct
+    | filter
+    | complement
+    | intersection
+    | union
+    | bag
+    // Spatial Operators
+    | inside
+    | outside
+    // When used directly here, the inside() operation on the shape is
+    // implied.
+    | shapes
+    ;
+
+/**********************************************************************/
+/* BAG OPERATORS                                                      */
+/**********************************************************************/
+distinct
+    : 'distinct' '(' bag_expression ')'
+    ;
+
+/* Returns all the points which are NOT part of the bag. */
+complement
+    : 'complement' '(' bag_expression ')'
+    ;
+
+/* Returns points which are part of both left and right sets. */
+intersection
+    : 'intersection' '(' bag_expression ',' bag_expression ')'
+    ;
+
+/* Returns points which are either part of left or right sets
+ * (or both). */
+union
+    : 'union' '(' bag_expression ',' bag_expression ')'
+    ;
+
+/* Filters point so that points part of the resulting bag respect
+ * the predicate. */
+filter
+    : 'filter' '(' ( bag_expression | predicate ( ',' bag_expression )? ) ')'
+    ;
+
+predicate
+    : less
+    | greater
+    | equal
+    | str_cmp
+    | str_cmp_icase
+    | not
+    | and
+    | or
+    ;
+
+less
+    : '<' '(' position_expr ',' position ')'
+    ;
+
+greater
+    : '>' '(' position_expr ',' position ')'
+    ;
+
+equal
+    : '=' '(' position_expr ',' position ')'
+    ;
+
+not
+    : '!' '(' predicate ')'
+    ;
+
+and
+    : '&' '(' predicate ',' predicate ')'
+    ;
+
+or
+    : '|' '(' predicate ',' predicate ')'
+    ;
+
+/* Arbitrary bag of positions. */
+bag
+    : 'bag' '{' bag_expression (',' bag_expression )* '}'
+    ;
+
+/**********************************************************************/
+/* SPATIAL OPERATORS                                                  */
+/**********************************************************************/
+
+/* Faces | vertices are included to allow selection on a pure plane or
+ * boundary.
+ *
+ * For example:
+ *   intersection(outside(hyperrectangle{[0,0], [1,1]},
+ *                inside(hyperrectangle{[0,0], [1,1]})
+ * will be true for any point lying EXACTLY on a face, corner or edge
+ * of the cube [0,0], [1,1].
+ */
+
+/* Returns the set of points outside the shape, (face included) */
+outside
+    : 'outside' '(' shapes ')'
+    ;
+
+/* Returns the set of points inside the shape, (face included) */
+inside
+    : 'inside' '(' shapes ')'
+    ;
+
+/**********************************************************************/
+/* SHAPES                                                             */
+/**********************************************************************/
+shapes
+    : point
+    | hyperrectangle
+    | hypersphere
+    | nifti
+    ;
+
+/* If the hyperrectangle is aligned with the axes, then two points are
+ * enough, if not we need all the points to be specified.
+ */
+hyperrectangle
+    : 'hyperrectangle' '{'
+          position ',' position
+          ( ',' position ',' position )*
+       '}'
+    ;
+
+/* A hypersphere is defined by its center and a radius, independantly
+ * of the number of dimensions of the space. */
+hypersphere
+    : 'hypersphere' '{' position ( ',' positive_number ) '}'
+    ;
+
+point
+    : 'point' '{' position '}'
+    ;
+
+/* Define a shape as the non-zero values in a NIfTI object, defined by
+ *   nifti{
+ *     spaceId: string,
+ *     lower_corner: position,  // Optional, default to the origin
+ *     rotation: [ position+ ], // Optional, no rotation by default
+ *     bytes: uri(STRING)       // uri to the NIfTI object
+ *   }
+ */
+nifti
+    : 'nifti' '{'
+        STRING ','
+        (position ',' )?
+        ( '[' position ( ',' position )* ']' ',' )?
+        byte_provider
+      '}'
+    ;
+
+/* TODO: STRING is assumed to be a well-formed URI, fully specify here?
+ *
+ * TODO: Add a provider for in-line raw-byte stream.
+ */
+byte_provider
+    : 'uri' '(' STRING ')'
+    ;
+
+/**********************************************************************/
+/* POSITIONS                                                          */
+/**********************************************************************/
+
+/* Always returns a vector of numbers, a.k.a a position (a scalar will
+ * be represented as a vector of one element) */
+position_expr
+    : str_cmp_icase
+    | str_cmp
+    | selector
+    | position
+    ;
+
+/* Compare lexicographically two strings, and returns a `position`:
+ *  [-1] : String is lexicographically before,
+ *  [ 0] : is equal,
+ *  [ 1] : is after.
+ */
+str_cmp
+    : 'str_cmp' '(' selector ',' STRING ')'
+    ;
+
+/* Same, but case insensitive. */
+str_cmp_icase
+    : 'str_cmp_ignore_case' '(' selector ',' STRING ')'
+    ;
+
+/* TODO: FIELDS are expected to be exisiting in the data model. Root Object is assumed to be the type of the ressource on which the POST call was done.
+ */
+selector
+    : ( FIELD )+
+    ;
+
+position
+    : '[' number ( ',' number )* ']'
+    ;
+
+/**********************************************************************/
+/* TOKENS - STRINGS                                                   */
+/**********************************************************************/
+
+/* Accept field descriptor which
+ *  1. start with a dot ('.')
+ *  2. optionnally followed by a field name consisting of a letter or
+ *     underscore, followed by letters, numbers or underscore,
+ *  3. optionnally followed by brakets enclosing an natural number
+ *     denoting an offset in a list or array. */
+FIELD
+    : '.' ( [a-zA-Z_] [a-zA-Z0-9_]* )? ('[' INTEGER ']')?
+    ;
+
+STRING
+   : '"' (ESC | SAFECODEPOINT)* '"'
+   ;
+
+fragment ESC
+   : '\\' (["\\/bfnrt] | UNICODE)
+   ;
+
+fragment UNICODE
+   : 'u' HEX HEX HEX HEX
+   ;
+
+fragment HEX
+   : [0-9a-fA-F]
+   ;
+
+fragment SAFECODEPOINT
+   : ~ ["\\\u0000-\u001F]
+   ;
+
+/**********************************************************************/
+/* TOKENS - NUMBERS                                                   */
+/**********************************************************************/
+/* We define 3 kinds of number, to avoid ambiguities in the rules. */
+
+/* No optional leading '+' */
+json_number
+    : '-'? NUM
+    ;
+
+positive_number
+    : '+'? NUM
+    ;
+
+number
+    : ( '+' | '-' )? NUM
+    ;
+
+NUM
+    :  INTEGER ('.' [0-9]+ )? EXP?
+    ;
+
+fragment EXP
+    : [Ee] [+\-]? INTEGER
+    ;
+
+
+/* No leading zeros */
+fragment INTEGER
+    : '0' | [1-9] [0-9]*
+    ;
+
+/**********************************************************************/
+/* WHITESPACES & COMMENTS                                             */
+/**********************************************************************/
+COMMENTS
+    : ( '//' ~[\r\n]* | '/*' .*? '*/' ) -> skip
+    ;
+
+WS
+    : [ \t\r\n]+ -> skip
+    ; // skip spaces, tabs, newlines
diff --git a/Grammars/queries.g4 b/Grammars/queries.g4
new file mode 100644
index 0000000..1b7509e
--- /dev/null
+++ b/Grammars/queries.g4
@@ -0,0 +1,82 @@
+grammar queries;
+import filters;
+
+/**********************************************************************/
+/* FORMATTING DATA                                                    */
+/**********************************************************************/
+queries
+    :  projection_operators?
+    ;
+
+projection_operators
+    : nifti_operator
+    | json_operator
+    ;
+
+/* If selector is not provided, one (1) will be used as the values for
+ * each position where there is a point in bag_expression.
+ *
+ * If it is provided, it MUST resolve to a NUMBER. */
+nifti_operator
+    : 'nifti' '(' ( selector ',' )? bag_expression ')'
+    ;
+
+json_operator
+    : 'json' '(' jslt ',' bag_expression ')'
+    ;
+
+jslt
+    : json
+    ;
+
+/**********************************************************************/
+/* JSON                                                               */
+/**********************************************************************/
+
+/**
+ * Taken and adapted from:
+ *  https://github.com/antlr/grammars-v4/blob/master/json/JSON.g4
+ *
+ * Some of the parser / lexer rules are in the imported grammar as well.
+ */
+json
+    : json_value
+    ;
+
+json_obj
+    : '{' json_pair (',' json_pair)* '}'
+    | '{' '}'
+    ;
+
+json_pair
+    : STRING ':' json_value
+    ;
+
+json_array
+    : '[' json_value (',' json_value)* ']'
+    | '[' ']'
+    ;
+
+json_value
+    : STRING
+    | json_number
+    | json_obj
+    | json_array
+    | 'true'
+    | 'false'
+    | 'null'
+    /* Add support to reference values from the selected bag. */
+    | selector
+    | aggregation_expr
+    ;
+
+/* The bag expression is implicit here, as this is te
+ * second argument to the json operator */
+aggregation_expr
+    : 'count' '(' 'distinct'? selector ')'
+    | 'sum' '(' selector ')'
+    | 'min' '(' selector ')'
+    | 'max' '(' selector ')'
+    | 'nifti' '(' selector ')'
+    | 'mbb' '(' ')'
+    ;