Compare commits

...

11 Commits

Author SHA1 Message Date
40786112d0 Updated compiler and dependencies
* Upgraded to rustc 1.80
 * Updated dependencies
 * Fixed most linter warnings
2024-08-09 19:15:37 +02:00
f68ec7af14 Fix compilation with rustc 1.4.2
More adaptation are required to be able to use the most recent version
of Rust, therefore fix the dependencies and the compiler version.
2024-08-08 12:14:21 +02:00
e4cbdf836f Rename library to mercator_parser 2020-04-01 18:14:45 +02:00
242de73053 Adding documentation 2020-04-01 17:06:52 +02:00
98b37e63b4 Silence warning in generated code 2020-03-17 17:18:50 +01:00
e2ea5c9ba4 Introduce Label for search within id
This allows to define a volume using an indexed object.
2020-03-16 13:48:01 +01:00
0dc31c65c6 Remove some .unwrap() calls.
The parser still contains three calls to `unwrap()`, but a this point
I have not yet figured out how to remove them.
2020-01-20 14:53:09 +01:00
e8d931b551 Prevent unsafe blocks for now 2020-01-20 14:53:09 +01:00
b8baee8019 Reduce dependencies. 2020-01-14 18:10:08 +01:00
f95aaa389b Updating test index to new format 2019-11-14 15:30:57 +01:00
a31ce4387f Prevent some allocations 2019-11-13 11:04:29 +01:00
20 changed files with 258 additions and 149 deletions

BIN
10k.index

Binary file not shown.

View File

@@ -21,26 +21,31 @@ include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.r
build = "build.rs" # LALRPOP preprocessing
[lib]
name = "parser"
name = "mercator_parser"
path = "src/lib.rs"
[[bin]]
name = "parser-driver"
path = "src/main.rs"
required-features = ["bin"]
[features]
bin = ["measure_time", "pretty_env_logger"]
[dependencies]
mercator_db = "^0.1"
mercator_db = "0.1"
lalrpop-util = "^0.17"
regex = "^1.2"
measure_time = "^0.6" # To mesure parsing time, only required by binary
lalrpop-util = "0.20"
regex = "1.10"
# Logging macros API
#log = { version = "^0.4", features = ["max_level_trace", "release_max_level_info"] }
log = { version = "^0.4", features = ["max_level_trace", "release_max_level_trace"] }
pretty_env_logger = "^0.3" # Logger implementation
#log = { version = "0.4", features = ["max_level_trace", "release_max_level_info"] }
log = { version = "0.4", features = ["max_level_trace", "release_max_level_trace"] }
# Used for main.rs
pretty_env_logger = { version = "0.5", optional = true } # Logger implementation
measure_time = { version = "0.8", optional = true } # To mesure parsing time, only required by binary
[build-dependencies]
lalrpop = "^0.17.1"
lalrpop = "0.20"

View File

@@ -19,6 +19,7 @@ bag_expression
// Spatial Operators
| inside
| outside
//| shape
;
/**********************************************************************/
@@ -113,6 +114,11 @@ inside
: 'inside' '(' shapes ')'
;
/* Returns the set of positions inside the shape, (face included) */
shape
: 'shape' '(' shapes ')'
;
/**********************************************************************/
/* SHAPES */
/**********************************************************************/

View File

@@ -18,7 +18,7 @@ projection_operators
*
* If it is provided, it MUST resolve to a NUMBER. */
nifti_operator
: 'nifti' '(' ( STRING ',' )? ( selector ',' )? bag_expression ')'
: 'nifti' '(' ( selector ',' )? bag_expression ( ',' STRING )? ')'
;
json_operator

View File

@@ -22,40 +22,6 @@ This enables the index implementations to be agnostic from the underlying data s
* Rust: https://www.rust-lang.org
## Quick start
## Building from sources
To build this project, you will need to run the following:
```sh
cargo build --release
```
### Installation
To install the software on the system you can use:
```sh
cargo install --release
```
### Usage
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin vehicula pretium
quam sit amet facilisis. Class aptent taciti sociosqu ad litora torquent per
conubia nostra, per inceptos himenaeos. Curabitur metus sapien, rhoncus vitae
eleifend nec, convallis vel nunc. Nulla metus mauris, porta eu porta eu,
vulputate et est. Suspendisse lacinia leo vel auctor aliquet. Maecenas non arcu
libero. Nulla ut eleifend dui. Cras bibendum pharetra facilisis. Proin mattis
libero non pharetra tristique. Nam massa nulla, ultrices pharetra quam a,
fermentum placerat dolor. Nullam mollis libero et neque lobortis, id dignissim
lectus dignissim. Maecenas ligula enim, congue in ornare vel, volutpat ut ante.
```sh
cargo run --release
```
## Documentation
For more information, please refer to the [documentation](https://epfl-dias.github.io/mercator_parser/).

1
book/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
book

6
book/book.toml Normal file
View File

@@ -0,0 +1,6 @@
[book]
authors = ["Lionel Sambuc"]
language = "en"
multilingual = false
src = "src"
title = "Mercator Parser"

5
book/src/SUMMARY.md Normal file
View File

@@ -0,0 +1,5 @@
# Summary
[Introduction](./introduction.md)
- [Filter Grammar](./filters.md)
- [Query Grammar](./queries.md)

10
book/src/filters.md Normal file
View File

@@ -0,0 +1,10 @@
# Filter Grammar
You will find below the definition of this SDL, for filtering data
from the index.
## filters.g4
```antlr
{{#include ../../Grammars/filters.g4}}
```

7
book/src/introduction.md Normal file
View File

@@ -0,0 +1,7 @@
# Introduction
To support volumetric queries for Mercator, a new domain-specific language (DSL) was created.
ANTLR was used to write and test the SDL, to check it stays simple
to parse and and fast to execute. The actual [parser](https://epfl-dias.github.io/mercator_parser/) and interpreter is
defined in rust, using [LALRPOP](https://docs.rs/lalrpop/0.18.1/lalrpop/).

9
book/src/queries.md Normal file
View File

@@ -0,0 +1,9 @@
# Query Grammar
You will find below the definition of this SDL, for queries. This builds on top of the [filters](filters.html) grammar.
## queries.g4
```antlr
{{#include ../../Grammars/queries.g4}}
```

2
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,2 @@
[toolchain]
channel = "1.80.0"

View File

@@ -38,12 +38,10 @@ fn complement_helper<'c>(
Ok(points
.into_iter()
.filter_map(|(space, v)| match hashmap.get(space) {
None => None,
Some(list) => {
Some((space, v.into_iter().filter(|t| !list.contains(t)).collect()))
}
})
.filter_map(|(space, v)|
hashmap.get(space).map(|list|
(space, v.into_iter().filter(|t|
!list.contains(t)).collect())))
.collect::<Vec<_>>())
}
}
@@ -74,39 +72,74 @@ fn distinct<'c>(
e @ Err(_) => e,
Ok(mut v) => {
let set: HashSet<_> = v.drain(..).collect(); // dedup
v.extend(set.into_iter());
v.extend(set);
Ok(v)
}
}
}
fn filter_helper<'c>(
predicate: &Predicate,
bag: &Bag,
core_id: &str,
parameters: &CoreQueryParameters<'c>,
) -> mercator_db::ResultSet<'c> {
match bag.execute(core_id, parameters) {
e @ Err(_) => e,
Ok(results) => Ok(results
.into_iter()
.filter_map(|(space, positions)| {
let filtered = positions
.into_iter()
.filter(|(position, properties)| predicate.eval((space, position, properties)))
.collect::<Vec<_>>();
if filtered.is_empty() {
None
} else {
Some((space, filtered))
}
})
.collect::<Vec<_>>()),
}
}
fn filter<'c>(
core_id: &str,
parameters: &CoreQueryParameters<'c>,
predicate: &Option<Predicate>,
bag: &Bag,
bag: &Option<Box<Bag>>,
) -> mercator_db::ResultSet<'c> {
match predicate {
None => bag.execute(core_id, parameters),
Some(predicate) => match bag.execute(core_id, parameters) {
e @ Err(_) => e,
Ok(results) => Ok(results
.into_iter()
.filter_map(|(space, positions)| {
let filtered = positions
.into_iter()
.filter(|(position, properties)| {
predicate.eval((space, position, properties))
})
.collect::<Vec<_>>();
if filtered.is_empty() {
None
} else {
Some((space, filtered))
}
})
.collect::<Vec<_>>()),
None => {
if let Some(bag) = bag {
bag.execute(core_id, parameters)
} else {
Err("Filter without predicate nor data set.".to_string())
}
}
Some(predicate) => match bag {
None => {
let (low, high) = space::Space::universe().bounding_box();
let low: Vec<_> = low.into();
let high: Vec<_> = high.into();
let shape = Shape::HyperRectangle(
space::Space::universe().name().clone(),
vec![
LiteralPosition(
low.into_iter()
.map(LiteralNumber::Float)
.collect::<Vec<_>>(),
),
LiteralPosition(
high.into_iter()
.map(LiteralNumber::Float)
.collect::<Vec<_>>(),
),
],
);
filter_helper(predicate, &Bag::Inside(shape), core_id, parameters)
}
Some(bag) => filter_helper(predicate, bag.as_ref(), core_id, parameters),
},
}
}
@@ -248,6 +281,10 @@ fn inside<'c>(
//FIXME: RADIUS IS A LENGTH, HOW TO ENCODE IT INTO THE SPACE?
Ok((space_id, space::Shape::HyperSphere(position, radius)))
}
Shape::Label(_, id) => {
// Not a real shape, so short circuit and return.
return core.get_by_label(parameters, id);
}
Shape::Nifti(_space_id) => Err("Inside-Nifti: not yet implemented".to_string()),
};
@@ -279,7 +316,7 @@ fn outside<'c>(
// Smallest increment possible
let mut increment = Vec::with_capacity(bounding_box[0].dimensions());
for _ in 0..bounding_box[0].dimensions() {
increment.push(std::f64::EPSILON);
increment.push(f64::EPSILON);
}
// Add it to the lower bound
@@ -298,7 +335,7 @@ fn outside<'c>(
Shape::HyperSphere(space_id, center, radius) => {
// Smallest decrement possible, to exclude the surface
let mut radius: f64 = radius.into();
radius -= std::f64::EPSILON;
radius -= f64::EPSILON;
let center: space::Position = center.into();
match core.get_by_shape(
@@ -310,6 +347,7 @@ fn outside<'c>(
Ok(inside) => complement_helper(core, parameters, space_id, inside),
}
}
Shape::Label(_, _) => Err("Label: not yet implemented".to_string()),
Shape::Nifti(_space_id) => Err("Outside-nifti: not yet implemented".to_string()),
}
}
@@ -324,7 +362,7 @@ impl<'e> Executor<'e> for Projection {
) -> Self::ResultSet {
match self {
Projection::Nifti(_, _, _bag) => Err("Proj-Nifti: not yet implemented".to_string()),
Projection::JSON(_, _format, bag) => {
Projection::Json(_, _format, bag) => {
bag.execute(core_id, parameters)
// FIXME: Add projections here
}

View File

@@ -1,15 +1,57 @@
#![forbid(unsafe_code)]
//! # Mercator Parser
//!
//! Query parser for Mercator.
//!
//! ## Mercator: Spatial Index
//!
//! **Mercator** is a spatial *volumetric* index for the
//! [Human Brain Project]. It is a component of the [Knowledge Graph]
//! service, which provides the spatial anchoring for the metadata
//! registered as well as processes the volumetric queries.
//!
//! It is build on top of the Iron Sea database toolkit.
//!
//! ## Iron Sea: Database Toolkit
//! **Iron Sea** provides a set of database engine bricks, which can be
//! combined and applied on arbitrary data structures.
//!
//! Unlike a traditional database, it does not assume a specific
//! physical structure for the tables nor the records, but relies on the
//! developer to provide a set of extractor functions which are used by
//! the specific indices provided.
//!
//! This enables the index implementations to be agnostic from the
//! underlying data structure, and re-used.
//!
//! [Human Brain Project]: http://www.humanbrainproject.eu
//! [Knowledge Graph]: http://www.humanbrainproject.eu/en/explore-the-brain/search/
#[macro_use]
extern crate lalrpop_util;
lalrpop_mod!(#[allow(clippy::all)] pub queries); // synthesized by LALRPOP
lalrpop_mod!(#[allow(clippy::all,unused_parens)] pub queries); // synthesized by LALRPOP
// Note: We do not enable for the whole library deny(missing_docs), as
// it requires the automatically generated parser to be documented
// as well.
// Instead we enable it per modules below, except for the tests.
//#[warn(missing_docs)]
mod evaluators;
//#[warn(missing_docs)]
mod executors;
//#[warn(missing_docs)]
mod expressions;
//#[warn(missing_docs)]
mod predictors;
//#[warn(missing_docs)]
mod validators;
//#[warn(missing_docs)]
mod symbols;
//#[warn(missing_docs)]
mod types;
pub use expressions::Executor;

View File

@@ -1,3 +1,5 @@
#![forbid(unsafe_code)]
#[macro_use]
extern crate measure_time;
@@ -5,11 +7,11 @@ use std::io;
use mercator_db::CoreQueryParameters;
use mercator_db::DataBase;
use parser::Executor;
use parser::FiltersParser;
use parser::Predictor;
use parser::QueryParser;
use parser::Validator;
use mercator_parser::Executor;
use mercator_parser::FiltersParser;
use mercator_parser::Predictor;
use mercator_parser::QueryParser;
use mercator_parser::Validator;
fn main() {
// If RUST_LOG is unset, set it to INFO, otherwise keep it as-is.
@@ -24,7 +26,8 @@ fn main() {
let db;
{
info_time!("Loading database index");
db = DataBase::load(&[&format!("{}.index", core)]).unwrap();
db = DataBase::load(&[&format!("{}.index", core)])
.unwrap_or_else(|e| panic!("Unable to load database '{}': {}", core, e));
}
let parameters = CoreQueryParameters {
@@ -91,7 +94,7 @@ fn main() {
}
if let Ok(r) = execute {
//let r = mercator_db::json::model::to_spatial_objects(&db, r);
//let r = mercator_db::json::model::to_spatial_objects(r);
info!("Execution: \n{:#?}", r);
info!("NB results: {:?}", r.len());
} else {

View File

@@ -1,3 +1,4 @@
use mercator_db::space;
use mercator_db::DataBase;
use super::expressions::Predictor;
@@ -7,7 +8,7 @@ impl Predictor for Projection {
fn predict(&self, db: &DataBase) -> Result<f64, String> {
match self {
Projection::Nifti(_, _, bag) => bag.predict(db),
Projection::JSON(_, _, bag) => bag.predict(db),
Projection::Json(_, _, bag) => bag.predict(db),
}
}
}
@@ -17,7 +18,10 @@ impl Predictor for Bag {
match self {
Bag::ViewPort(bag) => bag.predict(db),
Bag::Distinct(bag) => bag.predict(db),
Bag::Filter(_, bag) => bag.predict(db),
Bag::Filter(_, bag) => match bag {
None => Ok(db.space(space::Space::universe().name())?.volume()),
Some(b) => b.predict(db),
},
Bag::Complement(bag) => Ok(db.space(bag.space())?.volume() - bag.predict(db)?),
Bag::Intersection(lh, rh) => {
let l = lh.predict(db)?;

View File

@@ -51,7 +51,7 @@ JsonOperator: symbols::Projection = {
None => Space::universe().name().clone(),
};
symbols::Projection::JSON(space_id, f, b)
symbols::Projection::Json(space_id, f, b)
}
};
@@ -153,6 +153,8 @@ Bags: symbols::Bag = {
// Spatial Operators
Inside,
Outside,
// returns the positions or volume of the shape, instead of the data points in or outside it.
//Shape,
};
//*********************************************************************/
@@ -187,9 +189,13 @@ Union: symbols::Bag = {
Filter: symbols::Bag = {
// "filter" "(" <p:Predicates> "," <b:Bags> ")" =>
"filter" "(" <b:Bags> ")" =>
symbols::Bag::Filter(None, Box::new(b)),
symbols::Bag::Filter(None, Some(Box::new(b))),
"filter" "(" <p:Predicates> <b:("," <Bags> )?> ")" =>
symbols::get_filter(p, b)
match b {
None => symbols::Bag::Filter(Some(p), None),
Some(b) => symbols::Bag::Filter(Some(p), Some(Box::new(b))),
}
};
Predicates: symbols::Predicate = {
@@ -272,6 +278,12 @@ Inside: symbols::Bag = {
symbols::Bag::Inside(<>)
};
//FIXME: ADD A SHAPE VARIANT WHICH JUST RETURNS ALL THE POSITIONS OF THAT SHAPE
//Shape: symbols::Bag = {
// <Shapes> =>
// symbols::Bag::Shape(<>)
//}
//*********************************************************************/
// SHAPES */
//*********************************************************************/
@@ -282,6 +294,7 @@ Shapes: symbols::Shape = {
Point,
HyperRectangle,
HyperSphere,
Label,
Nifti
};
@@ -333,6 +346,21 @@ Point: symbols::Shape = {
}
};
// Filter by Label, a.k.a use an ID to define a volume, and use that volume to
// select data points.
Label: symbols::Shape = {
"label" "{"
<id:String>
<rs:( "," <String> )?>
"}" => {
let space_id = match rs {
Some(id) => id,
None => Space::universe().name().clone(),
};
symbols::Shape::Label(space_id, id)
}
};
// Define a shape as the non-zero values in a NIfTI object, defined by
// nifti{
// spaceId: string,

View File

@@ -11,14 +11,14 @@ pub use super::types::*;
#[derive(Clone, Debug)]
pub enum Projection {
Nifti(String, LiteralSelector, Bag),
JSON(String, JsonValue, Bag),
Json(String, JsonValue, Bag),
}
impl Projection {
pub fn space(&self) -> &String {
match self {
Projection::Nifti(space, _, _) => &space,
Projection::JSON(space, _, _) => &space,
Projection::Nifti(space, _, _) => space,
Projection::Json(space, _, _) => space,
}
}
}
@@ -61,7 +61,7 @@ pub enum Bag {
ViewPort(Box<Bag>),
// Bags
Distinct(Box<Bag>),
Filter(Option<Predicate>, Box<Bag>),
Filter(Option<Predicate>, Option<Box<Bag>>),
Complement(Box<Bag>),
Intersection(Box<Bag>, Box<Bag>),
Union(Box<Bag>, Box<Bag>),
@@ -77,7 +77,10 @@ impl Bag {
match self {
Bag::ViewPort(bag) => bag.space(),
Bag::Distinct(bag) => bag.space(),
Bag::Filter(_, bag) => bag.space(),
Bag::Filter(_, bag) => match bag {
None => space::Space::universe().name(),
Some(b) => b.space(),
},
Bag::Complement(bag) => bag.space(),
Bag::Intersection(lh, _) => {
// We are assuming lh and rh are in the same space.
@@ -124,6 +127,7 @@ pub enum Shape {
Point(String, LiteralPosition),
HyperRectangle(String, Vec<LiteralPosition>),
HyperSphere(String, LiteralPosition, LiteralNumber),
Label(String, String),
Nifti(String),
}
@@ -133,13 +137,14 @@ impl Shape {
Shape::Point(space, _) => space,
Shape::HyperRectangle(space, _) => space,
Shape::HyperSphere(space, _, _) => space,
Shape::Label(space, _) => space,
Shape::Nifti(space) => space,
}
}
pub fn volume(&self) -> f64 {
match self {
Shape::Point(_, _) => std::f64::EPSILON, // The smallest non-zero volume possible
Shape::Point(_, _) => f64::EPSILON, // The smallest non-zero volume possible
Shape::HyperRectangle(_space, pos) => {
//TODO: At this time, only aligned to the axes, defined by two points, hyperrectangles are supported.
assert_eq!(pos.len(), 2);
@@ -201,9 +206,17 @@ impl Shape {
a * radius.powi(i as i32)
}
Shape::Nifti(_) => unimplemented!(),
Shape::Label(_, _) => {
// FIXME: Needs to find a way to figure out the approximate volume of this specific ID, or return MAX or MIN..
f64::EPSILON
}
Shape::Nifti(_) => unimplemented!("Nifti"),
}
}
pub fn rasterize<'e>(&self) -> mercator_db::ResultSet<'e> {
unimplemented!("rasterize")
}
}
/**********************************************************************/
@@ -412,51 +425,21 @@ impl LiteralSelector {
}
// FIXME: THIS IS SOOO WRONG
pub fn str<'e>(&self, object: (&'e String, &'e space::Position, &'e Properties)) -> String {
pub fn str<'e>(&self, object: (&'e String, &'e space::Position, &'e Properties)) -> &'e str {
let LiteralSelector(v) = self;
let last = v.last();
if let Some(Field(name, _)) = last {
if name == "id" {
return object.2.id().into();
return object.2.id();
} else if name == "type" {
return object.2.type_name().into();
return object.2.type_name();
} else if name == "reference_space" {
return object.0.clone();
return object.0;
}
}
println!("LiteralSelector.str(): {:?}", self);
unimplemented!();
}
}
// The logic was getting a bit too complex to be embedded directly into the
// grammar definition.
pub fn get_filter(p: Predicate, b: Option<Bag>) -> Bag {
match b {
Some(b) => Bag::Filter(Some(p), Box::new(b)),
None => {
let (low, high) = space::Space::universe().bounding_box();
let low: Vec<_> = low.into();
let high: Vec<_> = high.into();
let bb = Shape::HyperRectangle(
space::Space::universe().name().clone(),
vec![
LiteralPosition(
low.into_iter()
.map(LiteralNumber::Float)
.collect::<Vec<_>>(),
),
LiteralPosition(
high.into_iter()
.map(LiteralNumber::Float)
.collect::<Vec<_>>(),
),
],
);
Bag::Filter(Some(p), Box::new(Bag::Inside(bb)))
}
unimplemented!("Unknown Field");
}
}

View File

@@ -11,23 +11,10 @@ pub enum LiteralTypes {
impl PartialEq for LiteralTypes {
fn eq(&self, other: &Self) -> bool {
match self {
LiteralTypes::String => match other {
LiteralTypes::String => true,
_ => false,
},
LiteralTypes::Int => match other {
LiteralTypes::Int => true,
_ => false,
},
LiteralTypes::Float => match other {
LiteralTypes::Float => true,
LiteralTypes::Int => true,
_ => false,
},
LiteralTypes::Bag(_) => match other {
LiteralTypes::Bag(_) => true,
_ => false,
},
LiteralTypes::String => matches!(other, LiteralTypes::String),
LiteralTypes::Int => matches!(other, LiteralTypes::Int),
LiteralTypes::Float => matches!(other, LiteralTypes::Float | LiteralTypes::Int),
LiteralTypes::Bag(_) => matches!(other, LiteralTypes::Bag(_)),
LiteralTypes::Vector(v) => match other {
LiteralTypes::Vector(ov) => {
let n = v.len();

View File

@@ -9,7 +9,7 @@ impl Validator for Projection {
fn validate(&self) -> ValidationResult {
match self {
Projection::Nifti(_, _, _) => Err("not yet implemented".to_string()),
Projection::JSON(_, _format, bag) => bag.validate(),
Projection::Json(_, _format, bag) => bag.validate(),
//FIXME: Add support for projections
/* match format.validate() {
Ok(_) => bag.validate(),
@@ -56,7 +56,10 @@ impl Validator for Bag {
match self {
Bag::ViewPort(bag) => bag.validate(),
Bag::Distinct(bag) => bag.validate(),
Bag::Filter(_, bag) => bag.validate(),
Bag::Filter(_, bag) => match bag {
None => Ok(LiteralPosition(vec![]).get_type()),
Some(b) => b.validate(),
},
Bag::Complement(bag) => bag.validate(),
Bag::Intersection(lh, rh) => compare_bag_types(lh, rh),
Bag::Union(lh, rh) => compare_bag_types(lh, rh),
@@ -149,6 +152,10 @@ impl Validator for Shape {
}
}
Shape::HyperSphere(_, pos, _) => pos.validate(),
Shape::Label(_, _) => {
// FIXME: Quick Hack, we need to fix this and return the effective type of the object Id.
Ok(LiteralPosition(vec![]).get_type())
}
Shape::Nifti(_) => Err("not yet implemented".to_string()),
}
}