From 9cab1916c9be0083a2fe52993ad8c766313fd8d1 Mon Sep 17 00:00:00 2001 From: Lionel Sambuc Date: Mon, 23 Mar 2020 10:45:03 +0100 Subject: [PATCH] Adding documentation, some code cleanups * Not re-exporting SpaceSetObject outside of the crate * Removed unused SpaceObject definition * Factored out Point definition. * Remove `pub` visibility on definition not actually requiring it. * Added an assert for indexing into a Position, in the case where the position has only one dimension, the index MUST BE 0. * Commented `highest_resolution()` as this is not yet used. --- README.md | 28 +---- src/database/db_core.rs | 149 +++++++++++++++++++++--- src/database/mod.rs | 58 ++++++--- src/database/space/axis.rs | 109 +++++++++++++++-- src/database/space/coordinate.rs | 21 ++++ src/database/space/coordinate_system.rs | 99 ++++++++++++++-- src/database/space/mod.rs | 80 +++++++++++-- src/database/space/position.rs | 57 ++++++--- src/database/space/shape.rs | 72 ++++++++++-- src/database/space_db.rs | 2 + src/database/space_index.rs | 2 +- src/lib.rs | 30 +++++ src/storage/bincode.rs | 43 +++++++ src/storage/json.rs | 10 ++ src/storage/mod.rs | 5 + src/storage/model.rs | 132 ++++++++++++++++++++- src/storage/xyz.rs | 122 ++++++++++++++++++- 17 files changed, 905 insertions(+), 114 deletions(-) diff --git a/README.md b/README.md index c3a51a7..54560fd 100644 --- a/README.md +++ b/README.md @@ -22,35 +22,9 @@ This enables the index implementations to be agnostic from the underlying data s * Rust: https://www.rust-lang.org -## Quick start - -## Building from sources - -To build this project, you will need to run the following: - -```sh -cargo build --release -``` - -### Installation - -To install the software on the system you can use: - -```sh -cargo install --release -``` - -### Usage - -The binary `db-test` provided is used only as an integration test at this point. It will convert a json input to a binary representation, before building an index over it. Once this is achieved, it will run a couple of hard-coded queries over the index. - -```sh -cargo run --release -``` - ## Documentation -For more information, please refer to the [documentation](https://epfl-dias.github.io/PROJECT_NAME/). +For more information, please refer to the [documentation](https://epfl-dias.github.io/mercator_db/). If you want to build the documentation and access it locally, you can use: diff --git a/src/database/db_core.rs b/src/database/db_core.rs index 87be942..3064ef7 100644 --- a/src/database/db_core.rs +++ b/src/database/db_core.rs @@ -9,15 +9,31 @@ use super::space_index::SpaceSetObject; use super::DataBase; use super::ResultSet; +/// Query Parameters. pub struct CoreQueryParameters<'a> { + /// Database to use. pub db: &'a DataBase, + /// Output reference space into which to convert results. pub output_space: Option<&'a str>, + /// Volume value to use to select the index resolution. + //FIXME: IS this necessary given view_port? pub threshold_volume: Option, + /// Full definition of the view port, a.k.a the volume being + /// displayed. pub view_port: &'a Option<(Vec, Vec)>, + /// Index resolution to use. pub resolution: &'a Option>, } impl CoreQueryParameters<'_> { + /// Build a minimum bounding box out of the provided viewport, and + /// rebase it in the target space. + /// + /// # Parameters + /// + /// * `space`: + /// Space to use for the encoded coordinates of the minimum + /// bounding box. pub fn view_port(&self, space: &Space) -> Option { if let Some((low, high)) = self.view_port { let view_port = Shape::BoundingBox(low.into(), high.into()); @@ -31,14 +47,21 @@ impl CoreQueryParameters<'_> { } } +/// Definition of the volumetric objects identifiers. +/// +/// We have two parts to it, first the *kind* and the actual, *id* used +/// to distinguish different objects. // FIXME: Ids are expected unique, irrespective of the enum variant! #[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] pub enum Properties { + /// Spatial Features. Feature(String), + /// Unoptimized arbitrary kind of *identifiers*. Unknown(String, String), } impl Properties { + /// Extract the *identifier* of this spatial object. pub fn id(&self) -> &str { match self { Properties::Feature(id) => id, @@ -46,6 +69,7 @@ impl Properties { } } + /// Extract the *kind* of spatial object. pub fn type_name(&self) -> &str { match self { Properties::Feature(_) => "Feature", @@ -53,6 +77,13 @@ impl Properties { } } + /// Instantiate a new *feature*. + /// + /// # Parameters + /// + /// * `id`: + /// The identifier of the object, which can be converted into a + /// `String`. pub fn feature(id: S) -> Properties where S: Into, @@ -60,6 +91,17 @@ impl Properties { Properties::Feature(id.into()) } + /// Instantiate a new arbitrary kind of object, with the given id. + /// + /// # Parameters + /// + /// * `id`: + /// The identifier of the object, which can be converted into a + /// `String`. + /// + /// * `type_name`: + /// A value which can be converted into a `String`, and + /// represent the **kind** of the object. pub fn unknown(id: S, type_name: S) -> Properties where S: Into, @@ -68,6 +110,7 @@ impl Properties { } } +/// Index over a single dataset #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Core { title: String, @@ -77,6 +120,43 @@ pub struct Core { } impl Core { + /// Instantiate a new index for a dataset. + /// + /// # Parameters + /// + /// * `title`: + /// The title to use for the new dataset. + /// + /// * `version`: + /// The revision of the new dataset. + /// + /// * `spaces`: + /// The list of reference spaces used within the dataset. + /// + /// * `properties`: + /// The *identifiers*, has an ordered list, which is referenced + /// by the `space_objects` by offset within this list. + /// + /// * `space_objects`: + /// A list of links between volumetric positions and + /// identifiers. + /// + /// * `scales`: + /// A list of resolutions for which to build indices. Each value + /// represent the number of bits of precision to **remove** from + /// the coordinates to build the index. + /// + /// * `max_elements`: + /// The minimum number of positions to use as a stopping + /// condition while building automatically multiple resolutions + /// of the index. + /// + /// Each consecutive index will contains at most half the number + /// of data points than the next finer-grained index. + /// + /// The minimum number of elements contained within an index is + /// this value or the number of *identifiers*, whichever is + /// greater. pub fn new( title: S, version: S, @@ -125,14 +205,17 @@ impl Core { }) } + /// Title of the dataset. pub fn name(&self) -> &String { &self.title } + /// Revision of the dataset. pub fn version(&self) -> &String { &self.version } + /// List of *identifiers* contained in this dataset. pub fn keys(&self) -> &Vec { &self.properties } @@ -164,13 +247,26 @@ impl Core { Ok(()) } - // Search by positions defining a volume. - // Positions ARE DEFINED IN F64 VALUES IN THE SPACE. NOT ENCODED! + /// Retrieve everything located at specific positions. + /// + /// # Parameters + /// + /// * `parameters`: + /// Search parameters, see [CoreQueryParameters](struct.CoreQueryParameters.html). + /// + /// * `positions`: + /// Volume to use to filter data points. + /// + /// * `space_id`: + /// *positions* are defined as decoded coordinates in this + /// reference space. + /// + /// [shape]: space/enum.Shape.html pub fn get_by_positions( &self, parameters: &CoreQueryParameters, positions: &[Position], - from: &str, + space_id: &str, ) -> ResultSet { let CoreQueryParameters { db, output_space, .. @@ -178,7 +274,7 @@ impl Core { let mut results = vec![]; let count = positions.len(); - let from = db.space(from)?; + let from = db.space(space_id)?; // Filter positions based on the view port, if present let filtered = match parameters.view_port(from) { @@ -211,12 +307,21 @@ impl Core { Ok(results) } - // Search by shape defining a volume: - // * Hyperrectangle (MBB), - // * HyperSphere (radius around a point), - // * Point (Specific position) - - // SHAPE IS DEFINED IN F64 VALUES IN THE SPACE. NOT ENCODED! + /// Search using a [shape] which defines a volume. + /// + /// # Parameters + /// + /// * `parameters`: + /// Search parameters, see [CoreQueryParameters](struct.CoreQueryParameters.html). + /// + /// * `shape`: + /// Volume to use to filter data points. + /// + /// * `space_id`: + /// *shape* is defined as decoded coordinates in this + /// reference space. + /// + /// [shape]: space/enum.Shape.html pub fn get_by_shape( &self, parameters: &CoreQueryParameters, @@ -251,7 +356,16 @@ impl Core { Ok(results) } - // Search by Id, a.k.a values + /// Search by Id, a.k.a retrieve all the positions linked to this id. + /// + /// # Parameters + /// + /// * `parameters`: + /// Search parameters, see [CoreQueryParameters](struct.CoreQueryParameters.html). + /// + /// * `id`: + /// Identifier for which to retrieve is positions. + /// pub fn get_by_id( &self, parameters: &CoreQueryParameters, @@ -305,8 +419,17 @@ impl Core { Ok(results) } - // Search by Label, a.k.a within a volume defined by the positions of an Id. - // FIXME: NEED TO KEEP TRACK OF SPACE IDS AND DO CONVERSIONS + /// Search by label, a.k.a use an identifier to define the search + /// volume. + /// + /// # Parameters + /// + /// * `parameters`: + /// Search parameters, see [CoreQueryParameters](struct.CoreQueryParameters.html). + /// + /// * `id`: + /// Identifier to use to define the search volume. + /// pub fn get_by_label(&self, parameters: &CoreQueryParameters, id: S) -> ResultSet where S: Into, diff --git a/src/database/mod.rs b/src/database/mod.rs index 97928e9..109294f 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -1,12 +1,11 @@ mod db_core; pub mod space; mod space_db; -mod space_index; +pub(crate) mod space_index; use std::collections::HashMap; use ironsea_index::Indexed; -use serde::Serialize; use super::storage; pub use db_core::Core; @@ -14,27 +13,36 @@ pub use db_core::CoreQueryParameters; pub use db_core::Properties; use space::Position; use space::Space; -pub use space_index::SpaceFields; -pub use space_index::SpaceSetObject; -// (Space Name, Position, Fields) +/// Selected tuples matching a query. +/// +/// This is either: +/// * `Err` with a reason stored as a `String` +/// * `Ok`, with a vector of tuples defined as: +/// `(Space Name, [(Position, Properties)])` pub type ResultSet<'r> = Result)>, String>; -pub type ReferenceSpaceIndex = ironsea_index_hashmap::Index; + +type ReferenceSpaceIndex = ironsea_index_hashmap::Index; type CoreIndex = ironsea_index_hashmap::Index; -#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize)] -pub struct SpaceObject { - pub space_id: String, - pub position: Position, - pub value: Properties, -} - +/// Collection of datasets and their reference spaces. pub struct DataBase { reference_spaces: ReferenceSpaceIndex, cores: CoreIndex, } impl DataBase { + /// Instantiate a `DataBase` struct. + /// + /// # Parameters + /// + /// * `spaces`: + /// List of reference spaces. + /// + /// * `cores`: + /// List of datasets (cores) which will be queried through this + /// `DataBase` struct. + // TODO: Replace vectors with iterators? pub fn new(spaces: Vec, cores: Vec) -> Self { DataBase { reference_spaces: ReferenceSpaceIndex::new(spaces.into_iter()), @@ -42,6 +50,12 @@ impl DataBase { } } + /// Load a list of indices. + /// + /// # Parameters + /// + /// * `indices`: + /// The list of index file names to load. pub fn load(indices: &[&str]) -> Result { let mut spaces = HashMap::new(); let mut cores = vec![]; @@ -99,12 +113,17 @@ impl DataBase { } } - // Lookup a space within the reference spaces registered + /// Returns an ordered list of the reference space names registered. pub fn space_keys(&self) -> &Vec { self.reference_spaces.keys() } - // Lookup a space within the reference spaces registered + /// Lookup a space within the reference spaces registered. + /// + /// # Parameters + /// + /// * `name`: + /// The name of the reference space to search for. pub fn space(&self, name: &str) -> Result<&Space, String> { if name == space::Space::universe().name() { Ok(space::Space::universe()) @@ -115,12 +134,17 @@ impl DataBase { } } - // Lookup a space within the reference spaces registered + /// Returns an ordered list of dataset (Core) names registered. pub fn core_keys(&self) -> &Vec { self.cores.keys() } - // Lookup a dataset within the datasets registered + /// Lookup a dataset within the datasets registered. + /// + /// # Parameters + /// + /// * `name`: + /// The name of the dataset (core) to search for. pub fn core(&self, name: &str) -> Result<&Core, String> { let r = self.cores.find(&name.to_string()); diff --git a/src/database/space/axis.rs b/src/database/space/axis.rs index 904d2c9..3aa5328 100644 --- a/src/database/space/axis.rs +++ b/src/database/space/axis.rs @@ -4,11 +4,16 @@ use serde::Serialize; use super::coordinate::Coordinate; use super::position::Position; +/// Mathematical set numbers. #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub enum NumberSet { + /// [Natural numbers](https://en.wikipedia.org/wiki/Natural_number), here including **0**. N, + /// [Integers](https://en.wikipedia.org/wiki/Integer). Z, + /// [Rational](https://en.wikipedia.org/wiki/Rational_number) numbers. Q, + /// [Real](https://en.wikipedia.org/wiki/Real_number) numbers. R, } @@ -37,12 +42,19 @@ impl From<&NumberSet> for String { } } +/// Definition of a fixed-precision, finite length axis. #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct Graduation { + /// Set of numbers allowed on the axis. pub set: NumberSet, + /// Minimum value *inclusive*. pub minimum: f64, + /// Maximum value *inclusive*. pub maximum: f64, + /// Number of *ticks* or discrete values between `minimum` and + /// `maximum`. pub steps: u64, + /// Length between two distinct *ticks* on the axis. pub epsilon: f64, } @@ -60,7 +72,7 @@ impl Graduation { #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] #[allow(non_camel_case_types)] -pub enum UnitSI { +enum UnitSI { // Partial list, which is tailored to the use case needs. Prevents possible // confusions between Mm and mm, for example. m, @@ -113,16 +125,45 @@ impl From<&str> for UnitSI { } } +/// Definition of an axis of a base. +/// +/// This links together valid values on this axis, as well as the +/// direction in the Universe of the axis and the base length unit of +/// the `1.0` value. // TODO: In the future this might become an Enum with AffineAxis, ArbitraryAxis, etc... #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct Axis { measurement_unit: UnitSI, graduation: Graduation, - // Coordinates in Universe, expressed in f64, and in the Universe number of dimensions. - pub unit_vector: Position, + // Coordinates in Universe, expressed in f64, and in the Universe + // number of dimensions. + unit_vector: Position, } impl Axis { + /// Instanciate a new Axis definition. + /// + /// # Parameters + /// + /// * `unit`: + /// SI Unit to use on this axis for the `1.0` value. + /// See [measurement_unit](#method.measurement_unit). + /// + /// * `unit_vector`: + /// A vector providing the direction in the Universe space of + /// this axis. + /// + /// * `set`: + /// The valid numbers on this axis. + /// + /// * `minimum`: + /// The minimum value described by this axis *included*. + /// + /// * `maximum`: + /// The maximum value described by this axis *included*. + /// + /// * `steps`: + /// The number of steps, or discrete *ticks* on this axis. pub fn new( unit: &str, unit_vector: Vec, @@ -142,20 +183,48 @@ impl Axis { }) } + /// The unit, as in [SI unit] used on this axis, more specifically, + /// a [metric prefix] of the **meter**. + /// + /// Currently the following values are supported: + /// * `m` + /// * `dm` + /// * `cm` + /// * `mm` + /// * `um` + /// * `nm` + /// * `pm` + /// + /// [SI unit]: https://en.wikipedia.org/wiki/International_System_of_Units + /// [metric prefix]: https://en.wikipedia.org/wiki/Metric_prefix pub fn measurement_unit(&self) -> &str { self.measurement_unit.to_str() } + /// The unit vector of the axis. + /// + /// This vector is expressed in the Universe coordinate system. pub fn unit_vector(&self) -> &Position { &self.unit_vector } + /// The valid number range and properties on this axis. pub fn graduation(&self) -> &Graduation { &self.graduation } - // Project a point expressed in Universe coordinates from the origin of this - // axis on this axis. + /// Project a position on this axis. + /// + /// The resulting coordinate is expressed as an encoded coordinate + /// on this axis. + /// + /// # Parameters + /// + /// * `position`: + /// The position to project on this axis. It must be defined in + /// Universe coordinates, but with any translations already + /// applied so that the origin of the vector is the origin of + /// this axis. pub fn project_in(&self, position: &Position) -> Result { let max = self.graduation.maximum; let min = self.graduation.minimum; @@ -192,7 +261,19 @@ impl Axis { self.encode(d) } - // Convert a value on this axis to Universe coordinates, based from the origin of this axis. + /// Convert an encoded coordinate expressed on this axis into a + /// position. + /// + /// The resulting position is expressed in the Universe reference + /// space, but from the origin of this axis. Any required + /// translation must be applied to this resulting position to obtain + /// an absolute value in the Universe space. + /// + /// # Parameters + /// + /// * `coordinate`: + /// The coordinate to project out of this axis. It must be + /// defined as an encoded coordinate on this axis. pub fn project_out(&self, coordinate: &Coordinate) -> Result { let d = self.decode(coordinate)?; @@ -202,7 +283,13 @@ impl Axis { Ok(&self.unit_vector * d) } - // Value is expressed on the current Axis, not in absolute coordinates! + /// Encode a coordinate expressed on this axis. + /// + /// # Parameters + /// + /// * `val`: + /// The coordinate to encode. It must be defined as a + /// coordinate on this axis. pub fn encode(&self, val: f64) -> Result { let max = self.graduation.maximum; let min = self.graduation.minimum; @@ -229,7 +316,13 @@ impl Axis { Ok(v.into()) } - // Value is expressed on the current Axis, not in absolute coordinates! + /// Decode a coordinate expressed on this axis. + /// + /// # Parameters + /// + /// * `val`: + /// The coordinate to decode. It must be defined as an encoded + /// coordinate on this axis. pub fn decode(&self, val: &Coordinate) -> Result { let max = self.graduation.maximum; let min = self.graduation.minimum; diff --git a/src/database/space/coordinate.rs b/src/database/space/coordinate.rs index a5967b6..2ae4bc2 100644 --- a/src/database/space/coordinate.rs +++ b/src/database/space/coordinate.rs @@ -11,18 +11,36 @@ use std::ops::Sub; use serde::Deserialize; use serde::Serialize; +/// Store efficiently a coordinate. +/// +/// While you can manually create a `Coordinate` value directly, using +/// the `From` trait will automatically choose the most efficient enum +/// member to store the value. This it the recommended way of using this +/// struct. #[derive(Clone, Copy, Debug, Deserialize, Serialize)] pub enum Coordinate { + /// Encoded coordinates whose value is in the range `[0; 2^8[`. CoordinateU8(u8), + /// Encoded coordinates whose value is in the range `[0; 2^16[`, + /// but should be used only for the range `[2^8; 2^16[`. CoordinateU16(u16), + /// Encoded coordinates whose value is in the range `[0; 2^32[`, + /// but should be used only for the range `[2^16; 2^32[`. CoordinateU32(u32), + /// Encoded coordinates whose value is in the range `[0; 2^64[`, + /// but should be used only for the range `[2^32; 2^64[`. CoordinateU64(u64), // We currently assume that 2^64 is enough to store encoded position values per axis. //CoordinateU128(u128), + /// Decoded coordinate value expressed as a floating point value over 64 bits. + /// For details on the precision, please see the + /// [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) reference. CoordinateF64(f64), } impl Coordinate { + /// Return the value as a `f64`, this may introduce a loss of + /// precision for encoded values. pub fn f64(&self) -> f64 { match *self { Coordinate::CoordinateU8(v) => f64::from(v), @@ -33,6 +51,7 @@ impl Coordinate { } } + /// Return the value as `u64`, this is valid only on encoded values. pub fn u64(&self) -> u64 { match *self { Coordinate::CoordinateU8(v) => u64::from(v), @@ -43,6 +62,8 @@ impl Coordinate { } } + /// Return the value as `usize`, this is valid only on encoded + /// values. pub fn as_usize(&self) -> usize { self.u64() as usize } diff --git a/src/database/space/coordinate_system.rs b/src/database/space/coordinate_system.rs index 56a3d8b..3877703 100644 --- a/src/database/space/coordinate_system.rs +++ b/src/database/space/coordinate_system.rs @@ -6,14 +6,39 @@ use super::coordinate::Coordinate; use super::position::Position; use super::MAX_K; +/// Kinds of space coordinate systems, or bases #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub enum CoordinateSystem { - Universe { origin: Position }, - // Coordinates in Universe, expressed in f64, and in the Universe number of dimensions. - AffineSystem { origin: Position, axes: Vec }, + /// Absolute base, which allows to generate transformation between + /// spaces by anchoring them relative to each other. + Universe { + /// A position which contains zeroes for all its coordinates, + /// but has a coordinate per dimensions of the highest + /// dimensions space referenced. + origin: Position, + }, + /// Base which needs only an affine transformation to map into the Universe. + AffineSystem { + /// Coordinates in Universe, expressed in f64, or decoded, and + /// in the Universe number of dimensions. + origin: Position, + + /// The definition of the coordinate system, through its axes. + axes: Vec, + }, } impl CoordinateSystem { + /// Instantiate a new coordinate system. + /// + /// # Parameters + /// + /// * `origin`: + /// The translation vector in Universe coordinates of this + /// base. + /// + /// * `axes`: + /// The list of axes defining the coordinate system. pub fn new(origin: Vec, axes: Vec) -> Self { CoordinateSystem::AffineSystem { origin: origin.into(), @@ -21,6 +46,7 @@ impl CoordinateSystem { } } + /// The translation vector, in Universe coordinates. pub fn origin(&self) -> &Position { match self { CoordinateSystem::Universe { origin, .. } => origin, @@ -28,6 +54,7 @@ impl CoordinateSystem { } } + /// The axes definition of this base. pub fn axes(&self) -> &Vec { match self { CoordinateSystem::Universe { .. } => { @@ -38,6 +65,7 @@ impl CoordinateSystem { } } + /// The number of dimensions of positions within this base. pub fn dimensions(&self) -> usize { match self { CoordinateSystem::Universe { .. } => MAX_K, @@ -45,6 +73,10 @@ impl CoordinateSystem { } } + /// The smallest bounding box containing the whole base, expressed + /// in decoded Universe coordinates. + /// + // FIXME: Add the translation vector! pub fn bounding_box(&self) -> (Position, Position) { let mut low = Vec::with_capacity(self.dimensions()); let mut high = Vec::with_capacity(self.dimensions()); @@ -67,6 +99,9 @@ impl CoordinateSystem { (low.into(), high.into()) } + /// The volume of this space. + /// + // FIXME: This assumes orthogonal spaces! pub fn volume(&self) -> f64 { let (low, high) = self.bounding_box(); let difference: Vec<_> = (high - low).into(); @@ -80,8 +115,19 @@ impl CoordinateSystem { volume } - // The position is expressed in coordinates in the universe, - // return a position in the current coordinate system. + /// Rebase a position in this coordinate space. + /// + /// Each coordinate is encoded individually, and a new `Position` + /// is generated. + /// + /// # Parameters + /// + /// * `position`: + /// expressed in decoded Universe coordinates. + /// + /// # Return value + /// + /// The encoded coordinates within this coordinate system. pub fn rebase(&self, position: &Position) -> Result { match self { CoordinateSystem::Universe { origin } => { @@ -106,8 +152,16 @@ impl CoordinateSystem { } } - // The position is expressed in coordinates in the current coordinate system, - // return a position in Universe coordinates. + /// Express the position in the Universe coordinate system. + /// + /// # Parameters + /// + /// * `position`: + /// expressed as an encoded coordinates in the coordinate system. + /// + /// # Return value + /// + /// The position expressed in Universe decoded coordinates. pub fn absolute_position(&self, position: &Position) -> Result { match self { CoordinateSystem::Universe { origin } => { @@ -132,8 +186,19 @@ impl CoordinateSystem { } } - // The position is expressed in the current system - // Encode each coordinate separately and return an encoded Position + /// Encode a position expressed in the current coordinate system. + /// + /// Each coordinate is encoded individually, and a new `Position` + /// is generated. + /// + /// # Parameters + /// + /// * `position`: + /// expressed in the current coordinate system. + /// + /// # Return value + /// + /// The encoded coordinates within this coordinate system. pub fn encode(&self, position: &[f64]) -> Result { let mut encoded = vec![]; @@ -155,8 +220,20 @@ impl CoordinateSystem { Ok(encoded.into()) } - // The position is expressed in the current system as an encoded value, - // return a position in the current system as f64 values. + /// Decode a position expressed in the current coordinate system as + /// an encoded value. + /// + /// Each coordinate is decoded individually. + /// + /// # Parameters + /// + /// * `position`: + /// expressed in the current coordinate system, as encoded + /// values. + /// + /// # Return value + /// + /// The decoded coordinates within this coordinate system. pub fn decode(&self, position: &Position) -> Result, String> { let mut decoded = vec![]; diff --git a/src/database/space/mod.rs b/src/database/space/mod.rs index 9dbf3a6..7eeb459 100644 --- a/src/database/space/mod.rs +++ b/src/database/space/mod.rs @@ -1,3 +1,7 @@ +//! Reference space definitions. +//! +//! This include notions such as shapes, positions, axes, etc… + mod axis; mod coordinate; mod coordinate_system; @@ -18,7 +22,14 @@ pub use coordinate_system::CoordinateSystem; pub use position::Position; pub use shape::Shape; -pub const MAX_K: usize = 3; +// Maximum number of dimensions currently supported. +// +// **Note:** This will be deprecated as soon as support is implemented +// in some dependencies. This is linked to limitations in +// [ironsea_index_sfc_dbc]. +// +// [ironsea_index_sfc_dbc]: https://github.com/epfl-dias/ironsea_index_sfc_dbc +const MAX_K: usize = 3; lazy_static! { static ref UNIVERSE: Space = Space { @@ -29,6 +40,7 @@ lazy_static! { }; } +/// A reference space, defined by its name and coordinate system. #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct Space { name: String, @@ -36,6 +48,15 @@ pub struct Space { } impl Space { + /// Instantiate a new space. + /// + /// # Parameters + /// + /// * `name`: + /// Id of the reference space. + /// + /// * `system`: + /// Coordinate system defintion of the reference space pub fn new(name: S, system: CoordinateSystem) -> Self where S: Into, @@ -46,54 +67,93 @@ impl Space { } } + /// Returns the Universe Space. + /// + /// This space contains all of the spaces, and allows us to connect + /// them between each others. pub fn universe() -> &'static Self { &UNIVERSE } + /// Transform a position from space `from` into a position in space `to`. + /// + /// # Parameters + /// + /// * `position`: + /// Position to transform, expressed as encoded coordinates. + /// + /// * `from`: + /// Space in which `position` is defined. + /// + /// * `to`: + /// Target space in which `position` should be expressed. pub fn change_base(position: &Position, from: &Space, to: &Space) -> Result { to.rebase(&from.absolute_position(position)?) } + /// Id of the reference space. pub fn name(&self) -> &String { &self.name } + /// Origin of the space, expressed in Universe. pub fn origin(&self) -> &Position { self.system.origin() } + /// Axes definition of the space. pub fn axes(&self) -> &Vec { self.system.axes() } + /// Returns the bounding box enclosing the whole space. pub fn bounding_box(&self) -> (Position, Position) { self.system.bounding_box() } + /// Total volume of the reference space. pub fn volume(&self) -> f64 { self.system.volume() } - // The position is expressed in coordinates in the universe, - // return a position in the current space. - pub fn rebase(&self, position: &Position) -> Result { + // `position` is expressed in the Universe, this return encoded + // coordinates in the current space. + fn rebase(&self, position: &Position) -> Result { self.system.rebase(position) } - // The position is expressed in coordinates in the current space, + // The position is expressed in encoded coordinates in the current space, // return an absolute position in Universe. - pub fn absolute_position(&self, position: &Position) -> Result { + fn absolute_position(&self, position: &Position) -> Result { self.system.absolute_position(position) } - // The position is expressed in the current space as an encoded value, - // return a position in the current system as f64 values + /// Decode coordinates expressed in the current space, to their + /// values within the axes definitions. + /// + /// # Parameters + /// + /// * `position`: + /// expressed in encoded coordinates within the current space. + /// + /// # Return value + /// + /// The decoded position within the space. pub fn decode(&self, position: &Position) -> Result, String> { self.system.decode(position) } - // The position is expressed in the current space, - // return a position expressed in the current space as an encoded value. + /// Encode a position expressed in the current space within the axes + /// value ranges. + /// + /// # Parameters + /// + /// * `position`: + /// expressed in the current space. + /// + /// # Return value + /// + /// The encoded coordinates within the space. pub fn encode(&self, position: &[f64]) -> Result { self.system.encode(position) } diff --git a/src/database/space/position.rs b/src/database/space/position.rs index 8cd1c71..aeed36e 100644 --- a/src/database/space/position.rs +++ b/src/database/space/position.rs @@ -18,24 +18,31 @@ use serde::Serialize; use super::coordinate::Coordinate; +/// Store a position as efficiently as possible in terms of space. #[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, Serialize)] pub enum Position { + /// 1 dimension positions. Position1(Coordinate), + /// 2 dimensions positions. Position2([Coordinate; 2]), + /// 3 dimensions positions. Position3([Coordinate; 3]), + /// 4 dimensions positions. Position4([Coordinate; 4]), + /// 5 dimensions positions. Position5([Coordinate; 5]), + /// 6 dimensions positions. Position6([Coordinate; 6]), + /// 7 dimensions positions. Position7([Coordinate; 7]), + /// 8 dimensions positions. Position8([Coordinate; 8]), + /// N dimensions positions. PositionN(Vec), } impl Position { - pub fn new(coordinates: Vec) -> Self { - coordinates.into() - } - + /// Returns the number of dimensions or size of the vector. pub fn dimensions(&self) -> usize { match self { Position::Position1(_) => 1, @@ -50,7 +57,7 @@ impl Position { } } - // Returns ||self|| + /// Compute `||self||`. pub fn norm(&self) -> f64 { if let Position::Position1(coordinates) = self { // the square root of a single number to the square is its positive value, so ensure it is. @@ -68,32 +75,48 @@ impl Position { } } - // Unit / Normalized vector from self. + /// Compute the unit vector pointing in the same direction as `self`. pub fn unit(&self) -> Self { self * (1f64 / self.norm()) } - // This multiplies self^T with other, producing a scalar value - pub fn dot_product(&self, other: &Self) -> f64 { - assert_eq!(self.dimensions(), other.dimensions()); + /// Multiplies `self` with `rhs`, producing a scalar value. + /// + /// `self • rhs = product` + /// + /// **Note:** The two vector sizes must be equal, a.k.a the two + /// vectors must have the same number of dimensions. + /// + /// # Parameters + /// + /// `rhs`: + /// The right-hand side vector. + pub fn dot_product(&self, rhs: &Self) -> f64 { + assert_eq!(self.dimensions(), rhs.dimensions()); let mut product = 0f64; for k in 0..self.dimensions() { - product += (self[k] * other[k]).f64(); + product += (self[k] * rhs[k]).f64(); } product } + /// Remove bits of precision. + /// + /// # Parameters + /// + /// * `scale`: + /// Number of bits of precision to remove from each coordinates. pub fn reduce_precision(&self, scale: u32) -> Self { let mut position = Vec::with_capacity(self.dimensions()); for i in 0..self.dimensions() { - position.push((self[i].u64() >> scale).into()) + position.push(self[i].u64() >> scale) } - Position::new(position) + position.into() } } @@ -153,7 +176,10 @@ impl Index for Position { fn index(&self, k: usize) -> &Self::Output { match self { - Position::Position1(coordinate) => coordinate, + Position::Position1(coordinate) => { + assert_eq!(k, 0); + coordinate + } Position::Position2(coordinates) => &coordinates[k], Position::Position3(coordinates) => &coordinates[k], Position::Position4(coordinates) => &coordinates[k], @@ -169,7 +195,10 @@ impl Index for Position { impl IndexMut for Position { fn index_mut(&mut self, k: usize) -> &mut Self::Output { match self { - Position::Position1(coordinate) => coordinate, + Position::Position1(coordinate) => { + assert_eq!(k, 0); + coordinate + } Position::Position2(coordinates) => &mut coordinates[k], Position::Position3(coordinates) => &mut coordinates[k], Position::Position4(coordinates) => &mut coordinates[k], diff --git a/src/database/space/shape.rs b/src/database/space/shape.rs index aa003ca..8e658d3 100644 --- a/src/database/space/shape.rs +++ b/src/database/space/shape.rs @@ -5,16 +5,33 @@ use super::Coordinate; use super::Position; use super::Space; +/// Known shapes descriptions #[derive(Clone, Debug, Deserialize, Serialize)] pub enum Shape { + /// A singular point in space. Point(Position), //HyperRectangle([Position; MAX_K]), + /// A sphere in space. HyperSphere(Position, Coordinate), + + /// Hyperrectangle whose faces have one of the axis as a normal. BoundingBox(Position, Position), //Nifti(nifti_data??), } impl Shape { + /// Convert the encoded coordinates between two reference spaces. + /// + /// The resulting shape is expressed in encoded coordinates in the + /// target space. + /// + /// # Parameters + /// + /// * `from`: + /// Current reference space of the shape. + /// + /// * `to`: + /// Target reference space. pub fn rebase(&self, from: &Space, to: &Space) -> Result { match self { Shape::Point(position) => Ok(Shape::Point(Space::change_base(position, from, to)?)), @@ -36,6 +53,20 @@ impl Shape { } } + /// Decode the coordinates of the shape. + /// + /// The encoded coordinates of the shapes are expressed in the + /// provided space. + /// + /// # Parameters + /// + /// * `space`: + /// Reference space of the shape. It is used to decode the + /// encoded coordinates into positions. + /// + /// # Return value + /// + /// The shape with decoded positions within the space. pub fn decode(&self, space: &Space) -> Result { let s = match self { Shape::Point(position) => Shape::Point(space.decode(position)?.into()), @@ -51,6 +82,19 @@ impl Shape { Ok(s) } + /// Encode the positions of the shape. + /// + /// The positions of the shapes are expressed in the provided space. + /// + /// # Parameters + /// + /// * `space`: + /// Reference space of the shape. It is used to encode the + /// positions into encoded coordinates. + /// + /// # Return value + /// + /// The shape with encoded coordinates within the space. pub fn encode(&self, space: &Space) -> Result { let s = match self { Shape::Point(position) => { @@ -72,6 +116,10 @@ impl Shape { Ok(s) } + /// Compute the minimum bounding box of the shape. + /// + /// This is an hyperrectangle whose faces are perpendicular to an + /// axis of the space, and which minimally covers the shape. pub fn get_mbb(&self) -> (Position, Position) { match self { Shape::Point(position) => (position.clone(), position.clone()), @@ -88,6 +136,12 @@ impl Shape { } } + /// Check if the shape overlaps with the given position. + /// + /// # Parameters + /// + /// * `position`: + /// The position to check. pub fn contains(&self, position: &Position) -> bool { match self { Shape::Point(reference) => reference == position, @@ -178,9 +232,8 @@ impl Shape { results } - // Transform a Shape into a list of Position which approximate the shape. - // Note: - // * All output positions are expressed within the space. + /// Transform a Shape into a list of `Position` which approximate + /// the shape. // TODO: Return an iterator instead, for performance! pub fn rasterise(&self) -> Result, String> { match self { @@ -200,10 +253,14 @@ impl Shape { } } - // Transform a Shape into a list of Position which approximate the shape. - // Note: - // * All input positions are expressed within the space. - // * All output positions are expressed in absolute positions in Universe + /// Transform a Shape into a list of `Position` which approximate + /// the shape, in absolute, or Universe positions. + /// + /// # Parameters + /// + /// * `space`: + /// Reference space in which the shape is expressed. + /// // TODO: Return an iterator instead, for performance! pub fn rasterise_from(&self, space: &Space) -> Result, String> { Ok(self @@ -216,6 +273,7 @@ impl Shape { .collect()) } + /// Compute the volume. pub fn volume(&self) -> f64 { match self { Shape::Point(_) => std::f64::EPSILON, // Smallest non-zero volume possible diff --git a/src/database/space_db.rs b/src/database/space_db.rs index d038c9e..a1bbb6a 100644 --- a/src/database/space_db.rs +++ b/src/database/space_db.rs @@ -193,11 +193,13 @@ impl SpaceDB { &self.reference_space } + /* Comment this for now, as this is not yet used. // The smallest volume threshold, which is the highest resolution, will // be at position 0 fn highest_resolution(&self) -> usize { 0 } + */ // The highest volume threshold, which is the lowest resolution, will // be at position len - 1 diff --git a/src/database/space_index.rs b/src/database/space_index.rs index acaa9da..5fdcf6f 100644 --- a/src/database/space_index.rs +++ b/src/database/space_index.rs @@ -139,7 +139,7 @@ impl SpaceIndex { self.index.find_by_value(id) } - /// Inputs and Results are also in encoded space coordinates. + // Inputs and Results are also in encoded space coordinates. pub fn find_by_shape( &self, shape: &Shape, diff --git a/src/lib.rs b/src/lib.rs index 1ee97c8..f157ee8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,33 @@ +#![deny(missing_docs)] + +//! # Mercator DB +//! +//! Database model for the Mercator spatial index. +//! +//! ## Mercator: Spatial Index +//! +//! **Mercator** is a spatial *volumetric* index for the +//! [Human Brain Project]. It is a component of the [Knowledge Graph] +//! service, which provides the spatial anchoring for the metadata +//! registered as well as processes the volumetric queries. +//! +//! It is build on top of the Iron Sea database toolkit. +//! +//! ## Iron Sea: Database Toolkit +//! **Iron Sea** provides a set of database engine bricks, which can be +//! combined and applied on arbitrary data structures. +//! +//! Unlike a traditional database, it does not assume a specific +//! physical structure for the tables nor the records, but relies on the +//! developer to provide a set of extractor functions which are used by +//! the specific indices provided. +//! +//! This enables the index implementations to be agnostic from the +//! underlying data structure, and re-used. +//! +//! [Human Brain Project]: http://www.humanbrainproject.eu +//! [Knowledge Graph]: http://www.humanbrainproject.eu/en/explore-the-brain/search/ + #[macro_use] extern crate lazy_static; diff --git a/src/storage/bincode.rs b/src/storage/bincode.rs index d3cb7f3..7ce1f70 100644 --- a/src/storage/bincode.rs +++ b/src/storage/bincode.rs @@ -1,3 +1,5 @@ +//! Bincode support + use std::fs::File; use std::io::BufWriter; use std::io::Error; @@ -9,6 +11,12 @@ use serde::Serialize; use super::model; +/// Deserialize a data structure. +/// +/// # Parameters +/// +/// * `from`: +/// File to read, which contains Bincode data. pub fn load(from: &str) -> Result where T: DeserializeOwned, @@ -26,6 +34,15 @@ where } } +/// Serialize a data structure. +/// +/// # Parameters +/// +/// * `data`: +/// Data to serialize. +/// +/// * `to`: +/// File to use to store the serialized data. pub fn store(data: T, to: &str) -> Result<(), Error> where T: Serialize, @@ -44,6 +61,32 @@ where } } +/// Build an index from the input files. +/// +/// # Parameters +/// +/// * `name`: +/// Index name, this value will also be used to generate file names +/// as such: +/// * `.spaces.bin` and `.objects.bin` will be appended for the +/// input files. +/// * `.index` will be appended for the index file. +/// +/// * `version`: +/// Parameter to distinguish revisions of an index. +/// +/// * `scales`: +/// An optional list of specific index resolutions to generates on +/// top of the full resolution one. +/// +/// * `max_elements`: +/// If this is specified, automatically generates scaled indices, by +/// halving the number elements between resolutions, and stop +/// generating indices either when the number of points remaining is +/// equal to the number of distinct Ids, or smaller or equal to this +/// value. +/// +/// **Note**: `max_elements` is ignored when `scales` is not `None`. pub fn build( name: &str, version: &str, diff --git a/src/storage/json.rs b/src/storage/json.rs index 45992e0..0ddfe08 100644 --- a/src/storage/json.rs +++ b/src/storage/json.rs @@ -1,3 +1,5 @@ +//! JSON support + use std::fs::File; use std::io::BufWriter; use std::io::Error; @@ -29,6 +31,14 @@ where } } +/// Deserialise a JSON file. +/// +/// # Parameters +/// +/// * `name`: +/// Base name of the file, +/// * `.xyz` will be automatically appended for the source file, while +/// * `.bin` will be appended for the output file. pub fn from(name: &str) -> Result<(), Error> where T: Serialize + DeserializeOwned, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 82303bc..197e22b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,3 +1,8 @@ +//! Persistent data functions and types. +//! +//! Serialisation / deserialisation functions and structures used to +//! store and manipulate indices and data. + pub mod bincode; pub mod json; pub mod model; diff --git a/src/storage/model.rs b/src/storage/model.rs index 52dfd92..09b143f 100644 --- a/src/storage/model.rs +++ b/src/storage/model.rs @@ -1,3 +1,8 @@ +//! Model definitions for serialisation. +//! +//! The following definitions are used as part of the serialisation +//! process to exchange objects either through network or to storage. + use std::collections::HashMap; use serde::Deserialize; @@ -5,32 +10,59 @@ use serde::Serialize; use crate::database; use database::space; +use database::space_index::SpaceSetObject; use database::Core; -use database::SpaceSetObject; +/// Reference space definition. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Space { + /// **Id** of the space. pub name: String, + + /// Position of the origin of axis expressed in Universe coordinates. pub origin: Vec, + + /// List of axes of the space. pub axes: Vec, } +/// Reference space axis definition. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Axis { + /// Length unit for the value `1.0`. pub measurement_unit: String, + + /// Define the valid range of number on this axis. pub graduation: Graduation, + + /// Vector which defines the direction of the axis in the Universe pub unit_vector: Vec, } +/// Valid range of numbers on the axis. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Graduation { + /// Mathematical Number Set of numbers allowed. pub set: String, + + /// Minimum value allowed, included. pub minimum: f64, + + /// Maximum value allowed, excluded. pub maximum: f64, + + /// Number of distinct positions between `[min; max[` pub steps: u64, } +/// A single spatial location. +/// +/// This has a value per dimension of the space it is expressed in. +pub type Point = Vec; + pub mod v1 { + //! REST API objects, v1. + use std::collections::HashMap; use serde::Deserialize; @@ -39,25 +71,41 @@ pub mod v1 { use crate::database; use database::space; + use super::Point; use super::Properties; + /// Links Properties to a list of spatial volumes. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct SpatialObject { + /// Definition of the `properties` to tag in space. pub properties: Properties, + + /// List of volumes associated with `properties`. pub shapes: Vec, } + /// Define a Shape, within a specific reference space. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Shape { + /// Type of the shape, which is used to interpret the list of `vertices`. #[serde(rename = "type")] pub type_name: String, + + /// Id of the reference space the points are defined in. #[serde(rename = "space")] pub reference_space: String, + + /// List of spatial positions. pub vertices: Vec, } - type Point = Vec; - + /// Convert a list of properties grouped by space id, then positions to a + /// list of Spatial Objects for the rest API v1. + /// + /// # Parameters + /// + /// * `list`: + /// A list of (**Space Id**, [ ( *Spatial position*, `&Properties` ) ]) tuples. pub fn to_spatial_objects( list: Vec<(&String, Vec<(space::Position, &database::Properties)>)>, ) -> Vec { @@ -95,6 +143,8 @@ pub mod v1 { } pub mod v2 { + //! REST API objects, v2. + use std::collections::HashMap; use serde::Deserialize; @@ -103,30 +153,68 @@ pub mod v2 { use crate::database; use database::space; + use super::Point; use super::Properties; + /// Links Properties to a list of spatial volumes. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct SpatialObject { + /// Definition of the `properties` to tag in space. pub properties: Properties, + + /// List of volumes associated with `properties`. pub volumes: Vec, } + /// Defines a volume as the union of geometric shapes. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Volume { + /// Reference space id. pub space: String, + + /// List of geometric shapes defined in the reference space + /// `space`. pub shapes: Vec, } + /// Describes an homogeneous list of geometric shapes. #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(rename_all = "lowercase")] pub enum Shape { + /// List of points. Points(Vec), + + /// List of Bounding boxes or *hyper rectangles* for which each + /// face is perpendicular to one of the axis of the reference + /// space. + /// + /// That property allows us to describe such a hyperrectangle + /// with two corners: + /// + /// * one for which all the coordinates are the smallest among + /// all the corners, per dimension, which is called here + /// *lower corner* + /// + /// * one for which all the coordinates are the greatest among + /// all the corners, per dimension, which is called + /// *higher corner*. + /// + /// The list simply stores tuples of (`lower corner`, + /// `higher corner`), as this is enough to reconstruct all the + /// corners of the bounding box. BoundingBoxes(Vec<(Point, Point)>), + + /// List of hyperspheres, stored as (`center`, radius) tuples. HyperSpheres(Vec<(Point, f64)>), } - type Point = Vec; - + /// Convert a list of properties grouped by space id, then positions to a + /// list of Spatial Objects for the rest API v2. + /// + /// # Parameters + /// + /// * `list`: + /// A list of (**Space Id**, [ ( *Spatial position*, `&Properties` ) ]) tuples. pub fn to_spatial_objects( list: Vec<(&String, Vec<(space::Position, &database::Properties)>)>, ) -> Vec { @@ -163,10 +251,15 @@ pub mod v2 { } } +/// **Properties** which are registered at one or more spatial locations. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Properties { + /// The **type** of *Id*, this allows for different kinds of objects + /// to have the same *Id*, but handled distinctly. #[serde(rename = "type")] pub type_name: String, + + /// An arbitrary string. pub id: String, } @@ -244,6 +337,35 @@ impl From<&&database::Properties> for Properties { pub use v1::SpatialObject; +/// Generate an index. +/// +/// # Parameters +/// +/// * `name`: +/// Name to give to the index. +/// +/// * `version`: +/// Parameter to distinguish revisions of an index. +/// +/// * `spaces`: +/// A list of the reference spaces. Only objects whose reference +/// space is known will be indexed. +/// +/// * `objects`: +/// The data points to index. +/// +/// * `scales`: +/// An optional list of specific index resolutions to generates on +/// top of the full resolution one. +/// +/// * `max_elements`: +/// If this is specified, automatically generates scaled indices, by +/// halving the number elements between resolutions, and stop +/// generating indices either when the number of points remaining is +/// equal to the number of distinct Ids, or smaller or equal to this +/// value. +/// +/// **Note**: `max_elements` is ignored when `scales` is not `None`. pub fn build_index( name: &str, version: &str, diff --git a/src/storage/xyz.rs b/src/storage/xyz.rs index 5c6a50f..36a6d05 100644 --- a/src/storage/xyz.rs +++ b/src/storage/xyz.rs @@ -1,3 +1,113 @@ +//! # XYZ file format +//! +//! This module support reading files read by [MeshView] tool used at +//! the [University of Oslo]. +//! +//! # File structure +//! +//! Each files begins with: +//! +//! ```txt +//! RGBA [Red] [Green] [Blue] [Alpha] # RGBA +//! [X],[Y],[Z] # WHS Origin +//! [X],[Y],[Z] # Bregma +//! +//! SCALE [F] +//! ``` +//! +//! * `RGBA [Red] [Green] [Blue] [Alpha]`: defines the color to use for +//! the following points +//! * `[X],[Y],[Z] # WHS Origin`: defines where the Waxholm Origin is +//! in Voxel coordinates. +//! * `[X],[Y],[Z] # Bregma`: same as above, for another reference +//! space. +//! * `SCALE [F]`: **TBC** Size of the voxels. +//! +//! The rest of the file contains (one per line): +//! * coordinate triplets (x, y and z), each representing one point +//! coordinate. +//! * `RGB [Red] [Green] [Blue]`: Which applies from that line +//! until further notice. +//! * A comment Line, starting with `#` +//! +//! ## File Coordinate system +//! +//! Coordinates in MeshView follow RAS (Right-Anterior-Superior) +//! orientation and are expressed in voxels: +//! * First axis `x` starts from the left side of the volume, and +//! points towards the right. +//! * Second axis `y` starts from the backmost position in the volume, +//! and points towards the front. +//! * Third axis `z` starts from the bottom of the volume and points +//! towards the top. +//! +//! # Waxholm Space +//! +//! ## Conversion to Waxholm Space +//! +//! The [Waxholm Space Atlas] of the Sprague Dawley Rat Brain (WHS) uses +//! the same axis order and orientation as the MeshView tool, there is +//! only a translation of the origin, and scaling have to be applied. +//! +//! # Example +//! +//! ```txt +//! RGBA 1 0 0 1 # RGBA +//! 244,623,248 # WHS Origin +//! 246,653,440 # Bregma +//! +//! #Aar27s49 26 0 +//! RGB 0.12941176470588237 0.403921568627451 0.1607843137254902 +//! 221.40199877 413.34541500312037 172.79973508489095 +//! 220.5800097805 412.82939421970866 173.56428074436994 +//! +//! #Aar27s48 49 0 +//! RGB 0.12941176470588237 0.403921568627451 0.1607843137254902 +//! 237.35325687425 412.5720395183866 176.6713556605702 +//! ``` +//! +//! ## Conversion to Waxholm +//! +//! Assuming the following extents of "WHS Rat 39 μm" in voxels: +//! +//! * Leftmost sagittal plane: `x = 0` +//! * Backmost coronal plane: `y = 0` +//! * Bottommost horizontal plane: `z = 0` +//! * Rightmost sagittal plane: `x = 511` +//! * Frontmost coronal plane: `y = 1023` +//! * Topmost horizontal plane: `z = 511` +//! +//! **NOTE**: Directions are deliberately matching the default +//! orientation of ​NIfTI​ data. +//! +//! 1. As per the `WHS Origin` directive, it is at 244, 623, 248 voxel +//! coordinates, which means each coordinate must be subtracted with +//! the corresponding value, then +//! 2. the coordinates must be converted to millimeters, a.k.a +//! multiplied by the atlas resolution. For the atlas of this example +//! it is 0.0390625 [mm], isotropic. +//! +//! This gives us the following conversion formula: +//! +//! ```txt +//! ⎡ 0.0390625 0 0 0 ⎤ +//! [ xw yw zw 1 ] = [ xq yq zq 1 ] * ⎢ 0 0.0390625 0 0 ⎥ +//! ⎢ 0 0 0.0390625 0 ⎥ +//! ⎣ -9.53125 -24.3359375 -9.6875 1 ⎦ +//! ``` +//! +//! Where: +//! * `[x​w​, y​w​, z​w 1]​` are WHS coordinates (RAS directions, expressed +//! in millimeters). +//! * `[x​q​, y​q, z​q 1]`​ are MeshView coordinates for the **WHS Rat 39 μm** +//! package (RAS directions, expressed in 39.0625 μm voxels). +//! +//! +//! +//! [MeshView]: http://www.nesys.uio.no/MeshView/meshview.html?atlas=WHS_SD_rat_atlas_v2 +//! [University of Oslo]: https://www.med.uio.no/imb/english/research/groups/neural-systems/index.html +//! [Waxholm Space Atlas]: https://www.nitrc.org/projects/whs-sd-atlas + use std::collections::HashMap; use std::fs::File; use std::io::BufReader; @@ -102,8 +212,18 @@ fn convert(string: &str) -> Result, Error> { .collect()) } +/// Read a XYZ file and convert it to the internal format for indexing. +/// +/// This only converts the data point definitions, a reference space +/// needs to be provided as well to be able to build an index. +/// +/// # Parameters +/// +/// * `name`: +/// Base name of the file, +/// * `.xyz` will be automatically appended for the source file, while +/// * `.bin` will be appended for the output file. pub fn from(name: &str) -> Result<(), Error> { - // Convert Reference Space definitions let fn_in = format!("{}.xyz", name); let fn_out = format!("{}.bin", name);