Initial commit

This commit is contained in:
2019-08-27 17:21:26 +02:00
parent 09d79d5d3b
commit 0a24bb441e
17 changed files with 2859 additions and 0 deletions

4
.gitignore vendored
View File

@@ -3,5 +3,9 @@
.DS_Store
.*
*~
test.bin
test.index
*k.json
!.gitignore
!100k.json

50
Cargo.toml Normal file
View File

@@ -0,0 +1,50 @@
[package]
name = "mercator_db"
version = "0.1.0"
authors = ["EPFL-DIAS", "Lionel Sambuc <lionel.sambuc@epfl.ch>"]
edition = "2018"
description = "Database model for mercator."
#homepage = "https://crates.io/crates/mercator_db"
repository = "https://github.com/epfl-dias/mercator_db"
readme = "README.md"
keywords = []
categories = ["database-implementations", "data-structures"]
license = "MIT"
#license-file = "LICENSE"
include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.rs"]
[lib]
name = "mercator_db"
path = "src/lib.rs"
[[bin]]
name = "db-test"
path = "src/main.rs"
[dependencies]
ironsea_index = "^0.1"
ironsea_index_sfc_dbc = "^0.1"
ironsea_index_hashmap = "^0.1"
ironsea_table = "^0.1"
ironsea_table_vector = "^0.1"
memmap = "^0.7"
lazy_static = "^1.3"
arrayref = "^0.3" # For Positions Objects
serde = "^1.0"
serde_derive = "^1.0"
serde_json = "^1.0"
bincode = "^1.1"
# Used for main.rs as integration test
measure_time = "^0.6" # To mesure parsing time, only required by binary
# Logging macros API
log = { version = "^0.4", features = ["max_level_trace", "release_max_level_info"] }
pretty_env_logger = "^0.3" # Logger implementation

66
README.md Normal file
View File

@@ -0,0 +1,66 @@
# Mercator DB
Database model for the Mercator spatial index.
## Mercator: Spatial Index
**Mercator** is a spatial *volumetric* index for the [Human Brain Project](http://www.humanbrainproject.eu). It is a component of the [Knowledge Graph](http://www.humanbrainproject.eu/en/explore-the-brain/search/) service, which provides the spatial anchoring for the metadata registered as well as processes the volumetric queries.
It is build on top of the Iron Sea database toolkit.
## Iron Sea: Database Toolkit
**Iron Sea** provides a set of database engine bricks, which can be combined and applied on arbitrary data structures.
Unlike a traditional database, it does not assume a specific physical structure for the tables nor the records, but relies on the developper to provide a set of extractor functions which are used by the specific indices provided.
This enables the index implementations to be agnostic from the underlying data structure, and re-used.
## Requirements
### Software
* Rust: https://www.rust-lang.org
## Quick start
## Building from sources
To build this project, you will need to run the following:
```sh
cargo build --release
```
### Installation
To install the software on the system you can use:
```sh
cargo install --release
```
### Usage
The binary `db-test` provided is used only as an integration test at this point. It will convert a json input to a binary representation, before building an index over it. Once this is achieved, it will run a couple of hard-coded queries over the index.
```sh
cargo run --release
```
## Documentation
For more information, please refer to the [documentation](https://epfl-dias.github.io/PROJECT_NAME/).
If you want to build the documentation and access it locally, you can use:
```sh
cargo doc --open
```
## Acknowledgements
This open source software code was developed in part or in whole in the
Human Brain Project, funded from the European Unions Horizon 2020
Framework Programme for Research and Innovation under the Specific Grant
Agreement No. 785907 (Human Brain Project SGA2).

359
src/database/db_core.rs Normal file
View File

@@ -0,0 +1,359 @@
use super::space::Position;
use super::space::Shape;
use super::space::Space;
use super::space_db::SpaceDB;
use super::space_index::SpaceSetObject;
use super::DataBase;
use super::ResultSet;
use crate::SpaceObject;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub enum Properties {
Feature(String),
Unknown(String, String),
}
impl Properties {
pub fn id(&self) -> &String {
match self {
Properties::Feature(id) => id,
Properties::Unknown(id, _) => id,
}
}
pub fn type_name(&self) -> String {
match self {
Properties::Feature(_) => "Feature".into(),
Properties::Unknown(_, type_name) => type_name.into(),
}
}
pub fn feature<S>(id: S) -> Properties
where
S: Into<String>,
{
Properties::Feature(id.into())
}
pub fn unknown<S>(id: S, type_name: S) -> Properties
where
S: Into<String>,
{
Properties::Unknown(id.into(), type_name.into())
}
}
impl PartialEq for Properties {
fn eq(&self, other: &Self) -> bool {
self.id() == other.id() && self.type_name() == other.type_name()
}
}
impl Eq for Properties {}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Core {
title: String,
version: String,
properties: Vec<Properties>,
space_db: Vec<SpaceDB>,
}
impl Core {
pub fn new<S>(
title: S,
version: S,
spaces: &[Space],
properties: Vec<Properties>,
space_objects: Vec<SpaceSetObject>,
) -> Self
//Result<Self, String>
where
S: Into<String>,
{
// Sort out the space, and create a SpaceDB per reference space
let mut space_dbs = vec![];
for space in spaces {
// Filter the points of this space, and encode them before creating the index.
let filtered = space_objects
.iter()
.filter_map(|object| {
if &object.space_id().0 == space.name() {
let position: Vec<f64> = object.position().into();
Some(SpaceSetObject::new(
space.name(),
space.encode(&position).unwrap(),
*object.value(),
))
} else {
None
}
})
.collect();
space_dbs.push(SpaceDB::new(space.name(), filtered))
}
Core {
title: title.into(),
version: version.into(),
properties,
space_db: space_dbs,
}
}
// Check if the given space_id is referenced in the current core.
pub fn is_empty<S>(&self, space_id: S) -> bool
where
S: Into<String>,
{
let id = space_id.into();
for s in &self.space_db {
if s.name() == &id {
return s.is_empty();
}
}
// Not found, so the space is empty.
true
}
pub fn name(&self) -> &String {
&self.title
}
pub fn version(&self) -> &String {
&self.version
}
pub fn keys(&self) -> &Vec<Properties> {
&self.properties
}
fn to_space_object(&self, space_id: &str, list: Vec<SpaceSetObject>) -> Vec<SpaceObject> {
list.into_iter()
.map(|o| {
let offset: usize = o.value().into();
let value = self.properties[offset].clone();
SpaceObject {
space_id: space_id.to_string(),
position: o.position().clone(),
value,
}
})
.collect()
}
fn decode_positions(
list: &mut [SpaceObject],
space: &Space,
db: &DataBase,
output_space: Option<&str>,
) -> Result<(), String> {
if let Some(unified_id) = output_space {
let unified = db.space(unified_id)?;
// Rebase the point to the requested output space before decoding.
for o in list {
o.position = unified
.decode(&Space::change_base(&o.position, space, unified)?)?
.into();
o.space_id = unified_id.to_string();
}
} else {
// Decode the positions into f64 values, which are defined in their
// respective reference space.
for o in list {
// Simply decode
o.position = space.decode(&o.position)?.into();
}
}
Ok(())
}
// Search by positions defining a volume.
// Positions ARE DEFINED IN F64 VALUES IN THE SPACE. NOT ENCODED!
pub fn get_by_positions(
&self,
db: &DataBase,
positions: &[Position],
from: &str,
output_space: Option<&str>,
threshold_volume: f64,
) -> ResultSet {
let mut results = vec![];
let count = positions.len();
let from = db.space(from)?;
for s in &self.space_db {
let to = db.space(s.name())?;
let mut p = Vec::with_capacity(count);
for position in positions {
let position: Vec<f64> = Space::change_base(position, from, to)?.into();
p.push(to.encode(&position)?);
}
let r = s.get_by_positions(&p, threshold_volume)?;
let mut r = self.to_space_object(s.name(), r);
Self::decode_positions(&mut r, to, db, output_space)?;
results.append(&mut r);
}
Ok(results)
}
// Search by shape defining a volume:
// * Hyperrectangle (MBB),
// * HyperSphere (radius around a point),
// * Point (Specific position)
// SHAPE IS DEFINED IN F64 VALUES IN THE SPACE. NOT ENCODED!
pub fn get_by_shape(
&self,
db: &DataBase,
shape: &Shape,
space_id: &str,
output_space: Option<&str>,
threshold_volume: f64,
) -> ResultSet {
let mut results = vec![];
let shape_space = db.space(space_id)?;
for s in &self.space_db {
let current_space = db.space(s.name())?;
let current_shape = shape.rebase(shape_space, current_space)?;
// println!("current shape: {:?}", current_shape);
// let current_shape = shape.encode(current_space)?;
// println!("current shape Encoded: {:?}", current_shape);
let r = s.get_by_shape(&current_shape, threshold_volume)?;
let mut r = self.to_space_object(s.name(), r);
Self::decode_positions(&mut r, current_space, db, output_space)?;
results.append(&mut r);
}
Ok(results)
}
// Search by Id, a.k.a values
pub fn get_by_id<S>(
&self,
db: &DataBase,
id: S,
output_space: Option<&str>,
threshold_volume: f64,
) -> ResultSet
where
S: Into<String>,
{
let id: String = id.into();
let mut results = vec![];
// Do we have this ID registered at all?
if let Ok(offset) = self
.properties
.binary_search_by_key(&&id, |properties| properties.id())
{
// Yes, so now let's find all the position linked to it, per
// reference space
for s in &self.space_db {
let current_space = db.space(s.name())?;
let r = s.get_by_id(offset, threshold_volume)?;
let mut r = self.to_space_object(s.name(), r);
Self::decode_positions(&mut r, current_space, db, output_space)?;
results.append(&mut r);
}
}
Ok(results)
}
// Search by Label, a.k.a within a volume defined by the positions of an Id.
// FIXME: NEED TO KEEP TRACK OF SPACE IDS AND DO CONVERSIONS
pub fn get_by_label<S>(
&self,
db: &DataBase,
id: S,
output_space: Option<&str>,
threshold_volume: f64,
) -> ResultSet
where
S: Into<String>,
{
let id: String = id.into();
let mut results = vec![];
if let Ok(offset) = self
.properties
.binary_search_by_key(&&id, |properties| properties.id())
{
// Generate the search volume. Iterate over all reference spaces, to
// retrieve a list of SpaceSetObjects linked to `id`, then iterate
// over the result to generate a list of positions.
let search_volume = self
.space_db
.iter()
.filter_map(|s| match s.get_by_id(offset, threshold_volume) {
Ok(v) => Some(v),
Err(_) => None,
})
.flat_map(|v| v)
.map(|o| o.position().clone())
.collect::<Vec<_>>();
/*
let search_volume = self
.space_db
.iter()
.filter_map(|s| match s.get_by_id(offset, threshold_volume) {
Err(_) => None,
Ok(v) => Some((
s.name(),
v.into_iter().map(|o| o.position()).collect::<Vec<_>>(),
)),
})
.filter_map(|(space_id, list)| match db.space(space_id) {
Err(_) => None,
Ok(space) => Some((
space_id,
list.into_iter()
.map(|o| space.decode(o).into())
.collect::<Vec<Position>>(),
)),
}).filter_map(|(space_id, list)|)
.collect::<Vec<_>>();
*/
// Select based on the volume, and filter out the label position themselves.
for s in &self.space_db {
let to = db.space(s.name())?;
let r = s.get_by_positions(&search_volume, threshold_volume)?;
let mut r = self.to_space_object(s.name(), r);
Self::decode_positions(&mut r, to, db, output_space)?;
results.append(&mut r);
}
}
Ok(results)
}
}
impl ironsea_index::Record<String> for Core {
fn key(&self) -> String {
self.title.clone()
}
}

210
src/database/mod.rs Normal file
View File

@@ -0,0 +1,210 @@
mod db_core;
pub mod space;
mod space_db;
mod space_index;
use std::fs::File;
use std::hash::Hash;
use std::hash::Hasher;
use ironsea_index::Indexed;
use ironsea_table_vector::VectorTable;
use memmap::Mmap;
pub use db_core::Core;
pub use db_core::Properties;
use space::Position;
use space::Space;
pub use space_index::SpaceSetObject;
pub type ResultSet = Result<Vec<SpaceObject>, String>;
pub type ReferenceSpaceIndex = ironsea_index_hashmap::Index<VectorTable<Space>, Space, String>;
type CoreIndex = ironsea_index_hashmap::Index<VectorTable<Core>, Core, String>;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceId(String);
impl SpaceId {
pub fn new<S>(space_name: S) -> Self
where
S: Into<String>,
{
SpaceId(space_name.into())
}
pub fn get(&self, index: &ReferenceSpaceIndex) -> Self {
let s = index.find(&self.0);
assert_eq!(s.len(), 1);
SpaceId(s[0].name().clone())
}
}
impl<S> From<S> for SpaceId
where
S: Into<String>,
{
fn from(id: S) -> Self {
SpaceId(id.into())
}
}
impl PartialEq for SpaceId {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
#[derive(Clone, Debug, Serialize)]
pub struct SpaceObject {
pub space_id: String,
pub position: Position,
pub value: Properties,
}
impl PartialEq for SpaceObject {
fn eq(&self, other: &Self) -> bool {
self.space_id == other.space_id
&& self.value == other.value
&& self.position == other.position
}
}
impl Eq for SpaceObject {}
impl Hash for SpaceObject {
fn hash<H: Hasher>(&self, state: &mut H) {
unimplemented!()
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct DataBase {
reference_spaces: ReferenceSpaceIndex,
cores: CoreIndex,
}
impl DataBase {
pub fn new(spaces: Vec<Space>, cores: Vec<Core>) -> Self {
DataBase {
reference_spaces: ReferenceSpaceIndex::new(VectorTable::new(spaces)),
cores: CoreIndex::new(VectorTable::new(cores)),
}
}
pub fn load<S>(name: S) -> Result<Self, String>
where
S: Into<String>,
{
let name = name.into();
let fn_index = format!("{}.index", name);
let file_in = match File::open(fn_index) {
Err(e) => return Err(format!("{:?}", e)),
Ok(file) => file,
};
let mmap = match unsafe { Mmap::map(&file_in) } {
Err(e) => return Err(format!("{:?}", e)),
Ok(mmap) => mmap,
};
match bincode::deserialize(&mmap[..]) {
Err(e) => Err(format!("Index deserialization error: {:?}", e)),
Ok(db) => Ok(db),
}
}
// Check if the given space_id is referenced in the DB.
fn is_empty<S>(&self, id: S) -> bool
where
S: Into<String>,
{
let id = id.into();
for s in self.cores.keys() {
let core: &Core = self.cores.find(s)[0];
if !core.is_empty(id.clone()) {
return false;
}
}
true
}
fn check_exactly_one<'t, T, S>(list: &[&'t T], name: S, value: S) -> Result<&'t T, String>
where
S: Into<String>,
{
if list.len() > 1 {
Err(format!(
"Multiple {} registered under `{}`: {}",
name.into(),
value.into(),
list.len()
))
} else if list.is_empty() {
Err(format!(
"No {} registered under `{}`: {}",
name.into(),
value.into(),
list.len()
))
} else {
Ok(&list[0])
}
}
pub fn space_id<S>(&self, name: S) -> Result<SpaceId, String>
where
S: Into<String>,
{
let name = name.into();
let r = self.reference_spaces.find(&name);
let s: &Space = Self::check_exactly_one(&r, "spaces", &name)?;
Ok(SpaceId(s.name().clone()))
}
// Lookup a space within the reference spaces registered
pub fn space_keys(&self) -> &Vec<String> {
self.reference_spaces.keys()
}
// Lookup a space within the reference spaces registered
pub fn space<S>(&self, name: S) -> Result<&Space, String>
where
S: Into<String>,
{
let name = name.into();
if &name == space::Space::universe().name() {
Ok(space::Space::universe())
} else {
let r = self.reference_spaces.find(&name);
Self::check_exactly_one(&r, "spaces", &name)
}
}
// Lookup a space within the reference spaces registered
pub fn core_keys(&self) -> &Vec<String> {
self.cores.keys()
}
// Lookup a dataset within the datasets registered
pub fn core<S>(&self, name: S) -> Result<&Core, String>
where
S: Into<String>,
{
let name = name.into();
let r = self.cores.find(&name);
Self::check_exactly_one(&r, "cores", &name)
}
}
impl ironsea_index::Record<String> for Space {
fn key(&self) -> String {
self.name().clone()
}
}

181
src/database/space/axis.rs Normal file
View File

@@ -0,0 +1,181 @@
use super::coordinate::Coordinate;
use super::position::Position;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub enum NumberSet {
N,
Z,
Q,
R,
}
impl From<String> for NumberSet {
fn from(set: String) -> Self {
match set.as_str() {
"N" => NumberSet::N,
"Z" => NumberSet::Z,
"Q" => NumberSet::Q,
"R" => NumberSet::R,
_ => panic!("Invalid set number: '{}', expected: N, Z, Q, R", set),
}
}
}
impl From<NumberSet> for String {
fn from(set: NumberSet) -> String {
let s = match set {
NumberSet::N => "N",
NumberSet::Z => "R",
NumberSet::Q => "Q",
NumberSet::R => "R",
};
s.to_string()
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Graduation {
pub set: NumberSet,
pub minimum: f64,
pub maximum: f64,
pub steps: u64,
pub epsilon: f64,
}
impl Graduation {
fn new(set: NumberSet, minimum: f64, maximum: f64, steps: u64) -> Result<Self, String> {
Ok(Graduation {
set,
minimum,
maximum,
steps,
epsilon: (maximum - minimum) / (steps as f64),
})
}
}
// TODO: In the future this might become an Enum with AffineAxis, ArbitraryAxis, etc...
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Axis {
measurement_unit: String,
graduation: Graduation,
// Coordinates in Universe, expressed in f64, and in the Universe number of dimensions.
pub unit_vector: Position,
}
impl Axis {
pub fn new<S>(
unit: S,
unit_vector: Vec<f64>,
set: NumberSet,
minimum: f64,
maximum: f64,
steps: u64,
) -> Result<Self, String>
where
S: Into<String>,
{
// Convert to Position, and ensure it is a unit vector.
let unit_vector = Position::from(unit_vector).unit();
let graduation = Graduation::new(set, minimum, maximum, steps)?;
Ok(Axis {
measurement_unit: unit.into(),
graduation,
unit_vector,
})
}
pub fn measurement_unit(&self) -> &String {
&self.measurement_unit
}
pub fn unit_vector(&self) -> &Position {
&self.unit_vector
}
pub fn graduation(&self) -> &Graduation {
&self.graduation
}
// Project a point expressed from the origin of this axis on this axis.
pub fn project_in(&self, position: &Position) -> Result<Coordinate, String> {
let max = self.graduation.maximum;
let min = self.graduation.minimum;
let d = position.dot_product(&self.unit_vector);
// Ensure it is within allowed range: Upper bound.
if d > max {
return Err(format!("Encode: position out of bounds: {} >= {}", d, max));
}
// Ensure it is within allowed range: Lower bound.
if d < min {
return Err(format!("Encode: position out of bounds: {} < {}", d, min));
}
self.encode(d)
}
// Convert a value on this axis to Universe coordinates, based from the origin of this axis.
pub fn project_out(&self, coordinate: &Coordinate) -> Result<Position, String> {
let d = self.decode(coordinate)?;
Ok(self.unit_vector.clone() * d)
}
// Value is expressed on the current Axis, not in absolute coordinates!
pub fn encode(&self, val: f64) -> Result<Coordinate, String> {
let max = self.graduation.maximum;
let min = self.graduation.minimum;
let mut d = val;
// Ensure it is within allowed range: Upper bound.
if d > max {
return Err(format!("Encode: position out of bounds: {} >= {}", d, max));
}
// Ensure it is within allowed range: Lower bound.
if d < min {
return Err(format!("Encode: position out of bounds: {} < {}", d, min));
}
// Shift range to zero.
d -= min;
// Scale to range.
let v = (d / self.graduation.epsilon) as u64;
// Convert to appropriate type.
Ok(v.into())
}
// Value is expressed on the current Axis, not in absolute coordinates!
pub fn decode(&self, val: &Coordinate) -> Result<f64, String> {
let max = self.graduation.maximum;
let min = self.graduation.minimum;
// Convert to appropriate type.
let mut d = val.f64();
// Scale range back.
d *= self.graduation.epsilon;
// Shift range back to origin.
d += self.graduation.minimum;
// Ensure it is within allowed range: Upper bound.
if d > max {
return Err(format!("Decode: position out of bounds: {} >= {}", d, max));
}
// Ensure it is within allowed range: Lower bound.
if d < min {
return Err(format!("Decode: position out of bounds: {} < {}", d, min));
}
Ok(d)
}
}

View File

@@ -0,0 +1,257 @@
use std::cmp::Ordering;
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::ops::Add;
use std::ops::Mul;
use std::ops::Sub;
#[derive(Clone, Copy, Debug, Deserialize, Serialize)]
pub enum Coordinate {
CoordinateU8(u8),
CoordinateU16(u16),
CoordinateU32(u32),
CoordinateU64(u64),
// We currently assume that 2^64 is enough to store encoded position values per axis.
//CoordinateU128(u128),
CoordinateF64(f64),
}
impl Coordinate {
pub fn f64(&self) -> f64 {
match *self {
Coordinate::CoordinateU8(v) => f64::from(v),
Coordinate::CoordinateU16(v) => f64::from(v),
Coordinate::CoordinateU32(v) => f64::from(v),
Coordinate::CoordinateU64(v) => v as f64,
Coordinate::CoordinateF64(v) => v,
}
}
pub fn u64(&self) -> u64 {
match *self {
Coordinate::CoordinateU8(v) => u64::from(v),
Coordinate::CoordinateU16(v) => u64::from(v),
Coordinate::CoordinateU32(v) => u64::from(v),
Coordinate::CoordinateU64(v) => v,
Coordinate::CoordinateF64(_v) => unreachable!(),
}
}
}
/*
impl Serialize for Coordinate {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
Coordinate::CoordinateF64(v) => serializer.serialize_f64(*v),
Coordinate::CoordinateU8(v) => serializer.serialize_u8(*v),
Coordinate::CoordinateU16(v) => serializer.serialize_u16(*v),
Coordinate::CoordinateU32(v) => serializer.serialize_u32(*v),
Coordinate::CoordinateU64(v) => serializer.serialize_u64(*v),
}
}
} */
impl Display for Coordinate {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Coordinate::CoordinateU8(v) => write!(f, "{}", v),
Coordinate::CoordinateU16(v) => write!(f, "{}", v),
Coordinate::CoordinateU32(v) => write!(f, "{}", v),
Coordinate::CoordinateU64(v) => write!(f, "{}", v),
Coordinate::CoordinateF64(v) => write!(f, "{}", v),
}
}
}
impl Add<f64> for Coordinate {
type Output = f64;
fn add(self, rhs: f64) -> Self::Output {
self.f64() + rhs
}
}
impl Add for Coordinate {
type Output = Coordinate;
fn add(self, rhs: Self) -> Self::Output {
if let Coordinate::CoordinateF64(u) = self {
return Coordinate::CoordinateF64(u + rhs.f64());
}
if let Coordinate::CoordinateF64(v) = rhs {
return Coordinate::CoordinateF64(v + self.f64());
}
(self.u64() + rhs.u64()).into()
}
}
impl Sub<f64> for Coordinate {
type Output = f64;
fn sub(self, rhs: f64) -> Self::Output {
self.f64() - rhs
}
}
impl Sub for Coordinate {
type Output = Coordinate;
fn sub(self, rhs: Self) -> Self::Output {
if let Coordinate::CoordinateF64(u) = self {
return Coordinate::CoordinateF64(u - rhs.f64());
}
if let Coordinate::CoordinateF64(v) = rhs {
return Coordinate::CoordinateF64(v - self.f64());
}
let r = rhs.u64();
let l = self.u64();
let d = if l < r { 0u64 } else { l - r };
d.into()
}
}
impl Mul<f64> for Coordinate {
type Output = Coordinate;
fn mul(self, rhs: f64) -> Self::Output {
(self.f64() * rhs).into()
}
}
impl Mul for Coordinate {
type Output = Coordinate;
fn mul(self, rhs: Coordinate) -> Self::Output {
if let Coordinate::CoordinateF64(u) = self {
return Coordinate::CoordinateF64(u * rhs.f64());
}
if let Coordinate::CoordinateF64(v) = rhs {
return Coordinate::CoordinateF64(v * self.f64());
}
(self.u64() * rhs.u64()).into()
}
}
impl From<Coordinate> for f64 {
fn from(v: Coordinate) -> Self {
v.f64()
}
}
impl From<&Coordinate> for f64 {
fn from(v: &Coordinate) -> Self {
v.f64()
}
}
impl From<f64> for Coordinate {
fn from(v: f64) -> Self {
Coordinate::CoordinateF64(v)
}
}
impl From<Coordinate> for u64 {
fn from(v: Coordinate) -> Self {
v.u64()
}
}
impl From<&Coordinate> for u64 {
fn from(v: &Coordinate) -> Self {
v.u64()
}
}
impl From<u64> for Coordinate {
fn from(v: u64) -> Self {
// Slight syntax hack, as exclusive ranges are not yet available.
// cf: https://github.com/rust-lang/rust/issues/37854
match v {
_ if v <= u64::from(std::u8::MAX) => Coordinate::CoordinateU8(v as u8),
_ if v <= u64::from(std::u16::MAX) => Coordinate::CoordinateU16(v as u16),
_ if v <= u64::from(std::u32::MAX) => Coordinate::CoordinateU32(v as u32),
_ => Coordinate::CoordinateU64(v as u64),
/*_ => {
panic!("Out of range {} > {}", v, std::u64::MAX);
} */
}
}
}
impl From<Coordinate> for usize {
fn from(v: Coordinate) -> Self {
(v.u64()) as usize
}
}
impl From<&Coordinate> for usize {
fn from(v: &Coordinate) -> Self {
(v.u64()) as usize
}
}
impl From<usize> for Coordinate {
fn from(v: usize) -> Self {
(v as u64).into()
}
}
impl Ord for Coordinate {
fn cmp(&self, other: &Self) -> Ordering {
// If one hand is a floating value, then messy case of floating point
// values only being partially ordered.
// TODO: Should we allow comparison between u64 and f64 Coordinates?
if let Coordinate::CoordinateF64(_lh) = self {
unimplemented!();
}
if let Coordinate::CoordinateF64(_rh) = other {
unimplemented!();
}
self.u64().cmp(&other.u64())
}
}
impl PartialOrd for Coordinate {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
// If one hand is a floating value, do use floating point comparison,
// otherwise integer.
if let Coordinate::CoordinateF64(lh) = self {
return lh.partial_cmp(&other.f64());
}
if let Coordinate::CoordinateF64(rh) = other {
return self.f64().partial_cmp(rh);
}
self.u64().partial_cmp(&other.u64())
}
}
impl Eq for Coordinate {}
impl PartialEq for Coordinate {
fn eq(&self, other: &Self) -> bool {
// If one hand is a floating value, do use floating point comparison,
// otherwise integer.
if let Coordinate::CoordinateF64(lh) = self {
return lh.eq(&other.f64());
}
if let Coordinate::CoordinateF64(rh) = other {
return self.f64().eq(rh);
}
self.u64() == other.u64()
}
}

View File

@@ -0,0 +1,180 @@
use super::axis::Axis;
use super::coordinate::Coordinate;
use super::position::Position;
use super::MAX_K;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub enum CoordinateSystem {
Universe,
// Coordinates in Universe, expressed in f64, and in the Universe number of dimensions.
AffineSystem { origin: Position, axes: Vec<Axis> },
}
impl CoordinateSystem {
pub fn new(origin: Vec<f64>, axes: Vec<Axis>) -> Self {
CoordinateSystem::AffineSystem {
origin: origin.into(),
axes,
}
}
pub fn origin(&self) -> Position {
match self {
CoordinateSystem::Universe => {
let origin = [0f64; MAX_K].to_vec();
origin.into()
}
CoordinateSystem::AffineSystem { origin, .. } => origin.clone(),
}
}
pub fn axes(&self) -> Vec<Axis> {
match self {
CoordinateSystem::Universe => {
//FIXME: Generate a CoordinateSystem on the fly or store it as part of the Universe Space?
unimplemented!()
}
CoordinateSystem::AffineSystem { axes, .. } => axes.clone(),
}
}
pub fn dimensions(&self) -> usize {
match self {
CoordinateSystem::Universe => MAX_K,
CoordinateSystem::AffineSystem { axes, .. } => axes.len(),
}
}
pub fn bounding_box(&self) -> (Position, Position) {
let mut low = Vec::with_capacity(self.dimensions());
let mut high = Vec::with_capacity(self.dimensions());
match self {
CoordinateSystem::Universe => {
for _ in 0..self.dimensions() {
low.push(std::f64::MAX);
high.push(std::f64::MIN);
}
}
CoordinateSystem::AffineSystem { axes, .. } => {
for a in axes {
low.push(a.graduation().minimum);
high.push(a.graduation().maximum);
}
}
}
(low.into(), high.into())
}
pub fn volume(&self) -> f64 {
let (low, high) = self.bounding_box();
let difference: Vec<_> = (high - low).into();
let mut volume = 1.0;
for l in difference {
volume *= l;
}
volume
}
// The position is expressed in coordinates in the universe,
// return a position in the current coordinate system.
pub fn rebase(&self, position: &Position) -> Result<Position, String> {
match self {
CoordinateSystem::Universe => {
// Ensure the coordinates are encoded into F64 variants of
// coordinates by forcing an addition to the origin position
// which is expressed as F64 variants. The addition will convert
// to F64 automatically.
Ok(self.origin().clone() + position.clone())
}
CoordinateSystem::AffineSystem { origin, axes } => {
let dimensions = axes.len();
let translated = position.clone() - origin.clone();
let mut rebased = Vec::with_capacity(dimensions);
for a in axes.iter().take(dimensions) {
let c = a.project_in(&translated)?;
rebased.push(c);
}
Ok(rebased.into())
}
}
}
// The position is expressed in coordinates in the current coordinate system,
// return a position in Universe coordinates.
pub fn absolute_position(&self, position: &Position) -> Result<Position, String> {
match self {
CoordinateSystem::Universe => {
// Ensure the coordinates are encoded into F64 variants of
// coordinates by forcing an addition to the origin position
// which is expressed as F64 variants. The addition will convert
// to F64 automatically.
Ok(self.origin().clone() + position.clone())
}
CoordinateSystem::AffineSystem { axes, .. } => {
// Start from the base origin.
let mut rebased = self.origin();
// Convert to Universe coordinates
for k in 0..axes.len() {
let c = axes[k].project_out(&position[k])?;
rebased += c;
}
Ok(rebased)
}
}
}
// The position is expressed in the current system
// Encode each coordinate separately and return an encoded Position
pub fn encode(&self, position: &[f64]) -> Result<Position, String> {
let mut encoded = vec![];
match self {
CoordinateSystem::Universe => {
assert_eq!(position.len(), MAX_K);
for c in position {
encoded.push(Coordinate::CoordinateF64(*c));
}
}
CoordinateSystem::AffineSystem { axes, .. } => {
assert_eq!(position.len(), axes.len());
for k in 0..axes.len() {
encoded.push(axes[k].encode(position[k])?);
}
}
};
Ok(encoded.into())
}
// The position is expressed in the current system as an encoded value,
// return a position in the current system as f64 values.
pub fn decode(&self, position: &Position) -> Result<Vec<f64>, String> {
let mut decoded = vec![];
match self {
CoordinateSystem::Universe => {
assert_eq!(position.dimensions(), MAX_K);
for c in 0..position.dimensions() {
decoded.push(position[c].into());
}
}
CoordinateSystem::AffineSystem { axes, .. } => {
assert_eq!(position.dimensions(), axes.len());
for k in 0..axes.len() {
decoded.push(axes[k].decode(&position[k])?);
}
}
};
Ok(decoded)
}
}

95
src/database/space/mod.rs Normal file
View File

@@ -0,0 +1,95 @@
mod axis;
mod coordinate;
mod coordinate_system;
mod position;
mod shape;
#[cfg(test)]
mod tests;
pub use axis::Axis;
pub use axis::Graduation;
pub use axis::NumberSet;
pub use coordinate::Coordinate;
pub use coordinate_system::CoordinateSystem;
pub use position::Position;
pub use shape::Shape;
pub const MAX_K: usize = 3;
lazy_static! {
static ref UNIVERSE: Space = Space {
name: "Universe".into(),
system: CoordinateSystem::Universe,
};
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Space {
name: String,
system: CoordinateSystem,
}
impl Space {
pub fn new<S>(name: S, system: CoordinateSystem) -> Self
where
S: Into<String>,
{
Space {
name: name.into(),
system,
}
}
pub fn universe() -> &'static Self {
&UNIVERSE
}
pub fn change_base(position: &Position, from: &Space, to: &Space) -> Result<Position, String> {
to.rebase(&from.absolute_position(position)?)
}
pub fn name(&self) -> &String {
&self.name
}
pub fn origin(&self) -> Position {
self.system.origin()
}
pub fn axes(&self) -> Vec<Axis> {
self.system.axes()
}
pub fn bounding_box(&self) -> (Position, Position) {
self.system.bounding_box()
}
pub fn volume(&self) -> f64 {
self.system.volume()
}
// The position is expressed in coordinates in the universe,
// return a position in the current space.
pub fn rebase(&self, position: &Position) -> Result<Position, String> {
self.system.rebase(position)
}
// The position is expressed in coordinates in the current space,
// return an absolute position in Universe.
pub fn absolute_position(&self, position: &Position) -> Result<Position, String> {
self.system.absolute_position(position)
}
// The position is expressed in the current space as an encoded value,
// return a position in the current system as f64 values
pub fn decode(&self, position: &Position) -> Result<Vec<f64>, String> {
self.system.decode(position)
}
// The position is expressed in the current space,
// return a position expressed in the current space as an encoded value.
pub fn encode(&self, position: &[f64]) -> Result<Position, String> {
self.system.encode(position)
}
}

View File

@@ -0,0 +1,295 @@
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::iter::FromIterator;
use std::ops::Add;
use std::ops::AddAssign;
use std::ops::Index;
use std::ops::IndexMut;
use std::ops::Mul;
use std::ops::MulAssign;
use std::ops::Sub;
use std::ops::SubAssign;
use super::coordinate::Coordinate;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub enum Position {
Position1(Coordinate),
Position2([Coordinate; 2]),
Position3([Coordinate; 3]),
Position4([Coordinate; 4]),
Position5([Coordinate; 5]),
Position6([Coordinate; 6]),
Position7([Coordinate; 7]),
Position8([Coordinate; 8]),
PositionN(Vec<Coordinate>),
}
impl Position {
pub fn new(coordinates: Vec<Coordinate>) -> Self {
coordinates.into()
}
pub fn dimensions(&self) -> usize {
match self {
Position::Position1(_) => 1,
Position::Position2(_) => 2,
Position::Position3(_) => 3,
Position::Position4(_) => 4,
Position::Position5(_) => 5,
Position::Position6(_) => 6,
Position::Position7(_) => 7,
Position::Position8(_) => 8,
Position::PositionN(coordinates) => coordinates.len(),
}
}
// Returns ||self||
pub fn norm(&self) -> f64 {
if let Position::Position1(coordinates) = self {
// the square root of a single number to the square is its positive value, so ensure it is.
coordinates.f64().abs()
} else {
let point: Vec<&Coordinate> = self.into();
let mut squared = 0f64;
for c in point {
let t: f64 = c.into();
squared += t * t;
}
squared.sqrt()
}
}
// Unit / Normalized vector from self.
pub fn unit(&self) -> Self {
self.clone() * (1f64 / self.norm())
}
// This multiplies self^T with other, producing a scalar value
pub fn dot_product(&self, other: &Self) -> f64 {
assert_eq!(self.dimensions(), other.dimensions());
let point = self.clone();
let other = other.clone();
let mut product = 0f64;
for k in 0..self.dimensions() {
product += (point[k] * other[k]).f64();
}
product
}
}
impl Display for Position {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let v: Vec<&Coordinate> = self.into();
write!(f, "{:?}", v)
}
}
impl Index<usize> for Position {
type Output = Coordinate;
fn index(&self, k: usize) -> &Self::Output {
match self {
Position::Position1(coordinate) => coordinate,
Position::Position2(coordinates) => &coordinates[k],
Position::Position3(coordinates) => &coordinates[k],
Position::Position4(coordinates) => &coordinates[k],
Position::Position5(coordinates) => &coordinates[k],
Position::Position6(coordinates) => &coordinates[k],
Position::Position7(coordinates) => &coordinates[k],
Position::Position8(coordinates) => &coordinates[k],
Position::PositionN(coordinates) => &coordinates[k],
}
}
}
impl IndexMut<usize> for Position {
fn index_mut(&mut self, k: usize) -> &mut Self::Output {
match self {
Position::Position1(coordinate) => coordinate,
Position::Position2(coordinates) => &mut coordinates[k],
Position::Position3(coordinates) => &mut coordinates[k],
Position::Position4(coordinates) => &mut coordinates[k],
Position::Position5(coordinates) => &mut coordinates[k],
Position::Position6(coordinates) => &mut coordinates[k],
Position::Position7(coordinates) => &mut coordinates[k],
Position::Position8(coordinates) => &mut coordinates[k],
Position::PositionN(coordinates) => &mut coordinates[k],
}
}
}
impl Add for Position {
type Output = Position;
fn add(mut self, rhs: Self) -> Self::Output {
self += rhs;
self
}
}
impl AddAssign for Position {
fn add_assign(&mut self, rhs: Self) {
let dimensions = self.dimensions();
assert_eq!(dimensions, rhs.dimensions());
for k in 0..dimensions {
self[k] = self[k] + rhs[k];
}
}
}
impl Sub for Position {
type Output = Position;
fn sub(mut self, rhs: Self) -> Self::Output {
self -= rhs;
self
}
}
impl SubAssign for Position {
fn sub_assign(&mut self, rhs: Self) {
let dimensions = self.dimensions();
assert_eq!(dimensions, rhs.dimensions());
for k in 0..dimensions {
self[k] = self[k] - rhs[k];
}
}
}
// Scalar product
impl Mul<f64> for Position {
type Output = Position;
fn mul(mut self, rhs: f64) -> Self::Output {
self *= rhs;
self
}
}
// Scalar product
impl MulAssign<f64> for Position {
fn mul_assign(&mut self, rhs: f64) {
for k in 0..self.dimensions() {
self[k] = self[k] * rhs;
}
}
}
// Outer product
impl Mul for Position {
type Output = Vec<Position>;
fn mul(self, rhs: Self) -> Self::Output {
let mut m = Vec::with_capacity(rhs.dimensions());
for i in 0..rhs.dimensions() {
let mut u = Vec::with_capacity(self.dimensions());
for k in 0..self.dimensions() {
u[k] = self[k] * rhs[i];
}
m[i] = u.into();
}
m
}
}
impl PartialEq for Position {
fn eq(&self, other: &Self) -> bool {
for i in 0..self.dimensions() {
if self[i] != other[i] {
return false;
}
}
true
}
}
impl Eq for Position {}
impl<'s> From<&'s Position> for Vec<&'s Coordinate> {
fn from(position: &'s Position) -> Self {
match position {
Position::Position1(coordinate) => vec![coordinate],
Position::Position2(coordinates) => coordinates.iter().map(|c| c).collect(),
Position::Position3(coordinates) => coordinates.iter().map(|c| c).collect(),
Position::Position4(coordinates) => coordinates.iter().map(|c| c).collect(),
Position::Position5(coordinates) => coordinates.iter().map(|c| c).collect(),
Position::Position6(coordinates) => coordinates.iter().map(|c| c).collect(),
Position::Position7(coordinates) => coordinates.iter().map(|c| c).collect(),
Position::Position8(coordinates) => coordinates.iter().map(|c| c).collect(),
Position::PositionN(coordinates) => coordinates.iter().map(|c| c).collect(),
}
}
}
impl From<Vec<Coordinate>> for Position {
fn from(coordinates: Vec<Coordinate>) -> Self {
match coordinates.len() {
1 => Position::Position1(coordinates[0]),
2 => Position::Position2(*array_ref!(coordinates, 0, 2)),
3 => Position::Position3(*array_ref!(coordinates, 0, 3)),
4 => Position::Position4(*array_ref!(coordinates, 0, 4)),
5 => Position::Position5(*array_ref!(coordinates, 0, 5)),
6 => Position::Position6(*array_ref!(coordinates, 0, 6)),
7 => Position::Position7(*array_ref!(coordinates, 0, 7)),
8 => Position::Position8(*array_ref!(coordinates, 0, 8)),
_ => Position::PositionN(coordinates),
}
}
}
impl From<Vec<f64>> for Position {
fn from(coordinates: Vec<f64>) -> Self {
coordinates
.into_iter()
.map(|c| c.into())
.collect::<Vec<Coordinate>>()
.into()
}
}
impl From<Vec<u64>> for Position {
fn from(coordinates: Vec<u64>) -> Self {
coordinates
.into_iter()
.map(|c| c.into())
.collect::<Vec<Coordinate>>()
.into()
}
}
impl From<Position> for Vec<f64> {
fn from(position: Position) -> Self {
let point: Vec<&Coordinate> = (&position).into();
point.into_iter().map(|c| c.into()).collect()
}
}
impl From<&Position> for Vec<f64> {
fn from(coordinates: &Position) -> Self {
coordinates.clone().into()
}
}
impl FromIterator<f64> for Position {
fn from_iter<I: IntoIterator<Item = f64>>(iter: I) -> Self {
iter.into_iter().collect::<Vec<_>>().into()
}
}
impl FromIterator<Coordinate> for Position {
fn from_iter<I: IntoIterator<Item = Coordinate>>(iter: I) -> Self {
iter.into_iter().collect::<Vec<_>>().into()
}
}

209
src/database/space/shape.rs Normal file
View File

@@ -0,0 +1,209 @@
use super::Coordinate;
use super::Position;
use super::Space;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub enum Shape {
Point(Position),
//HyperRectangle([Position; MAX_K]),
HyperSphere(Position, Coordinate),
BoundingBox(Position, Position),
//Nifti(nifti_data??),
}
impl Shape {
pub fn rebase(&self, from: &Space, to: &Space) -> Result<Shape, String> {
match self {
Shape::Point(position) => Ok(Shape::Point(Space::change_base(position, from, to)?)),
Shape::HyperSphere(center, radius) => {
//FIXME: Is the length properly dealt with? How do we process this for space conversions?
let mut r = Vec::with_capacity(center.dimensions());
for _ in 0..center.dimensions() {
r.push(radius.clone());
}
let r = r.into();
let r = from.absolute_position(&r)?;
let r = to.rebase(&(r))?[0];
Ok(Shape::HyperSphere(Space::change_base(center, from, to)?, r))
}
Shape::BoundingBox(lower, higher) => Ok(Shape::BoundingBox(
Space::change_base(lower, from, to)?,
Space::change_base(higher, from, to)?,
)),
}
}
pub fn decode(&self, space: &Space) -> Result<Shape, String> {
let s = match self {
Shape::Point(position) => Shape::Point(space.decode(position)?.into()),
Shape::HyperSphere(center, radius) => {
//FIXME: Is the length properly dealt with? How do we process this for space conversions?
Shape::HyperSphere(space.decode(center)?.into(), *radius)
}
Shape::BoundingBox(lower, higher) => {
Shape::BoundingBox(space.decode(lower)?.into(), space.decode(higher)?.into())
}
};
Ok(s)
}
pub fn encode(&self, space: &Space) -> Result<Shape, String> {
let s = match self {
Shape::Point(position) => {
let p: Vec<f64> = position.into();
Shape::Point(space.encode(&p)?)
}
Shape::HyperSphere(center, radius) => {
let p: Vec<f64> = center.into();
//FIXME: Is the length properly dealt with? How do we process this for space conversions?
Shape::HyperSphere(space.encode(&p)?, *radius)
}
Shape::BoundingBox(lower, higher) => {
let lower: Vec<f64> = lower.into();
let higher: Vec<f64> = higher.into();
Shape::BoundingBox(space.encode(&lower)?, space.encode(&higher)?)
}
};
Ok(s)
}
pub fn get_mbb(&self) -> (Position, Position) {
match self {
Shape::Point(position) => (position.clone(), position.clone()),
Shape::HyperSphere(center, radius) => {
let dimensions = center.dimensions();
let mut vr = Vec::with_capacity(dimensions);
for _ in 0..dimensions {
vr.push(*radius);
}
let vr: Position = vr.into();
(center.clone() - vr.clone(), center.clone() + vr)
}
Shape::BoundingBox(lower, higher) => (lower.clone(), higher.clone()),
}
}
//pub fn inside(&self) {}
/* Original version proposed by Charles François Rey - 2019
```perl
use strict;
my $conf = [[0, 2], [1, 3], [11, 20], [5, 6]];
my $dim = scalar @{$conf};
sub nxt {
my ($state) = @_;
foreach my $i (0..$dim-1) {
$i = $dim-1-$i;
$state->[$i] = $state->[$i] + 1;
if ($state->[$i] > $conf->[$i]->[-1]) {
$state->[$i] = $conf->[$i]->[0];
# => carry
} else {
return 1;
}
}
return;
}
sub pretty {
my ($state) = @_;
return "(", join(', ', @{$state}), ")";
}
sub first {
return [ map { $_->[0] } @{$conf} ];
}
my $i = 0;
my $s = first;
do {
print $i++, ": ", pretty($s), "\n";
} while (nxt($s))
```*/
fn gen(lower: &Position, higher: &Position) -> Vec<Position> {
fn next(
dimensions: usize,
lower: &Position,
higher: &Position,
state: &mut Position,
) -> bool {
for i in (0..dimensions).rev() {
state[i] = (state[i].u64() + 1).into();
if state[i] >= higher[i] {
state[i] = lower[i];
// => carry
} else {
return true;
}
}
false
}
fn first(lower: &Position) -> Position {
let mut current = vec![];
for i in 0..lower.dimensions() {
current.push(lower[i].u64());
}
current.into()
}
let mut results = vec![];
// Redefine lower as a compacted form of lower for all coordinates.
let lower = first(lower);
// Initialise the current value
let mut current = lower.clone();
// Add the first Position to the results, as nxt will return the following one.
results.push(current.clone());
while next(lower.dimensions(), &lower, higher, &mut current) {
results.push(current.clone())
}
results
}
// Transform a Shape into a list of Position which approximate the shape.
// Note:
// * All output positions are expressed within the space.
// TODO: Return an iterator instead, for performance!
pub fn rasterise(&self) -> Result<Vec<Position>, String> {
match self {
Shape::Point(position) => Ok(vec![position.clone()]),
Shape::HyperSphere(center, radius) => {
let (lower, higher) = self.get_mbb();
let radius = radius.f64();
let positions = Shape::gen(&lower, &higher)
.into_iter()
.filter(|p| (p.clone() - center.clone()).norm() <= radius)
.collect();
Ok(positions)
}
Shape::BoundingBox(lower, higher) => Ok(Shape::gen(lower, higher)),
}
}
// Transform a Shape into a list of Position which approximate the shape.
// Note:
// * All input positions are expressed within the space.
// * All output positions are expressed in absolute positions in Universe
// TODO: Return an iterator instead, for performance!
pub fn rasterise_from(&self, space: &Space) -> Result<Vec<Position>, String> {
Ok(self
.rasterise()?
.into_iter()
.filter_map(|p| match space.absolute_position(&p) {
Ok(p) => Some(p),
Err(_) => None, // Should be impossible, but let's handle the case.
})
.collect())
}
}

290
src/database/space/tests.rs Normal file

File diff suppressed because one or more lines are too long

159
src/database/space_db.rs Normal file
View File

@@ -0,0 +1,159 @@
use super::space::Coordinate;
use super::space::Position;
use super::space::Shape;
use super::space_index::SpaceFields;
use super::space_index::SpaceIndex;
use super::space_index::SpaceSetIndex;
use super::space_index::SpaceSetObject;
use ironsea_table_vector::VectorTable;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceDB {
reference_space: String,
values: Vec<Coordinate>,
resolutions: Vec<SpaceIndex>,
}
impl SpaceDB {
pub fn new<S>(reference_space: S, mut space_objects: Vec<SpaceSetObject>) -> Self
where
S: Into<String>,
{
let mut values = space_objects
.iter()
.map(|object| *object.value())
.collect::<Vec<_>>();
values.sort_unstable_by_key(|&c| c.u64());
values.dedup_by_key(|c| c.u64());
space_objects.iter_mut().for_each(|object| {
// Update the values to point into the local (shorter) mapping array.
let val = values.binary_search(object.value()).unwrap();
object.set_value(val.into());
});
// Build the set of SpaceIndices.
// FIXME: Build multiple-scale indices. What is the stopping condition, and what are the parameters?
let max_elem = 2_000;
// We cannot return less that the total number of individual Ids stored
// in the index.
let max = max_elem.max(values.len());
// Generate indices as long as max is smaller than the number of point located in the whole space.
// For each new index, reduce precision by two, and push to resolutions vectors.
// When done, go over the array, and set the threshold_volumes with Volume total / 8 * i in reverse order
//
let index = SpaceSetIndex::new(&VectorTable::new(space_objects), 3, 10);
let mut resolutions = vec![SpaceIndex::new(std::f64::MAX, vec![0, 0, 0], index)];
// Make sure the vector is sorted by threshold volumes, smallest to largest.
// this means indices are sorted form highest resolution to lowest resolution.
// default_resolution() relies on it to find the correct index.
//FIXME: Domain check between f64 <-> u64 XOR implement Ord on f64
resolutions.sort_unstable_by_key(|a| a.threshold() as u64);
SpaceDB {
reference_space: reference_space.into(),
values,
resolutions,
}
}
pub fn name(&self) -> &String {
&self.reference_space
}
// The smallest volume threshold, which is the highest resolution, will
// be at position 0
pub fn highest_resolution(&self) -> usize {
0
}
// The highest volume threshold, which is the lowest resolution, will
// be at position len - 1
pub fn lowest_resolution(&self) -> usize {
self.resolutions.len() - 1
}
// Is this Space DB empty?
pub fn is_empty(&self) -> bool {
self.values.is_empty()
}
// Returns the index to be used by default for the given volume.
// The index chosen by default will be the one with the smallest volume
// threshold which is greater or equal to the query volume.
pub fn default_resolution(&self, volume: f64) -> usize {
for i in 0..self.resolutions.len() {
if volume <= self.resolutions[i].threshold() {
return i;
}
}
self.resolutions.len()
}
// Convert the value back to caller's references
fn decode(&self, mut objects: Vec<SpaceSetObject>) -> Vec<SpaceSetObject> {
for o in &mut objects {
o.set_value(self.values[o.value().u64() as usize]);
}
objects
}
// Search by Id, a.k.a values
pub fn get_by_id(
&self,
id: usize,
threshold_volume: f64,
) -> Result<Vec<SpaceSetObject>, String> {
// Is that ID referenced in the current space?
if let Ok(offset) = self.values.binary_search(&id.into()) {
let resolution = self.default_resolution(threshold_volume);
let mut results = self.resolutions[resolution]
.find_by_value(&SpaceFields::new(self.name().into(), offset.into()));
// Convert the Value back to caller's references
// Here we do not use decode() as we have a single id value to manage.
for o in &mut results {
o.set_value(id.into());
}
Ok(results)
} else {
Ok(vec![])
}
}
// Search by positions defining a volume.
pub fn get_by_positions(
&self,
positions: &[Position],
threshold_volume: f64,
) -> Result<Vec<SpaceSetObject>, String> {
let resolution = self.default_resolution(threshold_volume);
let results = positions
.iter()
.flat_map(|position| self.resolutions[resolution].find(position))
.collect::<Vec<SpaceSetObject>>();
Ok(self.decode(results))
}
// Search by Shape defining a volume:
// * Hyperrectangle (MBB),
// * HyperSphere (radius around a point),
// * Point (Specific position)
pub fn get_by_shape(
&self,
shape: &Shape,
threshold_volume: f64,
) -> Result<Vec<SpaceSetObject>, String> {
let resolution = self.default_resolution(threshold_volume);
Ok(self.decode(self.resolutions[resolution].find_by_shape(&shape)?))
}
}

157
src/database/space_index.rs Normal file
View File

@@ -0,0 +1,157 @@
use ironsea_index::IndexedOwned;
use ironsea_table_vector::VectorTable;
use super::space::Coordinate;
use super::space::Position;
use super::space::Shape;
use super::SpaceId;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceSetObject {
space_id: SpaceId,
position: Position,
value: Coordinate, // Efficiently store the offset within the SpaceDB values vector
}
impl SpaceSetObject {
pub fn new(reference_space: &str, position: Position, value: Coordinate) -> Self {
SpaceSetObject {
space_id: reference_space.into(),
position,
value,
}
}
/*
pub fn eval(&self, _predicate: &Predicate) -> bool {
false
}
*/
pub fn id(&self) -> &Coordinate {
&self.value
}
pub fn space_id(&self) -> &SpaceId {
&self.space_id
}
pub fn position(&self) -> &Position {
&self.position
}
pub fn value(&self) -> &Coordinate {
&self.value
}
pub fn set_value(&mut self, value: Coordinate) {
self.value = value;
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceFields {
space_id: SpaceId,
value: Coordinate,
}
impl SpaceFields {
pub fn new(space_id: SpaceId, value: Coordinate) -> Self {
SpaceFields { space_id, value }
}
}
impl PartialEq for SpaceFields {
fn eq(&self, other: &Self) -> bool {
self.space_id == other.space_id && self.value == other.value
}
}
impl ironsea_index::Record<Position> for SpaceSetObject {
fn key(&self) -> Position {
self.position.clone()
}
}
impl ironsea_index::RecordFields<SpaceFields> for SpaceSetObject {
fn fields(&self) -> SpaceFields {
SpaceFields {
space_id: self.space_id().clone(),
value: self.value,
}
}
}
impl ironsea_index::RecordBuild<Position, SpaceFields, SpaceSetObject> for SpaceSetObject {
fn build(key: &Position, fields: &SpaceFields) -> SpaceSetObject {
SpaceSetObject {
space_id: fields.space_id.clone(),
position: key.clone(),
value: fields.value,
}
}
}
pub type SpaceSetIndex = ironsea_index_sfc_dbc::IndexOwned<
VectorTable<SpaceSetObject>,
SpaceSetObject,
Position,
Coordinate,
SpaceFields,
>;
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceIndex {
threshold_volume: f64,
// lookup_ rounds up, so reverse sort of the list on threasholds and check for last index.
scale: Vec<u32>,
index: SpaceSetIndex,
}
impl SpaceIndex {
pub fn new(threshold_volume: f64, scale: Vec<u32>, index: SpaceSetIndex) -> Self {
SpaceIndex {
threshold_volume,
scale,
index,
}
}
pub fn threshold(&self) -> f64 {
self.threshold_volume
}
pub fn find(&self, key: &Position) -> Vec<SpaceSetObject> {
self.index.find(key)
}
fn find_range(&self, start: &Position, end: &Position) -> Vec<SpaceSetObject> {
self.index.find_range(start, end)
}
pub fn find_by_value(&self, id: &SpaceFields) -> Vec<SpaceSetObject> {
self.index.find_by_value(id)
}
// The shape provided in arguments needs to be expressed in encoded space positions.
// Results are also in encoded space coordinates.
pub fn find_by_shape(&self, shape: &Shape) -> Result<Vec<SpaceSetObject>, String> {
match shape {
Shape::Point(position) => Ok(self.find(position)),
Shape::BoundingBox(lower, higher) => Ok(self.find_range(lower, higher)),
Shape::HyperSphere(center, radius) => {
let (lower, higher) = shape.get_mbb();
// Filter out results using using a range query over the MBB,
// then add the condition of the radius as we are working within
// a sphere.
let results = self
.find_range(&lower, &higher)
.into_iter()
.filter(|p| (p.position().clone() - center.clone()).norm() <= radius.f64())
.collect();
Ok(results)
}
}
}
}

12
src/lib.rs Normal file
View File

@@ -0,0 +1,12 @@
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate arrayref;
#[macro_use]
extern crate serde_derive;
mod database;
pub use database::*;

97
src/main.rs Normal file
View File

@@ -0,0 +1,97 @@
#[macro_use]
extern crate measure_time;
#[macro_use]
extern crate arrayref;
#[macro_use]
extern crate serde_derive;
mod storage;
use mercator_db::space::Shape;
use mercator_db::DataBase;
fn main() {
// If RUST_LOG is unset, set it to INFO, otherwise keep it as-is.
if std::env::var("RUST_LOG").is_err() {
std::env::set_var("RUST_LOG", "info");
}
pretty_env_logger::init();
// Convert to binary the JSON data:
if true {
info_time!("Converting to binary JSON data");
storage::convert("test");
}
// Build a Database Index:
if true {
info_time!("Building database index");
storage::build("test");
}
// Load a Database:
let db;
{
info_time!("Loading database index");
db = DataBase::load("test").unwrap();
}
if true {
let core = db.core("test").unwrap();
// 100k
let space = db.space("space0.146629817062").unwrap();
//let id = "oid0.606846546049";
let id = "oid0.732128500546";
let r = core.get_by_id(&db, id, None, std::f64::MAX).unwrap();
println!("get_by_id {}: {}", id, r.len());
println!("{}: {:?}\n", id, r[0]);
let r = core.get_by_id(&db, id, None, 0.0).unwrap();
println!("get_by_id {}: {}", id, r.len());
println!("{}: {:?}\n", id, r[0]);
let r = core.get_by_label(&db, id, None, std::f64::MAX).unwrap();
println!("get_by_label {}: {}", id, r.len());
if !r.is_empty() {
println!("{}: {:?}\n", id, r[0]);
}
let lower = space.encode(&[0.2, 0.2, 0.2]).unwrap();
let higher = space.encode(&[0.8, 0.8, 0.8]).unwrap();
let shape = Shape::BoundingBox(lower, higher);
let r = core.get_by_shape(&db, &shape, "std", None, 0.0).unwrap();
println!("get_by_shape {:?}: {}", shape, r.len());
println!("{:?}: {:?}\n", shape, r[0]);
let a = r.iter().filter(|o| o.value.id() == id).collect::<Vec<_>>();
println!("get_by_shape A {:?} filtered on {}: {}", shape, id, a.len());
if !a.is_empty() {
println!("{:?}\n", a[0]);
}
let a = r.iter().filter(|o| o.value.id() != id).collect::<Vec<_>>();
println!(
"get_by_shape !A {:?} filtered on {}: {}",
shape,
id,
a.len()
);
if !a.is_empty() {
println!("{:?}\n", a[0]);
}
println!(
"\nSPACE OBJECT:\n\n{}",
serde_json::to_string_pretty(space).unwrap()
);
println!(
"\nSPATIAL OBJECT:\n\n{}",
serde_json::to_string_pretty(a[0]).unwrap()
);
}
}

238
src/storage.rs Normal file
View File

@@ -0,0 +1,238 @@
use memmap::Mmap;
use serde::Deserialize;
use std::fs::File;
use std::io::BufWriter;
const K: usize = 3;
#[derive(Serialize, Deserialize, Debug)]
pub struct Properties {
pub id: String,
}
#[derive(Serialize, Deserialize, Debug)]
// Geometry is parametric as we have a specific deserializer for the JSON format.
pub struct Shape<'a, G> {
#[serde(rename = "type")]
pub type_name: &'a str,
pub geometry: G,
pub properties: Properties,
}
pub mod json {
use super::*;
use serde::Deserializer;
#[derive(Serialize, Deserialize, Debug)]
pub struct Geometry<'a> {
#[serde(rename = "type")]
pub type_name: &'a str,
#[serde(rename = "referenceSpace")]
pub reference_space: &'a str,
#[serde(deserialize_with = "deserialize_coordinates")]
pub coordinates: Vec<[f64; K]>,
}
fn deserialize_coordinates<'de, D>(deserializer: D) -> Result<Vec<[f64; K]>, D::Error>
where
D: Deserializer<'de>,
{
// Retrieve from the deserializer a vector of Strings, it is important to specify both the type
// of elements in the vector and use `Vec::` to obtain a vector from the json input.
// Vec<String> corresponds to ["0.1,0.1,0.1", ...] of the input.
let strings: Vec<String> = Vec::deserialize(deserializer)?;
let mut shape_coords = vec![];
// For each string, decompose into a fixed point float. A string might have multiple dimensions,
// we are generic in this regards, although we do not check for each point to be have a constant
// number of dimensions.
for pos_string in &strings {
// split the string on the `,`, convert each part to float, and store the vector.
let pos_float: Vec<f64> = pos_string
.split(',')
.map(move |a| a.parse::<f64>().unwrap())
.collect();
assert_eq!(pos_float.len(), K);
shape_coords.push(*array_ref![pos_float, 0, K])
}
Ok(shape_coords)
}
pub fn convert(from: &str, to: &str) {
let file_in = File::open(from).unwrap();
let file_out = File::create(to).expect("Unable to create file");
// We create a buffered writer from the file we get
let writer = BufWriter::new(&file_out);
let mmap = unsafe { Mmap::map(&file_in).unwrap() };
let v: Vec<Shape<Geometry>> = serde_json::from_slice(&mmap[..]).unwrap();
bincode::serialize_into(writer, &v).unwrap();
}
}
pub mod bin {
use super::*;
use mercator_db::space;
use mercator_db::Core;
use mercator_db::DataBase;
use mercator_db::Properties;
use mercator_db::SpaceSetObject;
use std::collections::HashMap;
#[derive(Serialize, Deserialize, Debug)]
pub struct Geometry<'a> {
pub type_name: &'a str,
pub reference_space: &'a str,
pub coordinates: Vec<[f64; K]>,
}
pub fn build(from: &str, to: &str) {
let file_in = File::open(from).unwrap();
let file_out = File::create(to).expect("Unable to create file");
// We create a buffered writer from the file we get
let writer = BufWriter::new(&file_out);
let mmap = unsafe { Mmap::map(&file_in).unwrap() };
let v: Vec<Shape<Geometry>> = bincode::deserialize(&mmap[..]).unwrap();
let mut spaces = vec![];
let mut properties = vec![];
let mut space_set_objects = Vec::with_capacity(v.len());
{
let mut properties_hm = HashMap::new();
let mut space_ids = HashMap::new();
let mut properties_ref = Vec::with_capacity(v.len());
// What to write in binary, a vec of json::shape or a Vec of SpaceShape?
for shape in &v {
assert!(shape.type_name == "Feature");
assert!(shape.geometry.type_name == "Point");
space_ids.insert(shape.geometry.reference_space, 1u8);
// Check if a properties Object exists, if not create it, keep an
// offset to a reference to that Properties.
// We store a new reference into a reference list, so that, we can
// later on build a deduplicated list and keep stable references.
// FIXME: Comment unclear
let value = match properties_hm.get(shape.properties.id.as_str()) {
Some(_) => {
properties_ref.push(shape.properties.id.as_str());
properties_ref.len() - 1
}
None => {
properties_hm.insert(
shape.properties.id.as_str(),
Properties::Feature(shape.properties.id.clone()),
);
properties_ref.push(shape.properties.id.as_str());
properties_ref.len() - 1
}
};
space_set_objects.push(SpaceSetObject::new(
shape.geometry.reference_space,
shape.geometry.coordinates[0].to_vec().into(),
value.into(),
));
}
properties.append(&mut properties_hm.drain().map(|(_, v)| v).collect::<Vec<_>>());
spaces.append(
&mut space_ids
.keys()
.map(|&space_name| {
space::Space::new(
space_name,
space::CoordinateSystem::new(
vec![0f64, 0f64, 0f64],
vec![
space::Axis::new(
"m",
vec![1f64, 0f64, 0f64],
space::NumberSet::N,
0.0,
1.0,
1E9 as u64,
)
.unwrap(),
space::Axis::new(
"m",
vec![0f64, 1f64, 0f64],
space::NumberSet::N,
0.0,
1.0,
1E9 as u64,
)
.unwrap(),
space::Axis::new(
"m",
vec![0f64, 0f64, 1f64],
space::NumberSet::N,
0.0,
1.0,
1E9 as u64,
)
.unwrap(),
],
),
)
})
.collect::<Vec<_>>(),
);
properties.sort_unstable_by_key(|p| p.id().clone());
space_set_objects.iter_mut().for_each(|object| {
let id = properties_ref[object.value().u64() as usize];
let value = properties.binary_search_by_key(&id, |p| p.id()).unwrap();
object.set_value(value.into());
});
}
let cores = vec![Core::new(
"test",
"v0.1",
&spaces,
properties,
space_set_objects,
)];
let db = DataBase::new(spaces, cores);
bincode::serialize_into(writer, &db).unwrap();
}
}
pub fn convert<S>(name: S)
where
S: Into<String>,
{
let name = name.into();
let fn_in = format!("{}.json", name);
let fn_out = format!("{}.bin", name);
json::convert(&fn_in, &fn_out);
}
pub fn build<S>(name: S)
where
S: Into<String>,
{
let name = name.into();
let fn_in = format!("{}.bin", name);
let fn_out = format!("{}.index", name);
bin::build(&fn_in, &fn_out);
}