Initial commit

This commit is contained in:
2019-08-27 11:39:04 +02:00
parent 8edb80cc29
commit 5a555b6518
6 changed files with 1267 additions and 0 deletions

31
Cargo.toml Normal file
View File

@@ -0,0 +1,31 @@
[package]
name = "ironsea_index_sfc_dbc"
version = "0.1.0"
authors = ["EPFL-DIAS", "Lionel Sambuc <lionel.sambuc@epfl.ch>"]
edition = "2018"
description = "Space-filling Curve over dictionnary-based compression, index implementation for the Iron Sea database toolkit."
homepage = "https://crates.io/crates/ironsea_index_sfc_dbc"
repository = "https://github.com/epfl-dias/ironsea_index_sfc_dbc"
readme = "README.md"
keywords = []
categories = ["database-implementations", "data-structures"]
license = "MIT"
#license-file = "LICENSE"
include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.rs"]
[dependencies]
ironsea_index = "^0.1"
ironsea_table = "^0.1"
ironsea_store = "^0.1"
arrayref = "^0.3"
log = { version = "^0.4", features = ["max_level_trace", "release_max_level_info"] }
serde = "^1.0"
serde_derive = "^1.0"
bincode = "^1.1"

37
README.md Normal file
View File

@@ -0,0 +1,37 @@
# Iron Sea - Index SFC DBC
Index for the Iron Sea toolkit, based on a Space Filling Curve (SFC), over dictionary-based compression (DBC), which offers great
performances for both range queries over point cloud data and at the same time uses a storage-efficient index.
More details in the paper: https://infoscience.epfl.ch/record/232536?ln=en
## Iron Sea: Database Toolkit
**Iron Sea** provides a set of database engine bricks, which can be combined and applied on arbitrary data structures.
Unlike a traditional database, it does not assume a specific physical structure for the tables nor the records, but relies on the developper to provide a set of extractor functions which are used by the specific indices provided.
This enables the index implementations to be agnostic from the underlying data structure, and re-used.
## Requirements
### Software
* Rust: https://www.rust-lang.org
## Documentation
For more information, please refer to the [documentation](https://epfl-dias.github.io/ironsea_index_sfc_dbc/).
If you want to build the documentation and access it locally, you can use:
```sh
cargo doc --open
```
## Acknowledgements
This open source software code was developed in part or in whole in the
Human Brain Project, funded from the European Unions Horizon 2020
Framework Programme for Research and Innovation under the Specific Grant
Agreement No. 785907 (Human Brain Project SGA2).

348
src/cell_space.rs Normal file
View File

@@ -0,0 +1,348 @@
use std::fmt::Debug;
use std::marker;
use std::ops::Index;
use ironsea_index::Record;
use ironsea_table::Table;
type Cell<T> = Vec<T>;
#[derive(Clone, Debug, Deserialize, Serialize)]
struct CellDictionary<K, V> {
table: Vec<Cell<V>>,
max_offset: usize,
_marker: marker::PhantomData<(K)>,
}
impl<K, V> CellDictionary<K, V>
where
V: Clone + Ord + Debug,
K: Debug + Index<usize, Output = V>,
{
pub fn new<T, R>(table: &T, dimension: usize, cell_bits: usize) -> Self
where
T: Table<R>,
R: Record<K> + Debug,
{
// Do not forget to initialise cells[0]
let mut cells: Vec<Cell<V>> = vec![vec![]];
// 1. Retrieve a list of distinct values for the coordinate `dimension`
let mut distinct = vec![];
let records = table.get_table();
for record in records {
distinct.push(record.key()[dimension].clone());
}
// 2. Build sorted, distinct lists
distinct.sort_unstable();
distinct.dedup();
info!(
"Number of distinct coordinates on dim[{}]: {}",
dimension,
distinct.len()
);
trace!("min {:?}, max {:?}", distinct[0], distinct.last());
// 3. Build the dictionary space
// 3.1. Build dictionnary per dimension, Add cell and offset
// informations
let mut count = 0;
let mut cell = 0;
// Beware integer division is rounded towards zero, so add 1 to the
// result as this is the max number of elements per bucket.
let max_offset = (distinct.len() / (1 << cell_bits)) + 1;
for coordinate in distinct {
//trace!("{:?} {:?} {:?} {:?}", dimension, coordinate, cell, count);
if count == max_offset {
count = 0;
cell += 1;
cells.push(vec![]);
}
cells[cell].push(coordinate);
count += 1;
}
info!(
"dim[{}]: {} cells, {} max per cell",
dimension,
cells.len(),
max_offset,
);
CellDictionary {
table: cells,
max_offset,
_marker: marker::PhantomData,
}
}
fn max_offset(&self) -> usize {
self.max_offset
}
fn cells(&self) -> &Vec<Cell<V>> {
&self.table
}
fn cell_id(&self, position: &V) -> Option<usize>
where
V: Clone + Ord + Debug,
{
let mut id = 0;
// If the last value of the current cell is >= than the value, then
// the value is stored in the cell.
// If this is the first cell, we will look into it as `id` is
// still 0.
for cell in self.cells() {
// last cell is likely to be only partially full
match cell.last() {
Some(x) => {
if x >= position {
break;
}
}
None => break,
};
id += 1;
}
if id >= self.cells().len() {
None
} else {
Some(id)
}
}
fn key(&self, position: &V) -> Option<(usize, usize)> {
let mut result = None;
if let Some(id) = self.cell_id(position) {
if let Ok(offset) = self.table[id].binary_search(position) {
result = Some((id, offset));
}
}
result
}
fn key_down(&self, position: &V) -> (usize, usize) {
match self.cell_id(position) {
Some(id) => match self.table[id].binary_search(position) {
Ok(offset) => (id, offset),
Err(offset) => {
if offset > 0 {
(id, offset - 1)
} else if id == 0 {
(0, 0)
} else {
let id = id - 1;
(id, self.table[id].len() - 1)
}
}
},
None => self.last(),
}
}
fn last(&self) -> (usize, usize) {
let last_id = self.table.len() - 1;
let last_offset = self.table[last_id].len() - 1;
(last_id, last_offset)
}
fn key_up(&self, position: &V) -> (usize, usize) {
match self.cell_id(position) {
Some(id) => match self.table[id].binary_search(position) {
Ok(offset) => (id, offset),
Err(offset) => {
if offset < self.max_offset {
(id, offset)
} else if id < self.table.len() {
(id + 1, 0)
} else {
self.last()
}
}
},
None => self.last(),
}
}
fn value(&self, cell_id: usize, offset: usize) -> V {
self.table[cell_id][offset].clone()
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct CellSpace<K, V> {
dimensions: usize,
coordinates: Vec<CellDictionary<K, V>>,
coordinates_max_offsets: Vec<usize>,
}
impl<K, V> CellSpace<K, V>
where
V: Clone + Ord + Debug,
K: Debug + Index<usize, Output = V>,
{
pub fn new<T, R>(table: &T, dimensions: usize, cell_bits: usize) -> Self
where
T: Table<R>,
R: Record<K> + Debug,
V: Clone + Ord + Debug,
{
let mut space = CellSpace {
dimensions,
coordinates: vec![],
coordinates_max_offsets: vec![],
};
// FIXME: Add check to ensure all positions have the required number of dimensions.
for k in 0..dimensions {
let dic = CellDictionary::new(table, k, cell_bits);
let max = dic.max_offset();
space.coordinates.push(dic);
space.coordinates_max_offsets.push(max);
}
space
}
/*
pub fn cells_id(&self, position: &Vec<V>) -> Result<Vec<Option<usize>>, String> {
trace!("cells_id: position {:?}", position);
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
if self.dimensions != position.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
position.len(),
position
));
}
let mut cells = vec![];
for k in 0..self.dimensions {
cells.push(self.coordinates[k].cell_id(&position[k]));
}
trace!("cells_id: cells {:?}", cells);
Ok(cells)
}
*/
pub fn key(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
if self.dimensions != position.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
position.len(),
position
));
}*/
let mut cells = vec![];
let mut offsets = vec![];
for k in 0..self.dimensions {
match self.coordinates[k].key(&position[k]) {
None => {
return Err(format!(
"Incorrect value for position[{:?}]: {:?}",
k, &position[k]
))
}
Some((id, offset)) => {
cells.push(id);
offsets.push(offset)
}
};
}
Ok((cells, offsets))
}
// Round down to the preceding element or self if in the space
pub fn key_down(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
if self.dimensions != position.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
position.len(),
position
));
}*/
let mut cells = vec![];
let mut offsets = vec![];
for k in 0..self.dimensions {
let (id, offset) = self.coordinates[k].key_down(&position[k]);
cells.push(id);
offsets.push(offset);
}
Ok((cells, offsets))
}
// Round up to the next element or self if in the space
pub fn key_up(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
if self.dimensions != position.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
position.len(),
position
));
}*/
let mut cells = vec![];
let mut offsets = vec![];
for k in 0..self.dimensions {
let (id, offset) = self.coordinates[k].key_up(&position[k]);
cells.push(id);
offsets.push(offset);
}
Ok((cells, offsets))
}
pub fn value(&self, cells_id: Vec<usize>, offsets: Vec<usize>) -> Result<Vec<V>, String> {
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
if self.dimensions != cells_id.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
cells_id.len(),
cells_id
));
}
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
if self.dimensions != offsets.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
offsets.len(),
offsets
));
}
let mut values = vec![];
for k in 0..self.dimensions {
values.push(self.coordinates[k].value(cells_id[k], offsets[k]));
}
Ok(values)
}
}

17
src/lib.rs Normal file
View File

@@ -0,0 +1,17 @@
#[macro_use]
extern crate log;
#[macro_use]
extern crate arrayref;
#[macro_use]
extern crate serde_derive;
mod cell_space;
mod morton;
mod sfc;
pub use sfc::Record;
pub use sfc::RecordBuild;
pub use sfc::RecordFields;
pub use sfc::SpaceFillingCurve as IndexOwned;

446
src/morton.rs Normal file
View File

@@ -0,0 +1,446 @@
use std::fmt;
use std::fmt::Debug;
use serde::de;
use serde::de::Deserialize;
use serde::de::Deserializer;
use serde::de::MapAccess;
use serde::de::SeqAccess;
use serde::de::Visitor;
use serde::ser::Serialize;
use serde::ser::SerializeStruct;
use serde::ser::Serializer;
pub type MortonCode = u32;
pub type MortonValue = u16;
const MORTON_CODE_BITS: usize = 32;
const MORTON_VALUE_BITS: usize = 10;
const MORTON_MAX_VALUES: usize = 1024;
#[derive(Clone)]
pub struct MortonEncoder {
cell_bits: usize,
cell_mask: usize,
dimensions: usize,
table: Vec<[MortonCode; MORTON_MAX_VALUES]>,
}
impl Debug for MortonEncoder {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"MortonEncoder {{ cell_bits: {}, cell_mask: {}, dimensions: {}, table: ",
self.cell_bits, self.cell_mask, self.dimensions
)?;
write!(f, "[ ")?;
for k in &self.table {
write!(f, "[ ")?;
for v in k.iter() {
write!(f, "{}, ", v)?;
}
write!(f, "], ")?;
}
write!(f, "] }}")
}
}
impl MortonEncoder {
pub fn new(dimensions: usize, cell_bits: usize) -> Self {
// Make sure we can store the encoding in a single T.
// Don't know how to make that test generically
assert!(MORTON_VALUE_BITS >= cell_bits);
assert!(MORTON_CODE_BITS >= cell_bits * dimensions);
//let mut masks = vec![];
let mut table = vec![];
let cell_max = 1 << cell_bits;
let cell_mask = cell_max - 1;
// Build lookup table & masks
for k in 0..dimensions {
table.push([0; MORTON_MAX_VALUES]);
for i in 0..cell_max {
let mut v = 0;
for p in 0..cell_bits {
// Note: bit is at position p, so shift it only K-1 p position again below, instead
// of K times
let bit = i & (1 << p);
let new_bit = bit << (p * (dimensions - 1) + k);
v |= new_bit;
}
table[k][i] = v as MortonCode;
}
/*
let mut v = 0usize;
for p in 0..cell_bits {
let new_bit = 1 << p * (dimensions - 1) + k;
v = v | new_bit;
}
masks.push(v as MortonCode);
*/
}
MortonEncoder {
cell_bits,
cell_mask,
dimensions,
table,
//masks,
}
}
fn encode_1(&self, k: usize, v: MortonValue) -> MortonCode {
// Already done by the array bound checker anyway
//assert!((v as usize) < MORTON_MAX_VALUES);
//assert!(k < self.table.len());
// Ensure we only have valid values in inputs, even when less bits than
// the maximum is used to define those values.
let v = v as usize & self.cell_mask;
self.table[k][v]
}
fn decode_1(&self, k: usize, code: MortonCode) -> MortonValue {
// Already done by the array bound checker anyway
//assert!(k < self.table.len());
let mut v = 0;
for i in 0..self.cell_bits {
let bit_pos = i * self.table.len() + k;
let bit = code as usize & (1 << bit_pos);
let bit_pos = bit_pos - i;
v |= (bit >> bit_pos) as MortonValue;
}
v as MortonValue
}
pub fn encode(&self, v: &[MortonValue]) -> Result<MortonCode, String> {
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
if self.dimensions != v.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
v.len(),
v
));
}
let mut code = 0;
for (k, i) in v.iter().enumerate().take(self.dimensions) {
code |= self.encode_1(k, *i);
}
Ok(code)
}
pub fn decode(&self, code: MortonCode) -> Vec<MortonValue> {
let mut values = vec![];
for k in 0..self.dimensions {
values.push(self.decode_1(k, code));
}
values
}
}
impl Serialize for MortonEncoder {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// We serialize the minimum amount of information necessary to
// deserialize the table.
// This is the parameters to init(dimensions, cell_bits)
let mut state = serializer.serialize_struct("MortonEncoder", 2)?;
state.serialize_field("cell_bits", &self.cell_bits)?;
state.serialize_field("dimensions", &self.dimensions)?;
state.end()
}
}
impl<'de> Deserialize<'de> for MortonEncoder {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
enum Field {
CellBits,
Dimensions,
};
impl<'de> Deserialize<'de> for Field {
fn deserialize<D>(deserializer: D) -> Result<Field, D::Error>
where
D: Deserializer<'de>,
{
struct FieldVisitor;
impl<'de> Visitor<'de> for FieldVisitor {
type Value = Field;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("`cell_bits` or `dimensions`")
}
fn visit_str<E>(self, value: &str) -> Result<Field, E>
where
E: de::Error,
{
match value {
"cell_bits" => Ok(Field::CellBits),
"dimensions" => Ok(Field::Dimensions),
_ => Err(de::Error::unknown_field(value, FIELDS)),
}
}
}
deserializer.deserialize_identifier(FieldVisitor)
}
}
struct MortonEncoderVisitor;
impl<'de> Visitor<'de> for MortonEncoderVisitor {
type Value = MortonEncoder;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct MortonEncoder")
}
fn visit_seq<V>(self, mut seq: V) -> Result<MortonEncoder, V::Error>
where
V: SeqAccess<'de>,
{
let cell_bits = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
let dimensions = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(1, &self))?;
Ok(MortonEncoder::new(dimensions, cell_bits))
}
fn visit_map<V>(self, mut map: V) -> Result<MortonEncoder, V::Error>
where
V: MapAccess<'de>,
{
let mut cell_bits = None;
let mut dimensions = None;
while let Some(key) = map.next_key()? {
match key {
Field::CellBits => {
if cell_bits.is_some() {
return Err(de::Error::duplicate_field("cell_bits"));
}
cell_bits = Some(map.next_value()?);
}
Field::Dimensions => {
if dimensions.is_some() {
return Err(de::Error::duplicate_field("dimensions"));
}
dimensions = Some(map.next_value()?);
}
}
}
let cell_bits = cell_bits.ok_or_else(|| de::Error::missing_field("cell_bits"))?;
let dimensions =
dimensions.ok_or_else(|| de::Error::missing_field("dimensions"))?;
Ok(MortonEncoder::new(dimensions, cell_bits))
}
}
const FIELDS: &[&str] = &["cell_bits", "dimensions"];
deserializer.deserialize_struct("MortonEncoder", FIELDS, MortonEncoderVisitor)
}
}
#[cfg(test)]
mod tests {
use super::*;
mod init {
use super::*;
/* Check the assertions */
#[test]
#[should_panic]
fn dim1_bit32() {
let _m = MortonEncoder::new(1, 31);
}
#[test]
#[should_panic]
fn dim2_bit16() {
// Max 10 bit for the codes, even if 16 would fit
let _m = MortonEncoder::new(2, 16);
}
#[test]
#[should_panic]
fn dim33_bit1() {
let _m = MortonEncoder::new(33, 1);
}
#[test]
#[should_panic]
fn dim17_bit2() {
let _m = MortonEncoder::new(17, 2);
}
#[test]
fn dim1_bit10() {
let _m = MortonEncoder::new(1, 10);
}
#[test]
fn dim2_bit10() {
let _m = MortonEncoder::new(2, 10);
}
#[test]
fn dim3_bit10() {
let _m = MortonEncoder::new(3, 10);
}
#[test]
fn dim4_bit8() {
let _m = MortonEncoder::new(4, 8);
}
#[test]
fn dim32_bit1() {
let _m = MortonEncoder::new(32, 1);
}
/*
morton_init();
// Morton table looks OK
// for n in 0..10 {
// println!("{:4}", n);
// for k in 0..K {
// println!("{:032b}", unsafe {MORTON[k][n]});
// }
// }
for n in 0..CELL_MAX {
println!("## {:04}", n);
let mut c = 0 as Code;
for k in 0..K {
// check diagonal
c = c | morton_encode(k, n as u16);
}
let f = n as u16;
for k in 1..2 {
// check diagonal
let p = morton_decode(k, c);
println!("\n{:04} \n f {:04}\n p {:04}\n 𝚫 {:06}\n", c, f, p, f-p);
}
}
let mut f = 0.0f64;
// while f < 1.0 {
// let v = convert_to_fixed(&f);
// let p = convert_to_f64(&v);
// println!("\n{:010} \n f {:+0.16e}\n p {:+03.16e}\n 𝚫 {:+03.16e}\n", v, f, p, f - p);
//
// f += 0.1e-1;
// }
let f =0.000724939184752;
let v = convert_to_fixed(&f);
let p = convert_to_f64(&v);
println!("\n{:010} \n f {:+0.16e}\n p {:+03.16e}\n 𝚫 {:+03.16e}\n", v, f, p, f - p);
*/
}
mod encode {
use super::*;
/* Check the lookup table produced */
#[test]
fn dim1_bit10() {
let m = MortonEncoder::new(1, 10);
for n in 0..MORTON_MAX_VALUES {
assert_eq!(n as MortonCode, m.encode_1(0, n as MortonValue));
}
}
#[test]
fn table_dim2_bit10() {
let m = MortonEncoder::new(2, 10);
let mut lookup = Vec::<Vec<MortonCode>>::new();
for k in 0..2 {
lookup.push(Vec::new());
for n in 0..MORTON_MAX_VALUES {
// Morton numbers are number where the bit are exploded so that we can
// interleave them. This means that for each position of a value, we need to
// insert dimensions - 1 columns between each bits, and shift that result by the
// dimension number so that we can OR all the dimensions together without having
// bits colliding.
let mut v = 0;
for p in 0..MORTON_VALUE_BITS {
let b = (n & (1 << p)) >> p;
v = v | b << (p * 2 + k);
}
lookup[k].push(v as MortonCode);
}
}
for k in 0..2 {
for n in 0..MORTON_MAX_VALUES {
assert_eq!(lookup[k][n], m.encode_1(k, n as MortonValue));
}
}
}
fn check(dimensions: usize, value_max: usize, value_bits: usize, m: MortonEncoder) -> () {
let mut lookup = Vec::<Vec<MortonCode>>::new();
for k in 0..dimensions {
lookup.push(Vec::new());
for n in 0..value_max {
// Morton numbers are number where the bit are exploded so that we can
// interleave them. This means that for each position of a value, we need to
// insert dimensions -1 columns between each bits, and shift that result by the
// dimension number so that we can OR all the dimensions together without having
// bits colliding.
let mut v = 0;
for p in 0..value_bits {
let b = (n & (1 << p)) >> p;
v = v | b << (p * dimensions + k);
}
lookup[k].push(v as MortonCode);
}
}
for k in 0..dimensions {
for n in 0..value_max {
assert_eq!(lookup[k][n], m.encode_1(k, n as MortonValue));
}
}
}
#[test]
fn table_dim3_bit10() {
let m = MortonEncoder::new(3, 10);
check(3, 1024, 10, m);
}
#[test]
fn table_dim4_bit8() {
let m = MortonEncoder::new(4, 8);
check(4, 256, 8, m);
}
}
}

388
src/sfc.rs Normal file
View File

@@ -0,0 +1,388 @@
use std::fmt::Debug;
use std::io;
use std::iter::FromIterator;
use std::marker;
use std::ops::Index;
use serde::de::DeserializeOwned;
use serde::Serialize;
pub use ironsea_index::IndexedOwned;
pub use ironsea_index::Record;
pub use ironsea_index::RecordBuild;
pub use ironsea_index::RecordFields;
use ironsea_store::Load;
use ironsea_store::Store;
use ironsea_table::Table;
use super::cell_space::CellSpace;
use super::morton::MortonCode;
use super::morton::MortonEncoder;
use super::morton::MortonValue;
type SFCCode = u32;
type SFCOffset = u32;
//FIXME: Remove the need for a constant, how can we make it type-checked instead?
// type-num crate?
const MAX_K: usize = 3;
#[derive(Debug)]
struct Limit<V> {
idx: usize,
position: Vec<V>,
}
#[derive(Debug)]
struct Limits<V> {
start: Limit<V>,
end: Limit<V>,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
struct SFCRecord<F> {
//FIXME: Find a way around hardcoding MAX_K
offsets: [SFCOffset; MAX_K],
fields: F,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
struct SFCCell<F> {
code: MortonCode,
records: Vec<SFCRecord<F>>,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceFillingCurve<T, R, K, V, F>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
V: Clone + Ord + Debug + From<usize>,
{
dimensions: usize,
morton: MortonEncoder,
space: CellSpace<K, V>,
index: Vec<SFCCell<F>>,
_marker: marker::PhantomData<(T, R)>,
}
impl<T, R, K, V, F> SpaceFillingCurve<T, R, K, V, F>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
V: Clone + Ord + Debug + From<usize>,
K: Debug + Index<usize, Output = V> + FromIterator<V>,
{
//FIXME: Should accept indexing 0 elements, at least not crash!
pub fn new(table: &T, dimensions: usize, cell_bits: usize) -> Self {
// 1. build the dictionnary space, called here CellSpace, as well as
// initialize the morton encoder used to project the multi-dimensional
// coordinates into a single dimension.
let mut index = SpaceFillingCurve {
dimensions,
morton: MortonEncoder::new(dimensions, cell_bits),
space: CellSpace::new(table, dimensions, cell_bits),
index: vec![],
_marker: marker::PhantomData,
};
// 2. Build a flat table of (code, offset, entries)
let mut flat_table = vec![];
for record in table.get_table() {
let position = record.key();
match index.space.key(&position) {
Ok((cell_ids, offsets)) => match index.encode(&cell_ids) {
Ok(code) => {
let offsets = offsets.iter().map(|i| *i as SFCOffset).collect::<Vec<_>>();
flat_table.push((
code,
SFCRecord {
offsets: *array_ref!(offsets, 0, MAX_K),
fields: record.fields(),
},
))
}
Err(e) => error!("Unable to encode position {:#?}: {}", cell_ids, e),
},
Err(e) => error!("Invalid position {:#?}: {}", position, e),
}
}
debug!(
"Processed {:#?} records into the index",
table.get_table().len()
);
// 5. Sort by SFCcode
flat_table.sort_unstable_by(|a, b| a.0.cmp(&b.0));
let nb_records = flat_table.len();
let mut current_cell_code = flat_table[0].0;
let mut count = 0;
index.index.push(SFCCell {
code: current_cell_code,
records: vec![],
});
for (code, record) in flat_table {
if code == current_cell_code {
index.index[count].records.push(record);
} else {
index.index.push(SFCCell {
code,
records: vec![record],
});
current_cell_code = code;
count += 1;
}
}
debug!("Inserted {:#?} records into the index", nb_records);
index
}
pub fn find_by_value(&self, value: &F) -> Vec<R>
where
F: std::cmp::PartialEq,
{
let mut results = vec![];
for cell in &self.index {
for record in &cell.records {
if &record.fields == value {
if let Ok(r) = self.get_record(cell.code, &record) {
results.push(r);
}
}
}
}
results
}
// Map the cell_ids of a point to its SFCcode
#[inline]
fn encode(&self, cell_ids: &[usize]) -> Result<SFCCode, String> {
let mut t = vec![];
for v in cell_ids.iter() {
t.push(*v as MortonValue);
}
self.morton.encode(&t)
}
// Build coordinate values from encoded value
fn position(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<K, String> {
let position = self.space.value(
self.morton
.decode(code)
.iter()
.map(|e| *e as usize)
.collect(),
offsets.iter().map(|e| *e as usize).collect(),
)?;
Ok(position.iter().map(|i| (*i).clone()).collect())
}
// Rebuild a specific record
fn get_record(&self, code: SFCCode, entry: &SFCRecord<F>) -> Result<R, String> {
let position = self.position(code, &entry.offsets)?;
Ok(R::build(&position, &entry.fields))
}
fn limits(&self, start: &K, end: &K) -> Result<Limits<V>, String> {
trace!("limits: {:?} - {:?}", start, end);
// Round down if not found, for start of range:
let (cells, offsets) = self.space.key_down(start)?;
let code = self.encode(&cells)?;
let idx = match self.index.binary_search_by(|e| e.code.cmp(&code)) {
Err(e) => {
if e > 0 {
e - 1
} else {
0
}
}
Ok(c) => c,
};
let position = self.space.value(cells, offsets)?;
let start = Limit { idx, position };
// Round up if not found, for end of range:
let (cells, offsets) = self.space.key_up(end)?;
let code = self.encode(&cells)?;
let idx = match self.index.binary_search_by(|e| e.code.cmp(&code)) {
Err(e) => {
if e >= self.index.len() {
self.index.len()
} else {
e
}
}
Ok(c) => c + 1,
};
let position = self.space.value(cells, offsets)?;
let end = Limit { idx, position };
trace!("limits: {:?} - {:?}", start, end);
Ok(Limits { start, end })
}
}
impl<T, R, K, V, F> IndexedOwned<T, R, K> for SpaceFillingCurve<T, R, K, V, F>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
K: Debug + Index<usize, Output = V> + FromIterator<V>,
V: Clone + Debug + Ord + From<usize> + Debug,
{
fn find(&self, key: &K) -> Vec<R> {
let mut values = vec![];
if let Ok((cell_ids, offsets)) = self.space.key(key) {
match self.encode(&cell_ids) {
Err(e) => error!("{}", e),
Ok(code) => {
if let Ok(cell) = self.index.binary_search_by(|a| a.code.cmp(&code)) {
for record in &self.index[cell].records {
let mut select = true;
for (k, o) in offsets.iter().enumerate().take(self.dimensions) {
select &= record.offsets[k] == (*o as SFCOffset);
}
if select {
match self.get_record(code, record) {
Err(e) => error!("{}", e),
Ok(r) => values.push(r),
}
}
}
}
}
}
}
values
}
fn find_range(&self, start: &K, end: &K) -> Vec<R> {
let mut values = vec![];
match self.limits(start, end) {
Ok(limits) => {
for idx in limits.start.idx..limits.end.idx {
let code = self.index[idx].code;
for record in &self.index[idx].records {
let mut select = true;
let pos = match self.position(code, &record.offsets) {
Err(e) => {
error!("{}", e);
continue;
}
Ok(p) => p,
};
// FIXME: Reduce number of comparison by using the cells boundaries.
for k in 0..self.dimensions {
select = select
&& limits.start.position[k] <= pos[k]
&& limits.end.position[k] >= pos[k];
}
if select {
match self.get_record(code, &record) {
Err(e) => error!("{}", e),
Ok(r) => values.push(r),
}
}
}
}
}
Err(e) => error!("find_range: limits failed: {}", e),
};
values
}
}
// Rough check, based on per-dimension cell Ids.
/*
// If the cell_ids are between ]pos_start and pos_end[, then the value is within the range,
// If the cell_ids are outside [pos_start, pos_end], then the value is out, stop checking
// Else, check the offsets of each entry to be within [off_start, off_end], then the value is within the range.
let mut rough_in = true;
for k in 0..self.dimensions {
if !(cells[k] > start_limits.cells[k] && cells[k] < end_limits.cells[k]) {
rough_in = false;
}
}
if rough_in {
// This is a cell well within the volume, so all points are a match, add all points,
// go to next cell.
for entry in entries {
values.push(self.get_element(code, entry))
}
continue;
}
let mut rough_out = false;
for k in 0..self.dimensions {
if cells[k] < start_limits.cells[k] || cells[k] > end_limits.cells[k] {
rough_out = false;
}
}
// If rough is not true, then we have nothing to double check.
if rough_out {
continue;
}
*/
impl<T, R, K, V, F> Store for SpaceFillingCurve<T, R, K, V, F>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
K: Serialize,
V: Clone + Ord + Debug + From<usize> + Serialize,
F: Serialize,
{
fn store<W>(&mut self, writer: W) -> io::Result<()>
where
W: std::io::Write,
{
match bincode::serialize_into(writer, &self) {
Ok(_) => Ok(()),
Err(e) => Err(io::Error::new(io::ErrorKind::WriteZero, e)),
}
}
}
impl<T, R, K, V, F> Load for SpaceFillingCurve<T, R, K, V, F>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
K: DeserializeOwned,
V: Clone + Ord + Debug + From<usize> + DeserializeOwned,
F: DeserializeOwned,
{
fn load<Re: io::Read>(reader: Re) -> io::Result<Self> {
match bincode::deserialize_from(reader) {
Ok(data) => Ok(data),
Err(e) => Err(io::Error::new(io::ErrorKind::InvalidData, e)),
}
}
// only required for store_mapped_file
fn load_slice(from: &[u8]) -> io::Result<Self> {
match bincode::deserialize(from) {
Ok(data) => Ok(data),
Err(e) => Err(io::Error::new(io::ErrorKind::InvalidData, e)),
}
}
}