Initial commit
This commit is contained in:
31
Cargo.toml
Normal file
31
Cargo.toml
Normal file
@@ -0,0 +1,31 @@
|
||||
[package]
|
||||
name = "ironsea_index_sfc_dbc"
|
||||
version = "0.1.0"
|
||||
authors = ["EPFL-DIAS", "Lionel Sambuc <lionel.sambuc@epfl.ch>"]
|
||||
|
||||
edition = "2018"
|
||||
|
||||
description = "Space-filling Curve over dictionnary-based compression, index implementation for the Iron Sea database toolkit."
|
||||
homepage = "https://crates.io/crates/ironsea_index_sfc_dbc"
|
||||
repository = "https://github.com/epfl-dias/ironsea_index_sfc_dbc"
|
||||
readme = "README.md"
|
||||
|
||||
keywords = []
|
||||
categories = ["database-implementations", "data-structures"]
|
||||
|
||||
license = "MIT"
|
||||
#license-file = "LICENSE"
|
||||
|
||||
include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.rs"]
|
||||
|
||||
[dependencies]
|
||||
ironsea_index = "^0.1"
|
||||
ironsea_table = "^0.1"
|
||||
ironsea_store = "^0.1"
|
||||
|
||||
arrayref = "^0.3"
|
||||
log = { version = "^0.4", features = ["max_level_trace", "release_max_level_info"] }
|
||||
|
||||
serde = "^1.0"
|
||||
serde_derive = "^1.0"
|
||||
bincode = "^1.1"
|
||||
37
README.md
Normal file
37
README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Iron Sea - Index SFC DBC
|
||||
|
||||
Index for the Iron Sea toolkit, based on a Space Filling Curve (SFC), over dictionary-based compression (DBC), which offers great
|
||||
performances for both range queries over point cloud data and at the same time uses a storage-efficient index.
|
||||
|
||||
More details in the paper: https://infoscience.epfl.ch/record/232536?ln=en
|
||||
|
||||
## Iron Sea: Database Toolkit
|
||||
|
||||
**Iron Sea** provides a set of database engine bricks, which can be combined and applied on arbitrary data structures.
|
||||
|
||||
Unlike a traditional database, it does not assume a specific physical structure for the tables nor the records, but relies on the developper to provide a set of extractor functions which are used by the specific indices provided.
|
||||
|
||||
This enables the index implementations to be agnostic from the underlying data structure, and re-used.
|
||||
|
||||
## Requirements
|
||||
|
||||
### Software
|
||||
|
||||
* Rust: https://www.rust-lang.org
|
||||
|
||||
## Documentation
|
||||
|
||||
For more information, please refer to the [documentation](https://epfl-dias.github.io/ironsea_index_sfc_dbc/).
|
||||
|
||||
If you want to build the documentation and access it locally, you can use:
|
||||
|
||||
```sh
|
||||
cargo doc --open
|
||||
```
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
This open source software code was developed in part or in whole in the
|
||||
Human Brain Project, funded from the European Union’s Horizon 2020
|
||||
Framework Programme for Research and Innovation under the Specific Grant
|
||||
Agreement No. 785907 (Human Brain Project SGA2).
|
||||
348
src/cell_space.rs
Normal file
348
src/cell_space.rs
Normal file
@@ -0,0 +1,348 @@
|
||||
use std::fmt::Debug;
|
||||
use std::marker;
|
||||
use std::ops::Index;
|
||||
|
||||
use ironsea_index::Record;
|
||||
use ironsea_table::Table;
|
||||
|
||||
type Cell<T> = Vec<T>;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
struct CellDictionary<K, V> {
|
||||
table: Vec<Cell<V>>,
|
||||
max_offset: usize,
|
||||
_marker: marker::PhantomData<(K)>,
|
||||
}
|
||||
|
||||
impl<K, V> CellDictionary<K, V>
|
||||
where
|
||||
V: Clone + Ord + Debug,
|
||||
K: Debug + Index<usize, Output = V>,
|
||||
{
|
||||
pub fn new<T, R>(table: &T, dimension: usize, cell_bits: usize) -> Self
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + Debug,
|
||||
{
|
||||
// Do not forget to initialise cells[0]
|
||||
let mut cells: Vec<Cell<V>> = vec![vec![]];
|
||||
|
||||
// 1. Retrieve a list of distinct values for the coordinate `dimension`
|
||||
let mut distinct = vec![];
|
||||
let records = table.get_table();
|
||||
|
||||
for record in records {
|
||||
distinct.push(record.key()[dimension].clone());
|
||||
}
|
||||
|
||||
// 2. Build sorted, distinct lists
|
||||
distinct.sort_unstable();
|
||||
distinct.dedup();
|
||||
|
||||
info!(
|
||||
"Number of distinct coordinates on dim[{}]: {}",
|
||||
dimension,
|
||||
distinct.len()
|
||||
);
|
||||
|
||||
trace!("min {:?}, max {:?}", distinct[0], distinct.last());
|
||||
|
||||
// 3. Build the dictionary space
|
||||
// 3.1. Build dictionnary per dimension, Add cell and offset
|
||||
// informations
|
||||
let mut count = 0;
|
||||
let mut cell = 0;
|
||||
|
||||
// Beware integer division is rounded towards zero, so add 1 to the
|
||||
// result as this is the max number of elements per bucket.
|
||||
let max_offset = (distinct.len() / (1 << cell_bits)) + 1;
|
||||
|
||||
for coordinate in distinct {
|
||||
//trace!("{:?} {:?} {:?} {:?}", dimension, coordinate, cell, count);
|
||||
|
||||
if count == max_offset {
|
||||
count = 0;
|
||||
cell += 1;
|
||||
cells.push(vec![]);
|
||||
}
|
||||
|
||||
cells[cell].push(coordinate);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
info!(
|
||||
"dim[{}]: {} cells, {} max per cell",
|
||||
dimension,
|
||||
cells.len(),
|
||||
max_offset,
|
||||
);
|
||||
|
||||
CellDictionary {
|
||||
table: cells,
|
||||
max_offset,
|
||||
_marker: marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn max_offset(&self) -> usize {
|
||||
self.max_offset
|
||||
}
|
||||
|
||||
fn cells(&self) -> &Vec<Cell<V>> {
|
||||
&self.table
|
||||
}
|
||||
|
||||
fn cell_id(&self, position: &V) -> Option<usize>
|
||||
where
|
||||
V: Clone + Ord + Debug,
|
||||
{
|
||||
let mut id = 0;
|
||||
// If the last value of the current cell is >= than the value, then
|
||||
// the value is stored in the cell.
|
||||
// If this is the first cell, we will look into it as `id` is
|
||||
// still 0.
|
||||
for cell in self.cells() {
|
||||
// last cell is likely to be only partially full
|
||||
match cell.last() {
|
||||
Some(x) => {
|
||||
if x >= position {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
};
|
||||
id += 1;
|
||||
}
|
||||
|
||||
if id >= self.cells().len() {
|
||||
None
|
||||
} else {
|
||||
Some(id)
|
||||
}
|
||||
}
|
||||
|
||||
fn key(&self, position: &V) -> Option<(usize, usize)> {
|
||||
let mut result = None;
|
||||
if let Some(id) = self.cell_id(position) {
|
||||
if let Ok(offset) = self.table[id].binary_search(position) {
|
||||
result = Some((id, offset));
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn key_down(&self, position: &V) -> (usize, usize) {
|
||||
match self.cell_id(position) {
|
||||
Some(id) => match self.table[id].binary_search(position) {
|
||||
Ok(offset) => (id, offset),
|
||||
Err(offset) => {
|
||||
if offset > 0 {
|
||||
(id, offset - 1)
|
||||
} else if id == 0 {
|
||||
(0, 0)
|
||||
} else {
|
||||
let id = id - 1;
|
||||
(id, self.table[id].len() - 1)
|
||||
}
|
||||
}
|
||||
},
|
||||
None => self.last(),
|
||||
}
|
||||
}
|
||||
|
||||
fn last(&self) -> (usize, usize) {
|
||||
let last_id = self.table.len() - 1;
|
||||
let last_offset = self.table[last_id].len() - 1;
|
||||
|
||||
(last_id, last_offset)
|
||||
}
|
||||
|
||||
fn key_up(&self, position: &V) -> (usize, usize) {
|
||||
match self.cell_id(position) {
|
||||
Some(id) => match self.table[id].binary_search(position) {
|
||||
Ok(offset) => (id, offset),
|
||||
Err(offset) => {
|
||||
if offset < self.max_offset {
|
||||
(id, offset)
|
||||
} else if id < self.table.len() {
|
||||
(id + 1, 0)
|
||||
} else {
|
||||
self.last()
|
||||
}
|
||||
}
|
||||
},
|
||||
None => self.last(),
|
||||
}
|
||||
}
|
||||
|
||||
fn value(&self, cell_id: usize, offset: usize) -> V {
|
||||
self.table[cell_id][offset].clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct CellSpace<K, V> {
|
||||
dimensions: usize,
|
||||
coordinates: Vec<CellDictionary<K, V>>,
|
||||
coordinates_max_offsets: Vec<usize>,
|
||||
}
|
||||
|
||||
impl<K, V> CellSpace<K, V>
|
||||
where
|
||||
V: Clone + Ord + Debug,
|
||||
K: Debug + Index<usize, Output = V>,
|
||||
{
|
||||
pub fn new<T, R>(table: &T, dimensions: usize, cell_bits: usize) -> Self
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + Debug,
|
||||
V: Clone + Ord + Debug,
|
||||
{
|
||||
let mut space = CellSpace {
|
||||
dimensions,
|
||||
coordinates: vec![],
|
||||
coordinates_max_offsets: vec![],
|
||||
};
|
||||
|
||||
// FIXME: Add check to ensure all positions have the required number of dimensions.
|
||||
for k in 0..dimensions {
|
||||
let dic = CellDictionary::new(table, k, cell_bits);
|
||||
let max = dic.max_offset();
|
||||
space.coordinates.push(dic);
|
||||
space.coordinates_max_offsets.push(max);
|
||||
}
|
||||
|
||||
space
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn cells_id(&self, position: &Vec<V>) -> Result<Vec<Option<usize>>, String> {
|
||||
trace!("cells_id: position {:?}", position);
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
if self.dimensions != position.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
position.len(),
|
||||
position
|
||||
));
|
||||
}
|
||||
|
||||
let mut cells = vec![];
|
||||
for k in 0..self.dimensions {
|
||||
cells.push(self.coordinates[k].cell_id(&position[k]));
|
||||
}
|
||||
trace!("cells_id: cells {:?}", cells);
|
||||
Ok(cells)
|
||||
}
|
||||
*/
|
||||
pub fn key(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
|
||||
if self.dimensions != position.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
position.len(),
|
||||
position
|
||||
));
|
||||
}*/
|
||||
|
||||
let mut cells = vec![];
|
||||
let mut offsets = vec![];
|
||||
for k in 0..self.dimensions {
|
||||
match self.coordinates[k].key(&position[k]) {
|
||||
None => {
|
||||
return Err(format!(
|
||||
"Incorrect value for position[{:?}]: {:?}",
|
||||
k, &position[k]
|
||||
))
|
||||
}
|
||||
Some((id, offset)) => {
|
||||
cells.push(id);
|
||||
offsets.push(offset)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok((cells, offsets))
|
||||
}
|
||||
|
||||
// Round down to the preceding element or self if in the space
|
||||
pub fn key_down(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
|
||||
if self.dimensions != position.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
position.len(),
|
||||
position
|
||||
));
|
||||
}*/
|
||||
|
||||
let mut cells = vec![];
|
||||
let mut offsets = vec![];
|
||||
for k in 0..self.dimensions {
|
||||
let (id, offset) = self.coordinates[k].key_down(&position[k]);
|
||||
cells.push(id);
|
||||
offsets.push(offset);
|
||||
}
|
||||
|
||||
Ok((cells, offsets))
|
||||
}
|
||||
|
||||
// Round up to the next element or self if in the space
|
||||
pub fn key_up(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
|
||||
if self.dimensions != position.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
position.len(),
|
||||
position
|
||||
));
|
||||
}*/
|
||||
|
||||
let mut cells = vec![];
|
||||
let mut offsets = vec![];
|
||||
for k in 0..self.dimensions {
|
||||
let (id, offset) = self.coordinates[k].key_up(&position[k]);
|
||||
cells.push(id);
|
||||
offsets.push(offset);
|
||||
}
|
||||
|
||||
Ok((cells, offsets))
|
||||
}
|
||||
|
||||
pub fn value(&self, cells_id: Vec<usize>, offsets: Vec<usize>) -> Result<Vec<V>, String> {
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
if self.dimensions != cells_id.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
cells_id.len(),
|
||||
cells_id
|
||||
));
|
||||
}
|
||||
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
if self.dimensions != offsets.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
offsets.len(),
|
||||
offsets
|
||||
));
|
||||
}
|
||||
|
||||
let mut values = vec![];
|
||||
for k in 0..self.dimensions {
|
||||
values.push(self.coordinates[k].value(cells_id[k], offsets[k]));
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
17
src/lib.rs
Normal file
17
src/lib.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
#[macro_use]
|
||||
extern crate arrayref;
|
||||
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
|
||||
mod cell_space;
|
||||
mod morton;
|
||||
mod sfc;
|
||||
|
||||
pub use sfc::Record;
|
||||
pub use sfc::RecordBuild;
|
||||
pub use sfc::RecordFields;
|
||||
pub use sfc::SpaceFillingCurve as IndexOwned;
|
||||
446
src/morton.rs
Normal file
446
src/morton.rs
Normal file
@@ -0,0 +1,446 @@
|
||||
use std::fmt;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use serde::de;
|
||||
use serde::de::Deserialize;
|
||||
use serde::de::Deserializer;
|
||||
use serde::de::MapAccess;
|
||||
use serde::de::SeqAccess;
|
||||
use serde::de::Visitor;
|
||||
use serde::ser::Serialize;
|
||||
use serde::ser::SerializeStruct;
|
||||
use serde::ser::Serializer;
|
||||
|
||||
pub type MortonCode = u32;
|
||||
pub type MortonValue = u16;
|
||||
|
||||
const MORTON_CODE_BITS: usize = 32;
|
||||
const MORTON_VALUE_BITS: usize = 10;
|
||||
const MORTON_MAX_VALUES: usize = 1024;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MortonEncoder {
|
||||
cell_bits: usize,
|
||||
cell_mask: usize,
|
||||
dimensions: usize,
|
||||
table: Vec<[MortonCode; MORTON_MAX_VALUES]>,
|
||||
}
|
||||
|
||||
impl Debug for MortonEncoder {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"MortonEncoder {{ cell_bits: {}, cell_mask: {}, dimensions: {}, table: ",
|
||||
self.cell_bits, self.cell_mask, self.dimensions
|
||||
)?;
|
||||
write!(f, "[ ")?;
|
||||
for k in &self.table {
|
||||
write!(f, "[ ")?;
|
||||
for v in k.iter() {
|
||||
write!(f, "{}, ", v)?;
|
||||
}
|
||||
write!(f, "], ")?;
|
||||
}
|
||||
write!(f, "] }}")
|
||||
}
|
||||
}
|
||||
|
||||
impl MortonEncoder {
|
||||
pub fn new(dimensions: usize, cell_bits: usize) -> Self {
|
||||
// Make sure we can store the encoding in a single T.
|
||||
// Don't know how to make that test generically
|
||||
assert!(MORTON_VALUE_BITS >= cell_bits);
|
||||
assert!(MORTON_CODE_BITS >= cell_bits * dimensions);
|
||||
|
||||
//let mut masks = vec![];
|
||||
let mut table = vec![];
|
||||
let cell_max = 1 << cell_bits;
|
||||
let cell_mask = cell_max - 1;
|
||||
|
||||
// Build lookup table & masks
|
||||
for k in 0..dimensions {
|
||||
table.push([0; MORTON_MAX_VALUES]);
|
||||
for i in 0..cell_max {
|
||||
let mut v = 0;
|
||||
for p in 0..cell_bits {
|
||||
// Note: bit is at position p, so shift it only K-1 p position again below, instead
|
||||
// of K times
|
||||
let bit = i & (1 << p);
|
||||
let new_bit = bit << (p * (dimensions - 1) + k);
|
||||
v |= new_bit;
|
||||
}
|
||||
table[k][i] = v as MortonCode;
|
||||
}
|
||||
/*
|
||||
let mut v = 0usize;
|
||||
for p in 0..cell_bits {
|
||||
let new_bit = 1 << p * (dimensions - 1) + k;
|
||||
v = v | new_bit;
|
||||
}
|
||||
masks.push(v as MortonCode);
|
||||
*/
|
||||
}
|
||||
|
||||
MortonEncoder {
|
||||
cell_bits,
|
||||
cell_mask,
|
||||
dimensions,
|
||||
table,
|
||||
//masks,
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_1(&self, k: usize, v: MortonValue) -> MortonCode {
|
||||
// Already done by the array bound checker anyway
|
||||
//assert!((v as usize) < MORTON_MAX_VALUES);
|
||||
//assert!(k < self.table.len());
|
||||
|
||||
// Ensure we only have valid values in inputs, even when less bits than
|
||||
// the maximum is used to define those values.
|
||||
let v = v as usize & self.cell_mask;
|
||||
self.table[k][v]
|
||||
}
|
||||
|
||||
fn decode_1(&self, k: usize, code: MortonCode) -> MortonValue {
|
||||
// Already done by the array bound checker anyway
|
||||
//assert!(k < self.table.len());
|
||||
|
||||
let mut v = 0;
|
||||
|
||||
for i in 0..self.cell_bits {
|
||||
let bit_pos = i * self.table.len() + k;
|
||||
let bit = code as usize & (1 << bit_pos);
|
||||
let bit_pos = bit_pos - i;
|
||||
v |= (bit >> bit_pos) as MortonValue;
|
||||
}
|
||||
|
||||
v as MortonValue
|
||||
}
|
||||
|
||||
pub fn encode(&self, v: &[MortonValue]) -> Result<MortonCode, String> {
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
if self.dimensions != v.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
v.len(),
|
||||
v
|
||||
));
|
||||
}
|
||||
|
||||
let mut code = 0;
|
||||
|
||||
for (k, i) in v.iter().enumerate().take(self.dimensions) {
|
||||
code |= self.encode_1(k, *i);
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
pub fn decode(&self, code: MortonCode) -> Vec<MortonValue> {
|
||||
let mut values = vec![];
|
||||
|
||||
for k in 0..self.dimensions {
|
||||
values.push(self.decode_1(k, code));
|
||||
}
|
||||
|
||||
values
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for MortonEncoder {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
// We serialize the minimum amount of information necessary to
|
||||
// deserialize the table.
|
||||
// This is the parameters to init(dimensions, cell_bits)
|
||||
let mut state = serializer.serialize_struct("MortonEncoder", 2)?;
|
||||
state.serialize_field("cell_bits", &self.cell_bits)?;
|
||||
state.serialize_field("dimensions", &self.dimensions)?;
|
||||
state.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for MortonEncoder {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
enum Field {
|
||||
CellBits,
|
||||
Dimensions,
|
||||
};
|
||||
|
||||
impl<'de> Deserialize<'de> for Field {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Field, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
struct FieldVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for FieldVisitor {
|
||||
type Value = Field;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("`cell_bits` or `dimensions`")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, value: &str) -> Result<Field, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match value {
|
||||
"cell_bits" => Ok(Field::CellBits),
|
||||
"dimensions" => Ok(Field::Dimensions),
|
||||
_ => Err(de::Error::unknown_field(value, FIELDS)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_identifier(FieldVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
struct MortonEncoderVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for MortonEncoderVisitor {
|
||||
type Value = MortonEncoder;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("struct MortonEncoder")
|
||||
}
|
||||
|
||||
fn visit_seq<V>(self, mut seq: V) -> Result<MortonEncoder, V::Error>
|
||||
where
|
||||
V: SeqAccess<'de>,
|
||||
{
|
||||
let cell_bits = seq
|
||||
.next_element()?
|
||||
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
|
||||
let dimensions = seq
|
||||
.next_element()?
|
||||
.ok_or_else(|| de::Error::invalid_length(1, &self))?;
|
||||
Ok(MortonEncoder::new(dimensions, cell_bits))
|
||||
}
|
||||
|
||||
fn visit_map<V>(self, mut map: V) -> Result<MortonEncoder, V::Error>
|
||||
where
|
||||
V: MapAccess<'de>,
|
||||
{
|
||||
let mut cell_bits = None;
|
||||
let mut dimensions = None;
|
||||
while let Some(key) = map.next_key()? {
|
||||
match key {
|
||||
Field::CellBits => {
|
||||
if cell_bits.is_some() {
|
||||
return Err(de::Error::duplicate_field("cell_bits"));
|
||||
}
|
||||
cell_bits = Some(map.next_value()?);
|
||||
}
|
||||
Field::Dimensions => {
|
||||
if dimensions.is_some() {
|
||||
return Err(de::Error::duplicate_field("dimensions"));
|
||||
}
|
||||
dimensions = Some(map.next_value()?);
|
||||
}
|
||||
}
|
||||
}
|
||||
let cell_bits = cell_bits.ok_or_else(|| de::Error::missing_field("cell_bits"))?;
|
||||
let dimensions =
|
||||
dimensions.ok_or_else(|| de::Error::missing_field("dimensions"))?;
|
||||
Ok(MortonEncoder::new(dimensions, cell_bits))
|
||||
}
|
||||
}
|
||||
|
||||
const FIELDS: &[&str] = &["cell_bits", "dimensions"];
|
||||
deserializer.deserialize_struct("MortonEncoder", FIELDS, MortonEncoderVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
mod init {
|
||||
use super::*;
|
||||
|
||||
/* Check the assertions */
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn dim1_bit32() {
|
||||
let _m = MortonEncoder::new(1, 31);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn dim2_bit16() {
|
||||
// Max 10 bit for the codes, even if 16 would fit
|
||||
let _m = MortonEncoder::new(2, 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn dim33_bit1() {
|
||||
let _m = MortonEncoder::new(33, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn dim17_bit2() {
|
||||
let _m = MortonEncoder::new(17, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dim1_bit10() {
|
||||
let _m = MortonEncoder::new(1, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dim2_bit10() {
|
||||
let _m = MortonEncoder::new(2, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dim3_bit10() {
|
||||
let _m = MortonEncoder::new(3, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dim4_bit8() {
|
||||
let _m = MortonEncoder::new(4, 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dim32_bit1() {
|
||||
let _m = MortonEncoder::new(32, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
morton_init();
|
||||
// Morton table looks OK
|
||||
// for n in 0..10 {
|
||||
// println!("{:4}", n);
|
||||
// for k in 0..K {
|
||||
// println!("{:032b}", unsafe {MORTON[k][n]});
|
||||
// }
|
||||
// }
|
||||
|
||||
for n in 0..CELL_MAX {
|
||||
println!("## {:04}", n);
|
||||
let mut c = 0 as Code;
|
||||
for k in 0..K {
|
||||
// check diagonal
|
||||
c = c | morton_encode(k, n as u16);
|
||||
}
|
||||
let f = n as u16;
|
||||
for k in 1..2 {
|
||||
// check diagonal
|
||||
let p = morton_decode(k, c);
|
||||
println!("\n{:04} \n f {:04}\n p {:04}\n 𝚫 {:06}\n", c, f, p, f-p);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let mut f = 0.0f64;
|
||||
// while f < 1.0 {
|
||||
// let v = convert_to_fixed(&f);
|
||||
// let p = convert_to_f64(&v);
|
||||
// println!("\n{:010} \n f {:+0.16e}\n p {:+03.16e}\n 𝚫 {:+03.16e}\n", v, f, p, f - p);
|
||||
//
|
||||
// f += 0.1e-1;
|
||||
// }
|
||||
|
||||
let f =0.000724939184752;
|
||||
let v = convert_to_fixed(&f);
|
||||
let p = convert_to_f64(&v);
|
||||
println!("\n{:010} \n f {:+0.16e}\n p {:+03.16e}\n 𝚫 {:+03.16e}\n", v, f, p, f - p);
|
||||
|
||||
*/
|
||||
}
|
||||
|
||||
mod encode {
|
||||
use super::*;
|
||||
|
||||
/* Check the lookup table produced */
|
||||
#[test]
|
||||
fn dim1_bit10() {
|
||||
let m = MortonEncoder::new(1, 10);
|
||||
for n in 0..MORTON_MAX_VALUES {
|
||||
assert_eq!(n as MortonCode, m.encode_1(0, n as MortonValue));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_dim2_bit10() {
|
||||
let m = MortonEncoder::new(2, 10);
|
||||
let mut lookup = Vec::<Vec<MortonCode>>::new();
|
||||
|
||||
for k in 0..2 {
|
||||
lookup.push(Vec::new());
|
||||
|
||||
for n in 0..MORTON_MAX_VALUES {
|
||||
// Morton numbers are number where the bit are exploded so that we can
|
||||
// interleave them. This means that for each position of a value, we need to
|
||||
// insert dimensions - 1 columns between each bits, and shift that result by the
|
||||
// dimension number so that we can OR all the dimensions together without having
|
||||
// bits colliding.
|
||||
let mut v = 0;
|
||||
for p in 0..MORTON_VALUE_BITS {
|
||||
let b = (n & (1 << p)) >> p;
|
||||
v = v | b << (p * 2 + k);
|
||||
}
|
||||
lookup[k].push(v as MortonCode);
|
||||
}
|
||||
}
|
||||
|
||||
for k in 0..2 {
|
||||
for n in 0..MORTON_MAX_VALUES {
|
||||
assert_eq!(lookup[k][n], m.encode_1(k, n as MortonValue));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn check(dimensions: usize, value_max: usize, value_bits: usize, m: MortonEncoder) -> () {
|
||||
let mut lookup = Vec::<Vec<MortonCode>>::new();
|
||||
|
||||
for k in 0..dimensions {
|
||||
lookup.push(Vec::new());
|
||||
|
||||
for n in 0..value_max {
|
||||
// Morton numbers are number where the bit are exploded so that we can
|
||||
// interleave them. This means that for each position of a value, we need to
|
||||
// insert dimensions -1 columns between each bits, and shift that result by the
|
||||
// dimension number so that we can OR all the dimensions together without having
|
||||
// bits colliding.
|
||||
let mut v = 0;
|
||||
for p in 0..value_bits {
|
||||
let b = (n & (1 << p)) >> p;
|
||||
v = v | b << (p * dimensions + k);
|
||||
}
|
||||
lookup[k].push(v as MortonCode);
|
||||
}
|
||||
}
|
||||
|
||||
for k in 0..dimensions {
|
||||
for n in 0..value_max {
|
||||
assert_eq!(lookup[k][n], m.encode_1(k, n as MortonValue));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_dim3_bit10() {
|
||||
let m = MortonEncoder::new(3, 10);
|
||||
check(3, 1024, 10, m);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_dim4_bit8() {
|
||||
let m = MortonEncoder::new(4, 8);
|
||||
check(4, 256, 8, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
388
src/sfc.rs
Normal file
388
src/sfc.rs
Normal file
@@ -0,0 +1,388 @@
|
||||
use std::fmt::Debug;
|
||||
use std::io;
|
||||
use std::iter::FromIterator;
|
||||
use std::marker;
|
||||
use std::ops::Index;
|
||||
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::Serialize;
|
||||
|
||||
pub use ironsea_index::IndexedOwned;
|
||||
pub use ironsea_index::Record;
|
||||
pub use ironsea_index::RecordBuild;
|
||||
pub use ironsea_index::RecordFields;
|
||||
use ironsea_store::Load;
|
||||
use ironsea_store::Store;
|
||||
use ironsea_table::Table;
|
||||
|
||||
use super::cell_space::CellSpace;
|
||||
use super::morton::MortonCode;
|
||||
use super::morton::MortonEncoder;
|
||||
use super::morton::MortonValue;
|
||||
|
||||
type SFCCode = u32;
|
||||
type SFCOffset = u32;
|
||||
|
||||
//FIXME: Remove the need for a constant, how can we make it type-checked instead?
|
||||
// type-num crate?
|
||||
const MAX_K: usize = 3;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Limit<V> {
|
||||
idx: usize,
|
||||
position: Vec<V>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Limits<V> {
|
||||
start: Limit<V>,
|
||||
end: Limit<V>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
struct SFCRecord<F> {
|
||||
//FIXME: Find a way around hardcoding MAX_K
|
||||
offsets: [SFCOffset; MAX_K],
|
||||
fields: F,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
struct SFCCell<F> {
|
||||
code: MortonCode,
|
||||
records: Vec<SFCRecord<F>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct SpaceFillingCurve<T, R, K, V, F>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
|
||||
V: Clone + Ord + Debug + From<usize>,
|
||||
{
|
||||
dimensions: usize,
|
||||
morton: MortonEncoder,
|
||||
space: CellSpace<K, V>,
|
||||
index: Vec<SFCCell<F>>,
|
||||
_marker: marker::PhantomData<(T, R)>,
|
||||
}
|
||||
|
||||
impl<T, R, K, V, F> SpaceFillingCurve<T, R, K, V, F>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
V: Clone + Ord + Debug + From<usize>,
|
||||
K: Debug + Index<usize, Output = V> + FromIterator<V>,
|
||||
{
|
||||
//FIXME: Should accept indexing 0 elements, at least not crash!
|
||||
pub fn new(table: &T, dimensions: usize, cell_bits: usize) -> Self {
|
||||
// 1. build the dictionnary space, called here CellSpace, as well as
|
||||
// initialize the morton encoder used to project the multi-dimensional
|
||||
// coordinates into a single dimension.
|
||||
let mut index = SpaceFillingCurve {
|
||||
dimensions,
|
||||
morton: MortonEncoder::new(dimensions, cell_bits),
|
||||
space: CellSpace::new(table, dimensions, cell_bits),
|
||||
index: vec![],
|
||||
_marker: marker::PhantomData,
|
||||
};
|
||||
|
||||
// 2. Build a flat table of (code, offset, entries)
|
||||
let mut flat_table = vec![];
|
||||
|
||||
for record in table.get_table() {
|
||||
let position = record.key();
|
||||
match index.space.key(&position) {
|
||||
Ok((cell_ids, offsets)) => match index.encode(&cell_ids) {
|
||||
Ok(code) => {
|
||||
let offsets = offsets.iter().map(|i| *i as SFCOffset).collect::<Vec<_>>();
|
||||
flat_table.push((
|
||||
code,
|
||||
SFCRecord {
|
||||
offsets: *array_ref!(offsets, 0, MAX_K),
|
||||
fields: record.fields(),
|
||||
},
|
||||
))
|
||||
}
|
||||
Err(e) => error!("Unable to encode position {:#?}: {}", cell_ids, e),
|
||||
},
|
||||
Err(e) => error!("Invalid position {:#?}: {}", position, e),
|
||||
}
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Processed {:#?} records into the index",
|
||||
table.get_table().len()
|
||||
);
|
||||
|
||||
// 5. Sort by SFCcode
|
||||
flat_table.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
||||
let nb_records = flat_table.len();
|
||||
|
||||
let mut current_cell_code = flat_table[0].0;
|
||||
let mut count = 0;
|
||||
index.index.push(SFCCell {
|
||||
code: current_cell_code,
|
||||
records: vec![],
|
||||
});
|
||||
for (code, record) in flat_table {
|
||||
if code == current_cell_code {
|
||||
index.index[count].records.push(record);
|
||||
} else {
|
||||
index.index.push(SFCCell {
|
||||
code,
|
||||
records: vec![record],
|
||||
});
|
||||
current_cell_code = code;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
debug!("Inserted {:#?} records into the index", nb_records);
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
pub fn find_by_value(&self, value: &F) -> Vec<R>
|
||||
where
|
||||
F: std::cmp::PartialEq,
|
||||
{
|
||||
let mut results = vec![];
|
||||
for cell in &self.index {
|
||||
for record in &cell.records {
|
||||
if &record.fields == value {
|
||||
if let Ok(r) = self.get_record(cell.code, &record) {
|
||||
results.push(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
// Map the cell_ids of a point to its SFCcode
|
||||
#[inline]
|
||||
fn encode(&self, cell_ids: &[usize]) -> Result<SFCCode, String> {
|
||||
let mut t = vec![];
|
||||
for v in cell_ids.iter() {
|
||||
t.push(*v as MortonValue);
|
||||
}
|
||||
|
||||
self.morton.encode(&t)
|
||||
}
|
||||
|
||||
// Build coordinate values from encoded value
|
||||
fn position(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<K, String> {
|
||||
let position = self.space.value(
|
||||
self.morton
|
||||
.decode(code)
|
||||
.iter()
|
||||
.map(|e| *e as usize)
|
||||
.collect(),
|
||||
offsets.iter().map(|e| *e as usize).collect(),
|
||||
)?;
|
||||
|
||||
Ok(position.iter().map(|i| (*i).clone()).collect())
|
||||
}
|
||||
|
||||
// Rebuild a specific record
|
||||
fn get_record(&self, code: SFCCode, entry: &SFCRecord<F>) -> Result<R, String> {
|
||||
let position = self.position(code, &entry.offsets)?;
|
||||
|
||||
Ok(R::build(&position, &entry.fields))
|
||||
}
|
||||
|
||||
fn limits(&self, start: &K, end: &K) -> Result<Limits<V>, String> {
|
||||
trace!("limits: {:?} - {:?}", start, end);
|
||||
|
||||
// Round down if not found, for start of range:
|
||||
let (cells, offsets) = self.space.key_down(start)?;
|
||||
let code = self.encode(&cells)?;
|
||||
let idx = match self.index.binary_search_by(|e| e.code.cmp(&code)) {
|
||||
Err(e) => {
|
||||
if e > 0 {
|
||||
e - 1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
Ok(c) => c,
|
||||
};
|
||||
let position = self.space.value(cells, offsets)?;
|
||||
let start = Limit { idx, position };
|
||||
|
||||
// Round up if not found, for end of range:
|
||||
let (cells, offsets) = self.space.key_up(end)?;
|
||||
let code = self.encode(&cells)?;
|
||||
let idx = match self.index.binary_search_by(|e| e.code.cmp(&code)) {
|
||||
Err(e) => {
|
||||
if e >= self.index.len() {
|
||||
self.index.len()
|
||||
} else {
|
||||
e
|
||||
}
|
||||
}
|
||||
Ok(c) => c + 1,
|
||||
};
|
||||
|
||||
let position = self.space.value(cells, offsets)?;
|
||||
let end = Limit { idx, position };
|
||||
|
||||
trace!("limits: {:?} - {:?}", start, end);
|
||||
|
||||
Ok(Limits { start, end })
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, R, K, V, F> IndexedOwned<T, R, K> for SpaceFillingCurve<T, R, K, V, F>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
K: Debug + Index<usize, Output = V> + FromIterator<V>,
|
||||
V: Clone + Debug + Ord + From<usize> + Debug,
|
||||
{
|
||||
fn find(&self, key: &K) -> Vec<R> {
|
||||
let mut values = vec![];
|
||||
|
||||
if let Ok((cell_ids, offsets)) = self.space.key(key) {
|
||||
match self.encode(&cell_ids) {
|
||||
Err(e) => error!("{}", e),
|
||||
Ok(code) => {
|
||||
if let Ok(cell) = self.index.binary_search_by(|a| a.code.cmp(&code)) {
|
||||
for record in &self.index[cell].records {
|
||||
let mut select = true;
|
||||
for (k, o) in offsets.iter().enumerate().take(self.dimensions) {
|
||||
select &= record.offsets[k] == (*o as SFCOffset);
|
||||
}
|
||||
|
||||
if select {
|
||||
match self.get_record(code, record) {
|
||||
Err(e) => error!("{}", e),
|
||||
Ok(r) => values.push(r),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
values
|
||||
}
|
||||
|
||||
fn find_range(&self, start: &K, end: &K) -> Vec<R> {
|
||||
let mut values = vec![];
|
||||
|
||||
match self.limits(start, end) {
|
||||
Ok(limits) => {
|
||||
for idx in limits.start.idx..limits.end.idx {
|
||||
let code = self.index[idx].code;
|
||||
for record in &self.index[idx].records {
|
||||
let mut select = true;
|
||||
let pos = match self.position(code, &record.offsets) {
|
||||
Err(e) => {
|
||||
error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
Ok(p) => p,
|
||||
};
|
||||
|
||||
// FIXME: Reduce number of comparison by using the cells boundaries.
|
||||
for k in 0..self.dimensions {
|
||||
select = select
|
||||
&& limits.start.position[k] <= pos[k]
|
||||
&& limits.end.position[k] >= pos[k];
|
||||
}
|
||||
if select {
|
||||
match self.get_record(code, &record) {
|
||||
Err(e) => error!("{}", e),
|
||||
Ok(r) => values.push(r),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => error!("find_range: limits failed: {}", e),
|
||||
};
|
||||
|
||||
values
|
||||
}
|
||||
}
|
||||
// Rough check, based on per-dimension cell Ids.
|
||||
/*
|
||||
// If the cell_ids are between ]pos_start and pos_end[, then the value is within the range,
|
||||
// If the cell_ids are outside [pos_start, pos_end], then the value is out, stop checking
|
||||
// Else, check the offsets of each entry to be within [off_start, off_end], then the value is within the range.
|
||||
let mut rough_in = true;
|
||||
for k in 0..self.dimensions {
|
||||
if !(cells[k] > start_limits.cells[k] && cells[k] < end_limits.cells[k]) {
|
||||
rough_in = false;
|
||||
}
|
||||
}
|
||||
|
||||
if rough_in {
|
||||
// This is a cell well within the volume, so all points are a match, add all points,
|
||||
// go to next cell.
|
||||
for entry in entries {
|
||||
values.push(self.get_element(code, entry))
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut rough_out = false;
|
||||
for k in 0..self.dimensions {
|
||||
if cells[k] < start_limits.cells[k] || cells[k] > end_limits.cells[k] {
|
||||
rough_out = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If rough is not true, then we have nothing to double check.
|
||||
if rough_out {
|
||||
continue;
|
||||
}
|
||||
*/
|
||||
|
||||
impl<T, R, K, V, F> Store for SpaceFillingCurve<T, R, K, V, F>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
|
||||
K: Serialize,
|
||||
V: Clone + Ord + Debug + From<usize> + Serialize,
|
||||
F: Serialize,
|
||||
{
|
||||
fn store<W>(&mut self, writer: W) -> io::Result<()>
|
||||
where
|
||||
W: std::io::Write,
|
||||
{
|
||||
match bincode::serialize_into(writer, &self) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => Err(io::Error::new(io::ErrorKind::WriteZero, e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, R, K, V, F> Load for SpaceFillingCurve<T, R, K, V, F>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
K: DeserializeOwned,
|
||||
V: Clone + Ord + Debug + From<usize> + DeserializeOwned,
|
||||
F: DeserializeOwned,
|
||||
{
|
||||
fn load<Re: io::Read>(reader: Re) -> io::Result<Self> {
|
||||
match bincode::deserialize_from(reader) {
|
||||
Ok(data) => Ok(data),
|
||||
Err(e) => Err(io::Error::new(io::ErrorKind::InvalidData, e)),
|
||||
}
|
||||
}
|
||||
|
||||
// only required for store_mapped_file
|
||||
fn load_slice(from: &[u8]) -> io::Result<Self> {
|
||||
match bincode::deserialize(from) {
|
||||
Ok(data) => Ok(data),
|
||||
Err(e) => Err(io::Error::new(io::ErrorKind::InvalidData, e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user