Compare commits

...

5 Commits

Author SHA1 Message Date
857534fd50 Updated to new Index API 2019-10-30 15:39:49 +01:00
c35cc5a11f Improve filtering in find_range 2019-10-23 16:27:25 +02:00
ad52da09b7 Add last() on SpaceFillingCurve 2019-10-23 16:27:25 +02:00
b0635d05d7 Add size to dynamic allocations 2019-10-23 16:27:25 +02:00
3ff46aa44c Ensure assumptions hold 2019-10-23 16:27:25 +02:00
4 changed files with 126 additions and 164 deletions

View File

@@ -20,7 +20,6 @@ include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.r
[dependencies]
ironsea_index = "^0.1"
ironsea_table = "^0.1"
ironsea_store = "^0.1"
arrayref = "^0.3"

View File

@@ -5,7 +5,6 @@ use std::marker;
use std::ops::Index;
use ironsea_index::Record;
use ironsea_table::Table;
type Cell<T> = Vec<T>;
@@ -18,20 +17,16 @@ struct CellDictionary<K, V> {
impl<K, V> CellDictionary<K, V>
where
V: Clone + Ord + Debug + Hash,
K: Debug + Index<usize, Output = V>,
V: Clone + Debug + Hash + Ord,
{
pub fn new<T, R>(table: &T, dimension: usize, cell_bits: usize) -> Self
pub fn new<I, R>(iter: I, dimension: usize, cell_bits: usize) -> Self
where
T: Table<R>,
R: Record<K> + Debug,
I: Iterator<Item = R>,
R: Debug + Record<K>,
{
// 1. Retrieve a list of distinct values for the coordinate `dimension`
let mut distinct: HashSet<V> = table
.get_table()
.iter()
.map(|&record| record.key()[dimension].clone())
.collect();
let mut distinct: HashSet<V> = iter.map(|record| record.key()[dimension].clone()).collect();
// 2. Build a sorted list, of distinct elements
let mut distinct = distinct.drain().collect::<Vec<_>>();
@@ -96,10 +91,7 @@ where
&self.table
}
fn cell_id(&self, position: &V) -> Option<usize>
where
V: Clone + Ord + Debug,
{
fn cell_id(&self, position: &V) -> Option<usize> {
let mut id = 0;
// If the last value of the current cell is >= than the value, then
// the value is stored in the cell.
@@ -156,7 +148,10 @@ where
}
fn last(&self) -> (usize, usize) {
assert!(!self.table.is_empty());
let last_id = self.table.len() - 1;
assert!(!self.table[last_id].is_empty());
let last_offset = self.table[last_id].len() - 1;
(last_id, last_offset)
@@ -194,14 +189,13 @@ pub struct CellSpace<K, V> {
impl<K, V> CellSpace<K, V>
where
V: Clone + Ord + Debug + Hash,
K: Debug + Index<usize, Output = V>,
V: Clone + Debug + Hash + Ord,
{
pub fn new<T, R>(table: &T, dimensions: usize, cell_bits: usize) -> Self
pub fn new<I, R>(iter: I, dimensions: usize, cell_bits: usize) -> Self
where
T: Table<R>,
R: Record<K> + Debug,
V: Clone + Ord + Debug,
I: Clone + Iterator<Item = R>,
R: Debug + Record<K>,
{
let mut space = CellSpace {
dimensions,
@@ -211,7 +205,7 @@ where
// FIXME: Add check to ensure all positions have the required number of dimensions.
for k in 0..dimensions {
let dic = CellDictionary::new(table, k, cell_bits);
let dic = CellDictionary::new(iter.clone(), k, cell_bits);
let max = dic.max_offset();
space.coordinates.push(dic);
space.coordinates_max_offsets.push(max);
@@ -220,27 +214,18 @@ where
space
}
/*
pub fn cells_id(&self, position: &Vec<V>) -> Result<Vec<Option<usize>>, String> {
trace!("cells_id: position {:?}", position);
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
if self.dimensions != position.len() {
return Err(format!(
"Incorrect number of dimensions, expected {}, got {} for {:?}",
self.dimensions,
position.len(),
position
));
}
let mut cells = vec![];
for k in 0..self.dimensions {
cells.push(self.coordinates[k].cell_id(&position[k]));
}
trace!("cells_id: cells {:?}", cells);
Ok(cells)
pub fn last(&self) -> (Vec<usize>, Vec<usize>) {
let mut cells = Vec::with_capacity(self.dimensions);
let mut offsets = Vec::with_capacity(self.dimensions);
for k in 0..self.dimensions {
let (cell_id, offset) = self.coordinates[k].last();
cells.push(cell_id);
offsets.push(offset);
}
*/
(cells, offsets)
}
pub fn key(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
@@ -253,8 +238,8 @@ where
));
}*/
let mut cells = vec![];
let mut offsets = vec![];
let mut cells = Vec::with_capacity(self.dimensions);
let mut offsets = Vec::with_capacity(self.dimensions);
for k in 0..self.dimensions {
match self.coordinates[k].key(&position[k]) {
None => {
@@ -286,8 +271,8 @@ where
));
}*/
let mut cells = vec![];
let mut offsets = vec![];
let mut cells = Vec::with_capacity(self.dimensions);
let mut offsets = Vec::with_capacity(self.dimensions);
for k in 0..self.dimensions {
let (id, offset) = self.coordinates[k].key_down(&position[k]);
cells.push(id);
@@ -310,8 +295,8 @@ where
));
}*/
let mut cells = vec![];
let mut offsets = vec![];
let mut cells = Vec::with_capacity(self.dimensions);
let mut offsets = Vec::with_capacity(self.dimensions);
for k in 0..self.dimensions {
let (id, offset) = self.coordinates[k].key_up(&position[k]);
cells.push(id);
@@ -342,7 +327,7 @@ where
));
}
let mut values = vec![];
let mut values = Vec::with_capacity(self.dimensions);
for k in 0..self.dimensions {
values.push(self.coordinates[k].value(cells_id[k], offsets[k]));
}

View File

@@ -12,6 +12,5 @@ mod morton;
mod sfc;
pub use sfc::Record;
pub use sfc::RecordBuild;
pub use sfc::RecordFields;
pub use sfc::SpaceFillingCurve as IndexOwned;

View File

@@ -1,27 +1,27 @@
#![allow(clippy::type_repetition_in_bounds)]
use std::cmp::PartialEq;
use std::fmt::Debug;
use std::hash::Hash;
use std::io;
use std::iter::FromIterator;
use std::marker;
use std::ops::Index;
use serde::de::DeserializeOwned;
use serde::Serialize;
pub use ironsea_index::IndexedOwned;
pub use ironsea_index::IndexedDestructured;
pub use ironsea_index::Record;
pub use ironsea_index::RecordBuild;
pub use ironsea_index::RecordFields;
use ironsea_store::Load;
use ironsea_store::Store;
use ironsea_table::Table;
use super::cell_space::CellSpace;
use super::morton::MortonCode;
use super::morton::MortonEncoder;
use super::morton::MortonValue;
type SFCCode = u32;
type SFCCode = MortonCode;
type SFCOffset = u32;
//FIXME: Remove the need for a constant, how can we make it type-checked instead?
@@ -54,44 +54,44 @@ struct SFCCell<F> {
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceFillingCurve<T, R, K, V, F>
pub struct SpaceFillingCurve<F, K, V>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
V: Clone + Ord + Debug + From<usize>,
F: PartialEq,
K: Debug + FromIterator<V> + Index<usize, Output = V>,
V: Clone + Debug + From<usize> + Ord,
{
dimensions: usize,
morton: MortonEncoder,
space: CellSpace<K, V>,
index: Vec<SFCCell<F>>,
_marker: marker::PhantomData<(T, R)>,
}
impl<T, R, K, V, F> SpaceFillingCurve<T, R, K, V, F>
impl<F, K, V> SpaceFillingCurve<F, K, V>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
F: PartialEq,
K: Debug + FromIterator<V> + Index<usize, Output = V>,
V: Clone + Debug + From<usize> + Hash + Ord,
K: Debug + Index<usize, Output = V> + FromIterator<V>,
{
//FIXME: Should accept indexing 0 elements, at least not crash!
pub fn new(table: &T, dimensions: usize, cell_bits: usize) -> Self {
pub fn new<I, R>(iter: I, dimensions: usize, cell_bits: usize) -> Self
where
I: Clone + Iterator<Item = R>,
R: Debug + Record<K> + RecordFields<F>,
{
// 1. build the dictionnary space, called here CellSpace, as well as
// initialize the morton encoder used to project the multi-dimensional
// coordinates into a single dimension.
let mut index = SpaceFillingCurve {
dimensions,
morton: MortonEncoder::new(dimensions, cell_bits),
space: CellSpace::new(table, dimensions, cell_bits),
space: CellSpace::new(iter.clone(), dimensions, cell_bits),
index: vec![],
_marker: marker::PhantomData,
};
// 2. Build a flat table of (code, offset, entries)
let mut flat_table = vec![];
for record in table.get_table() {
let (nb_records, _) = iter.size_hint();
for record in iter.into_iter() {
let position = record.key();
match index.space.key(&position) {
Ok((cell_ids, offsets)) => match index.encode(&cell_ids) {
@@ -111,10 +111,7 @@ where
}
}
debug!(
"Processed {:#?} records into the index",
table.get_table().len()
);
debug!("Processed {:#?} records into the index", nb_records);
// 5. Sort by SFCcode
flat_table.sort_unstable_by(|a, b| a.0.cmp(&b.0));
@@ -143,16 +140,13 @@ where
index
}
pub fn find_by_value(&self, value: &F) -> Vec<R>
where
F: std::cmp::PartialEq,
{
pub fn find_by_value(&self, value: &F) -> Vec<K> {
let mut results = vec![];
for cell in &self.index {
for record in &cell.records {
if &record.fields == value {
if let Ok(r) = self.get_record(cell.code, &record) {
results.push(r);
if let Ok(key) = self.position(cell.code, &record.offsets) {
results.push(key);
}
}
}
@@ -172,25 +166,26 @@ where
self.morton.encode(&t)
}
// Build coordinate values from encoded value
fn position(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<K, String> {
let position = self.space.value(
fn last(&self) -> (Vec<usize>, Vec<usize>) {
self.space.last()
}
fn value(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<Vec<&V>, String> {
Ok(self.space.value(
self.morton
.decode(code)
.iter()
.map(|e| *e as usize)
.collect(),
offsets.iter().map(|e| *e as usize).collect(),
)?;
Ok(position.iter().map(|i| (*i).clone()).collect())
)?)
}
// Rebuild a specific record
fn get_record(&self, code: SFCCode, entry: &SFCRecord<F>) -> Result<R, String> {
let position = &self.position(code, &entry.offsets)?;
// Build coordinate values from encoded value
fn position(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<K, String> {
let position = self.value(code, offsets)?;
Ok(R::build(position, &entry.fields))
Ok(position.iter().map(|i| (*i).clone()).collect())
}
fn limits(&self, start: &K, end: &K) -> Result<Limits<V>, String> {
@@ -235,14 +230,13 @@ where
}
}
impl<T, R, K, V, F> IndexedOwned<T, R, K> for SpaceFillingCurve<T, R, K, V, F>
impl<F, K, V> IndexedDestructured<F, K> for SpaceFillingCurve<F, K, V>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
K: Debug + Index<usize, Output = V> + FromIterator<V>,
F: PartialEq,
K: Debug + FromIterator<V> + Index<usize, Output = V>,
V: Clone + Debug + From<usize> + Hash + Ord,
{
fn find(&self, key: &K) -> Vec<R> {
fn find(&self, key: &K) -> Vec<&F> {
let mut values = vec![];
if let Ok((cell_ids, offsets)) = self.space.key(key) {
@@ -257,10 +251,7 @@ where
}
if select {
match self.get_record(code, record) {
Err(e) => error!("{}", e),
Ok(r) => values.push(r),
}
values.push(&record.fields);
}
}
}
@@ -271,33 +262,60 @@ where
values
}
fn find_range(&self, start: &K, end: &K) -> Vec<R> {
fn find_range(&self, start: &K, end: &K) -> Vec<(K, &F)> {
let mut values = vec![];
match self.limits(start, end) {
Ok(limits) => {
for idx in limits.start.idx..limits.end.idx {
let code = self.index[idx].code;
for record in &self.index[idx].records {
let mut select = true;
let pos = match self.position(code, &record.offsets) {
Err(e) => {
error!("{}", e);
continue;
}
Ok(p) => p,
};
// FIXME: Reduce number of comparison by using the cells boundaries.
for k in 0..self.dimensions {
select = select
&& *limits.start.position[k] <= pos[k]
&& *limits.end.position[k] >= pos[k];
let first = match self.value(code, &self.index[idx].records[0].offsets) {
Err(e) => {
error!("Cannot retrieve first value of cell: {}", e);
continue;
}
if select {
match self.get_record(code, &record) {
Err(e) => error!("{}", e),
Ok(r) => values.push(r),
Ok(r) => r,
};
let (cell_ids, last_offsets) = self.last();
let last = match self.space.value(cell_ids, last_offsets) {
Err(e) => {
error!("Cannot retrieve last value of cell: {}", e);
continue;
}
Ok(r) => r,
};
// Check first & last point of the cell, if both are fully
// in the bounding box, then all the points of the cell will
// be.
if limits.start.position <= first
&& first <= limits.end.position
&& limits.start.position <= last
&& last <= limits.end.position
{
for record in &self.index[idx].records {
if let Ok(key) = self.position(code, &record.offsets) {
values.push((key, &record.fields));
}
}
} else {
// We have points which are outside of the bounding box,
// so check every points one by one.
for record in &self.index[idx].records {
let pos = match self.value(code, &record.offsets) {
Err(e) => {
error!("{}", e);
continue;
}
Ok(r) => r,
};
if limits.start.position <= pos && pos <= limits.end.position {
if let Ok(key) = self.position(code, &record.offsets) {
values.push((key, &record.fields));
}
}
}
}
@@ -309,49 +327,12 @@ where
values
}
}
// Rough check, based on per-dimension cell Ids.
/*
// If the cell_ids are between ]pos_start and pos_end[, then the value is within the range,
// If the cell_ids are outside [pos_start, pos_end], then the value is out, stop checking
// Else, check the offsets of each entry to be within [off_start, off_end], then the value is within the range.
let mut rough_in = true;
for k in 0..self.dimensions {
if !(cells[k] > start_limits.cells[k] && cells[k] < end_limits.cells[k]) {
rough_in = false;
}
}
if rough_in {
// This is a cell well within the volume, so all points are a match, add all points,
// go to next cell.
for entry in entries {
values.push(self.get_element(code, entry))
}
continue;
}
let mut rough_out = false;
for k in 0..self.dimensions {
if cells[k] < start_limits.cells[k] || cells[k] > end_limits.cells[k] {
rough_out = false;
}
}
// If rough is not true, then we have nothing to double check.
if rough_out {
continue;
}
*/
impl<T, R, K, V, F> Store for SpaceFillingCurve<T, R, K, V, F>
impl<F, K, V> Store for SpaceFillingCurve<F, K, V>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
K: Serialize,
V: Clone + Ord + Debug + From<usize> + Serialize,
F: Serialize,
F: PartialEq + Serialize,
K: Debug + Serialize + FromIterator<V> + Index<usize, Output = V>,
V: Clone + Debug + From<usize> + Ord + Serialize,
{
fn store<W>(&mut self, writer: W) -> io::Result<()>
where
@@ -364,13 +345,11 @@ where
}
}
impl<T, R, K, V, F> Load for SpaceFillingCurve<T, R, K, V, F>
impl<F, K, V> Load for SpaceFillingCurve<F, K, V>
where
T: Table<R>,
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
K: DeserializeOwned,
V: Clone + Ord + Debug + From<usize> + DeserializeOwned,
F: DeserializeOwned,
F: PartialEq + DeserializeOwned,
K: Debug + DeserializeOwned + FromIterator<V> + Index<usize, Output = V>,
V: Clone + Debug + DeserializeOwned + From<usize> + Ord,
{
fn load<Re: io::Read>(reader: Re) -> io::Result<Self> {
match bincode::deserialize_from(reader) {