Compare commits
5 Commits
411a6222a1
...
857534fd50
| Author | SHA1 | Date | |
|---|---|---|---|
| 857534fd50 | |||
| c35cc5a11f | |||
| ad52da09b7 | |||
| b0635d05d7 | |||
| 3ff46aa44c |
@@ -20,7 +20,6 @@ include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.r
|
||||
|
||||
[dependencies]
|
||||
ironsea_index = "^0.1"
|
||||
ironsea_table = "^0.1"
|
||||
ironsea_store = "^0.1"
|
||||
|
||||
arrayref = "^0.3"
|
||||
|
||||
@@ -5,7 +5,6 @@ use std::marker;
|
||||
use std::ops::Index;
|
||||
|
||||
use ironsea_index::Record;
|
||||
use ironsea_table::Table;
|
||||
|
||||
type Cell<T> = Vec<T>;
|
||||
|
||||
@@ -18,20 +17,16 @@ struct CellDictionary<K, V> {
|
||||
|
||||
impl<K, V> CellDictionary<K, V>
|
||||
where
|
||||
V: Clone + Ord + Debug + Hash,
|
||||
K: Debug + Index<usize, Output = V>,
|
||||
V: Clone + Debug + Hash + Ord,
|
||||
{
|
||||
pub fn new<T, R>(table: &T, dimension: usize, cell_bits: usize) -> Self
|
||||
pub fn new<I, R>(iter: I, dimension: usize, cell_bits: usize) -> Self
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + Debug,
|
||||
I: Iterator<Item = R>,
|
||||
R: Debug + Record<K>,
|
||||
{
|
||||
// 1. Retrieve a list of distinct values for the coordinate `dimension`
|
||||
let mut distinct: HashSet<V> = table
|
||||
.get_table()
|
||||
.iter()
|
||||
.map(|&record| record.key()[dimension].clone())
|
||||
.collect();
|
||||
let mut distinct: HashSet<V> = iter.map(|record| record.key()[dimension].clone()).collect();
|
||||
|
||||
// 2. Build a sorted list, of distinct elements
|
||||
let mut distinct = distinct.drain().collect::<Vec<_>>();
|
||||
@@ -96,10 +91,7 @@ where
|
||||
&self.table
|
||||
}
|
||||
|
||||
fn cell_id(&self, position: &V) -> Option<usize>
|
||||
where
|
||||
V: Clone + Ord + Debug,
|
||||
{
|
||||
fn cell_id(&self, position: &V) -> Option<usize> {
|
||||
let mut id = 0;
|
||||
// If the last value of the current cell is >= than the value, then
|
||||
// the value is stored in the cell.
|
||||
@@ -156,7 +148,10 @@ where
|
||||
}
|
||||
|
||||
fn last(&self) -> (usize, usize) {
|
||||
assert!(!self.table.is_empty());
|
||||
let last_id = self.table.len() - 1;
|
||||
|
||||
assert!(!self.table[last_id].is_empty());
|
||||
let last_offset = self.table[last_id].len() - 1;
|
||||
|
||||
(last_id, last_offset)
|
||||
@@ -194,14 +189,13 @@ pub struct CellSpace<K, V> {
|
||||
|
||||
impl<K, V> CellSpace<K, V>
|
||||
where
|
||||
V: Clone + Ord + Debug + Hash,
|
||||
K: Debug + Index<usize, Output = V>,
|
||||
V: Clone + Debug + Hash + Ord,
|
||||
{
|
||||
pub fn new<T, R>(table: &T, dimensions: usize, cell_bits: usize) -> Self
|
||||
pub fn new<I, R>(iter: I, dimensions: usize, cell_bits: usize) -> Self
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + Debug,
|
||||
V: Clone + Ord + Debug,
|
||||
I: Clone + Iterator<Item = R>,
|
||||
R: Debug + Record<K>,
|
||||
{
|
||||
let mut space = CellSpace {
|
||||
dimensions,
|
||||
@@ -211,7 +205,7 @@ where
|
||||
|
||||
// FIXME: Add check to ensure all positions have the required number of dimensions.
|
||||
for k in 0..dimensions {
|
||||
let dic = CellDictionary::new(table, k, cell_bits);
|
||||
let dic = CellDictionary::new(iter.clone(), k, cell_bits);
|
||||
let max = dic.max_offset();
|
||||
space.coordinates.push(dic);
|
||||
space.coordinates_max_offsets.push(max);
|
||||
@@ -220,27 +214,18 @@ where
|
||||
space
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn cells_id(&self, position: &Vec<V>) -> Result<Vec<Option<usize>>, String> {
|
||||
trace!("cells_id: position {:?}", position);
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
if self.dimensions != position.len() {
|
||||
return Err(format!(
|
||||
"Incorrect number of dimensions, expected {}, got {} for {:?}",
|
||||
self.dimensions,
|
||||
position.len(),
|
||||
position
|
||||
));
|
||||
}
|
||||
|
||||
let mut cells = vec![];
|
||||
for k in 0..self.dimensions {
|
||||
cells.push(self.coordinates[k].cell_id(&position[k]));
|
||||
}
|
||||
trace!("cells_id: cells {:?}", cells);
|
||||
Ok(cells)
|
||||
pub fn last(&self) -> (Vec<usize>, Vec<usize>) {
|
||||
let mut cells = Vec::with_capacity(self.dimensions);
|
||||
let mut offsets = Vec::with_capacity(self.dimensions);
|
||||
for k in 0..self.dimensions {
|
||||
let (cell_id, offset) = self.coordinates[k].last();
|
||||
cells.push(cell_id);
|
||||
offsets.push(offset);
|
||||
}
|
||||
*/
|
||||
|
||||
(cells, offsets)
|
||||
}
|
||||
|
||||
pub fn key(&self, position: &K) -> Result<(Vec<usize>, Vec<usize>), String> {
|
||||
//TODO: Should we check inside each objects, or just assume it is correct and/or rely on the bound checks?
|
||||
/* This impose to require ExactSizeIterator, which is not implemented on Vec, and can't be in any easy way.
|
||||
@@ -253,8 +238,8 @@ where
|
||||
));
|
||||
}*/
|
||||
|
||||
let mut cells = vec![];
|
||||
let mut offsets = vec![];
|
||||
let mut cells = Vec::with_capacity(self.dimensions);
|
||||
let mut offsets = Vec::with_capacity(self.dimensions);
|
||||
for k in 0..self.dimensions {
|
||||
match self.coordinates[k].key(&position[k]) {
|
||||
None => {
|
||||
@@ -286,8 +271,8 @@ where
|
||||
));
|
||||
}*/
|
||||
|
||||
let mut cells = vec![];
|
||||
let mut offsets = vec![];
|
||||
let mut cells = Vec::with_capacity(self.dimensions);
|
||||
let mut offsets = Vec::with_capacity(self.dimensions);
|
||||
for k in 0..self.dimensions {
|
||||
let (id, offset) = self.coordinates[k].key_down(&position[k]);
|
||||
cells.push(id);
|
||||
@@ -310,8 +295,8 @@ where
|
||||
));
|
||||
}*/
|
||||
|
||||
let mut cells = vec![];
|
||||
let mut offsets = vec![];
|
||||
let mut cells = Vec::with_capacity(self.dimensions);
|
||||
let mut offsets = Vec::with_capacity(self.dimensions);
|
||||
for k in 0..self.dimensions {
|
||||
let (id, offset) = self.coordinates[k].key_up(&position[k]);
|
||||
cells.push(id);
|
||||
@@ -342,7 +327,7 @@ where
|
||||
));
|
||||
}
|
||||
|
||||
let mut values = vec![];
|
||||
let mut values = Vec::with_capacity(self.dimensions);
|
||||
for k in 0..self.dimensions {
|
||||
values.push(self.coordinates[k].value(cells_id[k], offsets[k]));
|
||||
}
|
||||
|
||||
@@ -12,6 +12,5 @@ mod morton;
|
||||
mod sfc;
|
||||
|
||||
pub use sfc::Record;
|
||||
pub use sfc::RecordBuild;
|
||||
pub use sfc::RecordFields;
|
||||
pub use sfc::SpaceFillingCurve as IndexOwned;
|
||||
|
||||
209
src/sfc.rs
209
src/sfc.rs
@@ -1,27 +1,27 @@
|
||||
#![allow(clippy::type_repetition_in_bounds)]
|
||||
|
||||
use std::cmp::PartialEq;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::io;
|
||||
use std::iter::FromIterator;
|
||||
use std::marker;
|
||||
use std::ops::Index;
|
||||
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::Serialize;
|
||||
|
||||
pub use ironsea_index::IndexedOwned;
|
||||
pub use ironsea_index::IndexedDestructured;
|
||||
pub use ironsea_index::Record;
|
||||
pub use ironsea_index::RecordBuild;
|
||||
pub use ironsea_index::RecordFields;
|
||||
use ironsea_store::Load;
|
||||
use ironsea_store::Store;
|
||||
use ironsea_table::Table;
|
||||
|
||||
use super::cell_space::CellSpace;
|
||||
use super::morton::MortonCode;
|
||||
use super::morton::MortonEncoder;
|
||||
use super::morton::MortonValue;
|
||||
|
||||
type SFCCode = u32;
|
||||
type SFCCode = MortonCode;
|
||||
type SFCOffset = u32;
|
||||
|
||||
//FIXME: Remove the need for a constant, how can we make it type-checked instead?
|
||||
@@ -54,44 +54,44 @@ struct SFCCell<F> {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct SpaceFillingCurve<T, R, K, V, F>
|
||||
pub struct SpaceFillingCurve<F, K, V>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
|
||||
V: Clone + Ord + Debug + From<usize>,
|
||||
F: PartialEq,
|
||||
K: Debug + FromIterator<V> + Index<usize, Output = V>,
|
||||
V: Clone + Debug + From<usize> + Ord,
|
||||
{
|
||||
dimensions: usize,
|
||||
morton: MortonEncoder,
|
||||
space: CellSpace<K, V>,
|
||||
index: Vec<SFCCell<F>>,
|
||||
_marker: marker::PhantomData<(T, R)>,
|
||||
}
|
||||
|
||||
impl<T, R, K, V, F> SpaceFillingCurve<T, R, K, V, F>
|
||||
impl<F, K, V> SpaceFillingCurve<F, K, V>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
F: PartialEq,
|
||||
K: Debug + FromIterator<V> + Index<usize, Output = V>,
|
||||
V: Clone + Debug + From<usize> + Hash + Ord,
|
||||
K: Debug + Index<usize, Output = V> + FromIterator<V>,
|
||||
{
|
||||
//FIXME: Should accept indexing 0 elements, at least not crash!
|
||||
pub fn new(table: &T, dimensions: usize, cell_bits: usize) -> Self {
|
||||
pub fn new<I, R>(iter: I, dimensions: usize, cell_bits: usize) -> Self
|
||||
where
|
||||
I: Clone + Iterator<Item = R>,
|
||||
R: Debug + Record<K> + RecordFields<F>,
|
||||
{
|
||||
// 1. build the dictionnary space, called here CellSpace, as well as
|
||||
// initialize the morton encoder used to project the multi-dimensional
|
||||
// coordinates into a single dimension.
|
||||
let mut index = SpaceFillingCurve {
|
||||
dimensions,
|
||||
morton: MortonEncoder::new(dimensions, cell_bits),
|
||||
space: CellSpace::new(table, dimensions, cell_bits),
|
||||
space: CellSpace::new(iter.clone(), dimensions, cell_bits),
|
||||
index: vec![],
|
||||
_marker: marker::PhantomData,
|
||||
};
|
||||
|
||||
// 2. Build a flat table of (code, offset, entries)
|
||||
let mut flat_table = vec![];
|
||||
|
||||
for record in table.get_table() {
|
||||
let (nb_records, _) = iter.size_hint();
|
||||
for record in iter.into_iter() {
|
||||
let position = record.key();
|
||||
match index.space.key(&position) {
|
||||
Ok((cell_ids, offsets)) => match index.encode(&cell_ids) {
|
||||
@@ -111,10 +111,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Processed {:#?} records into the index",
|
||||
table.get_table().len()
|
||||
);
|
||||
debug!("Processed {:#?} records into the index", nb_records);
|
||||
|
||||
// 5. Sort by SFCcode
|
||||
flat_table.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
||||
@@ -143,16 +140,13 @@ where
|
||||
index
|
||||
}
|
||||
|
||||
pub fn find_by_value(&self, value: &F) -> Vec<R>
|
||||
where
|
||||
F: std::cmp::PartialEq,
|
||||
{
|
||||
pub fn find_by_value(&self, value: &F) -> Vec<K> {
|
||||
let mut results = vec![];
|
||||
for cell in &self.index {
|
||||
for record in &cell.records {
|
||||
if &record.fields == value {
|
||||
if let Ok(r) = self.get_record(cell.code, &record) {
|
||||
results.push(r);
|
||||
if let Ok(key) = self.position(cell.code, &record.offsets) {
|
||||
results.push(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -172,25 +166,26 @@ where
|
||||
self.morton.encode(&t)
|
||||
}
|
||||
|
||||
// Build coordinate values from encoded value
|
||||
fn position(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<K, String> {
|
||||
let position = self.space.value(
|
||||
fn last(&self) -> (Vec<usize>, Vec<usize>) {
|
||||
self.space.last()
|
||||
}
|
||||
|
||||
fn value(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<Vec<&V>, String> {
|
||||
Ok(self.space.value(
|
||||
self.morton
|
||||
.decode(code)
|
||||
.iter()
|
||||
.map(|e| *e as usize)
|
||||
.collect(),
|
||||
offsets.iter().map(|e| *e as usize).collect(),
|
||||
)?;
|
||||
|
||||
Ok(position.iter().map(|i| (*i).clone()).collect())
|
||||
)?)
|
||||
}
|
||||
|
||||
// Rebuild a specific record
|
||||
fn get_record(&self, code: SFCCode, entry: &SFCRecord<F>) -> Result<R, String> {
|
||||
let position = &self.position(code, &entry.offsets)?;
|
||||
// Build coordinate values from encoded value
|
||||
fn position(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<K, String> {
|
||||
let position = self.value(code, offsets)?;
|
||||
|
||||
Ok(R::build(position, &entry.fields))
|
||||
Ok(position.iter().map(|i| (*i).clone()).collect())
|
||||
}
|
||||
|
||||
fn limits(&self, start: &K, end: &K) -> Result<Limits<V>, String> {
|
||||
@@ -235,14 +230,13 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, R, K, V, F> IndexedOwned<T, R, K> for SpaceFillingCurve<T, R, K, V, F>
|
||||
impl<F, K, V> IndexedDestructured<F, K> for SpaceFillingCurve<F, K, V>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
K: Debug + Index<usize, Output = V> + FromIterator<V>,
|
||||
F: PartialEq,
|
||||
K: Debug + FromIterator<V> + Index<usize, Output = V>,
|
||||
V: Clone + Debug + From<usize> + Hash + Ord,
|
||||
{
|
||||
fn find(&self, key: &K) -> Vec<R> {
|
||||
fn find(&self, key: &K) -> Vec<&F> {
|
||||
let mut values = vec![];
|
||||
|
||||
if let Ok((cell_ids, offsets)) = self.space.key(key) {
|
||||
@@ -257,10 +251,7 @@ where
|
||||
}
|
||||
|
||||
if select {
|
||||
match self.get_record(code, record) {
|
||||
Err(e) => error!("{}", e),
|
||||
Ok(r) => values.push(r),
|
||||
}
|
||||
values.push(&record.fields);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -271,33 +262,60 @@ where
|
||||
values
|
||||
}
|
||||
|
||||
fn find_range(&self, start: &K, end: &K) -> Vec<R> {
|
||||
fn find_range(&self, start: &K, end: &K) -> Vec<(K, &F)> {
|
||||
let mut values = vec![];
|
||||
|
||||
match self.limits(start, end) {
|
||||
Ok(limits) => {
|
||||
for idx in limits.start.idx..limits.end.idx {
|
||||
let code = self.index[idx].code;
|
||||
for record in &self.index[idx].records {
|
||||
let mut select = true;
|
||||
let pos = match self.position(code, &record.offsets) {
|
||||
Err(e) => {
|
||||
error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
Ok(p) => p,
|
||||
};
|
||||
|
||||
// FIXME: Reduce number of comparison by using the cells boundaries.
|
||||
for k in 0..self.dimensions {
|
||||
select = select
|
||||
&& *limits.start.position[k] <= pos[k]
|
||||
&& *limits.end.position[k] >= pos[k];
|
||||
let first = match self.value(code, &self.index[idx].records[0].offsets) {
|
||||
Err(e) => {
|
||||
error!("Cannot retrieve first value of cell: {}", e);
|
||||
continue;
|
||||
}
|
||||
if select {
|
||||
match self.get_record(code, &record) {
|
||||
Err(e) => error!("{}", e),
|
||||
Ok(r) => values.push(r),
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
let (cell_ids, last_offsets) = self.last();
|
||||
let last = match self.space.value(cell_ids, last_offsets) {
|
||||
Err(e) => {
|
||||
error!("Cannot retrieve last value of cell: {}", e);
|
||||
continue;
|
||||
}
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
// Check first & last point of the cell, if both are fully
|
||||
// in the bounding box, then all the points of the cell will
|
||||
// be.
|
||||
if limits.start.position <= first
|
||||
&& first <= limits.end.position
|
||||
&& limits.start.position <= last
|
||||
&& last <= limits.end.position
|
||||
{
|
||||
for record in &self.index[idx].records {
|
||||
if let Ok(key) = self.position(code, &record.offsets) {
|
||||
values.push((key, &record.fields));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// We have points which are outside of the bounding box,
|
||||
// so check every points one by one.
|
||||
for record in &self.index[idx].records {
|
||||
let pos = match self.value(code, &record.offsets) {
|
||||
Err(e) => {
|
||||
error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
if limits.start.position <= pos && pos <= limits.end.position {
|
||||
if let Ok(key) = self.position(code, &record.offsets) {
|
||||
values.push((key, &record.fields));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -309,49 +327,12 @@ where
|
||||
values
|
||||
}
|
||||
}
|
||||
// Rough check, based on per-dimension cell Ids.
|
||||
/*
|
||||
// If the cell_ids are between ]pos_start and pos_end[, then the value is within the range,
|
||||
// If the cell_ids are outside [pos_start, pos_end], then the value is out, stop checking
|
||||
// Else, check the offsets of each entry to be within [off_start, off_end], then the value is within the range.
|
||||
let mut rough_in = true;
|
||||
for k in 0..self.dimensions {
|
||||
if !(cells[k] > start_limits.cells[k] && cells[k] < end_limits.cells[k]) {
|
||||
rough_in = false;
|
||||
}
|
||||
}
|
||||
|
||||
if rough_in {
|
||||
// This is a cell well within the volume, so all points are a match, add all points,
|
||||
// go to next cell.
|
||||
for entry in entries {
|
||||
values.push(self.get_element(code, entry))
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut rough_out = false;
|
||||
for k in 0..self.dimensions {
|
||||
if cells[k] < start_limits.cells[k] || cells[k] > end_limits.cells[k] {
|
||||
rough_out = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If rough is not true, then we have nothing to double check.
|
||||
if rough_out {
|
||||
continue;
|
||||
}
|
||||
*/
|
||||
|
||||
impl<T, R, K, V, F> Store for SpaceFillingCurve<T, R, K, V, F>
|
||||
impl<F, K, V> Store for SpaceFillingCurve<F, K, V>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
// K: Debug + ExactSizeIterator + Index<usize, Output = V> + FromIterator<V>,
|
||||
K: Serialize,
|
||||
V: Clone + Ord + Debug + From<usize> + Serialize,
|
||||
F: Serialize,
|
||||
F: PartialEq + Serialize,
|
||||
K: Debug + Serialize + FromIterator<V> + Index<usize, Output = V>,
|
||||
V: Clone + Debug + From<usize> + Ord + Serialize,
|
||||
{
|
||||
fn store<W>(&mut self, writer: W) -> io::Result<()>
|
||||
where
|
||||
@@ -364,13 +345,11 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, R, K, V, F> Load for SpaceFillingCurve<T, R, K, V, F>
|
||||
impl<F, K, V> Load for SpaceFillingCurve<F, K, V>
|
||||
where
|
||||
T: Table<R>,
|
||||
R: Record<K> + RecordFields<F> + RecordBuild<K, F, R> + Debug,
|
||||
K: DeserializeOwned,
|
||||
V: Clone + Ord + Debug + From<usize> + DeserializeOwned,
|
||||
F: DeserializeOwned,
|
||||
F: PartialEq + DeserializeOwned,
|
||||
K: Debug + DeserializeOwned + FromIterator<V> + Index<usize, Output = V>,
|
||||
V: Clone + Debug + DeserializeOwned + From<usize> + Ord,
|
||||
{
|
||||
fn load<Re: io::Read>(reader: Re) -> io::Result<Self> {
|
||||
match bincode::deserialize_from(reader) {
|
||||
|
||||
Reference in New Issue
Block a user