Compare commits
5 Commits
12cfe01a86
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 363e94aa28 | |||
| 09cdd3d569 | |||
| 5af7d8cb4b | |||
| f97f7f18a3 | |||
| a7ac8f0fb2 |
13
Cargo.toml
13
Cargo.toml
@@ -19,12 +19,11 @@ license = "MIT"
|
||||
include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.rs"]
|
||||
|
||||
[dependencies]
|
||||
ironsea_index = "^0.1"
|
||||
#ironsea_store = "^0.1"
|
||||
ironsea_index = "0.1"
|
||||
|
||||
arrayref = "^0.3"
|
||||
#log = { version = "^0.4", features = ["max_level_trace", "release_max_level_info"] }
|
||||
log = { version = "^0.4", features = ["max_level_trace", "release_max_level_trace"] }
|
||||
arrayref = "0.3"
|
||||
#log = { version = "0.4", features = ["max_level_trace", "release_max_level_info"] }
|
||||
log = { version = "0.4", features = ["max_level_trace", "release_max_level_trace"] }
|
||||
|
||||
serde = { version = "^1.0", features = ["derive"] }
|
||||
bincode = "^1.1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
bincode = "1.3"
|
||||
|
||||
25
src/lib.rs
25
src/lib.rs
@@ -1,4 +1,29 @@
|
||||
#![forbid(unsafe_code)]
|
||||
#![deny(missing_docs)]
|
||||
|
||||
//! # Iron Sea - Index SFC DBC
|
||||
//!
|
||||
//! Index for the Iron Sea toolkit, based on a Space Filling Curve (SFC),
|
||||
//! over Dictionary-Based Compression (DBC), which offers great
|
||||
//! performances for both range queries over point cloud data and at the
|
||||
//! same time uses a storage-efficient index.
|
||||
//!
|
||||
//! More details in the [paper].
|
||||
//!
|
||||
//! [paper]: https://infoscience.epfl.ch/record/232536?ln=en
|
||||
//!
|
||||
//! ## Iron Sea: Database Toolkit
|
||||
//! **Iron Sea** provides a set of database engine bricks, which can be
|
||||
//! combined and applied on arbitrary data structures.
|
||||
//!
|
||||
//! Unlike a traditional database, it does not assume a specific
|
||||
//! physical structure for the tables nor the records, but relies on the
|
||||
//! developer to provide a set of extractor functions which are used by
|
||||
//! the specific indices provided.
|
||||
//!
|
||||
//! This enables the index implementations to be agnostic from the
|
||||
//! underlying data structure, and re-used.
|
||||
//!
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
@@ -171,7 +171,7 @@ impl<'de> Deserialize<'de> for MortonEncoder {
|
||||
enum Field {
|
||||
CellBits,
|
||||
Dimensions,
|
||||
};
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Field {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Field, D::Error>
|
||||
|
||||
197
src/sfc.rs
197
src/sfc.rs
@@ -28,13 +28,13 @@ type SFCOffset = u32;
|
||||
// type-num crate?
|
||||
const MAX_K: usize = 3;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
struct Limit<V> {
|
||||
idx: usize,
|
||||
position: Vec<V>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
struct Limits<'a, V> {
|
||||
start: Limit<&'a V>,
|
||||
end: Limit<&'a V>,
|
||||
@@ -53,6 +53,9 @@ struct SFCCell<F> {
|
||||
records: Vec<SFCRecord<F>>,
|
||||
}
|
||||
|
||||
/// Space Filling Curve-based index.
|
||||
///
|
||||
/// This structure retains the state of the index.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct SpaceFillingCurve<F, K, V>
|
||||
where
|
||||
@@ -72,6 +75,15 @@ where
|
||||
K: Debug + FromIterator<V> + Index<usize, Output = V>,
|
||||
V: Clone + Debug + From<usize> + Hash + Ord,
|
||||
{
|
||||
/// Creates a new Index from the provided iterator.
|
||||
///
|
||||
/// * `dimensions`: The number of dimensions of the space, a.k.a the
|
||||
/// length of the vector representing a single
|
||||
/// position.
|
||||
/// * `cell_bits`: The number of bits to reserve for the grid we
|
||||
/// build on top of the coordinate dictionaries.
|
||||
/// We generate 2^`cell_bits` Cells per dimension.
|
||||
///
|
||||
//FIXME: Should accept indexing 0 elements, at least not crash!
|
||||
pub fn new<I, R>(iter: I, dimensions: usize, cell_bits: usize) -> Self
|
||||
where
|
||||
@@ -140,19 +152,27 @@ where
|
||||
index
|
||||
}
|
||||
|
||||
pub fn find_by_value(&self, value: &F) -> Vec<K> {
|
||||
let mut results = vec![];
|
||||
for cell in &self.index {
|
||||
for record in &cell.records {
|
||||
if &record.fields == value {
|
||||
if let Ok(key) = self.position(cell.code, &record.offsets) {
|
||||
results.push(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
/// Returns a vector of keys which have stored values in the index
|
||||
/// equal to `value`.
|
||||
pub fn find_by_value<'s>(&'s self, value: &'s F) -> Box<dyn Iterator<Item = K> + 's> {
|
||||
Box::new(
|
||||
self.index
|
||||
.iter()
|
||||
.map(|cell| (cell, cell.records.iter()))
|
||||
.flat_map(move |(cell, records)| {
|
||||
records.filter_map(move |record| {
|
||||
if &record.fields == value {
|
||||
if let Ok(key) = self.position(cell.code, &record.offsets) {
|
||||
Some(key)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
// Map the cell_ids of a point to its SFCcode
|
||||
@@ -170,14 +190,14 @@ where
|
||||
}
|
||||
|
||||
fn value(&self, code: SFCCode, offsets: &[SFCOffset]) -> Result<Vec<&V>, String> {
|
||||
Ok(self.space.value(
|
||||
self.space.value(
|
||||
self.morton
|
||||
.decode(code)
|
||||
.iter()
|
||||
.map(|e| *e as usize)
|
||||
.collect(),
|
||||
offsets.iter().map(|e| *e as usize).collect(),
|
||||
)?)
|
||||
)
|
||||
}
|
||||
|
||||
// Build coordinate values from encoded value
|
||||
@@ -235,95 +255,100 @@ where
|
||||
K: Debug + FromIterator<V> + Index<usize, Output = V>,
|
||||
V: Clone + Debug + From<usize> + Hash + Ord,
|
||||
{
|
||||
fn find(&self, key: &K) -> Vec<&F> {
|
||||
let mut values = vec![];
|
||||
|
||||
fn find<'i>(&'i self, key: &K) -> Box<dyn Iterator<Item = &F> + 'i> {
|
||||
if let Ok((cell_ids, offsets)) = self.space.key(key) {
|
||||
match self.encode(&cell_ids) {
|
||||
Err(e) => error!("{}", e),
|
||||
Ok(code) => {
|
||||
if let Ok(cell) = self.index.binary_search_by(|a| a.code.cmp(&code)) {
|
||||
for record in &self.index[cell].records {
|
||||
let mut select = true;
|
||||
for (k, o) in offsets.iter().enumerate().take(self.dimensions) {
|
||||
select &= record.offsets[k] == (*o as SFCOffset);
|
||||
}
|
||||
return Box::new(self.index[cell].records.iter().filter_map(
|
||||
move |record| {
|
||||
let mut select = true;
|
||||
for (k, o) in offsets.iter().enumerate().take(self.dimensions) {
|
||||
select &= record.offsets[k] == (*o as SFCOffset);
|
||||
}
|
||||
|
||||
if select {
|
||||
values.push(&record.fields);
|
||||
}
|
||||
}
|
||||
if select {
|
||||
Some(&record.fields)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
values
|
||||
Box::new(Vec::with_capacity(0).into_iter())
|
||||
}
|
||||
|
||||
fn find_range(&self, start: &K, end: &K) -> Vec<(K, &F)> {
|
||||
let mut values = vec![];
|
||||
|
||||
fn find_range<'i>(&'i self, start: &K, end: &K) -> Box<dyn Iterator<Item = (K, &F)> + 'i> {
|
||||
match self.limits(start, end) {
|
||||
Ok(limits) => {
|
||||
for idx in limits.start.idx..limits.end.idx {
|
||||
let code = self.index[idx].code;
|
||||
|
||||
let first = match self.value(code, &self.index[idx].records[0].offsets) {
|
||||
Err(e) => {
|
||||
error!("Cannot retrieve first value of cell: {}", e);
|
||||
continue;
|
||||
let iter = (limits.start.idx..limits.end.idx)
|
||||
.filter_map(move |idx| {
|
||||
match self.value(self.index[idx].code, &self.index[idx].records[0].offsets)
|
||||
{
|
||||
Err(_) => None,
|
||||
Ok(first) => Some((idx, first)),
|
||||
}
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
let (cell_ids, last_offsets) = self.last();
|
||||
let last = match self.space.value(cell_ids, last_offsets) {
|
||||
Err(e) => {
|
||||
error!("Cannot retrieve last value of cell: {}", e);
|
||||
continue;
|
||||
})
|
||||
.filter_map(move |(idx, first)| {
|
||||
let (cell_ids, last_offsets) = self.last();
|
||||
match self.space.value(cell_ids, last_offsets) {
|
||||
Err(_) => None,
|
||||
Ok(last) => Some((idx, first, last)),
|
||||
}
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
// Check first & last point of the cell, if both are fully
|
||||
// in the bounding box, then all the points of the cell will
|
||||
// be.
|
||||
if limits.start.position <= first
|
||||
&& first <= limits.end.position
|
||||
&& limits.start.position <= last
|
||||
&& last <= limits.end.position
|
||||
{
|
||||
for record in &self.index[idx].records {
|
||||
if let Ok(key) = self.position(code, &record.offsets) {
|
||||
values.push((key, &record.fields));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// We have points which are outside of the bounding box,
|
||||
// so check every points one by one.
|
||||
for record in &self.index[idx].records {
|
||||
let pos = match self.value(code, &record.offsets) {
|
||||
Err(e) => {
|
||||
error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
if limits.start.position <= pos && pos <= limits.end.position {
|
||||
})
|
||||
.flat_map(move |(idx, first, last)| {
|
||||
// Check first & last point of the cell, if both are fully
|
||||
// in the bounding box, then all the points of the cell will
|
||||
// be.
|
||||
let limits = limits.clone();
|
||||
let b: Box<dyn Iterator<Item = _>> = if limits.start.position <= first
|
||||
&& first <= limits.end.position
|
||||
&& limits.start.position <= last
|
||||
&& last <= limits.end.position
|
||||
{
|
||||
Box::new(self.index[idx].records.iter().filter_map(move |record| {
|
||||
let code = self.index[idx].code;
|
||||
if let Ok(key) = self.position(code, &record.offsets) {
|
||||
values.push((key, &record.fields));
|
||||
Some((key, &record.fields))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => error!("find_range: limits failed: {}", e),
|
||||
};
|
||||
}))
|
||||
} else {
|
||||
// We have points which are outside of the bounding box,
|
||||
// so check every points one by one.
|
||||
Box::new(self.index[idx].records.iter().filter_map(move |record| {
|
||||
let code = self.index[idx].code;
|
||||
if let Ok(pos) = self.value(code, &record.offsets) {
|
||||
if limits.start.position <= pos && pos <= limits.end.position {
|
||||
if let Ok(key) = self.position(code, &record.offsets) {
|
||||
Some((key, &record.fields))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
};
|
||||
|
||||
values
|
||||
b
|
||||
});
|
||||
Box::new(iter)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("find_range: limits failed: {}", e);
|
||||
Box::new(Vec::with_capacity(0).into_iter())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user