Implement ViewPort & resolution selection

* Implement multi-scale indices generation.

 * Deduplicate values before sorting them while generating an index, to
   reduce the number of points to sort.

 * Use a hastable to deduplicate values, instead of a sort + dedup call.

 * Minor code cleanups
This commit is contained in:
2019-10-15 19:30:01 +02:00
parent 8fcf1b74e7
commit 0449f5a90a
10 changed files with 334 additions and 60 deletions

View File

@@ -11,7 +11,8 @@ pub struct CoreQueryParameters<'a> {
pub db: &'a DataBase, pub db: &'a DataBase,
pub output_space: Option<&'a str>, pub output_space: Option<&'a str>,
pub threshold_volume: Option<f64>, pub threshold_volume: Option<f64>,
pub resolution: Option<Vec<u64>>, pub view_port: &'a Option<(Vec<f64>, Vec<f64>)>,
pub resolution: Option<Vec<u32>>,
} }
#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] #[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
@@ -75,6 +76,8 @@ impl Core {
spaces: &[Space], spaces: &[Space],
properties: Vec<Properties>, properties: Vec<Properties>,
space_objects: Vec<SpaceSetObject>, space_objects: Vec<SpaceSetObject>,
scales: Option<Vec<Vec<u32>>>,
max_elements: Option<usize>,
) -> Self ) -> Self
//Result<Self, String> //Result<Self, String>
where where
@@ -101,7 +104,7 @@ impl Core {
}) })
.collect(); .collect();
space_dbs.push(SpaceDB::new(space.name(), filtered)) space_dbs.push(SpaceDB::new(&space, filtered, scales.clone(), max_elements))
} }
Core { Core {
@@ -195,6 +198,7 @@ impl Core {
output_space, output_space,
threshold_volume, threshold_volume,
resolution, resolution,
..
} = parameters; } = parameters;
let mut results = vec![]; let mut results = vec![];
@@ -238,6 +242,7 @@ impl Core {
output_space, output_space,
threshold_volume, threshold_volume,
resolution, resolution,
..
} = parameters; } = parameters;
let mut results = vec![]; let mut results = vec![];
@@ -272,6 +277,7 @@ impl Core {
output_space, output_space,
threshold_volume, threshold_volume,
resolution, resolution,
..
} = parameters; } = parameters;
let id: String = id.into(); let id: String = id.into();
@@ -310,6 +316,7 @@ impl Core {
output_space, output_space,
threshold_volume, threshold_volume,
resolution, resolution,
..
} = parameters; } = parameters;
let id: String = id.into(); let id: String = id.into();

View File

@@ -21,7 +21,7 @@ pub type ResultSet = Result<Vec<SpaceObject>, String>;
pub type ReferenceSpaceIndex = ironsea_index_hashmap::Index<VectorTable<Space>, Space, String>; pub type ReferenceSpaceIndex = ironsea_index_hashmap::Index<VectorTable<Space>, Space, String>;
type CoreIndex = ironsea_index_hashmap::Index<VectorTable<Core>, Core, String>; type CoreIndex = ironsea_index_hashmap::Index<VectorTable<Core>, Core, String>;
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Hash, PartialEq, Serialize)]
pub struct SpaceId(String); pub struct SpaceId(String);
impl SpaceId { impl SpaceId {
@@ -49,12 +49,6 @@ where
} }
} }
impl PartialEq for SpaceId {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize)] #[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize)]
pub struct SpaceObject { pub struct SpaceObject {
pub space_id: String, pub space_id: String,
@@ -62,18 +56,6 @@ pub struct SpaceObject {
pub value: Properties, pub value: Properties,
} }
// FIXME: Which is faster, the code below or the automatically generated
// implementation?
/*
impl PartialEq for SpaceObject {
fn eq(&self, other: &Self) -> bool {
self.space_id == other.space_id
&& self.value == other.value
&& self.position == other.position
}
}
*/
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]
pub struct DataBase { pub struct DataBase {
reference_spaces: ReferenceSpaceIndex, reference_spaces: ReferenceSpaceIndex,

View File

@@ -82,6 +82,16 @@ impl Position {
product product
} }
pub fn reduce_precision(&self, scale: u32) -> Self {
let mut position = Vec::with_capacity(self.dimensions());
for i in 0..self.dimensions() {
position.push((self[i].u64() >> scale).into())
}
Position::new(position)
}
} }
impl Display for Position { impl Display for Position {

View File

@@ -206,4 +206,49 @@ impl Shape {
}) })
.collect()) .collect())
} }
pub fn volume(&self) -> f64 {
match self {
Shape::Point(_) => std::f64::EPSILON, // Smallest non-zero volume possible
Shape::BoundingBox(low, high) => {
let mut volume = 1.0;
// For each dimension, multiply by the length in that dimension
for i in 0..low.dimensions() {
let l = low[i].f64();
let h = high[i].f64();
let length = if h > l { h - l } else { l - h };
volume *= length;
}
volume
}
Shape::HyperSphere(position, radius) => {
// Formula from https://en.wikipedia.org/wiki/N-sphere#/media/File:N_SpheresVolumeAndSurfaceArea.png
let k = position.dimensions(); // Number of dimensions.
let radius = radius.f64();
let pi = std::f64::consts::PI;
let factor = 2.0 * pi;
// Set starting values for the coefficient
let mut a = 2.0;
let mut i = if (k % 2) == 0 {
a = pi;
2
} else {
1
};
while i < k {
i += 2;
a *= factor;
a /= i as f64;
}
a * radius.powi(i as i32)
}
}
}
} }

View File

@@ -1,13 +1,21 @@
use std::cmp::Ordering;
use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap;
use std::collections::HashSet;
use std::hash::Hash;
use std::hash::Hasher;
use ironsea_table_vector::VectorTable;
use super::space::Coordinate; use super::space::Coordinate;
use super::space::Position; use super::space::Position;
use super::space::Shape; use super::space::Shape;
use super::space::Space;
use super::space_index::SpaceFields; use super::space_index::SpaceFields;
use super::space_index::SpaceIndex; use super::space_index::SpaceIndex;
use super::space_index::SpaceSetIndex; use super::space_index::SpaceSetIndex;
use super::space_index::SpaceSetObject; use super::space_index::SpaceSetObject;
use ironsea_table_vector::VectorTable;
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceDB { pub struct SpaceDB {
reference_space: String, reference_space: String,
@@ -16,17 +24,24 @@ pub struct SpaceDB {
} }
impl SpaceDB { impl SpaceDB {
pub fn new<S>(reference_space: S, mut space_objects: Vec<SpaceSetObject>) -> Self pub fn new(
where reference_space: &Space,
S: Into<String>, mut space_objects: Vec<SpaceSetObject>,
{ scales: Option<Vec<Vec<u32>>>,
max_elements: Option<usize>,
) -> Self {
//FIXME: Remove hard-coded constants for dimensions & bit length of morton codes.
const DIMENSIONS: usize = 3;
const CELL_BITS: usize = 10;
let mut values = space_objects let mut values = space_objects
.iter() .iter()
.map(|object| *object.value()) .map(|object| *object.value())
.collect::<HashSet<_>>()
.drain()
.collect::<Vec<_>>(); .collect::<Vec<_>>();
values.sort_unstable_by_key(|&c| c.u64()); values.sort_unstable_by_key(|&c| c.u64());
values.dedup_by_key(|c| c.u64());
space_objects.iter_mut().for_each(|object| { space_objects.iter_mut().for_each(|object| {
// Update the values to point into the local (shorter) mapping array. // Update the values to point into the local (shorter) mapping array.
@@ -35,27 +50,174 @@ impl SpaceDB {
}); });
// Build the set of SpaceIndices. // Build the set of SpaceIndices.
// FIXME: Build multiple-scale indices. What is the stopping condition, and what are the parameters? let mut resolutions = vec![];
let max_elem = 2_000; let mut indices = vec![];
// We cannot return less that the total number of individual Ids stored
// in the index. if let Some(scales) = scales {
let max = max_elem.max(values.len()); // We optimize scaling, by iteratively building coarser and coarser
// Generate indices as long as max is smaller than the number of point located in the whole space. // indexes. Powers holds a list of bit shift to apply based on the
// For each new index, reduce precision by two, and push to resolutions vectors. // previous value.
let mut powers = Vec::with_capacity(scales.len());
// Limit temporary values lifetimes
{
// Sort by values, smaller to bigger.
let mut exps = scales.clone();
exps.sort_unstable_by_key(|v| v[0]);
let mut previous = 0u32;
for scale in exps {
// FIXME: Remove these assertions ASAP, and support multi-factor scaling
assert_eq!(scale.len(), DIMENSIONS);
assert!(scale[0] == scale[1] && scale[0] == scale[2]);
powers.push((scale[0], scale[0] - previous));
previous = scale[0];
}
}
// Apply fixed scales
let mut count = 0;
for power in &powers {
space_objects = space_objects
.into_iter()
.map(|mut o| {
let p = o.position().reduce_precision(power.1);
let mut hasher = DefaultHasher::new();
o.set_position(p);
// Hash, AFTER updating the position.
o.hash(&mut hasher);
(hasher.finish(), o)
})
.collect::<HashMap<_, SpaceSetObject>>()
.drain()
.map(|(_k, v)| v)
.collect();
// Make sure we do not shift more position than available
let shift = if count >= 31 { 31 } else { count };
count += 1;
indices.push((
SpaceSetIndex::new(
&VectorTable::new(space_objects.to_vec()),
DIMENSIONS,
CELL_BITS,
),
vec![power.0, power.0, power.0],
shift,
));
}
} else {
// Generate scales, following max_elements
if let Some(max_elements) = max_elements {
// We cannot return less that the total number of individual Ids stored
// in the index for a full-volume query.
let max_elements = max_elements.max(values.len());
let mut count = 0;
// The next index should contain at most half the number of
// elements of the current index.
let mut element_count_target = space_objects.len() / 2;
// Insert Full resolution index.
indices.push((
SpaceSetIndex::new(
&VectorTable::new(space_objects.clone()),
DIMENSIONS,
CELL_BITS,
),
vec![count, count, count],
0, // Smallest value => highest resolution
));
// Generate coarser indices, until we reach the expect max_element
// values or we can't define bigger bit shift.
loop {
// Make sure we do not shift more position than available
let shift = if count >= 31 { 31 } else { count };
count += 1;
space_objects = space_objects
.into_iter()
.map(|mut o| {
let p = o.position().reduce_precision(1);
let mut hasher = DefaultHasher::new();
o.set_position(p);
// Hash, AFTER updating the position.
o.hash(&mut hasher);
(hasher.finish(), o)
})
.collect::<HashMap<_, SpaceSetObject>>()
.drain()
.map(|(_k, v)| v)
.collect();
// Skip a resolution if it does not bring down enough the
// number of points. It would be a waste of space to store it.
if element_count_target < space_objects.len() {
continue;
} else {
// The next index should contain at most half the number of
// elements of the current index.
element_count_target = space_objects.len() / 2;
}
indices.push((
SpaceSetIndex::new(
&VectorTable::new(space_objects.to_vec()),
DIMENSIONS,
CELL_BITS,
),
vec![count, count, count],
shift,
));
if space_objects.len() <= max_elements || count == std::u32::MAX {
break;
}
}
// Generate indices as long as max is smaller than the number of point located in the whole space.
// For each new index, reduce precision by two, and push to resolutions vectors.
} else {
// Generate only full-scale.
indices.push((
SpaceSetIndex::new(&VectorTable::new(space_objects), DIMENSIONS, CELL_BITS),
vec![0, 0, 0],
0,
));
}
}
// When done, go over the array, and set the threshold_volumes with Volume total / 8 * i in reverse order // When done, go over the array, and set the threshold_volumes with Volume total / 8 * i in reverse order
// let space_volume = reference_space.volume();
let index = SpaceSetIndex::new(&VectorTable::new(space_objects), 3, 10); let max_shift = match indices.last() {
let mut resolutions = vec![SpaceIndex::new(std::f64::MAX, vec![0, 0, 0], index)]; None => 31,
Some((_, _, x)) => *x,
};
for (index, scale, shift) in indices {
// Compute threshold volume as Vt = V / 2^(max_shift) * 2^shift
// => the smaller shift is, the smaller the threshold is and the higher
// the resolution is.
let volume = space_volume / f64::from(1 << (max_shift - shift));
resolutions.push(SpaceIndex::new(volume, scale, index));
}
// Make sure the vector is sorted by threshold volumes, smallest to largest. // Make sure the vector is sorted by threshold volumes, smallest to largest.
// this means indices are sorted form highest resolution to lowest resolution. // this means indices are sorted form highest resolution to lowest resolution.
// default_resolution() relies on it to find the correct index. // default_resolution() relies on this to find the correct index.
//FIXME: Domain check between f64 <-> u64 XOR implement Ord on f64 resolutions.sort_unstable_by(|a, b| match a.threshold().partial_cmp(&b.threshold()) {
resolutions.sort_unstable_by_key(|a| a.threshold() as u64); Some(o) => o,
None => Ordering::Less, // FIXME: This is most likely incorrect...
});
SpaceDB { SpaceDB {
reference_space: reference_space.into(), reference_space: reference_space.name().clone(),
values, values,
resolutions, resolutions,
} }
@@ -88,29 +250,65 @@ impl SpaceDB {
fn default_resolution(&self, volume: f64) -> usize { fn default_resolution(&self, volume: f64) -> usize {
for i in 0..self.resolutions.len() { for i in 0..self.resolutions.len() {
if volume <= self.resolutions[i].threshold() { if volume <= self.resolutions[i].threshold() {
debug!(
"Selected {:?} -> {:?} vs {:?}",
i,
self.resolutions[i].threshold(),
volume,
);
return i; return i;
} }
} }
self.resolutions.len()
debug!(
"Selected lowest resolution -> {:?} vs {:?}",
self.resolutions[self.lowest_resolution()].threshold(),
volume
);
self.lowest_resolution()
} }
fn find_resolution(&self, _scales: &[u64]) -> usize { fn find_resolution(&self, scale: &[u32]) -> usize {
// FIXME: Implement stuff here! for i in 0..self.resolutions.len() {
if scale <= self.resolutions[i].scale() {
debug!(
"Selected {:?} -> {:?} vs {:?}",
i,
self.resolutions[i].scale(),
scale
);
return i;
}
}
warn!(
"Scale factors {:?} not found, using lowest resolution: {:?}",
scale,
self.resolutions[self.lowest_resolution()].scale()
);
self.lowest_resolution() self.lowest_resolution()
} }
pub fn get_resolution( pub fn get_resolution(
&self, &self,
threshold_volume: &Option<f64>, threshold_volume: &Option<f64>,
resolution: &Option<Vec<u64>>, resolution: &Option<Vec<u32>>,
) -> usize { ) -> usize {
if let Some(threshold_volume) = threshold_volume { // If a specific scale has been set, try to find it, otherwise use the
self.default_resolution(*threshold_volume) // threshold volume to figure a default value, and fall back to the most
} else { // coarse resolution whenever nothing is specified.
match resolution { match resolution {
None => self.lowest_resolution(), None => {
Some(v) => self.find_resolution(v), if let Some(threshold_volume) = threshold_volume {
self.default_resolution(*threshold_volume)
} else {
self.lowest_resolution()
}
} }
Some(v) => self.find_resolution(v),
} }
} }
@@ -128,7 +326,7 @@ impl SpaceDB {
&self, &self,
id: usize, id: usize,
threshold_volume: &Option<f64>, threshold_volume: &Option<f64>,
resolution: &Option<Vec<u64>>, resolution: &Option<Vec<u32>>,
) -> Result<Vec<SpaceSetObject>, String> { ) -> Result<Vec<SpaceSetObject>, String> {
// Is that ID referenced in the current space? // Is that ID referenced in the current space?
if let Ok(offset) = self.values.binary_search(&id.into()) { if let Ok(offset) = self.values.binary_search(&id.into()) {
@@ -154,7 +352,7 @@ impl SpaceDB {
&self, &self,
positions: &[Position], positions: &[Position],
threshold_volume: &Option<f64>, threshold_volume: &Option<f64>,
resolution: &Option<Vec<u64>>, resolution: &Option<Vec<u32>>,
) -> Result<Vec<SpaceSetObject>, String> { ) -> Result<Vec<SpaceSetObject>, String> {
let index = self.get_resolution(threshold_volume, resolution); let index = self.get_resolution(threshold_volume, resolution);
@@ -174,7 +372,7 @@ impl SpaceDB {
&self, &self,
shape: &Shape, shape: &Shape,
threshold_volume: &Option<f64>, threshold_volume: &Option<f64>,
resolution: &Option<Vec<u64>>, resolution: &Option<Vec<u32>>,
) -> Result<Vec<SpaceSetObject>, String> { ) -> Result<Vec<SpaceSetObject>, String> {
let index = self.get_resolution(threshold_volume, resolution); let index = self.get_resolution(threshold_volume, resolution);

View File

@@ -6,7 +6,7 @@ use super::space::Position;
use super::space::Shape; use super::space::Shape;
use super::SpaceId; use super::SpaceId;
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Hash, Serialize)]
pub struct SpaceSetObject { pub struct SpaceSetObject {
space_id: SpaceId, space_id: SpaceId,
position: Position, position: Position,
@@ -34,6 +34,10 @@ impl SpaceSetObject {
&self.position &self.position
} }
pub fn set_position(&mut self, pos: Position) {
self.position = pos;
}
pub fn value(&self) -> &Coordinate { pub fn value(&self) -> &Coordinate {
&self.value &self.value
} }
@@ -97,7 +101,7 @@ pub type SpaceSetIndex = ironsea_index_sfc_dbc::IndexOwned<
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]
pub struct SpaceIndex { pub struct SpaceIndex {
threshold_volume: f64, threshold_volume: f64,
// lookup_ rounds up, so reverse sort of the list on threasholds and check for last index. // lookup_ rounds up, so reverse sort of the list on thresholds and check for last index.
scale: Vec<u32>, scale: Vec<u32>,
index: SpaceSetIndex, index: SpaceSetIndex,
} }
@@ -115,6 +119,10 @@ impl SpaceIndex {
self.threshold_volume self.threshold_volume
} }
pub fn scale(&self) -> &Vec<u32> {
&self.scale
}
pub fn find(&self, key: &Position) -> Vec<SpaceSetObject> { pub fn find(&self, key: &Position) -> Vec<SpaceSetObject> {
self.index.find(key) self.index.find(key)
} }

View File

@@ -162,6 +162,8 @@ pub fn build_index(
version: &str, version: &str,
spaces: &[space::Space], spaces: &[space::Space],
objects: &[SpatialObject], objects: &[SpatialObject],
scales: Option<Vec<Vec<u32>>>,
max_elements: Option<usize>,
) -> Core { ) -> Core {
let mut properties = vec![]; let mut properties = vec![];
let mut space_set_objects = vec![]; let mut space_set_objects = vec![];
@@ -210,5 +212,13 @@ pub fn build_index(
object.set_value(value.into()); object.set_value(value.into());
}); });
Core::new(name, version, spaces, properties, space_set_objects) Core::new(
name,
version,
spaces,
properties,
space_set_objects,
scales,
max_elements,
)
} }

View File

@@ -74,7 +74,12 @@ pub fn convert(name: &str) {
from_json::<Vec<model::SpatialObject>>(&fn_in, &fn_out); from_json::<Vec<model::SpatialObject>>(&fn_in, &fn_out);
} }
pub fn build(name: &str, version: &str) { pub fn build(
name: &str,
version: &str,
scales: Option<Vec<Vec<u32>>>,
max_elements: Option<usize>,
) {
let fn_spaces = format!("{}.spaces.bin", name); let fn_spaces = format!("{}.spaces.bin", name);
let fn_objects = format!("{}.objects.bin", name); let fn_objects = format!("{}.objects.bin", name);
let fn_index = format!("{}.index", name); let fn_index = format!("{}.index", name);
@@ -89,6 +94,8 @@ pub fn build(name: &str, version: &str) {
version, version,
&spaces, &spaces,
&load::<Vec<model::SpatialObject>>(&fn_objects), &load::<Vec<model::SpatialObject>>(&fn_objects),
scales,
max_elements,
); );
store((spaces, core), &fn_index); store((spaces, core), &fn_index);

View File

@@ -1,6 +1,9 @@
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
#[macro_use]
extern crate log;
#[macro_use] #[macro_use]
extern crate arrayref; extern crate arrayref;

View File

@@ -22,7 +22,7 @@ fn main() {
// Build a Database Index: // Build a Database Index:
if true { if true {
info_time!("Building database index"); info_time!("Building database index");
storage::build("10k", "v0.1"); storage::build("10k", "v0.1", None, None);
} }
// Load a Database: // Load a Database:
@@ -40,6 +40,7 @@ fn main() {
db: &db, db: &db,
output_space: None, output_space: None,
threshold_volume: Some(std::f64::MAX), threshold_volume: Some(std::f64::MAX),
view_port: &None,
resolution: None, resolution: None,
}; };
let r = core.get_by_id(&c, id).unwrap(); let r = core.get_by_id(&c, id).unwrap();
@@ -50,6 +51,7 @@ fn main() {
db: &db, db: &db,
output_space: None, output_space: None,
threshold_volume: Some(0.0), threshold_volume: Some(0.0),
view_port: &None,
resolution: None, resolution: None,
}; };
let r = core.get_by_id(&c, id).unwrap(); let r = core.get_by_id(&c, id).unwrap();
@@ -60,6 +62,7 @@ fn main() {
db: &db, db: &db,
output_space: None, output_space: None,
threshold_volume: Some(std::f64::MAX), threshold_volume: Some(std::f64::MAX),
view_port: &None,
resolution: None, resolution: None,
}; };
let r = core.get_by_label(&c, id).unwrap(); let r = core.get_by_label(&c, id).unwrap();
@@ -77,6 +80,7 @@ fn main() {
db: &db, db: &db,
output_space: None, output_space: None,
threshold_volume: Some(0.0), threshold_volume: Some(0.0),
view_port: &None,
resolution: None, resolution: None,
}; };
let r = core.get_by_shape(&c, &shape, "std").unwrap(); let r = core.get_by_shape(&c, &shape, "std").unwrap();