diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..c47c02d --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,180 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "c2-chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cfg-if" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "getrandom" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", + "wasi 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "itoa" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "mercator_data_generator" +version = "0.1.0" +dependencies = [ + "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "proc-macro2" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quote" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "getrandom 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_chacha" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "getrandom 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ryu" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde_derive" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_json" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "syn" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-xid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "wasi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" +"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" +"checksum getrandom 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "fc344b02d3868feb131e8b5fe2b9b0a1cc42942679af493061fc13b853243872" +"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" +"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +"checksum libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)" = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba" +"checksum ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" +"checksum proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4c5c2380ae88876faae57698be9e9775e3544decad214599c3a6266cca6ac802" +"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" +"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" +"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" +"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" +"checksum serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)" = "fec2851eb56d010dc9a21b89ca53ee75e6528bab60c11e89d38390904982da9f" +"checksum serde_derive 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)" = "cb4dc18c61206b08dc98216c98faa0232f4337e1e1b8574551d5bad29ea1b425" +"checksum serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)" = "051c49229f282f7c6f3813f8286cc1e3323e8051823fce42c7ea80fe13521704" +"checksum syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf" +"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +"checksum wasi 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fd5442abcac6525a045cc8c795aedb60da7a2e5e89c7bf18a0d5357849bb23c7" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ac403e0 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "mercator_data_generator" +version = "0.1.0" +authors = ["EPFL-DIAS", "Lionel Sambuc "] + +edition = "2018" + +description = "Generate test data for Mercator" +#homepage = "https://crates.io/crates/mercator_data_generator" +repository = "https://github.com/epfl-dias/mercator_data_generator" +readme = "README.md" + +keywords = [] +categories = [ ] + +license = "MIT" +#license-file = "LICENSE" + +include = ["Cargo.toml", "README.md", "LICENSE", "ACKNOWLEDGEMENTS", "src/**/*.rs"] + +[dependencies] +serde = "^1.0" +serde_derive = "^1.0" +serde_json = "^1.0" +rand = "^0.7" + +[workspace] diff --git a/README.md b/README.md new file mode 100644 index 0000000..29c7a5d --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# Mercator Test data generator + +Small tool to generate test data for Mercator, a spatial index. + +## Mercator: Spatial Index + +**Mercator** is a spatial *volumetric* index for the [Human Brain Project](http://www.humanbrainproject.eu). It is a component of the [Knowledge Graph](http://www.humanbrainproject.eu/en/explore-the-brain/search/) service, which provides the spatial anchoring for the metadata registered as well as processes the volumetric queries. + +It is build on top of the Iron Sea database toolkit. + +## Iron Sea: Database Toolkit + +**Iron Sea** provides a set of database engine bricks, which can be combined and applied on arbitrary data structures. + +Unlike a traditional database, it does not assume a specific physical structure for the tables nor the records, but relies on the developper to provide a set of extractor functions which are used by the specific indices provided. + +This enables the index implementations to be agnostic from the underlying data structure, and re-used. + +## Requirements + +### Software + + * Rust: https://www.rust-lang.org + +## Quick start + +Adapt the main function to generate the datasets you want, then run: + +```sh +cargo run --release +``` + +## Acknowledgements + +This open source software code was developed in part or in whole in the +Human Brain Project, funded from the European Union’s Horizon 2020 +Framework Programme for Research and Innovation under the Specific Grant +Agreement No. 785907 (Human Brain Project SGA2). diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..883094a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,49 @@ +#[macro_use] +extern crate serde_derive; + +mod storage; + +use rand::distributions::{Distribution, Uniform}; + +use rand::prelude::ThreadRng; +use storage::*; + +const POSITIONS_PER_SHAPE: usize = 1000; + +fn get_point(space_name: &str, rng: &mut ThreadRng, die: &Uniform) -> SpatialObject { + let mut shapes = Vec::with_capacity(POSITIONS_PER_SHAPE); + + for _ in 0..POSITIONS_PER_SHAPE { + shapes.push(Shape { + type_name: "Point".to_string(), + vertices: vec![vec![die.sample(rng), die.sample(rng), die.sample(rng)]], + reference_space: space_name.to_string(), + }); + } + + SpatialObject::new(shapes, format!("oid{}", die.sample(rng))) +} + +fn get_space(nb_points: usize, rng: &mut ThreadRng, die: &Uniform) { + let space_name = "std"; + + let mut objects = Vec::with_capacity(nb_points); + + for _ in 0..nb_points { + objects.push(get_point(&space_name, rng, &die)); + } + + storage::store(format!("{}k", nb_points).as_str(), objects); +} + +fn main() { + let mut rng = rand::thread_rng(); + let die = Uniform::from(0.0..1.0); + + get_space(1, &mut rng, &die); + get_space(10, &mut rng, &die); + get_space(100, &mut rng, &die); + get_space(1000, &mut rng, &die); + get_space(10000, &mut rng, &die); + //get_space(40000, &mut rng, &die); +} diff --git a/src/storage.rs b/src/storage.rs new file mode 100644 index 0000000..a661782 --- /dev/null +++ b/src/storage.rs @@ -0,0 +1,102 @@ +use std::fs::File; +use std::io::BufWriter; + +use serde::Serialize; + +#[derive(Clone, Debug, Serialize)] +pub struct Space { + pub name: String, + pub system: CoordinateSystem, +} + +#[derive(Clone, Debug, Serialize)] +pub struct CoordinateSystem { + pub origin: Vec, + pub axes: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Axis { + pub measurement_unit: String, + pub graduation: Graduation, + pub unit_vector: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Graduation { + pub set: String, + pub minimum: f64, + pub maximum: f64, + pub steps: u64, +} + +#[derive(Clone, Debug, Serialize)] +pub struct SpatialObject { + pub properties: Properties, + pub shapes: Vec, +} + +impl SpatialObject { + pub fn new(shapes: Vec, id: String) -> Self { + SpatialObject { + shapes, + properties: Properties { + type_name: "Feature".to_string(), + id, + }, + } + } +} + +#[derive(Clone, Debug, Serialize)] +pub struct Shape { + #[serde(rename = "type")] + pub type_name: String, + #[serde(rename = "space")] + pub reference_space: String, + pub vertices: Vec, +} + +type Point = Vec; + +#[derive(Clone, Debug, Serialize)] +pub struct Properties { + #[serde(rename = "type")] + pub type_name: String, + pub id: String, +} + +mod json { + use super::*; + + pub fn store(data: T, to: &str) + where + T: Serialize, + { + let file_out = + File::create(to).unwrap_or_else(|e| panic!("Unable to create file: {}: {}", to, e)); + + // We create a buffered writer from the file we get + let writer = BufWriter::new(&file_out); + + serde_json::to_writer(writer, &data).unwrap(); + } +} + +pub fn store(name: S, data: T) +where + S: Into, + T: Serialize, +{ + let name = name.into(); + /* + // Convert Reference Space definitions + let fn_out = format!("{}.spaces.json", name); + + json::store::>(data, &fn_out); + */ + // Convert Spatial Objects + let fn_out = format!("{}.objects.json", name); + + json::store(data, &fn_out); +}