@inproceedings{72f942a8fb4b471191af2764ec0cb7f2,
title = "Evaluating the impact of data placement to spark and SciDB with an Earth Science use case",
abstract = "We investigate the impact of data placement on two Big Data technologies, Spark and SciDB, with a use case from Earth Science where data arrays are multidimensional. Simultaneously, this investigation provides an opportunity to evaluate the performance of the technologies involved. Two datastores, HDFS and Cassandra, are used with Spark for our comparison. It is found that Spark with Cassandra performs better than with HDFS, but SciDB performs better yet than Spark with either datastore. The investigation also underscores the value of having data aligned for the most common analysis scenarios in advance on a shared nothing architecture. Otherwise, repartitioning needs to be carried out on the fly, degrading overall performance.",
keywords = "SciDB, SciDB, Spark, data layout, multimensional arrays",
author = "Khoa Doan and Oloso, {Amidu O.} and Kuo, {Kwo Sen} and Clune, {Thomas L.} and Hongfeng Yu and Brian Nelson and Jian Zhang",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 4th IEEE International Conference on Big Data, Big Data 2016 ; Conference date: 05-12-2016 Through 08-12-2016",
year = "2016",
doi = "10.1109/BigData.2016.7840621",
language = "English (US)",
series = "Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "341--346",
editor = "Ronay Ak and George Karypis and Yinglong Xia and Hu, {Xiaohua Tony} and Yu, {Philip S.} and James Joshi and Lyle Ungar and Ling Liu and Aki-Hiro Sato and Toyotaro Suzumura and Sudarsan Rachuri and Rama Govindaraju and Weijia Xu",
booktitle = "Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016",
}