@inproceedings{99a9ab2caa5e4677996b937b20738009,
title = "Implications of data placement strategy to Big Data technologies based on shared-nothing architecture for geosciences",
abstract = "It is found that data placement on the networked nodes of a cluster based on the shared-nothing architecture (SNA) should align in the physical (i.e. spatiotemporal) space for most geoscience Big Data analysis systems in order to minimize data movements and thus achieve optimal performance and efficiency. This is due to the fact that data analysis in geosciences predominantly requires spatiotemporal coincidence. If individual datasets are considered separately in their placement on the cluster nodes, these systems often have to move data between nodes when an analysis involves two or more datasets. In this paper, we first report our discoveries from a data placement alignment experiment with two Big Data technologies, SciDB and Spark+HDFS, and then elucidate some of the far-reaching implications of this discovery.",
keywords = "Big Data, data placement, geoscience, shared-nothing architecture",
author = "Kuo, {Kwo Sen} and Amidu Oloso and Khoa Doan and Clune, {Thomas L.} and Hongfeng Yu",
note = "Funding Information: We are grateful to the funding provided by NASA Advanced Information Systems Technology (AIST) program and NSF EarthCube program that made this research possible. Publisher Copyright: {\textcopyright} 2016 IEEE.; 36th IEEE International Geoscience and Remote Sensing Symposium, IGARSS 2016 ; Conference date: 10-07-2016 Through 15-07-2016",
year = "2016",
month = nov,
day = "1",
doi = "10.1109/IGARSS.2016.7730983",
language = "English (US)",
series = "International Geoscience and Remote Sensing Symposium (IGARSS)",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "7605--7607",
booktitle = "2016 IEEE International Geoscience and Remote Sensing Symposium, IGARSS 2016 - Proceedings",
}