@inproceedings{e95563c7264b49abae19ca71afa07b80,
title = "Eirene: Improving Short Job Latency Performance with Coordinated Cold Data Migration and Scheduler-Aware Task Cloning",
abstract = "In large-scale enterprise data centers for big data analytics, long batched jobs and short interactive jobs are usually mixed. Hybrid job schedulers, consisting of one centralized scheduler for long jobs and multiple distributed schedulers for short jobs, have become a promising alternative because they can significantly shorten latencies of short jobs via independent and parallelized assignment of short tasks by distributed schedulers and lower chances of head-of-line blocking via a number of performance optimization techniques.However, short jobs are still facing long job latencies under hybrid job schedulers due to workload fluctuation and straggler task problem. In this paper, we propose Eirene to optimize the latency performance of short jobs via two schemes tightly coupled into the general architecture of hybrid job schedulers. Coordinated Cold Data Migration leverages high task waiting time of short jobs under heavily-loaded periods and migrates cold data from disks to local memory for the initial phase of reading input so as to shorten task runtime and queueing time. On the other hand, Scheduler-Aware Task Cloning exploits spare computing resources under lightly-loaded periods and performs proactive task cloning for short jobs to mitigate the straggler problem.We implement a prototype of Eirene based on Eagle, a state-of-the-art hybrid job scheduler. Experimental results show that, under heavy loads, Eirene is able to improve 50-percentile (P50), 75-percentile (P75), 90-percentile (P90) latency performance of short jobs by up to 44.4%, 80.3%, 84.1% respectively compared with Eagle under the Facebook trace with a cluster of 50000 nodes.",
keywords = "Big Data, Job Scheduler, Resource Management",
author = "Wei Zhou and White, {K. Preston} and Hongfeng Yu",
note = "Funding Information: ACKNOWLEDGMENT We thank the anonymous reviewers for their constructive suggestions on this paper. This research has been partially supported by the National Science Foundation through grants IIS-1423487, ICER-1541043, and IIS-1652846. The contents do not necessarily reflect the views and policies of the funding agencies and do not mention of trade names or commercial products constitute endorsement or recommendation for use. Publisher Copyright: {\textcopyright} 2019 IEEE.; 2019 IEEE International Conference on Big Data, Big Data 2019 ; Conference date: 09-12-2019 Through 12-12-2019",
year = "2019",
month = dec,
doi = "10.1109/BigData47090.2019.9006575",
language = "English (US)",
series = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "423--432",
editor = "Chaitanya Baru and Jun Huan and Latifur Khan and Hu, {Xiaohua Tony} and Ronay Ak and Yuanyuan Tian and Roger Barga and Carlo Zaniolo and Kisung Lee and Ye, {Yanfang Fanny}",
booktitle = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
}