@inproceedings{a2580f470fcc4cce9182b43815fba74c,
title = "A cross-job framework for MapReduce scheduling",
abstract = "In this paper, we present a novel cross-job framework for MapReduce scheduling, which aims to minimize the total processing time of a sequence of related jobs by combining reduce and map phases of two consecutive jobs and streaming data between them. The proposed framework has the following desirable properties: (1) It can accelerate the execution of a sequence of related MapReduce jobs by achieving a good tradeoff between data locality and parallelism. (2) It can support all the existing MapReduce applications with no changes to their source code. (3) It is a general framework, which can work with different scheduling algorithms. We built a new MapReduce runtime system called cross-job Hadoop by integrating the proposed cross-job framework into Hadoop. We conducted extensive experiments to evaluate its performance using PageRank and an Apache Pig application. Our experimental results show that the cross-job Hadoop can significantly reduce both the total processing time of a job sequence and the size of data transferred over the network.",
keywords = "Big Data, MapReduce, Resource Management, Task Scheduling",
author = "Xuejie Xiao and Jian Tang and Zhenhua Chen and Jielong Xu and Chonggang Wang",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2nd IEEE International Conference on Big Data, IEEE Big Data 2014 ; Conference date: 27-10-2014 Through 30-10-2014",
year = "2014",
doi = "10.1109/BigData.2014.7004222",
language = "English (US)",
series = "Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "135--140",
editor = "Jimmy Lin and Jian Pei and Hu, {Xiaohua Tony} and Wo Chang and Raghunath Nambiar and Charu Aggarwal and Nick Cercone and Vasant Honavar and Jun Huan and Bamshad Mobasher and Saumyadipta Pyne",
booktitle = "Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014",
}