@inproceedings{23eb11cef17e4da3b1d21feb0f905e43,
title = "Dynamic data rebalancing in Hadoop",
abstract = "Current implementation of Hadoop is based on an assumption that all the nodes in a Hadoop cluster are homogenous. Data in a Hadoop cluster is split into blocks and are replicated based on the replication factor. Service time for jobs that accesses data stored in Hadoop considerably increases when the number of jobs is greater than the number of copies of data and when the nodes in Hadoop cluster differ much in their processing capabilities. This paper addresses dynamic data rebalancing in a heterogeneous Hadoop cluster. Data rebalancing is done by replicating data dynamically with minimum data movement cost based on the number of incoming parallel mapreduce jobs. Our experiments indicate that as a result of dynamic data rebalancing service time of mapreduce jobs were reduced by over 30% and resource utilization is increased by over 50% when compared against Hadoop.",
keywords = "Dynamic Data Rebalancing, Hadoop, Replication, heterogeneity, service time, waiting time",
author = "Kumar, {Ashwin T.K.} and Jongyeop Kim and George, {K. M.} and Nohpill Park",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2014 13th IEEE/ACIS International Conference on Computer and Information Science, ICIS 2014 - Proceedings ; Conference date: 04-06-2014 Through 06-06-2014",
year = "2014",
month = sep,
day = "26",
doi = "10.1109/ICIS.2014.6912153",
language = "English",
series = "2014 IEEE/ACIS 13th International Conference on Computer and Information Science, ICIS 2014 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "315--320",
editor = "Yan Han and Wenai Song and Simon Xu and Lichao Chen and Roger Lee",
booktitle = "2014 IEEE/ACIS 13th International Conference on Computer and Information Science, ICIS 2014 - Proceedings",
address = "United States",
}