@inproceedings{cba2bf6076154313ae0b672bbcca4338,
title = "Identification of the optimal Hadoop configuration parameters set for mapreduce computing",
abstract = "This paper investigates on the techniques to search for optimal configuration parameters sets for Hadoop HDFS (Hadoop Distributed File System). An optimization technique, so-called the automated benchmarking configuration methodology (ABCM) [4], has been proposed and demonstrated by employing a two-staged sampling technique in order to mitigate the computational complexity and cost of the search process for the optimal configuration parameters set. In this paper, a few methods are further employed to sample those configuration parameters sets such as random Monte Carlo, correlation approaches (versus sequential approach in ABCM) in an effort to improve the level of the resulting performance from the identified optimal configuration parameters set and the execution time as well. Experiments are conducted to compare the level of the resulting performances, the Monte Carlo and Correlation coefficient-based algorithms are developed and implemented to identify a better set of Ω space [4] for a benchmark TestDFSIO in which the number of iterations are kept at the same for comparison purpose, and their resulting performances are compared against the sequential. It is observed that the optimal configuration parameters set identified by the Monte Carlo-based approach reduces the execution time of the benchmark run by 13.84%compared to the sequential sampling method, while the correlation-based method ended up with an unexpected result suspiciously due to lack of linearity of correlation which to be validated in the future work.",
keywords = "Benchmarks, Correlation, HDFS, Hadoop, Hadoop configuration, MapReduce, Monte Carlo, Performance tuning, Sampling",
author = "Jongyeop Kim and Nohpill Park",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 3rd International Conference on Applied Computing and Information Technology and 2nd International Conference on Computational Science and Intelligence, ACIT-CSI 2015 ; Conference date: 12-07-2015 Through 16-07-2015",
year = "2015",
month = nov,
day = "23",
doi = "10.1109/ACIT-CSI.2015.27",
language = "English",
series = "Proceedings - 3rd International Conference on Applied Computing and Information Technology and 2nd International Conference on Computational Science and Intelligence, ACIT-CSI 2015",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "108--112",
editor = "Kensei Tsuchida and Naohiro Ishii and Takaaki Goto and Satoshi Takahashi",
booktitle = "Proceedings - 3rd International Conference on Applied Computing and Information Technology and 2nd International Conference on Computational Science and Intelligence, ACIT-CSI 2015",
address = "United States",
}