Difference between revisions of "Hadoop: Menjalankan MapReduce Job -WordCount"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) (New page: Sumber: http://wiki.apache.org/hadoop/WordCount hadoop jar hadoop-*-examples.jar wordcount [-m <#maps>] [-r <#reducers>] <in-dir> <out-dir> hadoop dfs -copyFromLocal <local-dir> <hdf...) |
Onnowpurbo (talk | contribs) |
||
| (9 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
Sumber: http://wiki.apache.org/hadoop/WordCount | Sumber: http://wiki.apache.org/hadoop/WordCount | ||
| − | + | ==Buat File Data Sederhana== | |
| + | cd ~ | ||
| + | touch file01 | ||
| + | touch file02 | ||
| + | echo "Hello World Bye World" > file01 | ||
| + | echo "Hello Hadoop Goodbye Hadoop" > file02 | ||
| − | hadoop | + | hadoop fs -mkdir /user/hduser/input |
| + | hadoop fs -put file* /user/hduser/input/ | ||
| + | Cek | ||
| + | |||
| + | hadoop fs -ls /user/hduser/input/ | ||
| + | |||
| + | Found 2 items | ||
| + | -rw-r--r-- 1 hduser supergroup 22 2015-11-09 17:28 /user/hduser/input/file01 | ||
| + | -rw-r--r-- 1 hduser supergroup 28 2015-11-09 17:28 /user/hduser/input/file02 | ||
| + | |||
| + | hadoop fs -cat /user/hduser/input/file01 | ||
| + | Hello World Bye World | ||
| + | |||
| + | hadoop fs -cat /usr/hduser/input/file02 | ||
| + | Hello Hadoop Goodbye Hadoop | ||
| + | |||
| + | ==Jalankan WordCount== | ||
| + | |||
| + | Contoh | ||
| + | |||
| + | hadoop fs -rm /user/hduser/output/* | ||
| + | hadoop fs -rmdir /user/hduser/output/ | ||
| + | cd /usr/local/hadoop | ||
| + | hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount [-m <#maps>] [-r <#reducers>] <in-dir> <out-dir> | ||
| + | |||
| + | Jalankan | ||
| + | |||
| + | hadoop fs -rm /user/hduser/output/* | ||
| + | hadoop fs -rmdir /user/hduser/output/ | ||
| + | cd /usr/local/hadoop | ||
| + | hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /user/hduser/input /user/hduser/output | ||
| + | |||
| + | ==Copy Hasil== | ||
| + | |||
| + | cd ~ | ||
| + | hadoop fs -copyToLocal /user/hduser/output . | ||
| + | more output/part-r-* | ||
| + | |||
| + | Hasilnya | ||
| + | |||
| + | Bye 1 | ||
| + | Goodbye 1 | ||
| + | Hadoop 2 | ||
| + | Hello 2 | ||
| + | World 2 | ||
| + | |||
| + | |||
| + | ==Ujicoba menggunakan dataset 10Gbyte== | ||
| + | |||
| + | Buat dataset | ||
| + | |||
| + | cd /usr/local/hadoop | ||
| + | hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar randomtextwriter /random-text-data | ||
| + | |||
| + | Analisa | ||
| + | |||
| + | hadoop fs -rm /user/hduser/output/* | ||
| + | hadoop fs -rmdir /user/hduser/output | ||
| + | cd /usr/local/hadoop | ||
| + | hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /random-text-data /user/hduser/output | ||
| + | |||
| + | |||
| + | Hasilnya | ||
| + | |||
| + | .. | ||
| + | .. | ||
| + | .. | ||
| + | Map-Reduce Framework | ||
| + | Map input records=26777 | ||
| + | Map output records=100840944 | ||
| + | Map output bytes=1522178351 | ||
| + | Map output materialized bytes=74694575 | ||
| + | Input split bytes=1053 | ||
| + | Combine input records=103318240 | ||
| + | Combine output records=4741732 | ||
| + | Reduce input groups=1577165 | ||
| + | Reduce shuffle bytes=74694575 | ||
| + | Reduce input records=2264436 | ||
| + | Reduce output records=1577165 | ||
| + | Spilled Records=7006168 | ||
| + | Shuffled Maps =9 | ||
| + | Failed Shuffles=0 | ||
| + | Merged Map outputs=9 | ||
| + | GC time elapsed (ms)=3255 | ||
| + | Total committed heap usage (bytes)=4048551936 | ||
| + | Shuffle Errors | ||
| + | BAD_ID=0 | ||
| + | CONNECTION=0 | ||
| + | IO_ERROR=0 | ||
| + | WRONG_LENGTH=0 | ||
| + | WRONG_MAP=0 | ||
| + | WRONG_REDUCE=0 | ||
| + | File Input Format Counters | ||
| + | Bytes Read=1102531981 | ||
| + | File Output Format Counters | ||
| + | Bytes Written=48884623 | ||
| + | |||
| + | |||
| + | Analisa hasil | ||
| + | |||
| + | cd ~ | ||
| + | rm -Rf output | ||
| + | hadoop fs -copyToLocal /user/hduser/output . | ||
| + | more output/part-r-* | ||
==Referensi== | ==Referensi== | ||
* http://wiki.apache.org/hadoop/WordCount | * http://wiki.apache.org/hadoop/WordCount | ||
Latest revision as of 08:55, 10 November 2015
Sumber: http://wiki.apache.org/hadoop/WordCount
Buat File Data Sederhana
cd ~ touch file01 touch file02 echo "Hello World Bye World" > file01 echo "Hello Hadoop Goodbye Hadoop" > file02
hadoop fs -mkdir /user/hduser/input hadoop fs -put file* /user/hduser/input/
Cek
hadoop fs -ls /user/hduser/input/
Found 2 items -rw-r--r-- 1 hduser supergroup 22 2015-11-09 17:28 /user/hduser/input/file01 -rw-r--r-- 1 hduser supergroup 28 2015-11-09 17:28 /user/hduser/input/file02
hadoop fs -cat /user/hduser/input/file01 Hello World Bye World
hadoop fs -cat /usr/hduser/input/file02 Hello Hadoop Goodbye Hadoop
Jalankan WordCount
Contoh
hadoop fs -rm /user/hduser/output/* hadoop fs -rmdir /user/hduser/output/ cd /usr/local/hadoop hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount [-m <#maps>] [-r <#reducers>] <in-dir> <out-dir>
Jalankan
hadoop fs -rm /user/hduser/output/* hadoop fs -rmdir /user/hduser/output/ cd /usr/local/hadoop hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /user/hduser/input /user/hduser/output
Copy Hasil
cd ~ hadoop fs -copyToLocal /user/hduser/output . more output/part-r-*
Hasilnya
Bye 1 Goodbye 1 Hadoop 2 Hello 2 World 2
Ujicoba menggunakan dataset 10Gbyte
Buat dataset
cd /usr/local/hadoop hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar randomtextwriter /random-text-data
Analisa
hadoop fs -rm /user/hduser/output/* hadoop fs -rmdir /user/hduser/output cd /usr/local/hadoop hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /random-text-data /user/hduser/output
Hasilnya
.. .. .. Map-Reduce Framework Map input records=26777 Map output records=100840944 Map output bytes=1522178351 Map output materialized bytes=74694575 Input split bytes=1053 Combine input records=103318240 Combine output records=4741732 Reduce input groups=1577165 Reduce shuffle bytes=74694575 Reduce input records=2264436 Reduce output records=1577165 Spilled Records=7006168 Shuffled Maps =9 Failed Shuffles=0 Merged Map outputs=9 GC time elapsed (ms)=3255 Total committed heap usage (bytes)=4048551936 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=1102531981 File Output Format Counters Bytes Written=48884623
Analisa hasil
cd ~ rm -Rf output hadoop fs -copyToLocal /user/hduser/output . more output/part-r-*