2013-11-08
這是學習兼分享,可能不夠完善,或100%完全正確。
Hadoop 是個架設雲端的系統,提供分散式平行運算,它參考Google Filesystem,以Java開發,提供HDFS與MapReduce API。
官方網站
http://hadoop.apache.org/
安裝參考
http://hadoop.apache.org/docs/r2.2.0/hadoop-project-dist/hadoop-common/SingleCluster.html
http://hadoop.apache.org/docs/r2.2.0/hadoop-project-dist/hadoop-common/ClusterSetup.html
http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleNodeSetup.html
http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleCluster.html
下載
http://apache.cdpa.nsysu.edu.tw/hadoop/common/hadoop-2.2.0/
環境
192.168.128.102 (CentOS 6.4 x64)
主機名稱: localhost 和 localhost.localdomain
安裝
# 為了省事,避免意外的情況,關閉 SELinux (Security Linux ) 和 iptables
# 立刻關閉 SELinux
setenforce 0
# 設定 reboot 後自動關閉 SELinux
vi /etc/selinux/config
找到
SELINUX=
設為
SELINUX=disabled
# 立刻停掉 iptables
service iptables stop
service ip6tables stop
# 設定 reboot 後自動關閉 iptable
chkconfig iptables off
chkconfig ip6tables off
# 安裝 JRE 或 JDK
cd /usr/local/src
# Java 請到官方網站手動下載
# http://www.oracle.com/technetwork/java/javase/downloads/index-jsp-138363.html#javasejdk
rpm -ivh jre-7u45-linux-x64.rpm
wget http://apache.cdpa.nsysu.edu.tw/hadoop/common/hadoop-2.2.0/hadoop-2.2.0.tar.gz
tar zxvf hadoop-2.2.0.tar.gz -C /usr/local
# 接下來請暫時先參考這篇,把 32 bit Library 修正換成 64 bit 版再繼續
# [研究] Hadoop 2.2.0 編譯 (CentOS 6.4 x64)
# http://shaurong.blogspot.tw/2013/11/hadoop-220-centos-64-x64.html
[root@localhost ~]# tar zxvf hadoop-2.2.0-native_x86_64.tar.gz
native/
native/libhadooppipes.a
native/libhadoop.so.1.0.0
native/libhdfs.so.0.0.0
native/libhadoop.a
native/libhdfs.so
native/libhadoop.so
native/libhadooputils.a
native/libhdfs.a
[root@localhost ~]# cp native/* /usr/local/hadoop-2.2.0/lib/native
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadoop.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadooppipes.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadoop.so'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadoop.so.1.0.0'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadooputils.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhdfs.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhdfs.so'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhdfs.so.0.0.0'? y
#如果安裝 jre-7u45-linux-x64.rpm
export JAVA_HOME=/usr/java/jre1.7.0_45
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=$JAVA_HOME/lib/ext:$JAVA_HOME/lib/tools.jar
#如果安裝 jdk-7u45-linux-x64.rpm (注意路徑不同)
#export JAVA_HOME=/usr/java/jdk1.7.0_45
#export PATH=$PATH:$JAVA_HOME/bin
#export CLASSPATH=$JAVA_HOME/jre/lib/ext:$JAVA_HOME/lib/tools.jar
export HADOOP_PREFIX=/usr/local/hadoop-2.2.0
export HADOOP_COMMON_HOME=/usr/local/hadoop-2.2.0
export HADOOP_MAPRED_HOME=/usr/local/hadoop-2.2.0
export HADOOP_CONF_DIR=/usr/local/hadoop-2.2.0/etc/hadoop
export HADOOP_HDFS_HOME=/usr/local/hadoop-2.2.0
export HADOOP_YARN_HOME=/usr/local/hadoop-2.2.0
export YARN_CONF_DIR=$HADOOP_CONF_DIR
# 修改 .bashrc,以後 login root 後不用每次都執行上面一堆 export
[root@localhost ~]# vi /root/.bashrc
增加
export JAVA_HOME=/usr/java/jre1.7.0_45
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=$JAVA_HOME/lib/ext:$JAVA_HOME/lib/tools.jar
#export JAVA_HOME=/usr/java/jdk1.7.0_45
#export PATH=$PATH:$JAVA_HOME/bin
#export CLASSPATH=$JAVA_HOME/jre/lib/ext:$JAVA_HOME/lib/tools.jar
export HADOOP_PREFIX=/usr/local/hadoop-2.2.0
export HADOOP_COMMON_HOME=/usr/local/hadoop-2.2.0
export HADOOP_MAPRED_HOME=/usr/local/hadoop-2.2.0
export HADOOP_CONF_DIR=/usr/local/hadoop-2.2.0/etc/hadoop
export HADOOP_HDFS_HOME=/usr/local/hadoop-2.2.0
export HADOOP_YARN_HOME=/usr/local/hadoop-2.2.0
export YARN_CONF_DIR=$HADOOP_CONF_DIR
# ******************************************************************************
# 替 hadoop-env.sh httpfs-env.sh mapred-env.sh yarn-env.sh 設定 JAAV_HOME
# 如果你已經執行過 export JAVA_HOME,也在 .bashrc 增加了,這部分可以不用設定,這邊是另一種方法,僅供參考
#設定hadoop-env.sh
[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/hadoop-env.sh
找到
# The java implementation to use.
export JAVA_HOME=${JAVA_HOME}
下面增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45
[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/httpfs-env.sh
隨便找地方增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45
[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/mapred-env.sh
找到
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
下面增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45
[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/yarn-env.sh
找到
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
下面增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45
# *******************************************************************************
# 修改 xml 設定檔 (此處只改其中幾個)
# capacity-scheduler.xml hadoop-policy.xml httpfs-site.xml yarn-site.xml core-site.xml hdfs-site.xml mapred-site.xml
# 參考
# http://hadoop.apache.org/docs/r2.2.0/hadoop-project-dist/hadoop-common/SingleCluster.html
注意,.xml 裡面的 host 要用 hostname -f 指令看到的,不可以用 IP
[root@localhost hadoop-2.2.0]# hostname
localhost.localdomain
[root@localhost hadoop-2.2.0]# hostname -f
localhost
如果你要把主機名稱改掉 (例如改成 hadoop01 和 hadoop01.hadoopcluster )
設定主機名稱(立刻生效)
[root@localhost hadoop-2.2.0]# hostname hadoop01.hadoopcluster驗證
[root@localhost hadoop-2.2.0]# hostname
hadoop01.hadoopcluster
修改 /etc/sysconfig/network,設定 reboot 後的主機名稱
[root@localhost hadoop-2.2.0]# vi /etc/sysconfig/network
找到
HOSTNAME=localhost.localdomain
改成
HOSTNAME=hadoop01.hadoopcluster
修改 hosts 設定
[root@localhost hadoop-2.2.0]# vi /etc/hosts
增加
192.168.128.102 hadoop01 hadoop01.hadoopcluster
因為這篇只是單機安裝,所以敝人仍用 localhost 和 localhost.localdomain,沒有去修改
******************************
# 設定 mapred-site.xml
cp /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml.template /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml
vi /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml
在
<configuration>
</configuration>
之間增加
<property>
<name>mapreduce.cluster.temp.dir</name>
<value></value>
<description>No description</description>
<final>true</final>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value></value>
<description>No description</description>
<final>true</final>
</property>
# 設定 yarn-site.xml
vi /usr/local/hadoop-2.2.0/etc/hadoop/yarn-site.xml
在
<configuration>
</configuration>
之間增加 (注意 host:port 要依照實際情況修改,要用 hostname -f 指令看到的)
(這些 port 可以自己改換別的)
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:9001</value>
<description>host is the hostname of the resource manager and
port is the port on which the NodeManagers contact the Resource Manager.
</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:9002</value>
<description>host is the hostname of the resourcemanager and port is the port
on which the Applications in the cluster talk to the Resource Manager.
</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
<description>In case you do not want to use the default scheduler</description>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:9003</value>
<description>the host is the hostname of the ResourceManager and the port is the port on
which the clients can talk to the Resource Manager. </description>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value></value>
<description>the local directories used by the nodemanager</description>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>localhost:9004</value>
<description>the nodemanagers bind to this port</description>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>10240</value>
<description>the amount of memory on the NodeManager in GB</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/app-logs</value>
<description>directory on hdfs where the application logs are moved to </description>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value></value>
<description>the directories used by Nodemanagers as log directories</description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>shuffle service that needs to be set for Map Reduce to run </description>
</property>
# 設定 capacity-scheduler.xml
vi /usr/local/hadoop-2.2.0/etc/hadoop/capacity-scheduler.xml
找 root.queue
將
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
<description>
The queues at the this level (root is the root queue).
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>100</value>
<description>Default queue target capacity.</description>
</property>
改為
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>unfunded,default</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.unfunded.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>50</value>
</property>
# **************************************************************************
# 啟動
[root@localhost hadoop]# cd $HADOOP_MAPRED_HOME
# 啟動 Resource Manager
[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-root-resourcemanager-localhost.localdomain.out
# 一定要用 ps aux | grep resourcemanager 去驗證,因為上面執行失敗,可能未必有錯誤訊息,但是驗證會找不到,表示沒有執行成功
[root@localhost hadoop-2.2.0]# ps aux | grep resourcemanager
root 3424 6.4 10.3 1712816 105288 pts/1 Sl 14:44 0:05 /usr/java/jre1.7.0_45/bin/java -Dproc_resourcemanager -Xmx1000m -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.home.dir= -Dyarn.id.str=root -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -Dyarn.policy.file=hadoop-policy.xml -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -classpath /usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/share/hadoop/common/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/common/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/*:/contrib/capacity-scheduler/*.jar:/contrib/capacity-scheduler/*.jar:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/etc/hadoop/rm-config/log4j.properties org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
root 3772 0.0 0.0 103236 872 pts/1 S+ 14:45 0:00 grep resourcemanager
# 啟動 Node Manager
[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh start nodemanager
starting nodemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-root-nodemanager-localhost.localdomain.out
一定要用 ps aux | grep nodemanager 去驗證,因為上面執行失敗,可能未必有錯誤訊息,但是驗證會找不到,表示沒有執行成功
[root@localhost hadoop-2.2.0]# ps aux | grep nodemanager
root 3659 4.2 9.6 1579176 97232 pts/1 Sl 14:44 0:07 /usr/java/jre1.7.0_45/bin/java -Dproc_nodemanager -Xmx1000m -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.home.dir= -Dyarn.id.str=root -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -Dyarn.policy.file=hadoop-policy.xml -server -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -classpath /usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/share/hadoop/common/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/common/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/*:/contrib/capacity-scheduler/*.jar:/contrib/capacity-scheduler/*.jar:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/etc/hadoop/nm-config/log4j.properties org.apache.hadoop.yarn.server.nodemanager.NodeManager
root 3780 0.0 0.0 103236 868 pts/1 S+ 14:47 0:00 grep nodemanager
# **************************************************************************
# 測試
#切換路徑
[root@localhost hadoop-2.2.0]# cd $HADOOP_COMMON_HOME
# 官方網頁說用下面命令,實際上是錯的
[root@localhost hadoop-2.2.0]# $HADOOP_COMMON_HOME/bin/hadoop jar hadoop-examples.jar randomwriter out
Not a valid JAR: /usr/local/hadoop-2.2.0/hadoop-examples.jar
# 應該這樣測試
[root@localhost hadoop-2.2.0]# $HADOOP_COMMON_HOME/bin/hadoop jar /usr/local/hadoop-2.2.0/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar randomwriter out
13/11/08 08:52:49 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
13/11/08 08:52:49 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
Running 10 maps.
Job started: Fri Nov 08 08:52:51 CST 2013
13/11/08 08:52:51 INFO jvm.JvmMetrics: Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
13/11/08 08:52:52 INFO mapreduce.JobSubmitter: number of splits:1
13/11/08 08:52:52 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.output.value.class is deprecated. Instead, use mapreduce.job.output.value.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.map.class is deprecated. Instead, use mapreduce.job.map.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.reduce.class is deprecated. Instead, use mapreduce.job.reduce.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.inputformat.class is deprecated. Instead, use mapreduce.job.inputformat.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.output.dir is deprecated. Instead, use mapreduce.output.fileoutputformat.outputdir
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.outputformat.class is deprecated. Instead, use mapreduce.job.outputformat.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.output.key.class is deprecated. Instead, use mapreduce.job.output.key.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir
13/11/08 08:52:53 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_local58926325_0001
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: dfs.namenode.name.dir; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.cluster.local.dir; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: dfs.datanode.data.dir; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: fs.defaultFS; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.cluster.temp.dir; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: dfs.namenode.name.dir; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.cluster.local.dir; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: dfs.datanode.data.dir; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: fs.defaultFS; Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.cluster.temp.dir; Ignoring.
13/11/08 08:52:53 INFO mapreduce.Job: The url to track the job: http://localhost:8080/
13/11/08 08:52:53 INFO mapreduce.Job: Running job: job_local58926325_0001
13/11/08 08:52:53 INFO mapred.LocalJobRunner: OutputCommitter set in config null
13/11/08 08:52:54 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
13/11/08 08:52:54 INFO mapred.LocalJobRunner: Waiting for map tasks
13/11/08 08:52:54 INFO mapred.LocalJobRunner: Starting task: attempt_local58926325_0001_m_000000_0
13/11/08 08:52:54 INFO mapred.Task: Using ResourceCalculatorProcessTree : [ ]
13/11/08 08:52:54 INFO mapred.MapTask: Processing split: hdfs://localhost:9000/user/root/out/dummy-split-0:0+1
13/11/08 08:52:54 INFO mapreduce.Job: Job job_local58926325_0001 running in uber mode : false
13/11/08 08:52:54 INFO mapreduce.Job: map 0% reduce 0%
13/11/08 08:53:02 INFO mapred.LocalJobRunner: wrote record 11400. 953873410 bytes left. > map
13/11/08 08:53:05 INFO mapred.LocalJobRunner: wrote record 20000. 862727228 bytes left. > map
13/11/08 08:53:08 INFO mapred.LocalJobRunner: wrote record 23200. 828573860 bytes left. > map
13/11/08 08:53:11 INFO mapred.LocalJobRunner: wrote record 33200. 723844502 bytes left. > map
13/11/08 08:53:14 INFO mapred.LocalJobRunner: wrote record 35800. 696712593 bytes left. > map
13/11/08 08:53:17 INFO mapred.LocalJobRunner: wrote record 46000. 589007538 bytes left. > map
13/11/08 08:53:20 INFO mapred.LocalJobRunner: wrote record 50400. 543160330 bytes left. > map
13/11/08 08:53:24 INFO mapred.LocalJobRunner: wrote record 57800. 466576786 bytes left. > map
13/11/08 08:53:27 INFO mapred.LocalJobRunner: wrote record 64600. 394899575 bytes left. > map
13/11/08 08:53:30 INFO mapred.LocalJobRunner: wrote record 69200. 346440937 bytes left. > map
13/11/08 08:53:33 INFO mapred.LocalJobRunner: wrote record 78200. 252285093 bytes left. > map
13/11/08 08:53:36 INFO mapred.LocalJobRunner: wrote record 81000. 223522662 bytes left. > map
13/11/08 08:53:39 INFO mapred.LocalJobRunner: wrote record 90800. 120472094 bytes left. > map
13/11/08 08:53:42 INFO mapred.LocalJobRunner: wrote record 94600. 80375787 bytes left. > map
13/11/08 08:53:44 INFO mapred.LocalJobRunner: wrote record 94600. 80375787 bytes left. > map
13/11/08 08:53:46 INFO mapred.LocalJobRunner: done with 102195 records.
13/11/08 08:53:47 INFO mapreduce.Job: map 100% reduce 0%
13/11/08 08:53:47 INFO mapred.Task: Task:attempt_local58926325_0001_m_000000_0 is done. And is in the process of committing
13/11/08 08:53:47 INFO mapred.LocalJobRunner: done with 102195 records.
13/11/08 08:53:47 INFO mapred.Task: Task attempt_local58926325_0001_m_000000_0 is allowed to commit now
13/11/08 08:53:47 INFO output.FileOutputCommitter: Saved output of task 'attempt_local58926325_0001_m_000000_0' to hdfs://localhost:9000/user/root/out/_temporary/0/task_local58926325_0001_m_000000
13/11/08 08:53:47 INFO mapred.LocalJobRunner: done with 102195 records.
13/11/08 08:53:47 INFO mapred.Task: Task 'attempt_local58926325_0001_m_000000_0' done.
13/11/08 08:53:47 INFO mapred.LocalJobRunner: Finishing task: attempt_local58926325_0001_m_000000_0
13/11/08 08:53:47 INFO mapred.LocalJobRunner: Map task executor complete.
13/11/08 08:53:48 INFO mapreduce.Job: Job job_local58926325_0001 completed successfully
13/11/08 08:53:48 INFO mapreduce.Job: Counters: 25
File System Counters
FILE: Number of bytes read=270385
FILE: Number of bytes written=455985
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=0
HDFS: Number of bytes written=1077281362
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=3
Map-Reduce Framework
Map input records=1
Map output records=102195
Input split bytes=114
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=611
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=25653248
org.apache.hadoop.examples.RandomWriter$Counters
BYTES_WRITTEN=1073744266
RECORDS_WRITTEN=102195
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=1077281362
Job ended: Fri Nov 08 08:53:48 CST 2013
The job took 57 seconds.
[root@localhost hadoop-2.2.0]#
# hadoop 1.x 版照官方網站教學,有提供 Web 介面,測試一下 (都沒有畫面,這在續篇中研究)
# **************************************************************************
# 停止 (用 stop 參數)
# 停止 Node Manager
[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop nodemanager
no nodemanager to stop
# 上面是 Node Manager 根本沒啟動成功,所以也沒 Node Manager 可以停止
# 若 Node Manager 有啟動,stop 出現訊息如下
[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop nodemanager
stopping nodemanager
# 停止 Resource Manager
[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop resourcemanager
no resourcemanager to stop
# 上面是 Resource Manager 根本沒啟動成功,所以也沒 Resource Manager 可以停止
# 若 Resource Manager 有啟動,stop 出現訊息如下
[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop resourcemanager
stopping resourcemanager
# **************************************************************************
# 補充:關於防火牆
測試 OK後,如果不想停掉防火牆,可加幾條 rules
先啟動 iptables 和 ip6tables,把 rules 存檔
(會自動存到 /etc/sysconfig/iptables 和 /etc/sysconfig/ip6tables)
[root@localhost ~]# service iptables start
[root@localhost ~]# service ip6tables start
[root@localhost ~]# iptables-save
[root@localhost ~]# ip6tables-save
修改 iptables 防火牆 rules
將
# Firewall configuration written by system-config-firewall
# Manual customization of this file is not recommended.
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT
-A INPUT -j REJECT --reject-with icmp-host-prohibited
-A FORWARD -j REJECT --reject-with icmp-host-prohibited
COMMIT
改為 (依自己設定的 port 增加)
# Firewall configuration written by system-config-firewall
# Manual customization of this file is not recommended.
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 8080 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 50070 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 50030 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9000 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9001 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9002 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9003 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9004 -j ACCEPT
-A INPUT -j REJECT --reject-with icmp-host-prohibited
-A FORWARD -j REJECT --reject-with icmp-host-prohibited
COMMIT
[root@localhost ~]# vi /etc/sysconfig/ip6tables
ip6tables 的 rules 比照辦理去修改
重新啟動 iptable ( 會重新載入全部 rules )
[root@localhost ~]# service iptables restart
iptables: Flushing firewall rules: [ OK ]
iptables: Setting chains to policy ACCEPT: filter [ OK ]
iptables: Unloading modules: [ OK ]
iptables: Applying firewall rules: [ OK ]
[root@localhost ~]# service ip6tables restart
ip6tables: Flushing firewall rules: [ OK ]
ip6tables: Setting chains to policy ACCEPT: filter [ OK ]
ip6tables: Unloading modules: [ OK ]
ip6tables: Applying firewall rules: [ OK ]
[root@localhost ~]#
相關
[研究] Hadoop 2.2.0 編譯 (CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/11/hadoop-220-centos-64-x64.html
[研究] Hadoop 2.2.0 Single Cluster 安裝 (二)(CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/11/hadoop-220-single-cluster-centos-64-x64_7.html
[研究] Hadoop 2.2.0 Single Cluster 安裝 (一)(CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/11/hadoop-220-single-cluster-centos-64-x64.html
[研究] Hadoop 1.2.1 (rpm)安裝 (CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/10/hadoop-121-rpm-centos-64-x64.html
[研究] Hadoop 1.2.1 (bin)安裝 (CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/07/hadoop-112-centos-64-x64.html
[研究] Hadoop 1.2.1 安裝 (CentOS 6.4 x64)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=80035
[研究] 雲端軟體 Hadoop 1.0.0 安裝 (CentOS 6.2 x86)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=21166
[研究] 雲端軟體 Hadoop 0.20.2 安裝 (CentOS 5.5 x86)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=18513
[研究] 雲端軟體 Hadoop 0.20.2 安裝 (CentOS 5.4 x86)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=17974
你好 感謝你的分享
回覆刪除請問 如果 resource manager 一直無法啟動成功 (node manager 已成功啟動)
如此可以怎麼調整呢?
安裝環境、步驟 都相同 ? 錯誤訊息是甚麼 ?
刪除你好,後來用版主分享的64liberary
刪除在重用一次就可以了, 謝謝
[研究] Hadoop 2.4.1 安裝 (CentOS 7.0 x86_64)
刪除http://shaurong.blogspot.com/2014/08/hadoop-241-centos-70-x8664.html
[研究] hadoop-2.4.1-src.tar.gz 快速編譯安裝程式(CentOS 7.0 x86_64)
http://shaurong.blogspot.com/2014/08/hadoop-241-srctargz-centos-70-x8664.html