2013年11月8日 星期五

[研究] Hadoop 2.2.0 Single Cluster 安裝 (一)(CentOS 6.4 x64)

[研究] Hadoop 2.2.0 Single Cluster 安裝 (一)(CentOS 6.4 x64)

2013-11-08

這是學習兼分享,可能不夠完善,或100%完全正確。

Hadoop 是個架設雲端的系統,提供分散式平行運算,它參考Google Filesystem,以Java開發,提供HDFS與MapReduce API。

官方網站
http://hadoop.apache.org/

安裝參考
http://hadoop.apache.org/docs/r2.2.0/hadoop-project-dist/hadoop-common/SingleCluster.html
http://hadoop.apache.org/docs/r2.2.0/hadoop-project-dist/hadoop-common/ClusterSetup.html
http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleNodeSetup.html
http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleCluster.html

下載
http://apache.cdpa.nsysu.edu.tw/hadoop/common/hadoop-2.2.0/

環境

192.168.128.102  (CentOS 6.4 x64)
主機名稱: localhost 和 localhost.localdomain

安裝

# 為了省事,避免意外的情況,關閉 SELinux (Security Linux ) 和 iptables

# 立刻關閉 SELinux
setenforce 0 

# 設定 reboot 後自動關閉 SELinux
vi  /etc/selinux/config
找到
SELINUX=
設為
SELINUX=disabled  

# 立刻停掉 iptables
service iptables stop  
service ip6tables stop  

# 設定 reboot 後自動關閉 iptable
chkconfig iptables off  
chkconfig ip6tables off  

# 安裝 JRE 或 JDK

cd /usr/local/src

# Java 請到官方網站手動下載
http://www.oracle.com/technetwork/java/javase/downloads/index-jsp-138363.html#javasejdk

rpm -ivh jre-7u45-linux-x64.rpm

wget http://apache.cdpa.nsysu.edu.tw/hadoop/common/hadoop-2.2.0/hadoop-2.2.0.tar.gz
tar zxvf hadoop-2.2.0.tar.gz -C /usr/local

# 接下來請暫時先參考這篇,把 32 bit Library 修正換成 64 bit 版再繼續
# [研究] Hadoop 2.2.0 編譯 (CentOS 6.4 x64)
# http://shaurong.blogspot.tw/2013/11/hadoop-220-centos-64-x64.html

[root@localhost ~]# tar zxvf hadoop-2.2.0-native_x86_64.tar.gz
native/
native/libhadooppipes.a
native/libhadoop.so.1.0.0
native/libhdfs.so.0.0.0
native/libhadoop.a
native/libhdfs.so
native/libhadoop.so
native/libhadooputils.a
native/libhdfs.a

[root@localhost ~]# cp native/* /usr/local/hadoop-2.2.0/lib/native
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadoop.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadooppipes.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadoop.so'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadoop.so.1.0.0'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhadooputils.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhdfs.a'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhdfs.so'? y
cp: overwrite `/usr/local/hadoop-2.2.0/lib/native/libhdfs.so.0.0.0'? y

#如果安裝 jre-7u45-linux-x64.rpm
export JAVA_HOME=/usr/java/jre1.7.0_45
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=$JAVA_HOME/lib/ext:$JAVA_HOME/lib/tools.jar

#如果安裝 jdk-7u45-linux-x64.rpm (注意路徑不同)
#export JAVA_HOME=/usr/java/jdk1.7.0_45
#export PATH=$PATH:$JAVA_HOME/bin
#export CLASSPATH=$JAVA_HOME/jre/lib/ext:$JAVA_HOME/lib/tools.jar

export HADOOP_PREFIX=/usr/local/hadoop-2.2.0
export HADOOP_COMMON_HOME=/usr/local/hadoop-2.2.0
export HADOOP_MAPRED_HOME=/usr/local/hadoop-2.2.0
export HADOOP_CONF_DIR=/usr/local/hadoop-2.2.0/etc/hadoop
export HADOOP_HDFS_HOME=/usr/local/hadoop-2.2.0
export HADOOP_YARN_HOME=/usr/local/hadoop-2.2.0
export YARN_CONF_DIR=$HADOOP_CONF_DIR

# 修改 .bashrc,以後 login root 後不用每次都執行上面一堆 export

[root@localhost ~]# vi  /root/.bashrc
增加
export JAVA_HOME=/usr/java/jre1.7.0_45
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=$JAVA_HOME/lib/ext:$JAVA_HOME/lib/tools.jar

#export JAVA_HOME=/usr/java/jdk1.7.0_45
#export PATH=$PATH:$JAVA_HOME/bin
#export CLASSPATH=$JAVA_HOME/jre/lib/ext:$JAVA_HOME/lib/tools.jar

export HADOOP_PREFIX=/usr/local/hadoop-2.2.0
export HADOOP_COMMON_HOME=/usr/local/hadoop-2.2.0
export HADOOP_MAPRED_HOME=/usr/local/hadoop-2.2.0
export HADOOP_CONF_DIR=/usr/local/hadoop-2.2.0/etc/hadoop
export HADOOP_HDFS_HOME=/usr/local/hadoop-2.2.0
export HADOOP_YARN_HOME=/usr/local/hadoop-2.2.0
export YARN_CONF_DIR=$HADOOP_CONF_DIR

# ******************************************************************************

# 替 hadoop-env.sh  httpfs-env.sh  mapred-env.sh  yarn-env.sh 設定 JAAV_HOME
# 如果你已經執行過 export JAVA_HOME,也在 .bashrc 增加了,這部分可以不用設定,這邊是另一種方法,僅供參考

#設定hadoop-env.sh

[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/hadoop-env.sh

找到
# The java implementation to use.
export JAVA_HOME=${JAVA_HOME}
下面增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45

[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/httpfs-env.sh
隨便找地方增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45

[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/mapred-env.sh
找到
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
下面增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45

[root@localhost ~]# vi /usr/local/hadoop-2.2.0/etc/hadoop/yarn-env.sh
找到
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
下面增加一行
export JAVA_HOME=/usr/java/jre1.7.0_45

# *******************************************************************************

# 修改 xml 設定檔 (此處只改其中幾個)

# capacity-scheduler.xml  hadoop-policy.xml  httpfs-site.xml  yarn-site.xml  core-site.xml  hdfs-site.xml mapred-site.xml
# 參考
# http://hadoop.apache.org/docs/r2.2.0/hadoop-project-dist/hadoop-common/SingleCluster.html

注意,.xml 裡面的 host 要用 hostname  -f 指令看到的,不可以用 IP
[root@localhost hadoop-2.2.0]# hostname
localhost.localdomain

[root@localhost hadoop-2.2.0]# hostname  -f
localhost

如果你要把主機名稱改掉 (例如改成 hadoop01 和 hadoop01.hadoopcluster )

設定主機名稱(立刻生效)
[root@localhost hadoop-2.2.0]# hostname  hadoop01.hadoopcluster

驗證
[root@localhost hadoop-2.2.0]# hostname
hadoop01.hadoopcluster

修改  /etc/sysconfig/network,設定 reboot 後的主機名稱
[root@localhost hadoop-2.2.0]# vi   /etc/sysconfig/network
找到
HOSTNAME=localhost.localdomain
改成
HOSTNAME=hadoop01.hadoopcluster

修改 hosts 設定
[root@localhost hadoop-2.2.0]# vi   /etc/hosts
增加
192.168.128.102    hadoop01   hadoop01.hadoopcluster

因為這篇只是單機安裝,所以敝人仍用 localhost 和 localhost.localdomain,沒有去修改

******************************

# 設定 mapred-site.xml

cp  /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml.template  /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml

vi  /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml


<configuration>
</configuration>

之間增加

  <property>
    <name>mapreduce.cluster.temp.dir</name>
    <value></value>
    <description>No description</description>
    <final>true</final>
  </property>

  <property>
    <name>mapreduce.cluster.local.dir</name>
    <value></value>
    <description>No description</description>
    <final>true</final>
  </property>


# 設定 yarn-site.xml

vi  /usr/local/hadoop-2.2.0/etc/hadoop/yarn-site.xml


<configuration>
</configuration>
之間增加 (注意 host:port 要依照實際情況修改,要用 hostname  -f 指令看到的)
(這些 port 可以自己改換別的)

  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>localhost:9001</value>
    <description>host is the hostname of the resource manager and 
    port is the port on which the NodeManagers contact the Resource Manager.
    </description>
  </property>

  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>localhost:9002</value>
    <description>host is the hostname of the resourcemanager and port is the port
    on which the Applications in the cluster talk to the Resource Manager.
    </description>
  </property>

  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
    <description>In case you do not want to use the default scheduler</description>
  </property>

  <property>
    <name>yarn.resourcemanager.address</name>
    <value>localhost:9003</value>
    <description>the host is the hostname of the ResourceManager and the port is the port on
    which the clients can talk to the Resource Manager. </description>
  </property>

  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value></value>
    <description>the local directories used by the nodemanager</description>
  </property>

  <property>
    <name>yarn.nodemanager.address</name>
    <value>localhost:9004</value>
    <description>the nodemanagers bind to this port</description>
  </property>  

  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>10240</value>
    <description>the amount of memory on the NodeManager in GB</description>
  </property>

  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/app-logs</value>
    <description>directory on hdfs where the application logs are moved to </description>
  </property>

   <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value></value>
    <description>the directories used by Nodemanagers as log directories</description>
  </property>

  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
    <description>shuffle service that needs to be set for Map Reduce to run </description>
  </property>

# 設定 capacity-scheduler.xml

vi  /usr/local/hadoop-2.2.0/etc/hadoop/capacity-scheduler.xml

找 root.queue


  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>default</value>
    <description>
      The queues at the this level (root is the root queue).
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.capacity</name>
    <value>100</value>
    <description>Default queue target capacity.</description>
  </property>

改為

  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>unfunded,default</value>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.capacity</name>
    <value>100</value>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.unfunded.capacity</name>
    <value>50</value>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.default.capacity</name>
    <value>50</value>
  </property>


# **************************************************************************

# 啟動

[root@localhost hadoop]# cd $HADOOP_MAPRED_HOME

# 啟動 Resource Manager

[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-root-resourcemanager-localhost.localdomain.out

# 一定要用 ps aux | grep resourcemanager 去驗證,因為上面執行失敗,可能未必有錯誤訊息,但是驗證會找不到,表示沒有執行成功

[root@localhost hadoop-2.2.0]# ps aux | grep resourcemanager
root      3424  6.4 10.3 1712816 105288 pts/1  Sl   14:44   0:05 /usr/java/jre1.7.0_45/bin/java -Dproc_resourcemanager -Xmx1000m -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.home.dir= -Dyarn.id.str=root -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -Dyarn.policy.file=hadoop-policy.xml -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-resourcemanager-localhost.localdomain.log -Dyarn.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -classpath /usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/share/hadoop/common/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/common/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/*:/contrib/capacity-scheduler/*.jar:/contrib/capacity-scheduler/*.jar:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/etc/hadoop/rm-config/log4j.properties org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
root      3772  0.0  0.0 103236   872 pts/1    S+   14:45   0:00 grep resourcemanager

# 啟動 Node Manager

[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh start nodemanager
starting nodemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-root-nodemanager-localhost.localdomain.out

一定要用 ps aux | grep nodemanager 去驗證,因為上面執行失敗,可能未必有錯誤訊息,但是驗證會找不到,表示沒有執行成功

[root@localhost hadoop-2.2.0]#  ps aux | grep nodemanager
root      3659  4.2  9.6 1579176 97232 pts/1   Sl   14:44   0:07 /usr/java/jre1.7.0_45/bin/java -Dproc_nodemanager -Xmx1000m -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.home.dir= -Dyarn.id.str=root -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -Dyarn.policy.file=hadoop-policy.xml -server -Dhadoop.log.dir=/usr/local/hadoop-2.2.0/logs -Dyarn.log.dir=/usr/local/hadoop-2.2.0/logs -Dhadoop.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.log.file=yarn-root-nodemanager-localhost.localdomain.log -Dyarn.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.home.dir=/usr/local/hadoop-2.2.0 -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/usr/local/hadoop-2.2.0/lib/native -classpath /usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/etc/hadoop:/usr/local/hadoop-2.2.0/share/hadoop/common/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/common/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/hdfs/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/lib/*:/usr/local/hadoop-2.2.0/share/hadoop/mapreduce/*:/contrib/capacity-scheduler/*.jar:/contrib/capacity-scheduler/*.jar:/usr/local/hadoop-2.2.0/share/hadoop/yarn/*:/usr/local/hadoop-2.2.0/share/hadoop/yarn/lib/*:/usr/local/hadoop-2.2.0/etc/hadoop/nm-config/log4j.properties org.apache.hadoop.yarn.server.nodemanager.NodeManager
root      3780  0.0  0.0 103236   868 pts/1    S+   14:47   0:00 grep nodemanager

# **************************************************************************

# 測試


#切換路徑

[root@localhost hadoop-2.2.0]# cd $HADOOP_COMMON_HOME

# 官方網頁說用下面命令,實際上是錯的

[root@localhost hadoop-2.2.0]# $HADOOP_COMMON_HOME/bin/hadoop jar hadoop-examples.jar randomwriter out
Not a valid JAR: /usr/local/hadoop-2.2.0/hadoop-examples.jar

# 應該這樣測試

[root@localhost hadoop-2.2.0]# $HADOOP_COMMON_HOME/bin/hadoop jar /usr/local/hadoop-2.2.0/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar randomwriter out

13/11/08 08:52:49 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
13/11/08 08:52:49 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
Running 10 maps.
Job started: Fri Nov 08 08:52:51 CST 2013
13/11/08 08:52:51 INFO jvm.JvmMetrics: Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
13/11/08 08:52:52 INFO mapreduce.JobSubmitter: number of splits:1
13/11/08 08:52:52 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.output.value.class is deprecated. Instead, use mapreduce.job.output.value.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.map.class is deprecated. Instead, use mapreduce.job.map.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.reduce.class is deprecated. Instead, use mapreduce.job.reduce.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.inputformat.class is deprecated. Instead, use mapreduce.job.inputformat.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.output.dir is deprecated. Instead, use mapreduce.output.fileoutputformat.outputdir
13/11/08 08:52:52 INFO Configuration.deprecation: mapreduce.outputformat.class is deprecated. Instead, use mapreduce.job.outputformat.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.output.key.class is deprecated. Instead, use mapreduce.job.output.key.class
13/11/08 08:52:52 INFO Configuration.deprecation: mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir
13/11/08 08:52:53 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_local58926325_0001
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: dfs.namenode.name.dir;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.cluster.local.dir;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: dfs.datanode.data.dir;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: fs.defaultFS;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/staging/root58926325/.staging/job_local58926325_0001/job.xml:an attempt to override final parameter: mapreduce.cluster.temp.dir;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: dfs.namenode.name.dir;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.cluster.local.dir;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: dfs.datanode.data.dir;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: fs.defaultFS;  Ignoring.
13/11/08 08:52:53 WARN conf.Configuration: file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local58926325_0001/job_local58926325_0001.xml:an attempt to override final parameter: mapreduce.cluster.temp.dir;  Ignoring.
13/11/08 08:52:53 INFO mapreduce.Job: The url to track the job: http://localhost:8080/
13/11/08 08:52:53 INFO mapreduce.Job: Running job: job_local58926325_0001
13/11/08 08:52:53 INFO mapred.LocalJobRunner: OutputCommitter set in config null
13/11/08 08:52:54 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
13/11/08 08:52:54 INFO mapred.LocalJobRunner: Waiting for map tasks
13/11/08 08:52:54 INFO mapred.LocalJobRunner: Starting task: attempt_local58926325_0001_m_000000_0
13/11/08 08:52:54 INFO mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
13/11/08 08:52:54 INFO mapred.MapTask: Processing split: hdfs://localhost:9000/user/root/out/dummy-split-0:0+1
13/11/08 08:52:54 INFO mapreduce.Job: Job job_local58926325_0001 running in uber mode : false
13/11/08 08:52:54 INFO mapreduce.Job:  map 0% reduce 0%
13/11/08 08:53:02 INFO mapred.LocalJobRunner: wrote record 11400. 953873410 bytes left. > map
13/11/08 08:53:05 INFO mapred.LocalJobRunner: wrote record 20000. 862727228 bytes left. > map
13/11/08 08:53:08 INFO mapred.LocalJobRunner: wrote record 23200. 828573860 bytes left. > map
13/11/08 08:53:11 INFO mapred.LocalJobRunner: wrote record 33200. 723844502 bytes left. > map
13/11/08 08:53:14 INFO mapred.LocalJobRunner: wrote record 35800. 696712593 bytes left. > map
13/11/08 08:53:17 INFO mapred.LocalJobRunner: wrote record 46000. 589007538 bytes left. > map
13/11/08 08:53:20 INFO mapred.LocalJobRunner: wrote record 50400. 543160330 bytes left. > map
13/11/08 08:53:24 INFO mapred.LocalJobRunner: wrote record 57800. 466576786 bytes left. > map
13/11/08 08:53:27 INFO mapred.LocalJobRunner: wrote record 64600. 394899575 bytes left. > map
13/11/08 08:53:30 INFO mapred.LocalJobRunner: wrote record 69200. 346440937 bytes left. > map
13/11/08 08:53:33 INFO mapred.LocalJobRunner: wrote record 78200. 252285093 bytes left. > map
13/11/08 08:53:36 INFO mapred.LocalJobRunner: wrote record 81000. 223522662 bytes left. > map
13/11/08 08:53:39 INFO mapred.LocalJobRunner: wrote record 90800. 120472094 bytes left. > map
13/11/08 08:53:42 INFO mapred.LocalJobRunner: wrote record 94600. 80375787 bytes left. > map
13/11/08 08:53:44 INFO mapred.LocalJobRunner: wrote record 94600. 80375787 bytes left. > map
13/11/08 08:53:46 INFO mapred.LocalJobRunner: done with 102195 records.
13/11/08 08:53:47 INFO mapreduce.Job:  map 100% reduce 0%
13/11/08 08:53:47 INFO mapred.Task: Task:attempt_local58926325_0001_m_000000_0 is done. And is in the process of committing
13/11/08 08:53:47 INFO mapred.LocalJobRunner: done with 102195 records.
13/11/08 08:53:47 INFO mapred.Task: Task attempt_local58926325_0001_m_000000_0 is allowed to commit now
13/11/08 08:53:47 INFO output.FileOutputCommitter: Saved output of task 'attempt_local58926325_0001_m_000000_0' to hdfs://localhost:9000/user/root/out/_temporary/0/task_local58926325_0001_m_000000
13/11/08 08:53:47 INFO mapred.LocalJobRunner: done with 102195 records.
13/11/08 08:53:47 INFO mapred.Task: Task 'attempt_local58926325_0001_m_000000_0' done.
13/11/08 08:53:47 INFO mapred.LocalJobRunner: Finishing task: attempt_local58926325_0001_m_000000_0
13/11/08 08:53:47 INFO mapred.LocalJobRunner: Map task executor complete.
13/11/08 08:53:48 INFO mapreduce.Job: Job job_local58926325_0001 completed successfully
13/11/08 08:53:48 INFO mapreduce.Job: Counters: 25
        File System Counters
                FILE: Number of bytes read=270385
                FILE: Number of bytes written=455985
                FILE: Number of read operations=0
                FILE: Number of large read operations=0
                FILE: Number of write operations=0
                HDFS: Number of bytes read=0
                HDFS: Number of bytes written=1077281362
                HDFS: Number of read operations=4
                HDFS: Number of large read operations=0
                HDFS: Number of write operations=3
        Map-Reduce Framework
                Map input records=1
                Map output records=102195
                Input split bytes=114
                Spilled Records=0
                Failed Shuffles=0
                Merged Map outputs=0
                GC time elapsed (ms)=611
                CPU time spent (ms)=0
                Physical memory (bytes) snapshot=0
                Virtual memory (bytes) snapshot=0
                Total committed heap usage (bytes)=25653248
        org.apache.hadoop.examples.RandomWriter$Counters
                BYTES_WRITTEN=1073744266
                RECORDS_WRITTEN=102195
        File Input Format Counters
                Bytes Read=0
        File Output Format Counters
                Bytes Written=1077281362
Job ended: Fri Nov 08 08:53:48 CST 2013
The job took 57 seconds.
[root@localhost hadoop-2.2.0]#

# hadoop 1.x 版照官方網站教學,有提供 Web 介面,測試一下 (都沒有畫面,這在續篇中研究)










# **************************************************************************

# 停止 (用 stop 參數)

# 停止 Node Manager

[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop nodemanager
no nodemanager to stop

# 上面是 Node Manager 根本沒啟動成功,所以也沒 Node Manager 可以停止
# 若 Node Manager 有啟動,stop 出現訊息如下

[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop nodemanager
stopping nodemanager

# 停止 Resource Manager

[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop resourcemanager
no resourcemanager to stop

# 上面是 Resource Manager 根本沒啟動成功,所以也沒  Resource Manager 可以停止
# 若  Resource Manager 有啟動,stop 出現訊息如下

[root@localhost hadoop-2.2.0]# sbin/yarn-daemon.sh stop resourcemanager
stopping resourcemanager

# **************************************************************************

# 補充:關於防火牆

測試 OK後,如果不想停掉防火牆,可加幾條 rules
先啟動 iptables 和 ip6tables,把 rules 存檔
(會自動存到 /etc/sysconfig/iptables 和  /etc/sysconfig/ip6tables)

[root@localhost ~]# service iptables start

[root@localhost ~]# service ip6tables start

[root@localhost ~]# iptables-save

[root@localhost ~]# ip6tables-save

修改 iptables 防火牆 rules

[root@localhost ~]# vi /etc/sysconfig/iptables
# Firewall configuration written by system-config-firewall
# Manual customization of this file is not recommended.
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT
-A INPUT -j REJECT --reject-with icmp-host-prohibited
-A FORWARD -j REJECT --reject-with icmp-host-prohibited
COMMIT

改為 (依自己設定的 port 增加)
# Firewall configuration written by system-config-firewall
# Manual customization of this file is not recommended.
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 8080 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 50070 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 50030 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9000 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9001 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9002 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9003 -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 9004 -j ACCEPT
-A INPUT -j REJECT --reject-with icmp-host-prohibited
-A FORWARD -j REJECT --reject-with icmp-host-prohibited
COMMIT

[root@localhost ~]# vi /etc/sysconfig/ip6tables
ip6tables 的 rules 比照辦理去修改

重新啟動 iptable ( 會重新載入全部 rules )

[root@localhost ~]# service iptables restart
iptables: Flushing firewall rules:                         [  OK  ]
iptables: Setting chains to policy ACCEPT: filter          [  OK  ]
iptables: Unloading modules:                               [  OK  ]
iptables: Applying firewall rules:                         [  OK  ]

[root@localhost ~]# service ip6tables restart
ip6tables: Flushing firewall rules:                        [  OK  ]
ip6tables: Setting chains to policy ACCEPT: filter         [  OK  ]
ip6tables: Unloading modules:                              [  OK  ]
ip6tables: Applying firewall rules:                        [  OK  ]
[root@localhost ~]#

(完)


相關

[研究] Hadoop 2.2.0 編譯 (CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/11/hadoop-220-centos-64-x64.html

[研究] Hadoop 2.2.0 Single Cluster 安裝 (二)(CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/11/hadoop-220-single-cluster-centos-64-x64_7.html

[研究] Hadoop 2.2.0 Single Cluster 安裝 (一)(CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/11/hadoop-220-single-cluster-centos-64-x64.html

[研究] Hadoop 1.2.1 (rpm)安裝 (CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/10/hadoop-121-rpm-centos-64-x64.html

[研究] Hadoop 1.2.1 (bin)安裝 (CentOS 6.4 x64)
http://shaurong.blogspot.tw/2013/07/hadoop-112-centos-64-x64.html

[研究] Hadoop 1.2.1 安裝 (CentOS 6.4 x64)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=80035

[研究] 雲端軟體 Hadoop 1.0.0 安裝 (CentOS 6.2 x86)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=21166

[研究] 雲端軟體 Hadoop 0.20.2 安裝 (CentOS 5.5 x86)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=18513

[研究] 雲端軟體 Hadoop 0.20.2 安裝 (CentOS 5.4 x86)
http://forum.icst.org.tw/phpbb/viewtopic.php?t=17974



4 則留言:

  1. 你好 感謝你的分享
    請問 如果 resource manager 一直無法啟動成功 (node manager 已成功啟動)
    如此可以怎麼調整呢?

    回覆刪除
    回覆
    1. 安裝環境、步驟 都相同 ? 錯誤訊息是甚麼 ?

      刪除
    2. 你好,後來用版主分享的64liberary
      在重用一次就可以了, 謝謝

      刪除
    3. [研究] Hadoop 2.4.1 安裝 (CentOS 7.0 x86_64)
      http://shaurong.blogspot.com/2014/08/hadoop-241-centos-70-x8664.html

      [研究] hadoop-2.4.1-src.tar.gz 快速編譯安裝程式(CentOS 7.0 x86_64)
      http://shaurong.blogspot.com/2014/08/hadoop-241-srctargz-centos-70-x8664.html

      刪除