HDFS高可用&YARN高可用集群(生产配置)

准备CentOSC CentOSA CentOSB

CentOSA(192.168.169.140)

CentOSB(192.168.169.141)

CentOSC
 

 

zookeeper
 

zookeeper

zookeeper

zkfc

zkfc

 

nn1

 

nn2

 

 

 

journalnode

journalnode

journalnode

datenode

datenode

datenode

 

rm1

rm12

nodemanager

nodemanager

nodemanager

 

配置主机名和IP映射关系

[root@CentOSX ~]代表3个都配置

[root@CentOSX ~]# vi /etc/hosts
192.168.169.140 CentOSA
192.168.169.141 CentOSB
192.168.169.142 CentOSC

*关闭防火墙

[root@CentOSX ~]# service iptables stop #关闭防火墙
iptables: Setting chains to policy ACCEPT: filter          [  OK  ]
iptables: Flushing firewall rules:                         [  OK  ]
iptables: Unloading modules:                               [  OK  ]
[root@CentOSX ~]# chkconfig iptables off #关闭开机自启
[root@CentOSX ~]# chkconfig --list | grep iptables#查看
iptables       	0:off	1:off	2:off	3:off	4:off	5:off	6:off

 同步CentOSA/B/C上的时钟

[root@CentOSX ~]# date -s '2019-01-09 11:26:00' #设置同步时间
Wed Jan  9 11:26:00 CST 2019
[root@CentOSX ~]# clock -w

配置CentOSA/B/C机器间ssh免密码认证

[root@CentOSX ~]# ssh-keygen -t rsa
[root@CentOSX ~]# ssh-copy-id CentOSA #相互之间免密
[root@CentOSX ~]# ssh-copy-id CentOSB #相互之间免密
[root@CentOSX ~]# ssh-copy-id CentOSC #相互之间免密
[root@CentOSX ~]# ssh CentOSC #看是否成功
[root@CentOSX ~]# ssh CentOSB #看是否成功
[root@CentOSX ~]# ssh CentOSA #看是否成功

 

安装JDK并且配置JAVA_HOME环境变量

 yum install lrzsz -y #运行一个插件 上传文件 rz 选择上传的文件 jdk-8u171-linux-x64.rpm #hadoop-2.6.0_x64.tar.gz zookeeper-3.4.6.tar.gz

[root@CentOSA ~]# ls
anaconda-ks.cfg          install.log.syslog       yes.pub
hadoop-2.6.0_x64.tar.gz  jdk-8u171-linux-x64.rpm  zookeeper-3.4.6.tar.gz
install.log              yes

[root@CentOSA ~]# scp jdk-8u171-linux-x64.rpm CentOSB:~ #把文件给 b 和 c
jdk-8u171-linux-x64.rpm                               100%  167MB  83.6MB/s   00:02    
[root@CentOSA ~]# scp jdk-8u171-linux-x64.rpm CentOSC:~
jdk-8u171-linux-x64.rpm                               100%  167MB  55.7MB/s   00:03    
[root@CentOSA ~]# scp zookeeper-3.4.6.tar.gz CentOSB:~
zookeeper-3.4.6.tar.gz                                100%   17MB  16.9MB/s   00:00    
[root@CentOSA ~]# scp zookeeper-3.4.6.tar.gz CentOSC:~
zookeeper-3.4.6.tar.gz                                100%   17MB  16.9MB/s   00:01    
[root@CentOSA ~]# scp hadoop-2.6.0_x64.tar.gz CentOSB:~/
hadoop-2.6.0_x64.tar.gz                               100%  172MB  85.9MB/s   00:02    
[root@CentOSA ~]# scp hadoop-2.6.0_x64.tar.gz CentOSC:~/
hadoop-2.6.0_x64.tar.gz                               100%  172MB  85.9MB/s   00:02 

[root@CentOSX ~]# rpm -ivh jdk-8u171-linux-x64.rpm 
Preparing...                ########################################### [100%]
   1:jdk1.8                 ########################################### [100%]
Unpacking JAR files...
	tools.jar...
	plugin.jar...
	javaws.jar...
	deploy.jar...
	rt.jar...
	jsse.jar...
	charsets.jar...
	localedata.jar...
[root@CentOSX ~]# vi .bashrc 
JAVA_HOME=/usr/java/latest
PATH=$PATH:$JAVA_HOME/bin
CLASSPATH=.
export JAVA_HOME
export PATH
export CLASSPATH
[root@CentOSX ~]# source .bashrc 

安装配置zookeeper集群

[root@CentOSX ~]# tar -zxf zookeeper-3.4.6.tar.gz -C /usr/
[root@CentOSX ~]# vi /usr/zookeeper-3.4.6/conf/zoo.cfg
tickTime=2000
dataDir=/root/zkdata
clientPort=2181
initLimit=5
syncLimit=2
server.1=CentOSA:2887:3887
server.2=CentOSB:2887:3887
server.3=CentOSC:2887:3887
[root@CentOSX ~]# mkdir /root/zkdata
[root@CentOSA ~]# echo 1 >> zkdata/myid
[root@CentOSB ~]# echo 2 >> zkdata/myid
[root@CentOSC ~]# echo 3 >> zkdata/myid
[root@CentOSX ~]# /usr/zookeeper-3.4.6/bin/zkServer.sh start zoo.cfg

 启动完成zookeeper之后需要用户执行/usr/zookeeper-3.4.6/bin/zkServer.sh status zoo.cfg查看集群状态,一定会有1个节点是leader其他节点是follower

安装配置hadoop

解压配置HADOOP_HOME

[root@CentOSX ~]# tar -zxf hadoop-2.6.0_x64.tar.gz -C /usr/
[root@CentOSX ~]# vi .bashrc

HADOOP_HOME=/usr/hadoop-2.6.0
JAVA_HOME=/usr/java/latest
PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
CLASSPATH=.
export JAVA_HOME
export PATH
export CLASSPATH
export HADOOP_HOME
[root@CentOSX ~]# source .bashrc

配置etc/hadoop/core-site.xml

<!--对namenode做命名服务,该服务会在hdfs-site.xml解释-->
<property>
   <name>fs.defaultFS</name>
   <value>hdfs://mycluster</value>
</property>
<property>
  <name>hadoop.tmp.dir</name>
  <value>/usr/hadoop-2.6.0/hadoop-${user.name}</value>  
</property>
<property>
  <name>fs.trash.interval</name>
  <value>30</value>  
</property>
<!--设置hadoop的机架脚本-->
<property>
  <name>net.topology.script.file.name</name>
  <value>/usr/hadoop-2.6.0/etc/hadoop/rack.sh</value>  
</property>

 创建机架脚本

`/usr/hadoop-2.6.0/etc/hadoop/rack.sh`

while [ $# -gt 0 ] ; do
	nodeArg=$1
	exec</usr/hadoop-2.6.0/etc/hadoop/topology.data
	result=""
	while read line ; do
        ar=( $line )
        if [ "${ar[0]}" = "$nodeArg" ] ; then
        result="${ar[1]}"
        fi
    done
    shift
    if [ -z "$result" ] ; then
         echo -n "/default-rack"
    else
    	echo -n "$result "
    fi
done

 给rack.sh添加可执行权限chmod u+x /usr/hadoop-2.6.0/etc/hadoop/rack.sh

创建机架映射文件

/usr/hadoop-2.6.0/etc/hadoop/topology.data`

192.168.169.140 /rack1
192.168.169.141 /rack1
192.168.169.142 /rack2

 配置hdfs-site.xml

<property>
    <name>dfs.replication</name>
    <value>3</value>
</property>
<!--开启自动故障转移-->
<property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
</property>
<!--设置zookeeper服务地址-->
<property> 
    <name>ha.zookeeper.quorum</name>
    <value>CentOSA:2181,CentOSB:2181,CentOSC:2181</value>
</property>
<!--解析core-site.xml中mycluster配置-->
<property>
    <name>dfs.nameservices</name>
    <value>mycluster</value>
</property>
<property>
    <name>dfs.ha.namenodes.mycluster</name>
    <value>nn1,nn2</value>
</property>
<property>
    <name>dfs.namenode.rpc-address.mycluster.nn1</name>
    <value>CentOSA:9000</value>
</property>
<property>
    <name>dfs.namenode.rpc-address.mycluster.nn2</name>
    <value>CentOSB:9000</value>
</property>
<!--连接日志服务-->
<property>
    <name>dfs.namenode.shared.edits.dir</name>
    <value>qjournal://CentOSA:8485;CentOSB:8485;CentOSC:8485/mycluster</value>
</property>
<!--配置转移处理者-->
<property>
    <name>dfs.client.failover.proxy.provider.mycluster</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--配置SSH防御信息-->
<property>
    <name>dfs.ha.fencing.methods</name>
    <value>sshfence</value>
</property>
<property>
    <name>dfs.ha.fencing.ssh.private-key-files</name>
    <value>/root/.ssh/id_rsa</value>
</property>

 编辑slaves信息

[root@CentOSX ~]# vi /usr/hadoop-2.6.0/etc/hadoop/slaves

CentOSA
CentOSB
CentOSC

启动HDFS服务(第一次初始化)

[root@CentOSX ~]# hadoop-daemon.sh start journalnode //等上10秒钟,再进行下一步操作
[root@CentOSA ~]# hdfs namenode -format
[root@CentOSA ~]# hadoop-daemon.sh start namenode
[root@CentOSB ~]# hdfs namenode -bootstrapStandby (下载active的namenode元数据)
[root@CentOSB ~]# hadoop-daemon.sh start namenode
[root@CentOSA|B ~]# hdfs zkfc -formatZK (可以在CentOSA或者CentOSB任意一台注册namenode信息)
[root@CentOSA ~]# hadoop-daemon.sh start zkfc (哨兵)
[root@CentOSB ~]# hadoop-daemon.sh start zkfc (哨兵)
[root@CentOSX ~]# hadoop-daemon.sh start datanode

日常维护

[root@CentOSA|B|C ~]start|stop-dfs.sh

参考:http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html

                                     Resource Manager HA

etc/hadoop/yarn-site.xml


<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<!--Resource Manager-->
<property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
</property>
<property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>CentOSA:2181,CentOSB:2181,CentOSC:2181</value>
</property>
<property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>rmcluster01</value>
</property>
<property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
</property>
<property>
    <name>yarn.resourcemanager.hostname.rm1</name>
    <value>CentOSB</value>
</property>
<property>
    <name>yarn.resourcemanager.hostname.rm2</name>
    <value>CentOSC</value>
</property>

 etc/hadoop/mapred-site.xml

<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>

 启动yarn

[root@CentOSB ~]# yarn-daemon.sh start|stop resourcemanager
[root@CentOSC ~]# yarn-daemon.sh start|stop resourcemanager
[root@CentOSX ~]# yarn-daemon.sh start|stop nodemanager

 参考:http://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html

猜你喜欢

转载自blog.csdn.net/weixin_43989957/article/details/86184100