Hadoop集群搭建(包成功)失败 hadoop故障转移-----------MapReduce原理详解

 

 

 

 

 

hadoop高可用集群搭建

准备工作

  1. 安装3台centos7 服务器

  1. 配置名字hd01\hd02\hd03

  1. 配置网络static

  1. 关闭防火墙

hostnamectl set-hostname hadoop01
vim /etc/hosts/  
vim /etc/sysconfig/network-scripts/ifcfg-ens33
​
​
systemctl stop firewalld.service            #但是开机之后还会启动防火墙
systemctl disable firewalld.service         #禁止firewall开机启动
systemctl restart network
​
#依次配置3个端口
220 230 240

配置钥匙(3个集群同步)

#hd01 做ssh 公私钥 无秘
ssh-keygen -t rsa -P ''
# copy 公钥到 hd02 hd03
ssh-copy-id root@hd01
yes
ok
ssh-copy-id root@hd02
yes
ok
ssh-copy-id root@hd03
yes
ok

所有服务器时间同步(3个集群同步)

# 安装chrony
yum -y install chrony
#配置chrony
注释掉server 0.centos.pool.ntp.org iburst
vim /etc/chrony.conf
server ntp1.aliyun.com
server ntp2.aliyun.com
server ntp3.aliyun.com
#测试时间
date
#启动chrony 
systemctl start chronyd
7. 安装wget
yum install -y wget
#8. 安装psmisc(linux命令工具包 namenode主备切换时要用到 只需要安装在两个namenode节点上)
yum install -y psmisc
​

拖入文件!!(在第一个hd01)

cd /opt/
ls
tar -zxf zookeeper-3.4.5-cdh5.14.2.tar.gz
mv zookeeper-3.4.5-cdh5.14.2 soft/zk345
cd soft/zk345/conf/
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
================1
#修改
dataDir=/opt/soft/zk345/data
#配置 第一个端口 使用zk  第二个端口选领导
server.1=hd01:2888:2888
server.1=hd02:2888:2888
server.1=hd03:2888:2888
================1
cd ..
mkdir data
cd data/
echo "1" > myid
cat myid
#出现1 说明成功!!!
​
cd ..
cd ..
ls
scp -r zk345/ root@hd02:/opt/soft/
scp -r zk345/ root@hd03:/opt/soft/
#进入hd02窗口
cd /opt/soft/
ls
vim zk345/data/myid
#修改1
2
#进入hd03窗口
cd /opt/soft/
ls
vim zk345/data/myid
#修改1
3

3个同时修改环境变量(3个集群同步)

vim /etc/profile
#ZK ENV
export ZOOKEEPER_HOME=/opt/soft/zk345
export PATH=$PATH:$ZOOKEEPER_HOME/bin
#激活
source /etc/profile
zkServer.sh start
jps

安装hadoop集群(写在---hd01)

cd /opt/
ls
tar -zxf had
mv had soft/hadoop260
cd soft/hadoop260
tmp
mkdir -p dfs/journalnode_data
mkdir -p dfs/edits
mkdir -p dfs/datanode_data
mkdir -p dfs/namenode_data
ls
cd dfs/
ls
​
2. 配置hadoop-env.sh
vim etc/hadoop/hadoop-env.sh
=======================1
JAVA_HOME=/opt/soft/jdk180
HADOOP_CONF_DIR=/opt/soft/hadoop260/etc/hadoop
​
:wq
=======================1
#3. 配置core-site.xml
vim core-site.xml
=======================2
<configuration>
<!--指定hadoop集群在zookeeper上注册的节点名-->
  <property> 
    <name>fs.defaultFS</name> 
    <value>hdfs://hacluster</value>     
  </property> 
  
<!--指定hadoop运行时产生的临时文件--> 
    <property> 
        <name>hadoop.tmp.dir</name>                           
        <value>file:///opt/soft/hadoop260/tmp</value> 
        
    </property> 
    
    <!--设置缓存大小 默认4KB--> 
    <property> 
        <name>io.file.buffer.size</name>
        <value>4096</value>     
    </property> 
    
    <!--指定zookeeper的存放地址--> 
    <property> 
        <name>ha.zookeeper.quorum</name>    
        <value>hd01:2181,hd02:2181,hd03:2181</value> 
    </property> 
    
    <!--配置允许root代理访问主机节点--> 
    <property> 
        <name>hadoop.proxyuser.root.hosts</name>
        <value>*</value> 
    </property> 
    
    <!--配置该节点允许root用户所属的组--> 
    <property> 
        <name>hadoop.proxyuser.root.groups</name> 
        <value>*</value> 
    </property> 
</configuration>
​
#无中文版
<configuration>
​
  <property> 
    <name>fs.defaultFS</name> 
    <value>hdfs://hacluster</value>     
  </property> 
  
​
    <property> 
        <name>hadoop.tmp.dir</name>                           
        <value>file:///opt/soft/hadoop260/tmp</value> 
        
    </property> 
    
​
    <property> 
        <name>io.file.buffer.size</name>
        <value>4096</value>     
    </property> 
    
​
    <property> 
        <name>ha.zookeeper.quorum</name>    
        <value>hd01:2181,hd02:2181,hd03:2181</value> 
    </property> 
    
​
    <property> 
        <name>hadoop.proxyuser.root.hosts</name>
        <value>*</value> 
    </property> 
    
​
    <property> 
        <name>hadoop.proxyuser.root.groups</name> 
        <value>*</value> 
    </property> 
</configuration>
​
:wq
=======================2
#4. 配置hdfs-site.xml
=======================3
<configuration> 
    <property> 
    <!--数据块默认大小128M--> 
    <name>dfs.block.size</name>
    <value>134217728</value> 
    </property> 
    <property> 
    <!--副本数量 不配置默认为3-->
    <name>dfs.replication</name> 
    <value>3</value>
    </property> 
    <property>
    <!--namenode节点数据(元数据)的存放位置-->   
    <name>dfs.name.dir</name> 
   <value>file:///opt/soft/hadoop260/dfs/namenode_data</value> 
   </property> 
   <property>
   <!--datanode节点数据(元数据)的存放位置--> 
   <name>dfs.data.dir</name> 
   <value>file:///opt/soft/hadoop260/dfs/datanode_data</value> 
   </property> 
   <property> 
   <!--开启hdfs的webui界面--> 
   <name>dfs.webhdfs.enabled</name> 
   <value>true</value> 
   </property> 
   <property>
   <!--datanode上负责进行文件操作的线程数--> 
   <name>dfs.datanode.max.transfer.threads</name> 
   <value>4096</value>
   </property>
   <property> 
   <!--指定hadoop集群在zookeeper上的注册名--> 
   <name>dfs.nameservices</name>
   <value>hacluster</value>
   </property>
   <property>
   <!--hacluster集群下有两个namenode分别是nn1,nn2--> 
   <name>dfs.ha.namenodes.hacluster</name> 
   <value>nn1,nn2</value>
   </property> 
   <!--nn1的rpc、servicepc和http通讯地址 --> 
   <property>
   <name>dfs.namenode.rpc-address.hacluster.nn1</name> 
   <value>hd01:9000</value> </property>
   <property>
   <name>dfs.namenode.servicepc-address.hacluster.nn1</name>  
   <value>hd01:53310</value> 
   </property> 
   <property> 
   <name>dfs.namenode.http-address.hacluster.nn1</name>  
   <value>hd01:50070</value> 
   </property> 
   <!--nn2的rpc、servicepc和http通讯地址 --> 
   <property> 
   <name>dfs.namenode.rpc-address.hacluster.nn2</name> 
   <value>hd02:9000</value> 
   </property>
   <property>
   <name>dfs.namenode.servicepc-address.hacluster.nn2</name> 
   <value>hd02:53310</value>
   </property>
   <property> 
   <name>dfs.namenode.http-address.hacluster.nn2</name> 
   <value>hd02:50070</value>
   </property>
   <property>
   <!--指定Namenode的元数据在JournalNode上存放的位置--> 
   <name>dfs.namenode.shared.edits.dir</name> 
    <value>qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster</value> 
    </property> 
    <property>
    <!--指定JournalNode在本地磁盘的存储位置--> 
    <name>dfs.journalnode.edits.dir</name> 
    <value>/opt/soft/hadoop260/dfs/journalnode_data</value> 
    </property> 
    <property> 
    <!--指定namenode操作日志存储位置-->
    <name>dfs.namenode.edits.dir</name>
    <value>/opt/soft/hadoop260/dfs/edits</value> 
    </property> <property> 
    <!--开启namenode故障转移自动切换-->
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value> 
    </property> 
    <property> 
    <!--配置失败自动切换实现方式-->
    <name>dfs.client.failover.proxy.provider.hacluster</name> 
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProx yProvider</value> 
    </property> 
    <property> 
    <!--配置隔离机制-->
    <name>dfs.ha.fencing.methods</name> 
    <value>sshfence</value> 
    </property> 
    <property> 
    <!--配置隔离机制需要SSH免密登录--> 
    <name>dfs.ha.fencing.ssh.private-key-files</name> 
    <value>/root/.ssh/id_rsa</value>
    </property> 
    <property> 
    <!--hdfs文件操作权限 false为不验证--> 
    <name>dfs.premissions</name>
    <value>false</value>
    </property> 
</configuration>
​
#无中文版(配置中文注释有可能报错)
<configuration> 
    <property> 
    
    <name>dfs.block.size</name>
    <value>134217728</value> 
    </property> 
    <property> 
​
    <name>dfs.replication</name> 
    <value>3</value>
    </property> 
    <property>
​
    <name>dfs.name.dir</name> 
   <value>file:///opt/soft/hadoop260/dfs/namenode_data</value> 
   </property> 
   <property>
​
   <name>dfs.data.dir</name> 
   <value>file:///opt/soft/hadoop260/dfs/datanode_data</value> 
   </property> 
   <property> 
​
   <name>dfs.webhdfs.enabled</name> 
   <value>true</value> 
   </property> 
   <property>
​
   <name>dfs.datanode.max.transfer.threads</name> 
   <value>4096</value>
   </property>
   <property> 
 
   <name>dfs.nameservices</name>
   <value>hacluster</value>
   </property>
   <property>
​
   <name>dfs.ha.namenodes.hacluster</name> 
   <value>nn1,nn2</value>
   </property> 
​
   <property>
   <name>dfs.namenode.rpc-address.hacluster.nn1</name> 
   <value>hd01:9000</value> </property>
   <property>
   <name>dfs.namenode.servicepc-address.hacluster.nn1</name>  
   <value>hd01:53310</value> 
   </property> 
   <property> 
   <name>dfs.namenode.http-address.hacluster.nn1</name>  
   <value>hd01:50070</value> 
   </property> 
​
   <property> 
   <name>dfs.namenode.rpc-address.hacluster.nn2</name> 
   <value>hd02:9000</value> 
   </property>
   <property>
   <name>dfs.namenode.servicepc-address.hacluster.nn2</name> 
   <value>hd02:53310</value>
   </property>
   <property> 
   <name>dfs.namenode.http-address.hacluster.nn2</name> 
   <value>hd02:50070</value>
   </property>
   <property>
  
   <name>dfs.namenode.shared.edits.dir</name> 
    <value>qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster</value> 
    </property> 
    <property>
​
    <name>dfs.journalnode.edits.dir</name> 
    <value>/opt/soft/hadoop260/dfs/journalnode_data</value> 
    </property> 
    <property> 
   
    <name>dfs.namenode.edits.dir</name>
    <value>/opt/soft/hadoop260/dfs/edits</value> 
    </property> <property> 
  
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value> 
    </property> 
    <property> 
 
    <name>dfs.client.failover.proxy.provider.hacluster</name> 
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> 
    </property> 
    <property> 
​
    <name>dfs.ha.fencing.methods</name> 
    <value>sshfence</value> 
    </property> 
    <property> 
 
    <name>dfs.ha.fencing.ssh.private-key-files</name> 
    <value>/root/.ssh/id_rsa</value>
    </property> 
    <property> 
​
    <name>dfs.premissions</name>
    <value>false</value>
    </property> 
</configuration>
=======================3
5. 配置mapper-site.xml
cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
vim mapred-site.xml
=======================4
<configuration> 
<property>
<!--指定mapreduce在yarn上运行--> 
<name>mapreduce.framework.name</name> 
<value>yarn</value> </property> <property> 
<!--配置历史服务器地址--> 
<name>mapreduce.jobhistory.address</name> 
<value>hd01:10020</value>
</property>
<property> 
<!--配置历史服务器webUI地址--> <name>mapreduce.jobhistory.webapp.address</name>
<value>hd01:19888</value>
</property> 
<property>
<!--开启uber模式--> 
<name>mapreduce.job.ubertask.enable</name>
<value>true</value> 
</property>
</configuration>
=======================4
6. 配置yarn-site.xml
=======================5
<configuration>
<property> 
<!--开启yarn高可用--> 
<name>yarn.resourcemanager.ha.enabled</name> 
<value>true</value> 
</property> 
<property>
<!-- 指定Yarn集群在zookeeper上注册的节点名--> 
<name>yarn.resourcemanager.cluster-id</name> 
<value>hayarn</value> </property> 
<property> 
<!--指定两个resourcemanager的名称--> 
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property> 
<property>
<!--指定rm1的主机-->
<name>yarn.resourcemanager.hostname.rm1</name> 
<value>hd02</value> 
</property>
<property> 
<!--指定rm2的主机--> 
<name>yarn.resourcemanager.hostname.rm2</name> 
<value>hd03</value>
</property> 
<property>
<!--配置zookeeper的地址-->
<name>yarn.resourcemanager.zk-address</name>
<value>hd01:2181,hd02:2181,hd03:2181</value> 
</property> 
<property> 
<!--开启yarn恢复机制-->
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value> 
</property> 
<property> 
<!--配置执行resourcemanager恢复机制实现类--> 
<name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store</value> </property> 
<property>
<!--指定主resourcemanager的地址--> 
<name>yarn.resourcemanager.hostname</name>
<value>hd03</value>
</property> 
<property> 
<!--nodemanager获取数据的方式-->
<name>yarn.nodemanager.aux-services</name> 
<value>mapreduce_shuffle</value> 
</property>
<property> 
<!--开启日志聚集功能--> 
<name>yarn.log-aggregation-enable</name> 
<value>true</value> 
</property>
<property> 
<!--配置日志保留7天-->
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value> 
</property> 
</configuration>
​
#无中文版
<configuration>
<property> 
​
<name>yarn.resourcemanager.ha.enabled</name> 
<value>true</value> 
</property> 
<property>
​
<name>yarn.resourcemanager.cluster-id</name> 
<value>hayarn</value> </property> 
<property> 
​
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property> 
<property>
​
<name>yarn.resourcemanager.hostname.rm1</name> 
<value>hd02</value> 
</property>
<property> 
​
<name>yarn.resourcemanager.hostname.rm2</name> 
<value>hd03</value>
</property> 
<property>
​
<name>yarn.resourcemanager.zk-address</name>
<value>hd01:2181,hd02:2181,hd03:2181</value> 
</property> 
<property> 
​
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value> 
</property> 
<property> 
​
<name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store</value> </property> 
<property>
​
<name>yarn.resourcemanager.hostname</name>
<value>hd03</value>
</property> 
<property> 
​
<name>yarn.nodemanager.aux-services</name> 
<value>mapreduce_shuffle</value> 
</property>
<property> 
​
<name>yarn.log-aggregation-enable</name> 
<value>true</value> 
</property>
<property> 
​
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value> 
</property> 
</configuration>
=======================5

编辑集群标准文件

vim etc/hadoop/slaves
#删除localhost
hd01
hd02
hd03
:wq
​
cd..
scp -r hadoop260/ root@hd02:/opt/soft/
scp -r hadoop260/ root@hd03:/opt/soft/

启动集群

为3台节点配置hadoop环境变量(vi /etc/profile)

#hadoop env
export HADOOP_HOME=/opt/soft/hadoop260 
export HADOOP_MAPRED_HOME=$HADOOP_HOME 
export HADOOP_COMMON_HOME=$HADOOP_HOME 
export HADOOP_HDFS_HOME=$HADOOP_HOME 
export YARN_HOME=$HADOOP_HOME 
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native 
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin 
export HADOOP_INSTALL=$HADOOP_HOME
​
#激活
source /etc/profile
​

. 启动zookeeper(3个都启动)

zkServer.sh start

启动JournalNode(3个都启动)

出现问题:hadoop-deamon.sh: 未找到命令!,找不到或无法加载主类 journode(注意单词拼写!)

解决方法:

hadoop-daemon.sh start journalnode
jps

格式化namenode(只在hd01主机上)

hdfs namenode -format

将hd01上的Namenode的元数据复制到hd02相同位置

scp -r /opt/soft/hadoop260/dfs/namenode_data/current/ root@hd02:/opt/soft/hadoop260/dfs/namenode_data/

 

在hd01或hd02格式化故障转移控制器zkfc

hdfs zkfc -formatZK

在hd01上启动dfs服务

start-dfs.sh 
jps

在hd03上启动yarn服务

start-yarn.sh 
jps

在hd01上启动history服务器

mr-jobhistory-daemon.sh start historyserver 

在hd02上启动resourcemanager服务

yarn-daemon.sh start resourcemanager

在hd01上启动

hdfs haadmin -getServiceState nn1

在hd02上启动

hdfs haadmin -getServiceState nn2

浏览器地址输入端口号

 

 

测试(杀死hd01 查看hd02是否激活)!!!

 

查看浏览器!(因为上面杀掉了进程 所以就应该访问不了)

 再次开启namenode

hadoop-daemon.sh start namenode

猜你喜欢

转载自blog.csdn.net/just_learing/article/details/126164192
今日推荐