Hadoop high availability cluster construction
Preparation
-
Install 3 centos7 servers
-
Configuration name hd01\hd02\hd03
-
Configure network static
-
Turn off firewall
hostnamectl set-hostname hadoop01 vim /etc/hosts/ vim /etc/sysconfig/network-scripts/ifcfg-ens33 systemctl stop firewalld.service #But the firewall will be started after booting systemctl disable firewalld.service #Prohibit firewall from starting systemctl restart network #Configure 3 ports 220 230 240 in sequence
Configuration key (3 clusters synchronized)
#hd01 Make ssh without secret public and private keys ssh-keygen -t rsa -P '' # copy public key to hd02 hd03 ssh-copy-id root@hd01 yes ok ssh-copy-id root@hd02 yes ok ssh-copy-id root@hd03 yes ok
All server time synchronization (3 clusters synchronization)
#Install chrony yum -y install chrony #Configure chrony and comment out server 0.centos.pool.ntp.org iburst vim /etc/chrony.conf server ntp1.aliyun.com server ntp2.aliyun.com server ntp3.aliyun.com # Test time date #Start chrony systemctl start chronyd 7. Install wget yum install -y wget #8. Install psmisc (the linux command tool package is used for namenode active and standby switching and only needs to be installed on two namenode nodes) yum install -y psmisc
Drag in files! ! (on the first hd01)
cd /opt/ ls tar -zxf zookeeper-3.4.5-cdh5.14.2.tar.gz mv zookeeper-3.4.5-cdh5.14.2 soft/zk345 cd soft/zk345/conf/ cp zoo_sample.cfg zoo.cfg vim zoo .cfg ================1 #Modify dataDir=/opt/soft/zk345/data #Configure the first port to use zk and the second port to select the leader server.1=hd01 :2888:2888 server.1=hd02:2888:2888 server.1=hd03:2888:2888 ================1 cd .. mkdir data cd data/ echo "1 " > myid cat myid #The appearance of 1 indicates success! ! ! cd .. cd .. ls scp -r zk345/ root@hd02:/opt/soft/ scp -r zk345/ root@hd03:/opt/soft/ #Enter hd02 window cd /opt/soft/ ls vim zk345/data/myid #Modify 1 2 #Enter hd03 window cd /opt/soft/ ls vim zk345/data/myid #Modify 1 3
3 modify environment variables at the same time (3 clusters synchronized)
vim /etc/profile #ZK ENV export ZOOKEEPER_HOME=/opt/soft/zk345 export PATH=$PATH:$ZOOKEEPER_HOME/bin #激活 source /etc/profile zkServer.sh start jpsInstall hadoop cluster (written in ---hd01)
cd /opt/ ls tar -zxf had mv had soft/hadoop260 cd soft/hadoop260 tmp mkdir -p dfs/journalnode_data mkdir -p dfs/edits mkdir -p dfs/datanode_data mkdir -p dfs/namenode_data ls cd dfs/ ls 2 . Configure hadoop-env.sh vim etc/hadoop/hadoop-env.sh =======================1 JAVA_HOME=/opt/soft/jdk180 HADOOP_CONF_DIR =/opt/soft/hadoop260/etc/hadoop : wq =======================1 #3. Configure core-site.xml vim core- site.xml =======================2 <configuration> <!--Specify the node name of the hadoop cluster registered on zookeeper--> <property> <name>fs.defaultFS</name> <value>hdfs://hacluster</value> </property> <!--Specify the temporary file generated when hadoop is running--> <property> <name>hadoop.tmp .dir</name> <value>file:///opt/soft/hadoop260/tmp</value> </property> <!--Set the cache size to 4KB by default--> <property> <name>io.file .buffer.size</name> <value>4096</value> </property> <!--Specify the storage address of zookeeper--> <property> <name>ha.zookeeper.quorum</name> <value> hd01:2181,hd02:2181,hd03:2181</value> </property> <!--Configure to allow the root agent to access the host node--> <property> <name>hadoop.proxyuser.root.hosts</name> <value>*</value> </property> <!--配置该节点允许root用户所属的组--> <property> <name>hadoop.proxyuser.root.groups</name> <value>*</value> </property> </configuration> #无中文版 <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hacluster</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:///opt/soft/hadoop260/tmp</value> </property> <property> <name>io.file.buffer.size</name> <value>4096</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>hd01:2181,hd02:2181,hd03:2181</value> </property> <property> <name>hadoop.proxyuser.root.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.root.groups</name> <value>*</value> </property> </configuration> :wq #4. Configure hdfs-site.xml =======================2 =======================3 <configuration> <property> <!--Default data block size 128M--> <name>dfs.block.size </name> <value>134217728</value> </property> <property> <!--The number of replicas is 3 by default if not configured--> <name>dfs.replication</name> <value>3</value > </property> <property> <!--The storage location of namenode node data (metadata)--> <name>dfs.name.dir</name> <value>file:///opt/soft/hadoop260 /dfs/namenode_data</value> </property> <property> <!--The storage location of datanode node data (metadata) --> <name>dfs.data.dir</name> <value>file:///opt/soft/hadoop260/dfs/datanode_data</value>dir</name> </property> <property> <!--Open the webui interface of hdfs--> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <!--The number of threads responsible for file operations on the datanode--> <name>dfs.datanode. max.transfer.threads</name> <value>4096</value> </property> <property> <!--Specify the registered name of the hadoop cluster on zookeeper--> <name>dfs.nameservices</name> <value>hacluster</value> </property> <property> <!--There are two namenodes under the hacluster cluster, namely nn1 and nn2--> <name>dfs.ha.namenodes.hacluster</name> <value >nn1,nn2</value> </property> <!--nn1's rpc, servicepc and http communication address--> <property> <name>dfs.namenode.rpc-address.hacluster.nn1</name> <value>hd01:9000</value> </property> <property> <name>dfs.namenode.servicepc-address.hacluster.nn1</name> <value>hd01:53310</value> </property> <property> <name>dfs.namenode.http-address.hacluster.nn1</name> <value>hd01:50070</value> </property> <!--nn2的rpc、servicepc和http通讯地址 --> <property> <name>dfs.namenode.rpc-address.hacluster.nn2</name> <value>hd02:9000</value> </property> <property> <name>dfs.namenode.servicepc-address.hacluster.nn2</name> <value>hd02:53310</value> </property> <property> <name>dfs.namenode.http-address.hacluster.nn2</name> <value>hd02:50070</value> </property> <property> <!--Specify the location where Namenode's metadata is stored on JournalNode--> <name>dfs.namenode.shared.edits.dir </name> <value>qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster</value> </property> <property> <!--Specify the storage location of JournalNode on the local disk--> <name>dfs.journalnode.edits.dir</name> <value>/opt/soft/hadoop260/dfs/journalnode_data</value> </property> <property> <!--Specify the namenode operation log storage location-- > <name>dfs.namenode.edits.dir</name> <value>/opt/soft/hadoop260/dfs/edits</value> </property> <property> <!--Enable namenode failover automatic switching--> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <!--Configuration failed Automatic switching implementation --> <name>dfs.client.failover.proxy.provider.hacluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProx yProvider</value> </value> property> <property> <!--Configure isolation mechanism--> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <property> <!--Configure isolation The mechanism requires SSH password-free login --> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <property> <!--hdfs file operation permission false means no verification--> <name>dfs.premissions</name> <value>false</value> </property> </configuration> #No Chinese version ( Configuring Chinese comments may report errors) <configuration> <property> <name>dfs.block.size</name> <value>134217728</value> </property> <property> <name> dfs.replication</name > <value>3</value> </property> <property> <name> dfs.name.dir</name> <value>file:///opt/soft/hadoop260/dfs/namenode_data</value> </property> <property> <name>dfs.data.dir</name> <value>file:///opt/soft/hadoop260/dfs/datanode_data</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <name>dfs.datanode.max.transfer.threads</name> <value>4096</value> </property> <property> <name>dfs.nameservices</name> <value>hacluster</value> </property> <property> <name>dfs.ha.namenodes.hacluster</name> <value>nn1,nn2</value> </property> <property> <name>dfs.namenode.rpc-address.hacluster.nn1</name> <value>hd01:9000</value> </property> <property> <name>dfs.namenode.servicepc-address.hacluster.nn1</name> <value>hd01:53310</value> </property> <property> <name>dfs.namenode.http-address.hacluster.nn1</name> <value>hd01:50070</value> </property> <property> <name>dfs.namenode.rpc-address.hacluster.nn2</name> <value>hd02:9000</value> </property> <property> <name>dfs.namenode.servicepc-address.hacluster.nn2</name> <value>hd02:53310</value> </property> <property> <name>dfs.namenode.http-address.hacluster.nn2</name> <value>hd02:50070</value> </property> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster</value> </property> <property> <name>dfs.journalnode.edits.dir</name> <value>/opt/soft/hadoop260/dfs/journalnode_data</value> </property> <property> <name>dfs.namenode.edits.dir</name> <value>/opt/soft/hadoop260/dfs/edits</value> </property> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>dfs.client.failover.proxy.provider.hacluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <property> <name>dfs.premissions</name> <value>false</value> </property> </configuration> =======================3 5. 配置mapper-site.xml cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml vim mapred-site.xml =======================4 <configuration> <property > <!--Specify mapreduce to run on yarn--> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <!--Configure the historical server address- -> <name>mapreduce.jobhistory.address</name> <value>hd01:10020</value> </property> <property> <!--Configure the history server webUI address--> <name>mapreduce.jobhistory. webapp.address</name> <value>hd01:19888</value> </property> <property> <!--Enable uber mode--> <name>mapreduce.job.ubertask.enable</name> <value>true</value> </property> </configuration> =======================4 6. 配置yarn-site.xml =======================5 <configuration> <property> <!--Enable yarn high availability--> <name>yarn.resourcemanager.ha. enabled</name> <value>true</value> </property> <property> <!-- Specify the node name of the Yarn cluster registered on zookeeper --> <name>yarn.resourcemanager.cluster-id</name > <value>hayarn</value> </property> <property> <!--Specify the names of two resourcemanagers--> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1, rm2</value> </property> <property> <!--Specify the host of rm1--> <name>yarn.resourcemanager.hostname.rm1</name> <value>hd02</value> </property> <property> <!--Specify the host of rm2--> <name>yarn.resourcemanager.hostname.rm2</name> <name>yarn.resourcemanager.hostname</name> <value>hd03</value> </property> <property> <!--配置zookeeper的地址--> <name>yarn.resourcemanager.zk-address</name> <value>hd01:2181,hd02:2181,hd03:2181</value> </property> <property> <!--开启yarn恢复机制--> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <property> <!--配置执行resourcemanager恢复机制实现类--> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store</value> </property> <property> <!--Specify the address of the main resourcemanager--> <property> </property> <value>hd03</value> <!--How nodemanager obtains data--> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <!--Enable log aggregation function- -> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <!--Configuration logs are retained for 7 days--> <name>yarn.log- aggregation.retain-seconds</name> <value>604800</value> </property> </configuration> #No Chinese version <configuration> <property> <name> yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>hayarn</value> </property> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>hd02</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>hd03</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>hd01:2181,hd02:2181,hd03:2181</value> </property> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store</value> </property> <property> <name>yarn.resourcemanager.hostname</name> <value>hd03</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>604800</value> </property> </configuration> =======================5
Edit cluster standards file
vim etc/hadoop/slaves #删除localhost hd01 hd02 hd03 :wq cd.. scp -r hadoop260/ root@hd02:/opt/soft/ scp -r hadoop260/ root@hd03:/opt/soft/
Start the cluster
Configure hadoop environment variables (vi /etc/profile) for 3 nodes
#hadoop env export HADOOP_HOME=/opt/soft/hadoop260 export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin export HADOOP_INSTALL=$HADOOP_HOME #激活 source /etc/profile
. Start zookeeper (start all 3)
zkServer.sh start
Start JournalNode (start all 3)
A problem occurred: hadoop-deamon.sh: command not found! , the main class journode cannot be found or cannot be loaded (note the spelling of the word!)
Solution:
hadoop-daemon.sh start journalnode jps
Format namenode (only on hd01 host)
hdfs namenode -format
Copy the metadata of the Namenode on hd01 to the same location on hd02
scp -r /opt/soft/hadoop260/dfs/namenode_data/current/ root@hd02:/opt/soft/hadoop260/dfs/namenode_data/
Format failover controller zkfc on hd01 or hd02
hdfs zkfc -formatZK
Start the dfs service on hd01
start-dfs.sh jps
Start yarn service on hd03
start-yarn.sh jps
Start the history server on hd01
mr-jobhistory-daemon.sh start historyserver
Start the resourcemanager service on hd02
yarn-daemon.sh start resourcemanager
Start on hd01
hdfs admin -getServiceState nn1
Start on hd02
hdfs admin -getServiceState nn2
Browser address input port number
Test (kill hd01 to see if hd02 is activated)! ! !
Check out the browser! (Because the process was killed above, it should not be accessible)
Start namenode again
hadoop-daemon.sh start namenode