hadoop-env.sh
export JAVA_HOME=/opt/jdk-9.0.4
core-site.xml
<configuration>
<!--HA two namenodeurls are two namespaces, which can be arbitrarily set bi-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://bi</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/root/hdpdata</value>
</property>
<!--Configure the address of zookeeper, HA, active standby zkfc journalnode, etc. All require zk-->
<property>
<name>ha.zookeeper.quorum</name>
<value>mini01:2181,mini02:2181,mini03:2181</value>
</property>
</configuration>
hdfs-site .xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/root/name1 ,/root/name2</value>
</property>
<!--Modify the nameservice of hddfs to bi, which needs to be consistent with the core-site. Nameservices s can be configured with multiple ones, such as those in feration -->
<property>
< name>dfs.nameservices</name>
<value>bi</value>
</property>
<!--bi has two namenodes, nn1 and nn2 are just code names -->
<property>
<name>dfs .ha.namenodes.bi</name>
<value>nn1,nn2</value>
</property>
<!--nn1的rpc通信-->
<property>
<name>dfs.namenode.rpc-address.bi.nn1</name>
<value>mini01:9000</value>
</property>
<!--nn1的http通信地址-->
<property>
<name>dfs.namenode.http-address.bi.nn1</name>
<value>mini01:50070</value>
</property>
<!--nn2的rpc通信-->
<property>
<name>dfs.namenode.rpc-address.bi.nn2</name>
<value>mini02:9000</value>
</property>
<!--nn2的http通信地址-->
<property>
<name>dfs.namenode.http-address.bi.nn2</name>
<value>mini02:50070</value> <property> <!--Specify the storage location of namenode's edit metadata on journalNode-->
</property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://mini01:8485;mini02:8485;mini03:8485/bi</value>
</property>
<!-- make journalnode The location where the data is stored on the local disk-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/root/journaldata</value>
</property>
<!--Automatic failure to enable namenode Switch-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--Configuration failure automatic switch implementation mode-->
<property >
<name>dfs.client.failover.proxy.provider.bi</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--Configuration isolation means that only one namenode is working, and multiple method mechanisms are separated by newlines, that is, each mechanism temporarily uses one line -->
<property>
<name>dfs.ha.fencing.methods </name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!--Require ssh password-free login when using sshfence isolation mechanism-->
<property>
<name>dfs.ha. fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--Configure sshfence isolation mechanism timeout-->
<property>
<name> dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>mini01</value>
</property>
<property>
<name>yarn. nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--Enable RM high availability-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
< value>true</value>
</property>
<!--Formulate the clusterId of RM, because there can be multiple RMs, so multiple RMs need to have a general name-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<!--Make the name of RM and make the names of two RMs respectively-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</ property>
<!--Specify the address of RM separately-->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>mini01</value>
</property>
<property>
<name>yarn .resourcemanager.hostname.rm2</name>
<value>mini02</value>
</property>
<!--Specify zk cluster address-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>mini01:2181,mini02:2181,mini03:2181</value>
</property>
</configuration>
Add 567 to the slave file
Then send the entire hadoop file to the other six machines
need seven machines,
1 purpose 2 purpose
3 resourcemanager 4resourcemanager
567zookeeper+quorum journal node+datanode 即
zookeeper only needs to be installed on the 567 machine
The startup sequence must be strictly as shown below
1 First start zk 567 cd /opt/zk/bin where there is zk
..zkServer.sh start source /etc/profile if remote ssh
Because the switch of the edit log namenode state (depending on zkfc) needs to rely on zk
2 Start the journal node
cd hadoop
sbin/hadoop-daemon.sh start journalnode Run jps and find that there are more journalNode processes on 567
journalnode edit journal
3 Format hdfs Because there are two namenodes in HA, only format hdfs namenode -format on one namenode first
Then cp the hdfs directory generated on namenode1 to nn2, so as to ensure that the initial state of the two NNs is consistent, and will be synchronized through journalnode when working in the future.
4. Format zkfc and execute it once on any NN, because it creates a node to the zk cluster
hdfs zkfc -formatZK
It actually creates a node on the zk cluster whose parent node starts with the nameservice of two NNs
This is the end of initialization
Startup, the premise is that the password-free configuration of other machines from NN to other machines is configured in advance.
start-dfs.sh on NN1
The log will show that two NNs, three datanodes, three journalnodes, and two zkfc have been started.
Then start yarn, the premise is that yarn is configured to other machines without password
On 3 start-yarn.sh ze567 will start nodemanager because of the existence of the slave file
Because there are two RMs, but the other needs to be started manually, yarn-daemon.sh start resourcemanager
mini01:50070 active
mini02:50070 standby
It will report an error directly in the IDE, saying that it does not know bi, because the client regards bi as a host name, but it is actually just a name code
The solution is to put the hdfs configuration file in the configuration directory of the ide project
yarn mini03:8088
mini04:8088