HA configuration

hadoop-env.sh

export JAVA_HOME=/opt/jdk-9.0.4

core-site.xml

<configuration>
    <!--HA two namenodeurls are two namespaces, which can be arbitrarily set bi-->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://bi</value>
    </property>

    <property>
        <name>hadoop.tmp.dir</name>
        <value>/root/hdpdata</value>
    </property>
    <!--Configure the address of zookeeper, HA, active standby zkfc journalnode, etc. All require zk-->
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>mini01:2181,mini02:2181,mini03:2181</value>
    </property>
</configuration>

hdfs-site .xml


<configuration>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.name.dir</name>
        <value>/root/name1 ,/root/name2</value>
    </property>
    
    
    <!--Modify the nameservice of hddfs to bi, which needs to be consistent with the core-site. Nameservices s can be configured with multiple ones, such as those in feration -->
    <property>
        < name>dfs.nameservices</name>
        <value>bi</value>
    </property>

    <!--bi has two namenodes, nn1 and nn2 are just code names -->
    <property>
        <name>dfs .ha.namenodes.bi</name>
        <value>nn1,nn2</value>
    </property>

    <!--nn1的rpc通信-->
    <property>
        <name>dfs.namenode.rpc-address.bi.nn1</name>
        <value>mini01:9000</value>
    </property>
    <!--nn1的http通信地址-->
    <property>
        <name>dfs.namenode.http-address.bi.nn1</name>
        <value>mini01:50070</value>
    </property>

    
    <!--nn2的rpc通信-->
    <property>
        <name>dfs.namenode.rpc-address.bi.nn2</name>
        <value>mini02:9000</value>
    </property>
    <!--nn2的http通信地址-->
    <property>
        <name>dfs.namenode.http-address.bi.nn2</name>
        <value>mini02:50070</value>     <property>     <!--Specify the storage location of namenode's edit metadata on journalNode-->
    </property>

    


        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://mini01:8485;mini02:8485;mini03:8485/bi</value>
    </property>
    <!-- make journalnode The location where the data is stored on the local disk-->
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/root/journaldata</value>
    </property>
    <!--Automatic failure to enable namenode Switch-->
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>

    <!--Configuration failure automatic switch implementation mode-->
    <property >
        <name>dfs.client.failover.proxy.provider.bi</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>

    <!--Configuration isolation means that only one namenode is working, and multiple method mechanisms are separated by newlines, that is, each mechanism temporarily uses one line -->
    <property>
        <name>dfs.ha.fencing.methods </name>
        <value>
            sshfence
            shell(/bin/true)
        </value>

    </property>
    <!--Require ssh password-free login when using sshfence isolation mechanism-->
    <property>
        <name>dfs.ha. fencing.ssh.private-key-files</name>
        <value>/root/.ssh/id_rsa</value>
    </property>
       <!--Configure sshfence isolation mechanism timeout-->
    <property>
        <name> dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>



</configuration>


mapred-site.xml


<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>


</configuration>

yarn-site.xml


<configuration>

<!-- Site specific YARN configuration properties -->

    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>mini01</value>
    </property>

    <property>
        <name>yarn. nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>


    <!--Enable RM high availability-->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        < value>true</value>
    </property>
    <!--Formulate the clusterId of RM, because there can be multiple RMs, so multiple RMs need to have a general name-->
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>yrc</value>
    </property>
<!--Make the name of RM and make the names of two RMs respectively-->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </ property>
<!--Specify the address of RM separately-->
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>mini01</value>
    </property>
    <property>
        <name>yarn .resourcemanager.hostname.rm2</name>
        <value>mini02</value>
    </property>

    <!--Specify zk cluster address-->
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>mini01:2181,mini02:2181,mini03:2181</value>
    </property>

</configuration>


Add 567 to the slave file

Then send the entire hadoop file to the other six machines




need seven machines,

1 purpose 2 purpose

3 resourcemanager 4resourcemanager

567zookeeper+quorum journal node+datanode 即

zookeeper only needs to be installed on the 567 machine


The startup sequence must be strictly as shown below


1 First start zk 567 cd /opt/zk/bin where there is zk

..zkServer.sh start source /etc/profile if remote ssh

Because the switch of the edit log namenode state (depending on zkfc) needs to rely on zk

2 Start the journal node

cd hadoop

sbin/hadoop-daemon.sh start journalnode Run jps and find that there are more journalNode processes on 567

journalnode edit journal

3 Format hdfs Because there are two namenodes in HA, only format hdfs namenode -format on one namenode first

Then cp the hdfs directory generated on namenode1 to nn2, so as to ensure that the initial state of the two NNs is consistent, and will be synchronized through journalnode when working in the future.


4. Format zkfc and execute it once on any NN, because it creates a node to the zk cluster

hdfs zkfc -formatZK

It actually creates a node on the zk cluster whose parent node starts with the nameservice of two NNs


This is the end of initialization

Startup, the premise is that the password-free configuration of other machines from NN to other machines is configured in advance.

start-dfs.sh on NN1

The log will show that two NNs, three datanodes, three journalnodes, and two zkfc have been started.


Then start yarn, the premise is that yarn is configured to other machines without password

On 3 start-yarn.sh ze567 will start nodemanager because of the existence of the slave file

Because there are two RMs, but the other needs to be started manually, yarn-daemon.sh start resourcemanager


mini01:50070 active

mini02:50070 standby


It will report an error directly in the IDE, saying that it does not know bi, because the client regards bi as a host name, but it is actually just a name code

The solution is to put the hdfs configuration file in the configuration directory of the ide project

yarn mini03:8088

mini04:8088

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325520264&siteId=291194637