#vi /etc/hosts
kf23 192.168.0.50 master
kf2 192.168.0.51 slave1
kf3 192.168.0.52 slave2
1 Password-free login
Cluster or single node mode requires SSH login
#yum install openssh-
serverinstalls ssh #ssh kf23 to test whether the ssh command is available
(1) Set ssh to log in without password
#cd ~/.ssh
#ssh-keygen -t rsa #有Prompt, all enter
#cat id_rsa.pub >> ./authorized_keys# Join authorization
#ssh kf23 test ssh login without password
(2) Password- free login to each node
Let kf23 node SSH login to each kf2, kf3 node without password.
(2-1) Transmit the public key to the kf2 and kf2 nodes on the kf23 node
scp /root/.ssh/id_rsa.pub root@kf2:/root
#scp /root/.ssh/id_rsa.pub root@kf3:/root
(2-2) Add the ssh public key to the authorization on the kf2 and kf3 nodes
#mkdir ~/.ssh
#If the folder already exists, ignore this command #cat ~/id_rsa.pub >> ~/.ssh/authorized_keys
#rm ~/id_rsa.pub
#Delete after use (2-3) ssh to kf2 and kf3 nodes without password on kf23
[root@kf23 ~]# ssh kf2
[root@kf23 ~]# ssh kf3
2 Install the Java environment
All three machines need to be installed and java JDK is installed.
#tar -xzvf jdk-8u181-linux-x64.tar.gz -C /usr/local/
#vi /root/.bashrc
export JAVA_HOME=/usr/local/jdk1.8.0_181
export PATH=$PATH:$JAVA_HOME/bin
#source /root/.bashrc
#java -version
3 Install hadoop
All three machines need to be installed
3.1 Download and install
#tar -xzvf hadoop-2.8.0.tar.gz -C /usr/local
#cd /usr/local
#mv ./hadoop-2.8.0/ ./hadoop
#vi /root/.bashrc
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
export HADOOP_CLASSPATH=/usr/local/hadoop/share/hadoop/common/lib
#source /root/.bashrc
3.2 Modify the configuration file
The cluster distributed environment needs to modify 5 configuration files in /usr/local/hadoop/etc/hadoop. Only the necessary setting items for normal startup are set here.
#cd /usr/local/hadoop/etc/hadoop/
3.2.1 slaves file
Here let the kf23 node be used only as the NameNode, so the original localhost in the file is deleted, and only two lines of content kf2 and kf3 are added.
#vim slaves
kf2
kf3
3.2.2 core-site.xml file
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://kf23:9000</value>
<description>The name of the default file system. </description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>
</configuration>
3.2.3 hdfs-site.xml file
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/tmp/dfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>kf23:50090</value>
</property>
</configuration>
Standalone does not need to configure the following two files:
mapred-site.xml file and yarn-site.xml file
3.2.4 mapred-site.xml file [required if you start yarn]
#cp ./mapred-site.xml.template ./mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>kf23:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>kf23:19888</value>
</property>
</configuration>
3.2.5 yarn-site.xml file [Required if yarn is started]
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value> mapreduce_shuffle </value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>kf23</value>
</property>
</configuration>
In order to run YARN, a machine needs to be designated as a resource manager. The easiest way is to set the property yarn.resourcemanager.hostname to the hostname or IP address of the machine used to run the resource manager. Basically, the address of the resource manager server can be obtained from this attribute.
3.2.6 hadoop-env.sh file
#vi hadoop-env.sh is
configured as follows, and the rest can be kept by default
#The java implementation to use.
export JAVA_HOME=/usr/local/jdk1.8.0_181
4 Start the cluster
For the first start, you need to perform formatting on
kf23 #hdfs namenode -format
#start-dfs.sh Start the cluster
http://kf23:50070 View the webpage
#stop-dfs.sh Stop the cluster