Environment version: hadoop 3.1.2 version, CentOS7.8
1. Preparation phase (hduser)
Use hduser account to log in and
upload files.
Unzip the file: tar -zxvf ~/hadoop-3.1.2.tar.gz
modify the file name to hadoop: mv hadoop-3.1.2 hadoop
Note: When we don’t know what the unzipped file name is, we can use the ls command to list the file name or folder name of the current directory
2. Configure hadoop environment variables (hduser)
Compile the sudo vi /etc/profile file and add the following at the end:
export HADOOP_HOME=/home/hduser/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export JAVA_LIBRARY_PATH=/home/hduser/hadoop/lib/native
3. Cluster configuration (hduser)
3.0, enter the configuration storage directory
3.1, core-site.xml placement
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hduser/hadoop/tmp</value>
</property>
</configuration>
3.2, hadoop-env.sh configuration
at the top,
add one more line of code
export JAVA_HOME=/usr/java/jdk
3.3, hdfs-site.xml configuration
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hduser/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hduser/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node1:50090</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
3.4, mapred-site.xml placement
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>node1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node1:19888</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>4096</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>8192</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx3072m</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx6144m </value>
</property>
</configuration>
3.5, yarn-site.xml placement
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>node1:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>node1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>node1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>node1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>node1:8035</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
/hadoop/hduser/etc/hadoop,
/home/hduser/hadoop/share/hadoop/common/*,
/home/hduser/hadoop/share/hadoop/common/lib/*,
/home/hduser/hadoop/share/hadoop/mapreduce/*,
/home/hduser/hadoop/share/hadoop/mapreduce/lib/*,
/home/hduser/hadoop/share/hadoop/hdfs/*,
/home/hduser/hadoop/share/hadoop/hdfs/lib/*,
/home/hduser/hadoop/share/hadoop/yarn/*,
/home/hduser/hadoop/share/hadoop/yarn/lib/*
</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/hadoop/hduser/etc/hadoop,
/home/hduser/hadoop/share/hadoop/common/*,
/home/hduser/hadoop/share/hadoop/common/lib/*,
/home/hduser/hadoop/share/hadoop/mapreduce/*,
/home/hduser/hadoop/share/hadoop/mapreduce/lib/*,
/home/hduser/hadoop/share/hadoop/hdfs/*,
/home/hduser/hadoop/share/hadoop/hdfs/lib/*,
/home/hduser/hadoop/share/hadoop/yarn/*,
/home/hduser/hadoop/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
3.6. Yarn-env.sh configuration
At the top,
add one more line of code
export JAVA_HOME=/usr/java/jdk
3.7, workers configuration