安装Hadoop(Linux CentOS)
1、前期环境准备工作
1.1、安装小工具
sudo yum install -y epel-release
sudo yum install -y psmisc nc net-tools rsync vim lrzsz ntp libzstd openssl-static
1.2、修改主机名
sudo hostnamectl set-hostname hadoop101
1.3、修改主机名映射
sudo vim /etc/hosts
192.168.1.100 hadoop100
192.168.1.101 hadoop101
1.4、关闭防火墙
sudo systemctl stop firewalld
sudo systemctl disable firewalld
1.5、创建atguigu用户
sudo useradd atguigu
sudo passwd atguigu
1.6、重启
reboot
1.7、配置atguigu用户具有root权限
visudo
#修改/etc/sudoers文件,91行左右,在root下面添加
root ALL=(ALL) ALL
atguigu ALL=(ALL) NOPASSWD:ALL
1.8、在/opt 目录下创建文件夹
# 在/opt 目录下创建 module、software 文件夹
sudo mkdir /opt/module /opt/software
# 修改module、software 文件夹的所有者
sudo chown atguigu:atguigu /opt/module /opt/software
2、安装jdk
安装jdk详情: https://blog.csdn.net/Asia1752/article/details/104505189
3、安装hadoop
sudo vim /etc/profile.d/my_env.sh
## Hadoop_home
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
检验:
hadoop version
hadoop checknative
4、hadoop集群配置
4.0、需要配置的文件列表:cd etc/hadoop
core-site.xml
hdfs-site.xml
mapred-site.xml
yarn-site.xml
workers
4.1、vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop102:8020</value>
</property>
<property>
<name>hadoop.data.dir</name>
<value>/opt/module/hadoop-3.1.3/data</value>
</property>
<property>
<name>hadoop.proxyuser.atguigu.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.atguigu.groups</name>
<value>*</value>
</property>
</configuration>
4.2、vi hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file://${hadoop.data.dir}/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file://${hadoop.data.dir}/data</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file://${hadoop.data.dir}/namesecondary</value>
</property>
<property>
<name>dfs.client.datanode-restart.timeout</name>
<value>30</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop104:9868</value>
</property>
</configuration>
4.3、vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
4.4、vi yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop103</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,hADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
4.5、vi workers
# 配置所有的从机
hadoop102
hadoop103
hadoop104
4.6、集群启动前-格式化
hdfs namenode -format
4.7、启动
start-dfs.sh
# 启动顺序:namenodes、datanodes、secondary namenodes
# 启动resourcemanager
start-yarn.sh
# 查看启动情况
jps
停止
stop-dfs.sh
stop-yarn.sh
5、hadoop集群扩展配置
5.1、配置历史服务器
建议与日志的聚集同节点
vi mapred-site.xml
<!-- 历史服务器端地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop102:10020</value>
</property>
<!-- 历史服务器web端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop102:19888</value>
</property>
启动历史服务器
mapred --daemon start historyserver
5.2、配置日志的聚集
建议与历史服务器同节点
vi yarn-site.xml
<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志服务器 -->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop102:19888/jobhistory/logs</value>
</property>
<!-- 日志保持一星期 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>