Hadoop 2.7.3 分布式集群安装

1. 集群规划:
192.167.1.252 palo252 Namenode+Datanode
192.167.1.253 palo253 YarnManager+Datanode+SecondaryNameNode
192.167.1.254 palo254 Datanode

2. 设定固定IP地址
vi /etc/sysconfig/network-scripts/ifcfg-eth0

TYPE=Ethernet
BOOTPROTO=static
DEFROUTE=yes
NAME=eth0
UUID=7ac09286-c35b-4f15-a9ba-701c093832bf
DEVICE=eth0
IPV4_FAILURE_FATAL=no
IPV6INIT=yes
IPV6_AUTOCONF=yes
IPV6_DEFROUTE=yes
IPV6_FAILURE_FATAL=no
IPV6_ADDR_GEN_MODE=stable-privacy
IPV6_PEERDNS=yes
IPV6_PEERROUTES=yes
IPV6_PRIVACY=no
ONBOOT=yes
DNS1=192.168.1.1
IPADDR=192.168.1.252 #三台机器都要分别设置
PREFIX=24
GATEWAY=192.168.1.1

3. 修改主机名:
192.167.1.252
hostnamectl set-hostname palo252
hostnamectl --static set-hostname palo252

192.167.1.253
hostnamectl set-hostname palo253
hostnamectl --static set-hostname palo253

192.167.1.254
hostnamectl set-hostname palo254
hostnamectl --static set-hostname palo254

4. 修改hosts文件
vi /etc/hosts

127.0.0.1 localhost
::1 localhost


192.168.1.252 palo252
192.168.1.253 palo253
192.168.1.254 palo254

5. 安装JDK(所有节点)
具体到oracle官网下载

6. SSH免密登录

A) 每台机器生成访问秘钥,复制到192.168.1.251:/home/workspace目录下
192.168.1.252:
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
cp ~/.ssh/authorized_keys 192.168.1.251:/home/workspace/authorized_keys252
rm -rf ~/.ssh/authorized_keys #删除公钥文件

192.168.1.253:
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
scp ~/.ssh/authorized_keys 192.168.1.251:/home/workspace/authorized_keys253
rm -rf ~/.ssh/authorized_keys #删除公钥文件

192.168.1.254:
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
scp ~/.ssh/authorized_keys 192.168.1.252:/home/workspace/authorized_keys254
rm -rf ~/.ssh/authorized_keys #删除公钥文件

B) 在192.168.1.252上将所有的公钥合并成一个公钥文件
cat /home/workspace/authorized_keys252 >> /home/workspace/authorized_keys
cat /home/workspace/authorized_keys253 >> /home/workspace/authorized_keys
cat /home/workspace/authorized_keys254 >> /home/workspace/authorized_keys

C) 将合并后的公钥文件复制到集群中的各个主机中
scp /home/workspace/authorized_keys 192.168.1.253:~/.ssh/
scp /home/workspace/authorized_keys 192.168.1.254:~/.ssh/
cp /home/workspace/authorized_keys ~/.ssh/ #因为目前在252主机中,所以使用的命令为cp而不是scp

D) 每台机器:
chmod 600 ~/.ssh/authorized_keys #authorized_keys的访问权限
chmod 600 ~/.ssh/

7. 配置hadoop
7-1) 解压
下载地址:https://archive.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
tar xzvf hadoop-2.7.3.tar.gz -C /opt/

7-2) 创建存放数据的目录(必须事先创建好,否则会报错)
mkdir -p /opt/hadoop-2.7.3/data/full/tmp/
mkdir -p /opt/hadoop-2.7.3/data/full/tmp/dfs/name
mkdir -p /opt/hadoop-2.7.3/data/full/tmp/dfs/data

7-3) 配置/opt/hadoop-2.7.3/etc/hadoop下面的配置文件
cd opt/hadoop-2.7.3/etc/hadoop #定位到配置文件目录

7-3-1) core-site.xml
<configuration>
<!-- 指定HDFS中NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://palo252:9000</value>
</property>

<!-- 指定hadoop运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>file:/opt/hadoop-2.7.3/data/full/tmp</value>
</property>
</configuration>

7-3-2) yarn-site.xml
<configuration>
<!-- reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>palo253</value>
</property>
</configuration>

7-3-3) slaves
palo252
palo253
palo254

7-3-4) mapred-site.xml
<configuration>
<!-- 指定mr运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>palo252:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>palo252:19888</value>
</property>
</configuration>

7-3-5) hdfs-site.xml
<configuration>
<!-- 设置dfs副本数 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 设置secondname的端口 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>palo253:50090</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/opt/hadoop-2.7.3/data/full/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/opt/hadoop-2.7.3/data/full/tmp/dfs/data</value>
</property>
</configuration>

8. 配置环境变量(每台机器都必须做)
vi /etc/profile

#####set jdk enviroment
export JAVA_HOME=/usr/java/jdk1.8.0_172-amd64
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH

##### set hadoop_home enviroment
export HADOOP_HOME=/opt/hadoop-2.7.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

source /etc/profile ####make the env variable to take effect right now.

9. 启动:
NameNode:(master 252)
#格式化namenode
hdfs namenode -format

#启动dfs 
start-dfs.sh # (master 252)

#启动Yarn:  yarn节点(253)
#注意:Namenode和ResourceManger如果不是同一台机器,
#不能在NameNode上启动 yarn,
#应该在ResouceManager所在的机器上启动yarn。
start-yarn.sh

#验证启动情况:
jps #查看java进程
http://namenode:50070/

猜你喜欢

转载自www.cnblogs.com/lenmom/p/9567846.html