Hadoop+HBase部署实战

ifconfig -a 查看IP
vi /etc/profile 修改环境变量,source /etc/profile
mv 修改文件名
tar -xzvf jdk.jar解压

export JAVA_HOME=/home/linkedcare/jdk1.8.0_181
export PATH=/usr/bin:/usr/sbin:/bin:/sbin:/usr/X11R6/bin
export PATH=.:$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar


#免登录
ssh-keygen -t rsa
cd /root/.ssh
cat id_rsa.pub>>authorized_keys
ssh-copy-id -i .ssh/id_rsa.pub [email protected]
ssh-copy-id -i .ssh/id_rsa.pub [email protected]
ssh-copy-id -i .ssh/id_rsa.pub [email protected]
vi /etc/hosts

10.25.172.xxx Trade-Test-Spark001
10.51.237.xxx Trade-Test-Spark002
10.31.121.xxx Trade-Test-Spark003

#关闭linux系统的防火墙
[root@djt002 ~]# service iptables status
[root@djt002 ~]# chkconfig iptables off
//永久关闭防火墙
[root@djt002 ~]# service iptables stop //临时关闭防火墙
[root@djt002 ~]# service iptables status

#firewalld的基本使用
启动: systemctl start firewalld
启动一个服务:systemctl start firewalld.service
关闭: systemctl stop firewalld
查看状态: systemctl status firewalld
开机禁用 : systemctl disable firewalld
开机启用 : systemctl enable firewalld

#端口查看
netstat -ant
netstat -anptlu | grep 51071


#zookeeper
tar -zxvf /home/linkedcare/zookeeper-3.4.5-cdh5.8.0.tar.gz
1.2修改配置

cd /home/linkedcare/zookeeper-3.4.5-cdh5.8.0/conf/
cp zoo_sample.cfg zoo.cfg
vi zoo.cfg

server.1=0.0.0.0:2888:3888
server.2=Trade-Test-Spark002:2888:3888
server.3=Trade-Test-Spark003:2888:3888

mkdir /home/linkedcare/zookeeper-3.4.5-cdh5.8.0/tmp

echo "1" > /home/linkedcare/zookeeper-3.4.5-cdh5.8.0/data/myid
echo "2" > /home/linkedcare/zookeeper-3.4.5-cdh5.8.0/data/myid
echo "3" > /home/linkedcare/zookeeper-3.4.5-cdh5.8.0/data/myid

#将配置好的zookeeper拷贝到其他节点
scp -r /home/linkedcare/zookeeper-3.4.5-cdh5.8.0 Trade-Test-Spark002:/home/linkedcare/
scp -r /home/linkedcare/zookeeper-3.4.5-cdh5.8.0 Trade-Test-Spark003:/home/linkedcare/
scp -r /home/linkedcare/phoenix-4.8.0-cdh5.8.0 Trade-Test-Spark002:/home/linkedcare/


scp /home/linkedcare/spark-2.2.0-bin-hadoop2.6.tgz Trade-Test-Spark001:/home/linkedcare/
scp /home/linkedcare/apache-phoenix-4.9.0-HBase-1.2-bin/phoenix-4.9.0-HBase-1.2-server.jar Trade-Test-Spark002:/home/linkedcare/hbase-1.2.0-cdh5.8.0/lib
jps查看进程


#hadoop配置
cd /home/linkedcare/hadoop-2.6.0-cdh5.8.0/etc/hadoop
vi hadoop-env.sh
export JAVA_HOME=/home/linkedcare/jdk1.8.0_181


配置yarn-env.sh
#export JAVA_HOME=/home/y/libexec/jdk1.6.0/
export JAVA_HOME=/home/linkedcare/jdk1.8.0_181


#修改core-site.xml
<configuration>
<!-- 指定hdfs的nameservice为bi -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!-- 指定hadoop临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/linkedcare/hadoop-2.6.0-cdh5.8.0/data/tmp</value>
</property>

<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>Trade-Test-Spark001:2181,Trade-Test-Spark002:2181,Trade-Test-Spark003:2181</value>
</property>
</configuration>

#修改hdfs-site.xml
<configuration>
<!-- 指定hdfs元数据存储的路径 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/linkedcare/hadoop-2.6.0-cdh5.8.0/data/namenode</value>
</property>

<!-- 指定hdfs数据存储的路径 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/linkedcare/hadoop-2.6.0-cdh5.8.0/data/datanode</value>
</property>

<!-- 数据备份的个数 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>

<!-- 关闭权限验证 -->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>

<!-- 开启WebHDFS功能(基于REST的接口服务) -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>

<property>
<name>dfs.http.address</name>
<value>0.0.0.0:50070</value>
</property>

<!--指定hdfs的nameservice为mycluster,需要和core-site.xml中的保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!-- bi(逻辑名)下面有两个NameNode,分别是nn1,nn2 -->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>

<property>
<name>dfs.ha.namenodes.bi</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.bi.nn1</name>
<value>Trade-Test-Spark001:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.bi.nn1</name>
<value>Trade-Test-Spark001:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.bi.nn2</name>
<value>Trade-Test-Spark002:9000</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.bi.nn2</name>
<value>Trade-Test-Spark002:50070</value>
</property>
<!-- 指定NameNode的edits元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://Trade-Test-Spark001:8485;Trade-Test-Spark002:8485;Trade-Test-Spark003:8485/mycluster</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/linkedcare/hadoop-2.6.0-cdh5.8.0/data/journal</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.bi</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/linkedcare/hadoop-2.6.0-cdh5.8.0/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
</configuration>


#修改mapred-site.xml
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>

<!-- 指定jobhistory server的rpc地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>>Trade-Test-Spark001:10020</value>
</property>

<!-- 指定jobhistory server的http地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>>Trade-Test-Spark002:19888</value>
</property>

<!-- 开启uber模式(针对小作业的优化) -->
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>

<!-- 配置启动uber模式的最大map数 -->
<property>
<name>mapreduce.job.ubertask.maxmaps</name>
<value>9</value>
</property>

<!-- 配置启动uber模式的最大reduce数 -->
<property>
<name>mapreduce.job.ubertask.maxreduces</name>
<value>1</value>
</property>

</configuration>


#修改yarn-site.xml
<configuration>
<!-- 开启RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>Trade-Test-Spark001</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>Trade-Test-Spark002</value>
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>Trade-Test-Spark001:2181,Trade-Test-Spark002:2181,Trade-Test-Spark003:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>

scp -r /home/linkedcare/hadoop-2.6.0-cdh5.8.0 Trade-Test-Spark002:/home/linkedcare/


##启动zookeeper集群(分别在mini5、mini6、mini7上启动zk)
cd /usr/local/zookeeper-3.4.5/bin/
./zkServer.sh start
#查看状态:一个leader,两个follower
./zkServer.sh status

##启动journalnode(分别在在mini5、mini6、mini7上执行)
cd /usr/local/hadoop-2.6.4
sbin/hadoop-daemon.sh start journalnode
#运行jps命令检验,mini5、mini6、mini7上多了JournalNode进程

#HBase
cd conf
#在hbase-env.sh文件里面加入java环境变量.
vi hbase-env.sh
export JAVA_HOME=
export HBASE_MANAGES_ZK=false
#编辑hbase-site.xml ,添加配置文件:

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://Trade-Test-Spark001:8020/hbase</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>Trade-Test-Spark001,Trade-Test-Spark002,Trade-Test-Spark003</value>
  </property>
  <property>
    <name>hbase.zookeeper.property.dataDir</name>
    <value>/home/linkedcare/hbase-1.2.0-cdh5.8.0/data</value>
  </property>
</configuration>

#编辑配置目录下面的文件regionservers. 命令:

vi regionservers

加入如下内容:

Trade-Test-Spark002
Trade-Test-Spark003

#把Hbase复制到其他机器
scp -r /home/linkedcare/hbase-1.2.0-cdh5.8.0 Trade-Test-Spark002:/home/linkedcare/
scp -r /home/linkedcare/hbase-1.2.0-cdh5.8.0 Trade-Test-Spark003:/home/linkedcare/


#hadoop启动
#在node01上启动journalnode集群
[root@node01 hadoop]# hadoop-daemons.sh start journalnode

#在node01上格式化zkfc
[root@node01 hadoop]# hdfs zkfc -formatZK

#在node01上格式化hdfs
[root@node01 hadoop]# hadoop namenode -format

#在node01上启动namenode
/home/hadoop/hadoop-2.7.1/sbin/hadoop-daemon.sh start namenode

#在node02上启动数据同步和standby的namenode
[root@node01 hadoop]# hdfs namenode -bootstrapStandby
[root@node01 hadoop]# hadoop-daemon.sh start namenode

#在node01 上启动datanode
[root@node01 hadoop]# hadoop-daemons.sh start datanode

#在node03上启动yarn
[root@node03 hadoop]# start-yarn.sh
stop-yarn.sh

#启动DFSZKFailoverController
ssh hadoop31
/home/hadoop/hadoop-2.7.1/sbin/hadoop-daemon.sh start zkfc

#启动datanode
#注意hadoop-daemons.sh datanode是启动所有datanode,而hadoop-daemon.sh datanode是启动单个datanode
ssh hadoop31
/home/hadoop/hadoop-2.7.1/sbin/hadoop-daemons.sh start datanode

#启动yarn
#在hadoop31上启动resouremanager,在hadoop31,hadoop32,hadoop33,hadoop34,hadoop35上启动nodemanager
ssh hadoop31
/home/hadoop/hadoop-2.7.1/sbin/start-yarn.sh

#在hadoop31上启动备用resouremanager
ssh hadoop32
/home/hadoop/hadoop-2.7.1/sbin/yarn-daemon.sh start resourcemanager

#启动region server
hbase-daemon.sh start regionserver
#在某regionserver上启动Hmaster
hbase-daemon.sh start master

#start stop整个hadoop
start-all.sh
stop-all.sh
start-dfs.sh
stop-dfs.sh
#stop停止单个进程
/hadoop-daemon.sh stop zkfc
[root@hadoop01 ~]# /root/apps/hadoop/sbin/hadoop-daemon.sh stop zkfc
stopping zkfc
[root@hadoop01 ~]# /root/apps/hadoop/sbin/hadoop-daemon.sh stop journalnode
stopping journalnode
[root@hadoop01 ~]# /root/apps/hadoop/sbin/hadoop-daemon.sh stop datanode
stopping datanode
[root@hadoop01 ~]# /root/apps/hadoop/sbin/hadoop-daemon.sh stop namenode
stopping namenode
yarn-daemon.sh stop resourcemanager
stop-yarn.sh
#
scp /home/linkedcare/zookeeper-3.4.5-cdh5.8.0/conf/zoo.cfg Trade-Test-Spark001:/home/linkedcare/hbase-1.2.0-cdh5.8.0/conf
scp /home/linkedcare/hadoop-2.6.0-cdh5.8.0/etc/hadoop/core-site.xml Trade-Test-Spark002:/home/linkedcare/hbase-1.2.0-cdh5.8.0/conf
scp /home/linkedcare/hadoop-2.6.0-cdh5.8.0/etc/hadoop/hdfs-site.xml Trade-Test-Spark001:/home/linkedcare/hbase-1.2.0-cdh5.8.0/conf

#重新格式化namenode之前需要清空dfs下的name和data文件夹以解决datanode无法启动的问题
#Incompatible clusterIDs in /home/linkedcare/hadoop-2.6.0-cdh5.8.0/data/datanode问题解决
修改/data下去找namenode和datanode的VERSION文件
/home/linkedcare/hadoop-2.6.0-cdh5.8.0/data/datanode/current/VERSION
/home/linkedcare/hadoop-2.6.0-cdh5.8.0/data/namenode/current/VERSION
只需要把其中一个clusterID改成相同的就可以了
改了之后成功启动了


#HBase
把Hadoop的2个配置文件(core-site.xml和hdfs-site.xml),放到HBase的conf目录下,让HBase能找到Hadoop的配置。
启动HBase集群:

bin/start-hbase.sh
单独启动一个HMaster进程:

bin/stop-hbase.sh
单独启动一个HMaster进程:
bin/hbase-daemon.sh start master
单独停止一个HMaster进程:

bin/hbase-daemon.sh stop master
单独启动一个HRegionServer进程:

bin/hbase-daemon.sh start regionserver
单独停止一个HRegionServer进程:

bin/hbase-daemon.sh stop regionserver

#进程
18688 SqlLine
2689 QuorumPeerMain
23795 Jps
10515 ResourceManager
9957 NameNode
11829 Main
10245 JournalNode
10617 NodeManager
10058 DataNode
10426 DFSZKFailoverController
25983 HRegionServer
11881 HMaster

#HA cluster secondary can no be used, instead using standby namenode

#hosts
10.25.172.xxx  Trade-Test-Spark001
10.51.237.xxx Trade-Test-Spark002
xx.31.121.xxx Trade-Test-Spark003


#内网映射到外网
配置允许路由转发
#vim /etc/sysctl.conf
net.ipv4.ip_forward = 1 # 默认值为0,修改为1,表示允许转发
#sysctl -p
关联包通过配置
#iptables -A FORWARD -m state --state ESTABLISHED,RELATED -j ACCEPT
#转发路由配置
iptables -t nat -A PREROUTING -d xxx.178.104.xx -p tcp --dport 50070 -j DNAT --to-destination 10.25.172.171:50070

查看配置
#iptables -t nat --list -n


#hadoop history server. mapred-site.xml
#JobTracker and TaskTracker are no longer exists in Hadoop 2.
?<property>
??????? <name>mapreduce.jobhistory.address</name>
??????? <value>hadoop-senior01.beifeng.com:10020</value>
??? </property>
?? ?<property>
??????? <name>mapreduce.jobhistory.webapp.address</name>
??????? <value>hadoop-senior01.beifeng.com:19888</value>
??? </property>
<property>
<name>mapred.job.tracker</name>
<value>trade-test-spark001:8021</value>
</property>
#一般情况应该把JobTracker部署在单独的机器上。JobTracker与TaskTracker把持心跳;

猜你喜欢

转载自www.cnblogs.com/junglecat/p/10018707.html