Big Data platform to build: Hadoop-3.1.3 + Hive-3.1.2 + HBase-2.2.3 + Zookeeper-3.5.7 + Kafka_2.11-2.4.0 + Spark-2.4.5

1. Frame Selection

hadoop-3.1.3

hive-3.1.2

zookeeper-3.5.7

hbase-2.2.3

kafka_2.11-2.4.0

spark-2.4.5-bin-hadoop2.7

2. Preparation Before Installation

1. Turn off the firewall

2. Install the JDK

3. Install Scala

4. Free densely arranged ssh

5. Configure IP hostname mapping and

6.Mysql installation

3. Install

3.1 Hadoop installation

1.hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
2.hdfs-site.xml
 <property>
        <name>dfs.replication</name>
        <value>2</value>
  </property>

  <!-- 指定Hadoop辅助名称节点主机配置 -->
  <property>
      <name>dfs.namenode.secondary.http-address</name>
      <value>hadoop102:50090</value>
  </property>

  <property>
       <name>dfs.namenode.name.dir</name>
       <value>/opt/module/hadoop-3.1.3/data/dfs/nn</value>
  </property>
  <property>
       <name>dfs.datanode.data.dir</name>
       <value>/opt/module/hadoop-3.1.3/data/dfs/dn</value>
  </property>
  <property>
    <name>dfs.permissions.enabled</name>
    <value>false</value>
  </property>
3.yarn-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
4.yarn-site.xml
<!-- reducer获取数据的方式 -->
  <property>
	<name>yarn.nodemanager.aux-services</name>
	<value>mapreduce_shuffle</value>
  </property>

  <!-- 指定YARN的ResourceManager的地址 -->
  <property>
	<name>yarn.resourcemanager.hostname</name>
	<value>hadoop103</value>
  </property>

  <!-- 日志聚集功能使能 -->
  <property>
 	<name>yarn.log-aggregation-enable</name>
	<value>true</value>
  </property>

  <!-- 日志保留时间设置3天 -->
  <property>
	<name>yarn.log-aggregation.retain-seconds</name>
	<value>259200</value>
  </property>
  
  <property>
	<name>yarn.nodemanager.vmem-check-enabled</name>
	<value>false</value>
  </property>
  <property>
	<name>yarn.nodemanager.vmem-pmem-ratio</name>
	<value>5</value>
  </property>
5.mapred-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
6.mapred-site.xml
<!-- 指定mr运行在yarn上 -->
  <property>
	<name>mapreduce.framework.name</name>
	<value>yarn</value>
  </property>
  
  <!--配置历史服务器-->
  <property>
	<name>mapreduce.jobhistory.address</name>
	<value>hadoop103:10020</value>
  </property>
  <property>
	<name>mapreduce.jobhistory.webapp.address</name>
	<value>hadoop103:19888</value>
  </property>

  <!-- 在hadoop3.x中需要执行mapreduce的运行环境 -->
  <property>
	<name>yarn.app.mapreduce.am.env</name>
	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
  </property>
  <property>
	<name>mapreduce.map.env</name>
	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
  </property>
  <property>
	<name>mapreduce.reduce.env</name>
	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
  </property>

  <property>
	<name>mapreduce.map.memory.mb</name>
	<value>1024</value>
  </property>
7.slaves
hadoop101
hadoop102
hadoop103
8. configure the environment variables / etc / profile
#Java
export JAVA_HOME=/opt/module/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin

#Scala
export SCALA_HOME=/opt/module/scala-2.11.12
export PATH=$PATH:$SCALA_HOME/bin

#Hadoop
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

3.2 Hive installation

1.hive-env.sh
HADOOP_HOME=/opt/module/hadoop-3.1.3

export HIVE_CONF_DIR=/opt/module/hive-3.1.2/conf

export HIVE_AUX_JARS_PATH=/opt/module/hive-3.1.2/auxlib
2.hive-site.xml
        <property>
	  <name>javax.jdo.option.ConnectionURL</name>
	  <value>jdbc:mysql://hadoop101:3306/metastore?createDatabaseIfNotExist=true</value>
	</property>

	<property>
	  <name>javax.jdo.option.ConnectionDriverName</name>
	  <value>com.mysql.jdbc.Driver</value>
	</property>

	<property>
	  <name>javax.jdo.option.ConnectionUserName</name>
	  <value>root</value>
	</property>

	<property>
	  <name>javax.jdo.option.ConnectionPassword</name>
	  <value>123456</value>
	</property>

	<property>
	  <name>hive.cli.print.header</name>
	  <value>true</value>
	</property>

	<property>
	  <name>hive.cli.print.current.db</name>
	  <value>true</value>
	</property>

	<property>
	    <name>hive.metastore.uris</name>
	    <value>thrift://hadoop101:9083</value>
	</property>

	<property>
		<name>hive.server2.webui.host</name>
		<value>hadoop101</value>
	</property>

	<property>
		<name>hive.server2.webui.port</name>
		<value>10002</value>
	</property>

	<!--Hive 3.x 默认打开了ACID,Spark不支持读取 ACID 的 Hive,需要关闭ACID-->
        <property>
                <name>hive.strict.managed.tables</name>
                <value>false</value>
        </property>
        <property>
                <name>hive.create.as.insert.only</name>
                <value>false</value>
        </property>
        <property>
                <name>metastore.create.as.acid</name>
                <value>false</value>
        </property>

	<!--关闭版本验证-->
        <property>
                <name>hive.metastore.schema.verification</name>
                <value>false</value>
        </property>
3. Create HIVE_AUX_JARS_PATH
mkdir -p /opt/module/hive-3.1.2/auxlib
4. Copy mysql-connector-java-5.1.27-bin.jar to the /opt/module/hive-3.1.2/lib
cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar  /opt/module/hive-3.1.2/lib
5. Set Environment Variables
#HIVE_HOME
export HIVE_HOME=/opt/module/hive-3.1.2
export PATH=$PATH:$HIVE_HOME/bin
6. The first implementation is initialized
schematool -dbType mysql -initSchema

3.3 Zookeeper installation

1.zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/module/zookeeper-3.5.7/zkData
clientPort=2181

# 3台机器
server.1=hadoop101:2888:3888
server.2=hadoop102:2888:3888
server.3=hadoop103:2888:3888
2. Create a data directory Zookeeper
mkdir -p /opt/module/zookeeper-3.5.7/zkData
3. Create myid in /opt/module/zookeeper-3.5.7/zkData, identify the current host
echo "1" > /opt/module/zookeeper-3.5.7/zkData/myid
4. configure the environment variables / etc / profile
#Zookeeper
export ZOOKEEPER_HOME=/opt/module/zookeeper-3.5.7
export PATH=$PATH:$ZOOKEEPER_HOME/bin
5. distribution zookeeper; Note: Each node must have a unique myid zookeeper

3.4 HBase installation

1.hbase-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121

#是否使用Hbase内置的Zookeeper.改成false,使用我们以前配置的Zookeeper
export HBASE_MANAGES_ZK=false
2.hbase-site.xml
  <!--hbase在hdfs上存储数据时的目录-->
  <property>
    	<name>hbase.rootdir</name>
   	<value>hdfs://hadoop101:9000/hbase</value>
  </property>
  <!--是否开启集群-->
  <property>
    	<name>hbase.cluster.distributed</name>
    	<value>true</value>
  </property>
  <property>
	<name>hbase.tmp.dir</name>
	<value>/opt/module/hbase-2.2.3/tmp</value>
  </property>
  <!--配置Zookeeper-->
  <property>
    	<name>hbase.zookeeper.quorum</name>
    	<value>hadoop101,hadoop102,hadoop103</value>
  </property>
  <property>
        <name>hbase.zookeeper.property.clientPort</name>
        <value>2181</value>
  </property>
  <!--Zookeeper的dataDir目录-->
  <property>
    	<name>hbase.zookeeper.property.dataDir</name>
    	<value>/opt/module/zookeeper-3.5.7/zkData</value>
  </property>
  <property>
        <name>zookeeper.znode.parent</name>
        <value>/hbase</value>
  </property>

  <property>
       <name>hbase.unsafe.stream.capability.enforce</name>
       <value>false</value>
   </property>
3.regionservers
hadoop101
hadoop102
hadoop102
4. configure the environment variables / etc / profile
#HBase
export HBASE_HOME=/opt/module/hbase-2.2.3
export PATH=$PATH:$HBASE_HOME/bin

3.5 Kafka AnSo

1.server.properties
broker.id=0
log.dirs=/opt/module/kafka_2.11-2.4.0/logs
zookeeper.connect=hadoop101:2181,hadoop102:2181,hadoop103:2181/kafka
2. distribute kafka kafka to other nodes; server.properties attention must be globally unique in broker.id
3. Configure environment variables / etc / profile
#KAFKA_HOME
export KAFKA_HOME=/opt/module/kafka_2.11-2.4.0
export PATH=$PATH:$KAFKA_HOME/bin
3.6 Spark installation
1.spark-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121

export SCALA_HOME=/opt/module/scala-2.11.12

export SPARK_MASTER_IP=hadoop101

export HADOOP_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop

#history.retainedApplications=3  //内存中历史副本存1份
export SPARK_HISTORY_OPTS="-Dspark.history.retainedApplications=1 -Dspark.history.fs.logDirectory=hdfs://hadoop101:9000/spark/log/"
2.spark-defalt.conf
spark.eventLog.enabled           true
spark.eventLog.dir               hdfs://hadoop101:9000/spark/log/
spark.yarn.historyServer.address hadoop102:18080
3.slaves
hadoop101
hadoop102
hadoop103
4. Create hdfs-site.xml, hdfs-site.xml, hive-site.xml is connected to the lower soft /opt/module/spark-2.4.5-bin-hadoop2.7/conf
ln -s /opt/module/hadoop-3.1.3/etc/hadoop/core-site.xml
ln -s /opt/module/hadoop-3.1.3/etc/hadoop/hdfs-site.xml
ln -s /opt/module/hive-3.1.2/conf/hive-site.xml
5. Copy the mysql-connector-java-5.1.27-bin.jar to the /opt/module/spark-2.4.5-bin-hadoop2.7/jars
cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar /opt/module/spark-2.4.5-bin-hadoop2.7/jars
6. configure the environment variables / etc / profile
#Spark
export SPARK_HOME=/opt/module/spark-2.4.5-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin

#spark 提示 unable to load native-hadoop library for your platform... using builtin-java classes where applicable
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native/:$LD_LIBRARY_PATH

4. Summary

The installation has been tested all available!

Guess you like

Origin www.cnblogs.com/wuning/p/12595930.html