plataforma de grandes volúmenes de datos a construir: Hadoop-3.1.3 + 3.1.2 + Colmena-HBase-2.2.3 + 3.5.7 + Zookeeper-Kafka_2.11-2.4.0 + Spark-2.4.5

1. Marco de Selección

hadoop-3.1.3

colmena-3.1.2

ZOOKEEPER-3.5.7

hbase-2.2.3

kafka_2.11-2.4.0

chispa-2.4.5-bin-hadoop2.7

2. Preparación antes de la instalación

1. Apague el servidor de seguridad

2. Instalar el JDK

3. Instalar Scala

4. Libre densamente dispuestas ssh

5. Configurar el mapeo nombre de host IP y

instalación 6.Mysql

3. Instalar

3.1 Instalación de Hadoop

1.hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
2.hdfs-site.xml
 <property>
        <name>dfs.replication</name>
        <value>2</value>
  </property>

  <!-- 指定Hadoop辅助名称节点主机配置 -->
  <property>
      <name>dfs.namenode.secondary.http-address</name>
      <value>hadoop102:50090</value>
  </property>

  <property>
       <name>dfs.namenode.name.dir</name>
       <value>/opt/module/hadoop-3.1.3/data/dfs/nn</value>
  </property>
  <property>
       <name>dfs.datanode.data.dir</name>
       <value>/opt/module/hadoop-3.1.3/data/dfs/dn</value>
  </property>
  <property>
    <name>dfs.permissions.enabled</name>
    <value>false</value>
  </property>
3.yarn-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
4.yarn-site.xml
<!-- reducer获取数据的方式 -->
  <property>
	<name>yarn.nodemanager.aux-services</name>
	<value>mapreduce_shuffle</value>
  </property>

  <!-- 指定YARN的ResourceManager的地址 -->
  <property>
	<name>yarn.resourcemanager.hostname</name>
	<value>hadoop103</value>
  </property>

  <!-- 日志聚集功能使能 -->
  <property>
 	<name>yarn.log-aggregation-enable</name>
	<value>true</value>
  </property>

  <!-- 日志保留时间设置3天 -->
  <property>
	<name>yarn.log-aggregation.retain-seconds</name>
	<value>259200</value>
  </property>
  
  <property>
	<name>yarn.nodemanager.vmem-check-enabled</name>
	<value>false</value>
  </property>
  <property>
	<name>yarn.nodemanager.vmem-pmem-ratio</name>
	<value>5</value>
  </property>
5.mapred-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121
6.mapred-site.xml
<!-- 指定mr运行在yarn上 -->
  <property>
	<name>mapreduce.framework.name</name>
	<value>yarn</value>
  </property>
  
  <!--配置历史服务器-->
  <property>
	<name>mapreduce.jobhistory.address</name>
	<value>hadoop103:10020</value>
  </property>
  <property>
	<name>mapreduce.jobhistory.webapp.address</name>
	<value>hadoop103:19888</value>
  </property>

  <!-- 在hadoop3.x中需要执行mapreduce的运行环境 -->
  <property>
	<name>yarn.app.mapreduce.am.env</name>
	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
  </property>
  <property>
	<name>mapreduce.map.env</name>
	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
  </property>
  <property>
	<name>mapreduce.reduce.env</name>
	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
  </property>

  <property>
	<name>mapreduce.map.memory.mb</name>
	<value>1024</value>
  </property>
7.slaves
hadoop101
hadoop102
hadoop103
8. Configure las variables de entorno / etc / profile
#Java
export JAVA_HOME=/opt/module/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin

#Scala
export SCALA_HOME=/opt/module/scala-2.11.12
export PATH=$PATH:$SCALA_HOME/bin

#Hadoop
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

3.2 Instalación de la colmena

1.hive-env.sh
HADOOP_HOME=/opt/module/hadoop-3.1.3

export HIVE_CONF_DIR=/opt/module/hive-3.1.2/conf

export HIVE_AUX_JARS_PATH=/opt/module/hive-3.1.2/auxlib
2.hive-site.xml
        <property>
	  <name>javax.jdo.option.ConnectionURL</name>
	  <value>jdbc:mysql://hadoop101:3306/metastore?createDatabaseIfNotExist=true</value>
	</property>

	<property>
	  <name>javax.jdo.option.ConnectionDriverName</name>
	  <value>com.mysql.jdbc.Driver</value>
	</property>

	<property>
	  <name>javax.jdo.option.ConnectionUserName</name>
	  <value>root</value>
	</property>

	<property>
	  <name>javax.jdo.option.ConnectionPassword</name>
	  <value>123456</value>
	</property>

	<property>
	  <name>hive.cli.print.header</name>
	  <value>true</value>
	</property>

	<property>
	  <name>hive.cli.print.current.db</name>
	  <value>true</value>
	</property>

	<property>
	    <name>hive.metastore.uris</name>
	    <value>thrift://hadoop101:9083</value>
	</property>

	<property>
		<name>hive.server2.webui.host</name>
		<value>hadoop101</value>
	</property>

	<property>
		<name>hive.server2.webui.port</name>
		<value>10002</value>
	</property>

	<!--Hive 3.x 默认打开了ACID,Spark不支持读取 ACID 的 Hive,需要关闭ACID-->
        <property>
                <name>hive.strict.managed.tables</name>
                <value>false</value>
        </property>
        <property>
                <name>hive.create.as.insert.only</name>
                <value>false</value>
        </property>
        <property>
                <name>metastore.create.as.acid</name>
                <value>false</value>
        </property>

	<!--关闭版本验证-->
        <property>
                <name>hive.metastore.schema.verification</name>
                <value>false</value>
        </property>
3. Crear HIVE_AUX_JARS_PATH
mkdir -p /opt/module/hive-3.1.2/auxlib
4. Copia mysql-connector-java-5.1.27-bin.jar a la /opt/module/hive-3.1.2/lib
cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar  /opt/module/hive-3.1.2/lib
5. establecer las variables de Medio Ambiente
#HIVE_HOME
export HIVE_HOME=/opt/module/hive-3.1.2
export PATH=$PATH:$HIVE_HOME/bin
6. La primera aplicación se inicializa
schematool -dbType mysql -initSchema

3.3 Instalación Zookeeper

1.zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/module/zookeeper-3.5.7/zkData
clientPort=2181

# 3台机器
server.1=hadoop101:2888:3888
server.2=hadoop102:2888:3888
server.3=hadoop103:2888:3888
2. Crear un directorio de datos Zookeeper
mkdir -p /opt/module/zookeeper-3.5.7/zkData
3. Crear myid en /opt/module/zookeeper-3.5.7/zkData, identificar el host actual
echo "1" > /opt/module/zookeeper-3.5.7/zkData/myid
4. Configure las variables de entorno / etc / profile
#Zookeeper
export ZOOKEEPER_HOME=/opt/module/zookeeper-3.5.7
export PATH=$PATH:$ZOOKEEPER_HOME/bin
5. Distribución del cuidador del zoológico; Nota: Cada nodo debe tener un empleado del zoológico myid único

3.4 instalación HBase

1.hbase-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121

#是否使用Hbase内置的Zookeeper.改成false,使用我们以前配置的Zookeeper
export HBASE_MANAGES_ZK=false
2.hbase-site.xml
  <!--hbase在hdfs上存储数据时的目录-->
  <property>
    	<name>hbase.rootdir</name>
   	<value>hdfs://hadoop101:9000/hbase</value>
  </property>
  <!--是否开启集群-->
  <property>
    	<name>hbase.cluster.distributed</name>
    	<value>true</value>
  </property>
  <property>
	<name>hbase.tmp.dir</name>
	<value>/opt/module/hbase-2.2.3/tmp</value>
  </property>
  <!--配置Zookeeper-->
  <property>
    	<name>hbase.zookeeper.quorum</name>
    	<value>hadoop101,hadoop102,hadoop103</value>
  </property>
  <property>
        <name>hbase.zookeeper.property.clientPort</name>
        <value>2181</value>
  </property>
  <!--Zookeeper的dataDir目录-->
  <property>
    	<name>hbase.zookeeper.property.dataDir</name>
    	<value>/opt/module/zookeeper-3.5.7/zkData</value>
  </property>
  <property>
        <name>zookeeper.znode.parent</name>
        <value>/hbase</value>
  </property>

  <property>
       <name>hbase.unsafe.stream.capability.enforce</name>
       <value>false</value>
   </property>
3.regionservers
hadoop101
hadoop102
hadoop102
4. Configure las variables de entorno / etc / profile
#HBase
export HBASE_HOME=/opt/module/hbase-2.2.3
export PATH=$PATH:$HBASE_HOME/bin

3.5 Kafka AnSo

1.server.properties
broker.id=0
log.dirs=/opt/module/kafka_2.11-2.4.0/logs
zookeeper.connect=hadoop101:2181,hadoop102:2181,hadoop103:2181/kafka
2. distribuir kafka kafka a otros nodos; server.properties atención debe ser única a nivel mundial en broker.id
3. Configurar las variables de entorno / etc / profile
#KAFKA_HOME
export KAFKA_HOME=/opt/module/kafka_2.11-2.4.0
export PATH=$PATH:$KAFKA_HOME/bin
Instalación 3.6 Spark
1.spark-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_121

export SCALA_HOME=/opt/module/scala-2.11.12

export SPARK_MASTER_IP=hadoop101

export HADOOP_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop

#history.retainedApplications=3  //内存中历史副本存1份
export SPARK_HISTORY_OPTS="-Dspark.history.retainedApplications=1 -Dspark.history.fs.logDirectory=hdfs://hadoop101:9000/spark/log/"
2.spark-defalt.conf
spark.eventLog.enabled           true
spark.eventLog.dir               hdfs://hadoop101:9000/spark/log/
spark.yarn.historyServer.address hadoop102:18080
3.slaves
hadoop101
hadoop102
hadoop103
4. Crear hdfs-site.xml, hdfs-site.xml, colmena-site.xml está conectado a la /opt/module/spark-2.4.5-bin-hadoop2.7/conf suave menor
ln -s /opt/module/hadoop-3.1.3/etc/hadoop/core-site.xml
ln -s /opt/module/hadoop-3.1.3/etc/hadoop/hdfs-site.xml
ln -s /opt/module/hive-3.1.2/conf/hive-site.xml
5. Copie el mysql-connector-java-5.1.27-bin.jar a los /opt/module/spark-2.4.5-bin-hadoop2.7/jars
cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar /opt/module/spark-2.4.5-bin-hadoop2.7/jars
6. Configure las variables de entorno / etc / profile
#Spark
export SPARK_HOME=/opt/module/spark-2.4.5-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin

#spark 提示 unable to load native-hadoop library for your platform... using builtin-java classes where applicable
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native/:$LD_LIBRARY_PATH

4. Resumen

La instalación ha sido probado todos disponibles!

Supongo que te gusta

Origin www.cnblogs.com/wuning/p/12595930.html
Recomendado
Clasificación