Complete installation of each component environment of big data

First install the Centos system to modify the network configuration:

My three machines:

master 192.168.179.10

slave1 192.168.179.11

slava2 192.168.179.12

View the port number of each component: http://www.gaohongwei.cn/530/

CentOS7 configuration NAT mode network detailed steps (pro-test version) https://blog.csdn.net/Jackson_mvp/article/details/100856966

1. Restart the network

service network restart | systemctl restart network.service

Permanently change the name

hostnamectl set-hostname master1

vi / etc / hostname

Set the shared directory /mnt/hdfs

 

2. Modify hosts, each must be configured

vi /etc/hosts

192.168.179.10 master

192.168.179.11 slave1

192.168.179.12 slave2

 

3. System firewall and kernel firewall configuration (each node needs to be executed)

vi /etc/selinux/config

SELINUX=disabled

setenforce 0

systemctl stop firewalld.service

systemctl disable firewalld.service

# Temporarily turn off the kernel firewall

setenforce 0

# Permanently turn off the kernel firewall

vi /etc/selinux/config

SELINUX=disabled

# Temporarily turn off the system firewall

systemctl stop firewalld.service

#Permanently turn off the kernel firewall

systemctl disable firewalld.service

4. Password-free login

ssh-keygen -t rsa executes each

# Generate public key (executed by the master node) 192.168.179. Both 9 and 10 have been executed

cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys

chmod 600 /root/.ssh/authorized_keys

# Copy the public keys of other nodes (executed by the master node) 10 executed

ssh slave1 cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys

ssh slave2 cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys

 

# Copy the public key to other nodes (executed by the master node)

scp /root/.ssh/authorized_keys root@slave1:/root/.ssh/authorized_keys scp /root/.ssh/authorized_keys root@slave2:/root/.ssh/authorized_keys

test

ssh slave1

exit


First, check the installed system configuration file (if you haven't installed it, you don't need to write it yourself, modify the location to your own directory).

vim /etc/profile

export JAVA_HOME=/usr/local/jdk1.8.0_144

export PATH=$PATH:$JAVA_HOME/bin

export HADOOP_HOME=/usr/local/hadoop/hadoop-2.8.1

export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

export SCALA_HOME=/usr/local/scala-2.11.6

export PATH=$PATH:$SCALA_HOME/bin

#export SPARK_HOME=/usr/local/spark-2.3.0-bin-hadoop2.7

#export PATH=$PATH:$SPARK_HOME/bin

export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.5

export PATH=$PATH:$ZOOKEEPER_HOME/bin

export HBASE_HOME=/usr/local/hbase-2.0.0-alpha3

export PATH=$PATH:$HBASE_HOME/bin

export HIVE_HOME=/usr/local/hive

export HIVE_CONF_DIR=${HIVE_HOME}/conf

export PATH=$HIVE_HOME/bin:$PATH

export FLUME_HOME=/usr/local/flume1.8

export PATH=$PATH:$FLUME_HOME/bin

#set sqoop

export SQOOP_HOME=/usr/local/sqoop1.4.7

export PATH=$SQOOP_HOME/bin:$PATH


JDK installation

rpm -qa | grep java

rpm -e --nodeps java-1.8.0-openjdk-1.8.0.161-2.b14.el7.x86_64

 

# Configure environment variables, add at the end of the configuration

vim ~/.bashrc

export JAVA_HOME=/usr/local/src/jdk1.8.0_172

export JRE_HOME=${JAVA_HOME}/jre

export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib

export PATH=${JAVA_HOME}/bin:$PATH

 

# Copy environment variables to other nodes

scp ~/.bashrc    root@slave1:~/

scp ~/.bashrc    root@slave2:~/

# Copy the JDK package to other nodes

scp -r  /usr/local/src/jdk1.8.0_172   root@slave1:/usr/local/src/

scp -r /usr/local/src/jdk1.8.0_172  root@slave2:/usr/local/src/

# Reload environment variables

source ~/.bashrc

# Test environment is configured successfully

java   -version


Hadoop installation (prerequisite jdk is installed)

1. Unzip the installation package and enter the conf directory to modify the configuration file

vim core-site.xml

<configuration>

 <property>

  <name>fs.defaultFS</name>

<value>hdfs://master:9000</value>

   </property>

 <property>

    <name>hadoop.tmp.dir</name>

     <value>file:/usr/local/src/hadoop-2.6.5/tmp</value>

</property>

</configuration>

 

vim hdfs-site.xml

 <configuration>

<property>

<name>dfs.namenode.secondary.http-address</name>

<value>master:9001</value>

</property>

<property>

<name>dfs.namenode.name.dir</name>

 <value>file:/usr/local/src/hadoop-2.6.5/dfs/name</value>

</property>

<property>

<name>dfs.datanode.data.dir</name>

 <value>file:/usr/local/src/hadoop-2.6.5/dfs/data</value>

      </property>

<property>

<name>dfs.replication</name>

             <value>3</value>

</property>

</configuration>

 #Modify configuration file Add MR configuration

cp mapred-site.xml.template mapred-site.xml

vim mapred-site.xml

<configuration>

<property>

<name>mapreduce.framework.name</name>

<value>yarn</value>

</property>

</configuration>

# Modify configuration file Add resource management configuration

vim yarn-site.xml

<configuration>

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce_shuffle</value>

</property>

<property>

<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value>

</property>

<property>

<name>yarn.resourcemanager.address</name>

<value>master:8032</value>

</property>

<property>

<name>yarn.resourcemanager.scheduler.address</name>

<value>master:8030</value>

</property>

<property>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>master:8035</value>

</property>

<property>

<name>yarn.resourcemanager.admin.address</name>

<value>master:8033</value>

</property>

<property>

<name>yarn.resourcemanager.webapp.address</name>

<value>master:8088</value>

  </property>

</configuration>

 

# Create a temporary directory and file directory

mkdir /usr/local/src/hadoop-2.6.5/tmp

mkdir -p /usr/local/src/hadoop-2.6.5/dfs/name

mkdir -p /usr/local/src/hadoop-2.6.5/dfs/data

 

# Configure environment variables, add at the end of the configuration

vim ~/.bashrc

export PATH=$PATH:$HADOOP_HOME/bin

export HADOOP_HOME=/usr/local/src/hadoop-2.6.5

# Copy environment variables to the slave node

scp    -r ~/.bashrc root@slave1:~/

scp  -r ~/.bashrc  root@slave2:~/

# Copy the Hadoop package to the slave node

scp  -r   /usr/local/src/hadoop-2.6.5  root@slave1:/usr/local/src/

scp  -r   /usr/local/src/hadoop-2.6.5  root@slave2:/usr/local/src/

# Reload environment variables

source ~/.bashrc

# Format Namenode

hadoop purpose -format

common.Storage: Storage directory /usr/local/src/hadoop-2.6.5/dfs/name has been successfully formatted.

jps command to check whether the process is started in master and slave


hive1.2.2

# Unzip the Hive package

cd /usr/local/src tar zxvf apache-hive-1.2.2-bin.tar.gz

# Modify the configuration file Configure metadata related information

cd apache-hive-1.2.2-bin/conf

vim hive-site.xml

<configuration>

 <property>

<name>javax.jdo.option.ConnectionURL</name>

<value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true</value>

</property>

<property>

<name>javax.jdo.option.ConnectionDriverName</name>

<value>com.mysql.jdbc.Driver</value>

</property>

<property>

 <name>javax.jdo.option.ConnectionUserName</name>

  <value>root</value>

</property>

<property>

 <name>javax.jdo.option.ConnectionPassword</name>

<value>123456</value>

</property>

</configuration>

 

# Increase environment variables

vim ~/.bashrc

export HIVE_HOME=/usr/local/src/hive1.2.2

export PATH=$HIVE_HOME/bin:$PATH

# Reload environment variables

source ~/.bashrc

# Configure JDBC connection tool

cd /usr/local/src tar zxvf mysql-connector-java-5.1.44.tar.gz

# Copy the JDBC library file to Hive

cp mysql-connector-java-5.1.44/mysql-connector-java-5.1.44-bin.jar apachehive-1.2.2-bin/lib

# Start Hive service

hive error

Solution:

Replace the jline package in the share/hadoop/yarn/lib path in hadoop with the matching jar package in hive. Replace hadoop2.6.5/share/hadoop/yarn/libjline0.9 with jline2.1 under hive

Replace the jline in the Hadoop jar package with the jline in the hive jar package

Run the command in the Hadoop root directory:

find ../ -name  jline*

search results:

../hadoop-2.6.5/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jline-0.9.94.jar

../hadoop-2.6.5/share/hadoop/yarn/lib/jline-0.9.94.jar

../hadoop-2.6.5/share/hadoop/kms/tomcat/webapps/kms/WEB-INF/lib/jline-0.9.94.jar

../scala-2.11.4/lib/jline-2.12.jar

../hive1.2.2/lib/jline-2.12.jar

../zookeeper-3.4.11/lib/jline-0.9.94.jar

../zookeeper-3.4.11/lib/jline-0.9.94.LICENSE.txt

../zookeeper-3.4.11/src/java/lib/jline-0.9.94.LICENSE.txt

Replace jline-2.12.jar in the Hive root directory with hadoop

Can refer to: https://blog.csdn.net/wanghaiyuan1993/article/details/46272553


Zookeeper3.4.11

# Unzip the Zookeeper package

cd /usr/local/src tar zxvf zookeeper-3.4.11.tar.gz

# Modify the configuration file Comment line 12, add configuration at the end

cd /usr/local/src/zookeeper-3.4.11/conf

cp zoo_sample.cfg zoo.cfg vim zoo.cfg

dataDir=/usr/local/src/zookeeper-3.4.11/data

dataLogDir=/usr/local/src/zookeeper-3.4.11/log

server.1=master:2888:3888

server.2=slave1:2888:3888

server.3=slave2:2888:3888

# Create log folder and data folder

mkdir /usr/local/src/zookeeper-3.4.11/data

mkdir /usr/local/src/zookeeper-3.4.11/log

 

# Configure environment variables vim ~/.bashrc

export ZOOKEEPER_HOME=/usr/local/src/zookeeper-3.4.11

export PATH=$PATH:$ZOOKEEPER_HOME/bin

# Copy environment variables to other nodes

scp ~/.bashrc     root@slave1:~/

scp ~/.bashrc     root@slave2:~/

 # Reload environment variables

source ~/.bashrc

# Copy the Zookeeper package to the slave node

scp -r /usr/local/src/zookeeper-3.4.11 root@slave1:/usr/local/src/

scp -r /usr/local/src/zookeeper-3.4.11 root@slave2:/usr/local/src/

#Add ID separately, the id of each node cannot be repeated and must be consistent with the configuration file

#Master

echo "1">/usr/local/src/zookeeper-3.4.11/data/myid

#Slave1

echo "2">/usr/local/src/zookeeper-3.4.11/data/myid

#Slave2

echo "3" >  /usr/local/src/zookeeper-3.4.11/data/myid

#Start Zookeeper service (each node must be started)

zkServer.sh start

zkServer.sh stop

zkServer.sh status

All: QuorumPeerMain


HBASE0.9.8

 

# Unzip the Hbase package cd /usr/local/src

tar zxvf hbase-1.3.1-bin.tar.gz

# Configure regionservers hostname

cd hbase0.9.8 / conf

vim regionservers

master

slave1

slave2

# Configure environment variables and disable the Zookeeper service that comes with Hbase

vim hbase-env.sh

export JAVA_HOME=/usr/local/src/jdk1.8.0_172

export HBASE_MANAGES_ZK=false

Lines 29 and 124

 

# Configure Hbase core parameters

vim hbase-site.xml

<configuration>

<property>

<name>hbase.tmp.dir</name>

<value>/usr/local/src/hbase0.9.8/tmp/hbase</value>

  </property>

<property>

<name>hbase.rootdir</name>

               <value>hdfs://master:9000/hbase</value>

</property>

<property>

  <name>hbase.cluster.distributed</name>

<value>true</value>

</property>

<property>

<name>hbase.zookeeper.quorum</name>

<value>master,slave1,slave2</value>

</property>

<property>

<name>hbase.zookeeper.property.dataDir</name>

<value>/usr/local/src/zookeeper-3.4.11/tmp/zookeeper</value>

</property>

 <property>

<name>hbase.master.info.port</name>

<value>60010</value>

</property>

</configuration>

 

# Create a temporary directory

mkdir -p /usr/local/src/hbase0.9.8/tmp/hbase

mkdir -p /usr/local/src/zookeeper-3.4.11/tmp/zookeeper

# Copy HBase package to other nodes

scp  -r   /usr/local/src/hbase0.9.8  root@slave1:/usr/local/src/

scp  -r   /usr/local/src/hbase0.9.8  root@slave2:/usr/local/src/

# Configure environment variables

vim ~/.bashrc

export HBASE_HOME=/usr/local/src/ hbase0.9.8

export PATH=$PATH:$HBASE_HOME/bin

#Copy environment variables to other nodes

scp     ~/.bashrc root@slave1:~/

scp     ~/.bashrc root@slave2:~/

#Reload environment variables

source ~/.bashrc

 

Start command of each component:

Start HDFS

cd /usr/local/src/hadoop-2.6.5/sbin

./start-all.sh

Start Zookeeper, each node writes

zkServer.sh start

zkServer.sh status

Start spark (under any path), start hbase without starting spark

start-all.sh

Start hbase

start-hbase.sh

Close hbase

stop-hbase.sh

zkServer.sh stop

cd /usr/local/src/hadoop-2.6.5/sbin

./stop-all.sh

 

jps

Master: HMaster HRegionServer

Slave:  HRegionServer


Flume 1.6

# Unzip the Kafka package

cd /usr/local/src

tar zxvf apache-flume-1.6.0-bin.tar.gz

 

# Modify the configuration file, configure the working mode

cd apache-flume-1.6.0-bin/conf

cp flume-env.sh.template flume-env.sh

vi flume-env.sh

export JAVA_HOME=/usr/local/src/jdk1.8.0_172

export JAVA_OPTS="-Xms100m -Xmx2000m -Dcom.sun.management.jmxremote"

 

 

# Modify the configuration file, configure the working mode

cd apache-flume-1.6.0-bin/conf

#NetCat

vim flume-­netcat.conf

# Name the components on this agent

agent.sources = r1

agent.sinks = k1

agent.channels = c1

# Describe/configuration the source

agent.sources.r1.type = netcat

agent.sources.r1.bind = 127.0.0.1

agent.sources.r1.port = 44444

# Describe the sink

agent.sinks.k1.type = logger

# Use a channel which buffers events in memory

agent.channels.c1.type = memory

agent.channels.c1.capacity = 1000

agent.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel

agent.sources.r1.channels = c1

agent.sinks.k1.channel = c1

#Verification#Server

Knock in the flume1.6.0 directory

flume-ng agent --conf conf --conf-file conf/flume-netcat.conf -name=agent -Dflume.root.logger=INFO,console

#Client windows上

telnet master 44444


kafka_2.11-0.10.2.1

Refer to the following installation:

1.https://blog.csdn.net/qq_43605654/article/details/90786063?depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-1&utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-1

 

2.https://blog.csdn.net/CarolRain/article/details/78376642?depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-6&utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-6

 

# Unzip the Kafka package cd /usr/local/src

tar zxvf kafka_2.11-0.10.2.1.tgz

# Configure Zookeeper as all hosts in the cluster where zookeeper is deployed

cd kafka_2.11-0.10.2.1/config

vim server.properties

119行:zookeeper.connect=master:2181,slave1:2181,slave2:218

# Increase environment variables

vim ~/.bashrc

export KAFKA_HOME=/usr/local/src/kafka_2.11-0.10.2.1

export PATH=$KAFKA_HOME/bin:$PATH

# Copy Kafka package to slave node

scp -r /usr/local/src/kafka_2.11-0.10.2.1 root@slave1:/usr/local/src/

scp -r  /usr/local/src/kafka_2.11-0.10.2.1   root@slave2:/usr/local/src/

#Copy environment variables to other nodes

scp ~/.bashrc root@slave1:~/

scp ~/.bashrc root@slave2:~/

 # Reload environment variables

source ~/.bashrc

#Modify Kafka broker id, each broker id in the cluster must be unique

#Master

vim /usr/local/src/kafka_2.11-0.10.2.1/config/server.properties

broker.id=0

#Slave1

broker.id=1

#Slave2

broker.id=2

# Create a startup script for Zookeeper (if you start the Zookeeper cluster, skip this step)

vim /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka-zookeeper.sh

/usr/local/src/kafka_2.11-0.10.2.1/bin/zookeeper-server-start.sh

/usr/local/src/kafka_2.11-0.10.2.1/config/zookeeper.propeties

# Grant execution permissions

chmod +x /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka-zookeeper.sh

 

# Start the Zookeeper cluster that comes with Kafka (all nodes need to be started separately)

start-kafka-zookeeper.sh

 

# Create Kafka startup script

vim /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh

/usr/local/src/kafka_2.11-0.10.2.1/bin/kafka-server-start.sh -daemon

/usr/local/src/kafka_2.11-0.10.2.1/config/server.properties

# Grant execution permissions

chmod +x /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh

# Copy the Kafka startup script to the slave node

scp -r   /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh   root@slave1:/usr/local/src/kafka_2.11-0.10.2.1/bin/

scp -r /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh   root@slave2:/usr/local/src/kafka_2.11-0.10.2.1/bin/

# Start Kafka cluster (all nodes need to be started separately)

start-kafka.sh

bin/kafka-server-start.sh config/server.properties


Spark installation 1.6.3

# Unzip Spark and Scala packages

cd /usr/local/src

tar zxvf spark-2.0.2-bin-hadoop2.6.tgz

tar zxvf scala-2.11.8.tgz

# Modify the configuration file Configure environment variables

cd spark-2.0.2-bin-hadoop2.6/conf

cp spark-env.sh.template spark-env.sh

vim spark-env.sh

export SCALA_HOME=/usr/local/src/scala-2.11.4

export JAVA_HOME=/usr/local/src/jdk1.8.0_172

export HADOOP_HOME=/usr/local/src/hadoop-2.6.5

export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop

SPARK_MASTER_IP=master

SPARK_LOCAL_DIRS=/usr/local/src/spark1.63.

SPARK_DRIVER_MEMORY=1G

# Modify the configuration file to add the host name of the slave node

  cp slaves.template slaves

vim slaves

slave1

slave2

# Configure environment variables, add vim ~/.bashrc at the end of the configuration

# Copy environment variables to other nodes

export SCALA_HOME=/usr/local/src/scala export PATH=$PATH:$SCALA_HOME/bin

export SPARK_HOME=/usr/local/src/spark1.6.3

export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin

scp     -r ~/.bashrc root@slave1:~/

scp     -r ~/.bashrc root@slave2:~/

# Copy the Scala package to the slave node

scp -r  /usr/local/src/scala-2.11.4   root@slave1:/usr/local/src/

scp -r  /usr/local/src/scala-2.11.4 root@slave2:/usr/local/src/

#

Copy the Spark package to the slave node

scp   -r   /usr/local/src/spark1.6.3   root@slave1:/usr/local/src/

scp   -r   /usr/local/src/spark1.6.3 root@slave2:/usr/local/src/

# Reload environment variables

source ~/.bashrc

# Start the cluster

start-all.sh

start-master.sh

start-slaves.sh

shut down:

stop-all.sh

stop-master.sh

stop-slaves.sh

WEBUI view:

http://master:8080/

Guess you like

Origin blog.csdn.net/qq_36816848/article/details/113106510