First install the Centos system to modify the network configuration:
My three machines:
master 192.168.179.10
slave1 192.168.179.11
slava2 192.168.179.12
View the port number of each component: http://www.gaohongwei.cn/530/
CentOS7 configuration NAT mode network detailed steps (pro-test version) https://blog.csdn.net/Jackson_mvp/article/details/100856966
1. Restart the network
service network restart | systemctl restart network.service
Permanently change the name
hostnamectl set-hostname master1
vi / etc / hostname
Set the shared directory /mnt/hdfs
2. Modify hosts, each must be configured
vi /etc/hosts
192.168.179.10 master
192.168.179.11 slave1
192.168.179.12 slave2
3. System firewall and kernel firewall configuration (each node needs to be executed)
vi /etc/selinux/config
SELINUX=disabled
setenforce 0
systemctl stop firewalld.service
systemctl disable firewalld.service
# Temporarily turn off the kernel firewall
setenforce 0
# Permanently turn off the kernel firewall
vi /etc/selinux/config
SELINUX=disabled
# Temporarily turn off the system firewall
systemctl stop firewalld.service
#Permanently turn off the kernel firewall
systemctl disable firewalld.service
4. Password-free login
ssh-keygen -t rsa executes each
# Generate public key (executed by the master node) 192.168.179. Both 9 and 10 have been executed
cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys
chmod 600 /root/.ssh/authorized_keys
# Copy the public keys of other nodes (executed by the master node) 10 executed
ssh slave1 cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
ssh slave2 cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
# Copy the public key to other nodes (executed by the master node)
scp /root/.ssh/authorized_keys root@slave1:/root/.ssh/authorized_keys scp /root/.ssh/authorized_keys root@slave2:/root/.ssh/authorized_keys
test
ssh slave1
exit
First, check the installed system configuration file (if you haven't installed it, you don't need to write it yourself, modify the location to your own directory).
vim /etc/profile
export JAVA_HOME=/usr/local/jdk1.8.0_144
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/usr/local/hadoop/hadoop-2.8.1
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export SCALA_HOME=/usr/local/scala-2.11.6
export PATH=$PATH:$SCALA_HOME/bin
#export SPARK_HOME=/usr/local/spark-2.3.0-bin-hadoop2.7
#export PATH=$PATH:$SPARK_HOME/bin
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.5
export PATH=$PATH:$ZOOKEEPER_HOME/bin
export HBASE_HOME=/usr/local/hbase-2.0.0-alpha3
export PATH=$PATH:$HBASE_HOME/bin
export HIVE_HOME=/usr/local/hive
export HIVE_CONF_DIR=${HIVE_HOME}/conf
export PATH=$HIVE_HOME/bin:$PATH
export FLUME_HOME=/usr/local/flume1.8
export PATH=$PATH:$FLUME_HOME/bin
#set sqoop
export SQOOP_HOME=/usr/local/sqoop1.4.7
export PATH=$SQOOP_HOME/bin:$PATH
JDK installation
rpm -qa | grep java
rpm -e --nodeps java-1.8.0-openjdk-1.8.0.161-2.b14.el7.x86_64
# Configure environment variables, add at the end of the configuration
vim ~/.bashrc
export JAVA_HOME=/usr/local/src/jdk1.8.0_172
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
# Copy environment variables to other nodes
scp ~/.bashrc root@slave1:~/
scp ~/.bashrc root@slave2:~/
# Copy the JDK package to other nodes
scp -r /usr/local/src/jdk1.8.0_172 root@slave1:/usr/local/src/
scp -r /usr/local/src/jdk1.8.0_172 root@slave2:/usr/local/src/
# Reload environment variables
source ~/.bashrc
# Test environment is configured successfully
java -version
Hadoop installation (prerequisite jdk is installed)
1. Unzip the installation package and enter the conf directory to modify the configuration file
vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/src/hadoop-2.6.5/tmp</value>
</property>
</configuration>
vim hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/src/hadoop-2.6.5/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/src/hadoop-2.6.5/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
#Modify configuration file Add MR configuration
cp mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
# Modify configuration file Add resource management configuration
vim yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8035</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
</configuration>
# Create a temporary directory and file directory
mkdir /usr/local/src/hadoop-2.6.5/tmp
mkdir -p /usr/local/src/hadoop-2.6.5/dfs/name
mkdir -p /usr/local/src/hadoop-2.6.5/dfs/data
# Configure environment variables, add at the end of the configuration
vim ~/.bashrc
export PATH=$PATH:$HADOOP_HOME/bin
export HADOOP_HOME=/usr/local/src/hadoop-2.6.5
# Copy environment variables to the slave node
scp -r ~/.bashrc root@slave1:~/
scp -r ~/.bashrc root@slave2:~/
# Copy the Hadoop package to the slave node
scp -r /usr/local/src/hadoop-2.6.5 root@slave1:/usr/local/src/
scp -r /usr/local/src/hadoop-2.6.5 root@slave2:/usr/local/src/
# Reload environment variables
source ~/.bashrc
# Format Namenode
hadoop purpose -format
common.Storage: Storage directory /usr/local/src/hadoop-2.6.5/dfs/name has been successfully formatted.
jps command to check whether the process is started in master and slave
hive1.2.2
# Unzip the Hive package
cd /usr/local/src tar zxvf apache-hive-1.2.2-bin.tar.gz
# Modify the configuration file Configure metadata related information
cd apache-hive-1.2.2-bin/conf
vim hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
</configuration>
# Increase environment variables
vim ~/.bashrc
export HIVE_HOME=/usr/local/src/hive1.2.2
export PATH=$HIVE_HOME/bin:$PATH
# Reload environment variables
source ~/.bashrc
# Configure JDBC connection tool
cd /usr/local/src tar zxvf mysql-connector-java-5.1.44.tar.gz
# Copy the JDBC library file to Hive
cp mysql-connector-java-5.1.44/mysql-connector-java-5.1.44-bin.jar apachehive-1.2.2-bin/lib
# Start Hive service
hive error
Solution:
Replace the jline package in the share/hadoop/yarn/lib path in hadoop with the matching jar package in hive. Replace hadoop2.6.5/share/hadoop/yarn/libjline0.9 with jline2.1 under hive
Replace the jline in the Hadoop jar package with the jline in the hive jar package
Run the command in the Hadoop root directory:
find ../ -name jline*
search results:
../hadoop-2.6.5/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jline-0.9.94.jar
../hadoop-2.6.5/share/hadoop/yarn/lib/jline-0.9.94.jar
../hadoop-2.6.5/share/hadoop/kms/tomcat/webapps/kms/WEB-INF/lib/jline-0.9.94.jar
../scala-2.11.4/lib/jline-2.12.jar
../hive1.2.2/lib/jline-2.12.jar
../zookeeper-3.4.11/lib/jline-0.9.94.jar
../zookeeper-3.4.11/lib/jline-0.9.94.LICENSE.txt
../zookeeper-3.4.11/src/java/lib/jline-0.9.94.LICENSE.txt
Replace jline-2.12.jar in the Hive root directory with hadoop
Can refer to: https://blog.csdn.net/wanghaiyuan1993/article/details/46272553
Zookeeper3.4.11
# Unzip the Zookeeper package
cd /usr/local/src tar zxvf zookeeper-3.4.11.tar.gz
# Modify the configuration file Comment line 12, add configuration at the end
cd /usr/local/src/zookeeper-3.4.11/conf
cp zoo_sample.cfg zoo.cfg vim zoo.cfg
dataDir=/usr/local/src/zookeeper-3.4.11/data
dataLogDir=/usr/local/src/zookeeper-3.4.11/log
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
# Create log folder and data folder
mkdir /usr/local/src/zookeeper-3.4.11/data
mkdir /usr/local/src/zookeeper-3.4.11/log
# Configure environment variables vim ~/.bashrc
export ZOOKEEPER_HOME=/usr/local/src/zookeeper-3.4.11
export PATH=$PATH:$ZOOKEEPER_HOME/bin
# Copy environment variables to other nodes
scp ~/.bashrc root@slave1:~/
scp ~/.bashrc root@slave2:~/
# Reload environment variables
source ~/.bashrc
# Copy the Zookeeper package to the slave node
scp -r /usr/local/src/zookeeper-3.4.11 root@slave1:/usr/local/src/
scp -r /usr/local/src/zookeeper-3.4.11 root@slave2:/usr/local/src/
#Add ID separately, the id of each node cannot be repeated and must be consistent with the configuration file
#Master
echo "1">/usr/local/src/zookeeper-3.4.11/data/myid
#Slave1
echo "2">/usr/local/src/zookeeper-3.4.11/data/myid
#Slave2
echo "3" > /usr/local/src/zookeeper-3.4.11/data/myid
#Start Zookeeper service (each node must be started)
zkServer.sh start
zkServer.sh stop
zkServer.sh status
All: QuorumPeerMain
HBASE0.9.8
# Unzip the Hbase package cd /usr/local/src
tar zxvf hbase-1.3.1-bin.tar.gz
# Configure regionservers hostname
cd hbase0.9.8 / conf
vim regionservers
master
slave1
slave2
# Configure environment variables and disable the Zookeeper service that comes with Hbase
vim hbase-env.sh
export JAVA_HOME=/usr/local/src/jdk1.8.0_172
export HBASE_MANAGES_ZK=false
Lines 29 and 124
# Configure Hbase core parameters
vim hbase-site.xml
<configuration>
<property>
<name>hbase.tmp.dir</name>
<value>/usr/local/src/hbase0.9.8/tmp/hbase</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master,slave1,slave2</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/usr/local/src/zookeeper-3.4.11/tmp/zookeeper</value>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
</configuration>
# Create a temporary directory
mkdir -p /usr/local/src/hbase0.9.8/tmp/hbase
mkdir -p /usr/local/src/zookeeper-3.4.11/tmp/zookeeper
# Copy HBase package to other nodes
scp -r /usr/local/src/hbase0.9.8 root@slave1:/usr/local/src/
scp -r /usr/local/src/hbase0.9.8 root@slave2:/usr/local/src/
# Configure environment variables
vim ~/.bashrc
export HBASE_HOME=/usr/local/src/ hbase0.9.8
export PATH=$PATH:$HBASE_HOME/bin
#Copy environment variables to other nodes
scp ~/.bashrc root@slave1:~/
scp ~/.bashrc root@slave2:~/
#Reload environment variables
source ~/.bashrc
Start command of each component:
Start HDFS
cd /usr/local/src/hadoop-2.6.5/sbin
./start-all.sh
Start Zookeeper, each node writes
zkServer.sh start
zkServer.sh status
Start spark (under any path), start hbase without starting spark
start-all.sh
Start hbase
start-hbase.sh
Close hbase
stop-hbase.sh
zkServer.sh stop
cd /usr/local/src/hadoop-2.6.5/sbin
./stop-all.sh
jps
Master: HMaster HRegionServer
Slave: HRegionServer
Flume 1.6
# Unzip the Kafka package
cd /usr/local/src
tar zxvf apache-flume-1.6.0-bin.tar.gz
# Modify the configuration file, configure the working mode
cd apache-flume-1.6.0-bin/conf
cp flume-env.sh.template flume-env.sh
vi flume-env.sh
export JAVA_HOME=/usr/local/src/jdk1.8.0_172
export JAVA_OPTS="-Xms100m -Xmx2000m -Dcom.sun.management.jmxremote"
# Modify the configuration file, configure the working mode
cd apache-flume-1.6.0-bin/conf
#NetCat
vim flume-netcat.conf
# Name the components on this agent
agent.sources = r1
agent.sinks = k1
agent.channels = c1
# Describe/configuration the source
agent.sources.r1.type = netcat
agent.sources.r1.bind = 127.0.0.1
agent.sources.r1.port = 44444
# Describe the sink
agent.sinks.k1.type = logger
# Use a channel which buffers events in memory
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
agent.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1
#Verification#Server
Knock in the flume1.6.0 directory
flume-ng agent --conf conf --conf-file conf/flume-netcat.conf -name=agent -Dflume.root.logger=INFO,console
#Client windows上
telnet master 44444
kafka_2.11-0.10.2.1
Refer to the following installation:
# Unzip the Kafka package cd /usr/local/src
tar zxvf kafka_2.11-0.10.2.1.tgz
# Configure Zookeeper as all hosts in the cluster where zookeeper is deployed
cd kafka_2.11-0.10.2.1/config
vim server.properties
119行:zookeeper.connect=master:2181,slave1:2181,slave2:218
# Increase environment variables
vim ~/.bashrc
export KAFKA_HOME=/usr/local/src/kafka_2.11-0.10.2.1
export PATH=$KAFKA_HOME/bin:$PATH
# Copy Kafka package to slave node
scp -r /usr/local/src/kafka_2.11-0.10.2.1 root@slave1:/usr/local/src/
scp -r /usr/local/src/kafka_2.11-0.10.2.1 root@slave2:/usr/local/src/
#Copy environment variables to other nodes
scp ~/.bashrc root@slave1:~/
scp ~/.bashrc root@slave2:~/
# Reload environment variables
source ~/.bashrc
#Modify Kafka broker id, each broker id in the cluster must be unique
#Master
vim /usr/local/src/kafka_2.11-0.10.2.1/config/server.properties
broker.id=0
#Slave1
broker.id=1
#Slave2
broker.id=2
# Create a startup script for Zookeeper (if you start the Zookeeper cluster, skip this step)
vim /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka-zookeeper.sh
/usr/local/src/kafka_2.11-0.10.2.1/bin/zookeeper-server-start.sh
/usr/local/src/kafka_2.11-0.10.2.1/config/zookeeper.propeties
# Grant execution permissions
chmod +x /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka-zookeeper.sh
# Start the Zookeeper cluster that comes with Kafka (all nodes need to be started separately)
start-kafka-zookeeper.sh
# Create Kafka startup script
vim /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh
/usr/local/src/kafka_2.11-0.10.2.1/bin/kafka-server-start.sh -daemon
/usr/local/src/kafka_2.11-0.10.2.1/config/server.properties
# Grant execution permissions
chmod +x /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh
# Copy the Kafka startup script to the slave node
scp -r /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh root@slave1:/usr/local/src/kafka_2.11-0.10.2.1/bin/
scp -r /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh root@slave2:/usr/local/src/kafka_2.11-0.10.2.1/bin/
# Start Kafka cluster (all nodes need to be started separately)
start-kafka.sh
bin/kafka-server-start.sh config/server.properties
Spark installation 1.6.3
# Unzip Spark and Scala packages
cd /usr/local/src
tar zxvf spark-2.0.2-bin-hadoop2.6.tgz
tar zxvf scala-2.11.8.tgz
# Modify the configuration file Configure environment variables
cd spark-2.0.2-bin-hadoop2.6/conf
cp spark-env.sh.template spark-env.sh
vim spark-env.sh
export SCALA_HOME=/usr/local/src/scala-2.11.4
export JAVA_HOME=/usr/local/src/jdk1.8.0_172
export HADOOP_HOME=/usr/local/src/hadoop-2.6.5
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
SPARK_MASTER_IP=master
SPARK_LOCAL_DIRS=/usr/local/src/spark1.63.
SPARK_DRIVER_MEMORY=1G
# Modify the configuration file to add the host name of the slave node
cp slaves.template slaves
vim slaves
slave1
slave2
# Configure environment variables, add vim ~/.bashrc at the end of the configuration
# Copy environment variables to other nodes
export SCALA_HOME=/usr/local/src/scala export PATH=$PATH:$SCALA_HOME/bin
export SPARK_HOME=/usr/local/src/spark1.6.3
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
scp -r ~/.bashrc root@slave1:~/
scp -r ~/.bashrc root@slave2:~/
# Copy the Scala package to the slave node
scp -r /usr/local/src/scala-2.11.4 root@slave1:/usr/local/src/
scp -r /usr/local/src/scala-2.11.4 root@slave2:/usr/local/src/
#
Copy the Spark package to the slave node
scp -r /usr/local/src/spark1.6.3 root@slave1:/usr/local/src/
scp -r /usr/local/src/spark1.6.3 root@slave2:/usr/local/src/
# Reload environment variables
source ~/.bashrc
# Start the cluster
start-all.sh
start-master.sh
start-slaves.sh
shut down:
stop-all.sh
stop-master.sh
stop-slaves.sh
WEBUI view: