New chapter-Hadoop basic ecosystem cluster construction (pure command version without picture)

Three machine simulation:
Insert picture description here

tar.gz preparation

jdk1.8.0_144 + scala-2.11.12 + apache-zookeeper-3.5.8-bin + hadoop-2.7.2 + spark-2.4.0-bin-hadoop2.7 + apache-hive-1.2.1 + apache-tez-0.9.1 + hbase-2.1.10 + apache-phoenix-5.0.0-HBase-2.0-bin

1. jdk1.8 and above

2. scala 2.11.12

3. Secret-free configuration

cd ~/.ssh/
ssh-keygen -t rsa
ssh-copy-id ip

4. zk cluster configuration

4.1 vim conf/zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
clientPort=2181
dataLogDir=/opt/soft/zookeeper/logs
dataDir=/opt/soft/zookeeper/data
autopurge.snapRetainCount=500
autopurge.purgeInterval=24
server.1= golaxy-124-224:2888:3888
server.2= golaxy-124-225:2888:3888
server.3= golaxy-124-226:2888:3888

4.2 创建myid
echo “1” > /opt/soft/zookeeper/data/myid
echo “2” > /opt/soft/zookeeper/data/myid
echo “3” > /opt/soft/zookeeper/data/myid

4.3
Service
start./bin/zkServer.sh start
Service
status./bin/zkServer.sh status

5. Hadoop cluster configuration

5.1 Configure environment variables

5.2 Create folder
mkdir hdfs
mkdir hdfs/tmp

5.3 vim hadoop-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0_144

5.4 vim core-site.xml

<!-- 指定HDFS中NameNode的地址 -->
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://172.31.124.224:9000</value>
</property>
<!-- 指定Hadoop运行时产生文件的存储目录 -->
<property>
    <name>hadoop.tmp.dir</name>
<value>/opt/soft/hadoop-2.7.2/hdfs/tmp</value>
</property>

5.5 vim hdfs-site.xml

<property>
	<name>dfs.replication</name>
	<value>1</value>
</property>
<!-- 指定Hadoop辅助名称节点主机配置 -->
<property>
    <name>dfs.namenode.secondary.http-address</name>
    <value>172.31.124.226:50090</value>
</property>

5.6 vim yarn-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0_144

5.7 vim yarn-sit.xml

<property>
	<name>yarn.nodemanager.aux-services</name>
	<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
	<name>yarn.resourcemanager.hostname</name>
	<value>golaxy-124-225</value>
</property>

5.8 vim mapred-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0_144

5.9 vim mapred-site.xml
mv mapred-site.xml.template mapred-site.xml

<!-- 指定MR运行在Yarn上 -->
<property>
	<name>mapreduce.framework.name</name>
	<value>yarn</value>
</property>

5.10 vim salves
golaxy-124.224
golaxy-124-225
golaxy-124.226

5.11 Distribution of configuration file information
xsync /opt/soft/hadoop-2.7.2 [email protected]:/opt/soft/
xsync /opt/soft/hadoop-2.7.2 [email protected]:/opt/soft/

5.12 Format namenode
./bin/hdfs namenode -format on each server

5.13 Execute on namenode (start namenode datanode SecondaryNameNode)
./bin/start-dfs.sh

5.14 Execute on the machine of resourcemanager (start yarn -> reourceManage and nodeManage)
sbin/start-yarn.sh

5.15 hadoop namenode stop the execution
./sbin/stop-dfs.sh

5.16 Stop yarn on resourceManage execution
./sbin/stop-yarn.sh

5.17

hadoop-nameNode -> http://172.31.124.224:50070/explorer.html#/
hadoop -yarn -> http://172.31.124.225:8088/cluster
hadoop-secondNameNode -> http://172.31.124.226:50090/status.html

6. Spark Standalone cluster mode construction

6.1 Spark environment variable configuration

6.2 mv spark-env.sh.template spark-env.sh

6.3 vim spark-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0_144
export SCALA_HOME=/usr/local/scala/scala-2.12.12
export SPARK_MASTER_IP=172.31.124.224
export SPARK_WORKER_MEMORY=2g
export SPARK_WORKER_CORES=7
export SPARK_WORKER_INSTANCES=1

6.4 vim slaves
172.31.124.225

spark-master -> webUi http://172.31.124.224:8080/

6.5 vim spark-defaults.conf (create a corresponding folder on hdfs )
spark.eventLog.enabled true
spark.eventLog.dir hdfs://master:9000/[history)
spark.eventLog.compress true

6.6 vimspark-env.conf
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.retainedApplications=3 -Dspark.history.fs.logDirectory=hdfs://172.16.0.180:9000/lwdEventlog"

6.7 start-history-server.sh

spark-history-server -> http://172.31.124.224:18080/

6.8 Write scalaDemo and run
/opt/soft/spark-2.4.0-bin-hadoop2.7/bin/spark-submit --class cn.golaxy.SparkTest
--master
spark://172.31.124.224:7077
--executor-memory 1G
- total-executor-cores 7
/home/crawler/scala_jars/scala-0.0.1-SNAPSHOT-jar-with-dependencies.jar

7. hive configuration

7.1 mv apache-hive-1.2.1-bin/ hive
7.2 mv conf/hive-env.sh.template conf/hive-env.sh
export HADOOP_HOME=/opt/soft/hadoop-2.7.2
export HIVE_CONF_DIR=/opt/soft/hive/conf
7.3 cd …/hadoop-2.7.2/
./bin/hadoop fs -mkdir /tmp
./bin/hadoop fs -mkdir -p /user/hive/warehouse
./bin/hadoop fs -chmod g+w /tmp
./bin/hadoop fs -chmod g+w /user/hive/warehouse
7.4 cd …/hive/
./bin/hive
7.5 测试sql
create table student(id int, name string);
insert into student values(1000,“ss”);
select * from student;
7.6 安装mysql
unzip mysql-libs.zip
rpm -ivh MySQL-server-5.6.24-1.el6.x86_64.rpm
(遇到的坑
报错。 … conflicts with file from package mariadb-libs-1:5.5.60-1.el7_5.x86_64
rpm -e mariadb-libs-1:5.5.60-1.el7_5.x86_64
报错。 error: Failed dependencies:
libmysqlclient.so.18()(64bit) is needed by (installed) postfix-2:2.10.1-7.0.1.el7.centos.x86_64
libmysqlclient.so.18(libmysqlclient_18)(64bit) is needed by (installed) postfix-2:2.10.1-7.0.1.el7.centos.x86_64

rpm -ev --nodeps mariadb-libs-1:5.5.60-1.el7_5.x86_64

Uninstall and re-rpm -ivh MySQL... The
error log reports /usr/sbin/mysqld: Table'mysql.plugin' doesn't exist

Check out the installed package rpm -qa | grep MySQL
View the configuration path of the installation package rpm -ql MySQL-server-5.6.24-1.el6.x86_64.rpm

/usr/bin/mysql_install_db --user=mysql

service mysql start succeeded
)

7.7 安装client
rpm -ivh MySQL-client-5.6.24-1.el6.x86_64.rpm
mysql
SET PASSWORD=PASSWORD(‘[email protected]’);
quit;

7.8 MySql中user表中主机配置
mysql -uroot [email protected]
use mysql;
update user set host=’%’ where host=‘localhost’;
delete from user where Host=‘golaxy-124-225’;
delete from user where Host=‘127.0.0.1’;
delete from user where Host=’::1’;
flush privileges;
quit;

7.9 Hive original data configuration to mysql
tar -zxvf mysql-connector-java-5.1.27.tar.gz
cp mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar /opt/ soft/hive/lib/
cd …/hive/

7.10 vim conf/hive-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<property>
		<name>javax.jdo.option.ConnectionURL</name>
		<value>jdbc:mysql://172.31.124.225:3306/metastore?createDatabaseIfNotExist=true</value>
		<description>JDBC connect string for a JDBC metastore</description>
	</property>

	<property>
		<name>javax.jdo.option.ConnectionDriverName</name>
		<value>com.mysql.jdbc.Driver</value>
		<description>Driver class name for a JDBC metastore</description>
	</property>

	<property>
		<name>javax.jdo.option.ConnectionUserName</name>
		<value>root</value>
		<description>username to use against metastore database</description>
	</property>

	<property>
		<name>javax.jdo.option.ConnectionPassword</name>
		<value>[email protected]</value>
		<description>password to use against metastore database</description>
	</property>

	<property>
		<name>hive.cli.print.header</name>
		<value>true</value>
	</property>

	<property>
		<name>hive.cli.print.current.db</name>
		<value>true</value>
	</property>
</configuration>

7.11 hive日志
mv conf/hive-log4j.properties.template conf/hive-log4j.properties
vim conf/hive-log4j.properties
hive.log.dir=/opt/soft/hive/logs

8. Compute Engine-tez replace mr configuration

8.1 Close the original data to check vim conf/hive-site.xml

<property>
    <name>hive.metastore.schema.verification</name>
    <value>false</value>
</property>

8.2
Go to the hadoop directory. /bin/ hadoop fs -mkdir /tez
./bin/hadoop fs -put /opt/soft/apache-tez-0.9.1-bin.tar.gz /tez

8.3
tar -zxvf apache-tez-0.9.1-bin.tar.gz
mv apache-tez-0.9.1-bin tez-0.9.1

8.4 Go to the hive directory vim conf/tez-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<property>
		<name>tez.lib.uris</name>
	    <value>${fs.defaultFS}/tez/apache-tez-0.9.1-bin.tar.gz</value>
	</property>
	<property>
	     <name>tez.use.cluster.hadoop-libs</name>
	     <value>true</value>
	</property>
	<property>
	     <name>tez.history.logging.service.class</name>
	     <value>org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService</value>
	</property>
</configuration>

8.5 vim conf/hive-env.sh
export HADOOP_HOME=/opt/module/hadoop-2.7.2

export HIVE_CONF_DIR=/opt/module/hive/conf

export TEZ_HOME=/opt/soft/tez-0.9.1 # is the decompression directory of your tez
export TEZ_JARS=""
for jar in ls $TEZ_HOME |grep jar; do
export TEZ_JARS = TEZJARS: TEZ_JARS:T E ZJARS:TEZ_HOME/$jar
done
for jar in ls $TEZ_HOME/lib; do
export TEZ_JARS= T E Z J A R S : TEZ_JARS: T E ZJARS:TEZ_HOME/lib/$jar
done

export HIVE_AUX_JARS_PATH=/opt/soft/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar$TEZ_JARS

8.6 vim conf/hive-site.xmlh

<property>
    <name>hive.execution.engine</name>
    <value>tez</value>
</property>

8.7 hadoop-lzo-0.4.20.jar
synchronized to hadoop-2.7.2/share/hadoop/common/

9. hbase cluster configuration

9.1 tar -zxvf hbase-2.1.10-bin.tar.gz
9.2 vim conf/hbase-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0_144
export HBASE_MANAGES_ZK=false
9.3 vim conf/hbase-site.xml

<configuration>
	<property>     
		<name>hbase.rootdir</name>     
		<value>hdfs://golaxy-124-224:9000/HBase</value>   
	</property>

	<property>   
		<name>hbase.cluster.distributed</name>
		<value>true</value>
	</property>

   <!-- 0.98后的新变动,之前版本没有.port,默认端口为60000 -->
	<property>
		<name>hbase.master.port</name>
		<value>16000</value>
	</property>

	<property>    
		<name>hbase.zookeeper.quorum</name>
	     <value>golaxy-124-224:2181,golaxy-124-225:2181,golaxy-124-226:2181</value>
	</property>

	<property>   
		<name>hbase.zookeeper.property.dataDir</name>
	     <value>/opt/soft/zookeeper-3.4.10/zkData</value>
	</property>
</configuration>

9.4 vim conf/regionservers
golaxy-124-224
golaxy-124-225
golaxy-124-226
9.5 Soft connection
ln -s /opt/soft/hadoop-2.7.2/etc/hadoop/core-site.xml /opt/soft /hbase-2.1.10/conf/core-site.xml
ln -s /opt/soft/hadoop-2.7.2/etc/hadoop/hdfs-site.xml /opt/soft/hbase-2.1.10/conf/ hdfs-site.xml
9.6 Synchronize configuration information to other servers
9.7 Start
method
one./bin/hbase-daemon.sh start master
./bin/hbase-daemon.sh start regionserver
method
two./bin/start-hbase.sh

9.8 webUi view

http://golaxy-124-224:16010/master-status

10. phoenix configuration-hbase query tool

10.1 Unzip and configure environment variables
tar -zxvf apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
mv apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz phoenix-5.0.0
configuration Environment variable

10.2 The jar package required by
cp cp phoenix-5.0.0-HBase-2.0-server.jar /opt/soft/hbase-2.1.10/lib/
cp phoenix-5.0.0-HBase-2.0-client.jar /opt /soft/hbase-2.1.10/lib/

Synchronize to the other two servers

10.3
Start./bin/sqlline.py golaxy-124-224,golaxy-124-225,golaxy-124-226:2181

Guess you like

Origin blog.csdn.net/weixin_45657738/article/details/111684905