目录
2.${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
3.${HADOOP_HOME}/etc/hadoop/core-site.xml
4.${HADOOP_HOME}/etc/hadoop/hdfs-site.xml
5.${HADOOP_HOME}/etc/hadoop/capacity-scheduler.xml
6.${HADOOP_HOME}/etc/hadoop/yarn-site.xml
7.${HADOOP_HOME}/etc/hadoop/mapred-site.xml
9.${HIVE_HOME}/conf/hive-env.sh
10.${HIVE_HOME}/conf/hive-site.xml
11.${HIVE_HOME}/conf/spark-defaults.conf
12.${ZOOKEEPER_HOME}/conf/zoo.conf
13.${KAFKA_HOME}/config/server.properties
vagrant + virtualBox构建虚拟机:https://blog.csdn.net/appearbeauty/category_10799817.html
1.软件版本
linux | centos7 |
jdk | 1.8 |
hadoop | 2.7.3 |
hive | 2.1.1 |
kafka | 2.7.0 |
zookeeper | 3.6.2 |
sqoop | 1.4.7 |
flume | 1.9.0 |
mysql | 5.6 |
hbase | 2.2.6 |
2.软件安装
1.jdk
yum -y update
yum -y install java-1.8.0-openjdk-devel
2.mysql
wget http://repo.mysql.com/mysql-community-release-el7-5.noarch.rpm
rpm -ivh mysql-community-release-el7-5.noarch.rpm
yum update
yum -y install mysql-server
systemctl start mysqld
mysqladmin -u root password "123456";
mysql -u root -p
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123456' ;
FLUSH PRIVILEGES;
3.mysql-connector-java
yum -y install mysql-connector-java.noarch
cp /usr/share/java/mysql-connector-java.jar /root/soft/apache-hive-3.1.2-bin/lib
3.配置文件
1.环境变量/etc/profile
export TZ='Asia/Shanghai'
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre
export HADOOP_HOME=/root/soft/hadoop-2.7.3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export FLUME_HOME=/root/soft/apache-flume-1.9.0-bin
export HIVE_HOME=/root/soft/apache-hive-2.1.1-bin
export SPARK_HOME=/root/soft/spark-3.0.0-bin-hadoop2.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin
2.${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre
3.${HADOOP_HOME}/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.33.10:9000</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/hadoop</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
</configuration>
4.${HADOOP_HOME}/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
5.${HADOOP_HOME}/etc/hadoop/capacity-scheduler.xml
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default,hive</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>20</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.hive.capacity</name>
<value>80</value>
</property>
6.${HADOOP_HOME}/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>192.168.33.10</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>3600</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/var/logs</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs/</value>
</property>
</configuration>
7.${HADOOP_HOME}/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
8.${HADOOP_HOME}/sbin
start-dfs.sh,stop-dfs.sh 顶部添加
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
HDFS_DATANODE_USER=root
start-yarn.sh,stop-yarn.sh 顶部添加
YARN_RESOURCEMANAGER_USER=root
YARN_NODEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
9.${HIVE_HOME}/conf/hive-env.sh
export HADOOP_HOME=/root/soft/hadoop-2.7.3/
export SPARK_HOME=/root/soft/spark-3.0.0-bin-hadoop2.7
export HIVE_CONF_DIR=/root/soft/apache-hive-2.1.1-bin/conf/
10.${HIVE_HOME}/conf/hive-site.xml
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.33.10:3306/hive_metadata?createDatabaseIfNotExsit=true&characterEncoding=UTF-8&verifyServerCertificate=false&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>system:java.io.tmpdir</name>
<value>/home/root/hdp/tmpdir</value>
</property>
<property>
<name>system:user.name</name>
<value>hive</value>
</property>
<property>
<name>beeline.hs2.connection.user</name>
<value>hive</value>
</property>
<property>
<name>beeline.hs2.connection.password</name>
<value>hive</value>
</property>
<property>
<name>beeline.hs2.connection.hosts</name>
<value>192.168.33.10:10000</value>
</property>
<property>
<name>spark.yarn.jars</name>
<value>hdfs://192.168.33.10:9000/spark-jars/*</value>
</property>
11.${HIVE_HOME}/conf/spark-defaults.conf
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir hdfs://192.168.33.10:9000/spark-history
spark.executor.memory 512m
spark.driver.memory 512m
12.${ZOOKEEPER_HOME}/conf/zoo.conf
dataDir=/var/zookeeper
13.${KAFKA_HOME}/config/server.properties
log.dirs=/var/kafka-logs
listeners=PLAINTEXT://192.168.33.10:9092
advertised.listeners=PLAINTEXT://192.168.33.10:9092
14.${HBASE_HOME}/conf/hbase-env.sh
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre
export HBASE_MANAGES_ZK=false
14.${HBASE_HOME}/conf/hbase-site.conf
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master:2181</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/root/soft/apache-zookeeper-3.6.2-bin/zkData</value>
</property>
</configuration>
4.启动服务
格式化hdfs: bin/hdfs namenode -format
启动hdfs: sbin/start-dfs.sh
启动yarn: sbin/start-yarn.sh
启动jobhistory: sbin/mr-jobhistory-daemon.sh start historyserver
初始化hive元数据ddl: schematool -dbType mysql -initSchema
启动hive服务: nohup bin/hive --service hiveserver2 &
启动hive元数据服务: nohup bin/hive --service metastore &
启动hive连接客户端: beeline -u jdbc:hive2://192.168.33.10:10000;auth=hive2;password=hive2
启动zookeeper服务: bin/zkServer.sh start
启动zookeeper客户端: bin/zkCli.sh -server 192.168.33.10:2181
启动kafka: nohup bin/kafka-server-start.sh config/server.properties >> /dev/null 2>&1 &
启动hbase master: bin/hbase-daemon.sh start master
启动hbase regionServer: bin/hbase-daemon.sh start regionserver
启动hbase client: bin/hbase shell
4.其他
1.guava版本冲突
find ./ -name "guava*" 用最新版替换
参考链接
- https://www.runoob.com/mysql/mysql-install.html
- http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleCluster.html
- https://cwiki.apache.org/confluence/display/Hive/GettingStarted
- https://kafka.apache.org/quickstart
- https://zookeeper.apache.org/doc/r3.3.3/zookeeperStarted.html