简易版hadoop集群搭建配置(CDH版)

  1. core-site.xml
    vim /export/servers/hadoop-2.6/etc/hadoop/core-site.xml
<property>
		<name>fs.default.name</name>
		<value>hdfs://cdh-node1:8020</value>
</property>


<!--指定hadoop运行时产生文件的存储目录,默认/tmp/hadoop-${user.name}-->

<property>
		<name>hadoop.tmp.dir</name>
		<value>/export/servers/hadoop-2.6/hadoopDatas/tempDatas</value>
</property>


<!--  缓冲区大小,实际工作中根据服务器性能动态调整 -->

<property>
		<name>io.file.buffer.size</name>
		<value>4096</value>
</property>

<!--  开启hdfs的垃圾桶机制,删除掉的数据可以从垃圾桶中回收,单位分钟 -->

<property>
		<name>fs.trash.interval</name>
		<value>10080</value>
</property>
<!--(关于impala的配置)-->
<property>
	<name>hadoop.proxyuser.root.hosts</name>
	<value>*</value>
</property>
<property>
	<name>hadoop.proxyuser.root.groups</name>
	<value>*</value>
</property>

创建hadoop的数据目录: mkdir -p /export/data/hddata

2.hdfs-site.xml
vim /export/servers/hadoop-2.6/etc/hadoop/hdfs-site.xml

<!-- NameNode存储元数据信息的路径,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割   --> 
<!--   集群动态上下线 
<property>
		<name>dfs.hosts</name>
		<value>/export/servers/hadoop-2.6/etc/hadoop/accept_host</value>
</property>
	<property>
		<name>dfs.hosts.exclude</name>
		<value>/export/servers/hadoop-2.6/etc/hadoop/deny_host</value>
</property>
-->
	 
<property>
		<name>dfs.namenode.secondary.http-address</name>
		<value>cdh-node1:50090</value>
</property>

<property>
		<name>dfs.namenode.http-address</name>
		<value>cdh-node1:50070</value>
</property>
<property>
		<name>dfs.namenode.name.dir</name>
		<value>file:///export/servers/hadoop-2.6/hadoopDatas/namenodeDatas</value>
</property>

<!--  定义dataNode数据存储的节点位置,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割  -->
<property>
		<name>dfs.datanode.data.dir</name>
		<value>file:///export/servers/hadoop-2.6/hadoopDatas/datanodeDatas</value>
</property>
	
<property>
		<name>dfs.namenode.edits.dir</name>
		<value>file:///export/servers/hadoop-2.6/hadoopDatas/dfs/nn/edits</value>
</property>
<property>
		<name>dfs.namenode.checkpoint.dir</name>
		<value>file:///export/servers/hadoop-2.6/hadoopDatas/dfs/snn/name</value>
</property>
<property>
		<name>dfs.namenode.checkpoint.edits.dir</name>
		<value>file:///export/servers/hadoop-2.6/hadoopDatas/dfs/nn/snn/edits</value>
</property>
	
<!--指定HDFS副本的数量-->
<property>
		<name>dfs.replication</name>
		<value>1</value>
</property>
<property>
		<name>dfs.permissions</name>
		<value>false</value>
</property>
<!--指定文件块大小(128MB)-->
<property>
		<name>dfs.blocksize</name>
		<value>134217728</value>
</property>

<!--在Namenodes和datanode中启用WebHDFS (REST API)(关于impala的配置)-->
<property>
	<name>dfs.webhdfs.enabled</name>
	<value>true</value>
</property>

  1. mapred-site.xml
    vim /export/servers/hadoop-2.6/etc/hadoop/mapred-site.xml

     <property>
            <name>mapreduce.framework.name</name>
            <value>yarn</value>
     </property>
     
     <property>
     		<name>mapreduce.job.ubertask.enable</name>
     		<value>true</value>
     </property>
     	
     <property>
     		<name>mapreduce.jobhistory.address</name>
     		<value>cdh-node1:10020</value>
     </property>
     
     <property>
     		<name>mapreduce.jobhistory.webapp.address</name>
     		<value>cdh-node1:19888</value>
     </property>
     
     <!--map输出数据进行压缩-->
     <!--<property>
               <name>mapreduce.map.output.compress</name>
               <value>true</value>
     </property>
     <!--Snappy压缩为本地库压缩方式,效率高-->
     <property>
              <name>mapreduce.map.output.compress.codec</name>
              <value>org.apache.hadoop.io.compress.SnappyCodec</value>
     </property>-->
     <!--reduce输出数据进行压缩-->
     <!--<property>       
     		<name>mapreduce.output.fileoutputformat.compress</name>
             <value>true</value>
     </property>
     <property>         
     		<name>mapreduce.output.fileoutputformat.compress.type</name>
             <value>RECORD</value>
     </property>
     <property>        
     		<name>mapreduce.output.fileoutputformat.compress.codec</name>
             <value>org.apache.hadoop.io.compress.SnappyCodec</value> 
     </property>-->
    

  1. yarn-site.xml
    vim /export/servers/hadoop-2.6/etc/hadoop/yarn-site.xml
<property>
       <name>yarn.resourcemanager.hostname</name>
       <value>cdh-node1</value>
</property>

<!--NodeManager上运行的附属服务.需配置成mapreduce_shuffle,才可运行MapReduce程序默认值:""-->

<property>
       <name>yarn.nodemanager.aux-services</name>
       <value>mapreduce_shuffle</value>
</property>

<property>
		<name>yarn.log-aggregation-enable</name>
		<value>true</value>
</property>
<property>
		<name>yarn.log-aggregation.retain-seconds</name>
		<value>604800</value>
</property>

  1. slaves
    vim /export/servers/hadoop-2.6/etc/hadoop/slaves
    里面写上从节点所在的主机名字(我这里只写了3台的配置)

    node1
    node2
    node3

    (node4
    node5
    …)

猜你喜欢

转载自blog.csdn.net/Thomson617/article/details/83141752
今日推荐