Ali Cloud MapReduce remote configuration (using the external network)

1.hosts
Linux: network ip
Here Insert Picture Description
Widows hosts added: external network ip
47.xxx hostname

Etc 2. / hadoop / slaves
Linux hostname
Here Insert Picture Description
3.4 xml configuration to host name "zs" to get rid of

yarn-site.xml
which must be configured 0.0.0.0-> external network access port yarn 8088 cluster control page
when copied to the idea of the resources 0.0.0.0 Host name changed to "zs" (that is, outside the network ip)

<configuration>

    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>zs</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>0.0.0.0:18040</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>0.0.0.0:18030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>0.0.0.0:18025</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>0.0.0.0:18141</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>0.0.0.0:8088</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>file:///home/zzh/data/nm</value>
    </property>
    <!-- 新增加的配置 -->
    <property>
        <name>yarn.log.server.url</name>
        <value>http://0.0.0.0:19888/jobhistory/logs/</value>
    </property>
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.web-proxy.address</name>
        <value>0.0.0.0:20000</value>
    </property>
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>-1</value>
    </property>
    <property>
        <name>yarn.nodemanager.remote-app-log-dir</name>
        <value>/logs</value>
    </property>
    <property>
        <name>yarn.nodemanager.pmem-check-enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
    </property>
</configuration>

mapred-site.xml

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <!-- 新增加的配置 -->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>zs:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>zs:19888</value>
    </property>
    <property>
        <name>mapreduce.job.ubertask.enable</name>
        <value>true</value>
    </property>
</configuration>

core-site.xml

<configuration>
 <property>
     <name>fs.defaultFS</name>
     <value>hdfs://zs:9000</value>
 </property>
 <!-- 新增加的配置 -->
 <property>
     <name>hadoop.proxyuser.zzh.hosts</name>
     <value>*</value>
 </property>
 <property>
     <name>hadoop.proxyuser.zzh.groups</name>
     <value>*</value>
 </property>
 <property>
     <name>hadoop.http.staticuser.user</name>
     <value>zzh</value>
 </property>
</configuration>

hdfs-site.xml

<configuration>
    <property>
        <name>dfs.nameservices</name>
        <value>zs-cluster</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.blocksize</name>
        <value>64M</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///home/zzh/data/nn</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///home/zzh/data/dn</value>
    </property>
    <property>
        <name>dfs.namenode.checkpoint.dir</name>
        <value>file:///home/zzh/data/snn</value>
    </property>
    <property>
        <name>dfs.namenode.checkpoint.edits.dir</name>
        <value>file:///home/zzh/data/snn</value>
    </property>
    <!--  新增的配置   -->
     <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
        </property>
    <property>
        <name>dfs.web.ugi</name>
        <value>zzh,zzh</value>
    </property>
    <property>
        <name>fs.permissions.umask-mode</name>
        <value>000</value>
    </property>
    <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.permissions.superusergroup</name>
        <value>zzh</value>
    </property>
    <property>
        <name>dfs.namenode.safemode.threshold-pct</name>
        <value>0f</value>
    </property>
        <property>
        <name>dfs.namenode.name.dir.restore</name>
        <value>true</value>
        </property>
        <property>
        <name>dfs.cluster.administrators</name>
        <value>*</value>
        </property>
        <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>zs:9001</value>
        </property>
</configuration>
                   

Code sharing idea MapReduce

Compared to the local virtual machine plus conf.set ( "dfs.client.use.datanode.hostname", "true"); this sentence

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.StringTokenizer;

public class WordCount extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new WordCount(),args));
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf=this.getConf();
        
        conf.set("dfs.client.use.datanode.hostname", "true");//重点配置

        Path in = new Path("/data/test/wordcount/");
        Path out=new Path("./word1");

        Job job = Job.getInstance(conf, "单词计数");
        job.setJarByClass(this.getClass());


        job.setMapperClass(WordCountMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,in);

        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,out);

        return job.waitForCompletion(true)?0:1;

    }
    static class WordCountMapper extends Mapper <LongWritable,Text,Text,IntWritable> {

        private final static IntWritable one=new IntWritable(1); //统计使用变量
        private Text word=new Text(); //单词变量

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            StringTokenizer words=new StringTokenizer(value.toString());
            while(words.hasMoreTokens()){
                word.set(words.nextToken());
                context.write(word, one);
            }
        }
    }

    static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

        private Text k3=new Text();
        private IntWritable v3=new IntWritable();

        @Override
        protected void reduce(Text k2, Iterable<IntWritable> v2s, Context context) throws IOException, InterruptedException {
            this.k3.set(k2.toString());

            int sum=0;
            for(IntWritable v2: v2s){
                sum+=v2.get();
            }
            this.v3.set(sum);

            context.write(this.k3,this.v3);
        }
    }


}

Published 14 original articles · won praise 1 · views 2245

Guess you like

Origin blog.csdn.net/weixin_42458562/article/details/104861865