hadoop详细笔记(十一) mapreduce数据分析案例之线段重叠案例

1 数据

1,4
2,5
3,4
2,5
2,4
3,4
2,6

2 需求

统计x轴上每个点线段重叠的次数

3 代码实现

package com._51doit.mr.line;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * Author:   多易教育-行哥
 * Date:     2020/7/10
 * Description:
 */
public class LineDemo {


    static class LineMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        Text k = new Text() ;
        IntWritable v = new IntWritable(1) ;
        /**
         * map方法每行执行一次
         * @param key
         * @param value
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //处理每行  2,5 --> 2,1  3,1  4,1  5,1
            String[] split = value.toString().split(",");
            String x1 = split[0];
            String x2 = split[1];
            int a1 = Integer.parseInt(x1);
            int a2 = Integer.parseInt(x2);
            for (int x = a1; x <= a2; x++) {
                k.set(x+""); // 更新  不用创建大量的Text
               // context.write(new Text(x+""),new IntWritable(1));
                context.write(k,v);
            }
        }
    }

    static class LineReducer extends Reducer<Text,IntWritable ,Text ,IntWritable> {
        IntWritable v =new IntWritable() ;
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int count = 0;
            for (IntWritable value : values) {
                count++ ;
            }
            v.set(count);
            context.write(key,v);
        }
    }

    public static void main(String[] args) throws Exception {


        Configuration conf = new Configuration();
        // 参数2  job的名字
        Job job = Job.getInstance(conf, new LineDemo().getClass().getSimpleName());

        job.setMapperClass(LineMapper.class);
        job.setReducerClass(LineReducer.class);
         // 设置map阶段的输出类型
        //job.setMapOutputKeyClass(Text.class);
       // job.setMapOutputValueClass(IntWritable.class);
        // 最终结果的数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setNumReduceTasks(2);  //启动3个reduce任务
        // 待处理数据的路径
        FileInputFormat.setInputPaths(job, new Path("D:\\data\\line\\input\\"));
        FileOutputFormat.setOutputPath(job,new Path("D:\\data\\line\\res4\\"));

        job.waitForCompletion(true) ;



    }
}

4 知识点

4.1 添加log4j日志配置文件

# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=INFO, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

4.2 控制台观察日志

D:\tools\jdk1.8.0_65\bin\java.exe "-javaagent:D:\apps\IntelliJ IDEA 2020_01\IntelliJ IDEA 2020.1.3\lib\idea_rt.jar=12052:D:\apps\IntelliJ IDEA 2020_01\IntelliJ IDEA 2020.1.3\bin" -Dfile.encoding=UTF-8 -classpath D:\tools\jdk1.8.0_65\jre\lib\charsets.jar;D:\tools\jdk1.8.0_65\jre\lib\deploy.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\access-bridge-64.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\cldrdata.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\dnsns.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\jaccess.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\jfxrt.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\localedata.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\nashorn.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunec.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunjce_provider.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunmscapi.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunpkcs11.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\zipfs.jar;D:\tools\jdk1.8.0_65\jre\lib\javaws.jar;D:\tools\jdk1.8.0_65\jre\lib\jce.jar;D:\tools\jdk1.8.0_65\jre\lib\jfr.jar;D:\tools\jdk1.8.0_65\jre\lib\jfxswt.jar;D:\tools\jdk1.8.0_65\jre\lib\jsse.jar;D:\tools\jdk1.8.0_65\jre\lib\management-agent.jar;D:\tools\jdk1.8.0_65\jre\lib\plugin.jar;D:\tools\jdk1.8.0_65\jre\lib\resources.jar;D:\tools\jdk1.8.0_65\jre\lib\rt.jar;F:\codes\doit16\hdp\target\classes;D:\doit16-maven\repository\org\apache\hadoop\hadoop-common\2.8.5\hadoop-common-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-annotations\2.8.5\hadoop-annotations-2.8.5.jar;D:\doit16-maven\repository\com\google\guava\guava\11.0.2\guava-11.0.2.jar;D:\doit16-maven\repository\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;D:\doit16-maven\repository\org\apache\commons\commons-math3\3.1.1\commons-math3-3.1.1.jar;D:\doit16-maven\repository\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;D:\doit16-maven\repository\org\apache\httpcomponents\httpclient\4.5.2\httpclient-4.5.2.jar;D:\doit16-maven\repository\org\apache\httpcomponents\httpcore\4.4.4\httpcore-4.4.4.jar;D:\doit16-maven\repository\commons-codec\commons-codec\1.4\commons-codec-1.4.jar;D:\doit16-maven\repository\commons-io\commons-io\2.4\commons-io-2.4.jar;D:\doit16-maven\repository\commons-net\commons-net\3.1\commons-net-3.1.jar;D:\doit16-maven\repository\commons-collections\commons-collections\3.2.2\commons-collections-3.2.2.jar;D:\doit16-maven\repository\javax\servlet\servlet-api\2.5\servlet-api-2.5.jar;D:\doit16-maven\repository\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;D:\doit16-maven\repository\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;D:\doit16-maven\repository\org\mortbay\jetty\jetty-sslengine\6.1.26\jetty-sslengine-6.1.26.jar;D:\doit16-maven\repository\javax\servlet\jsp\jsp-api\2.1\jsp-api-2.1.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;D:\doit16-maven\repository\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;D:\doit16-maven\repository\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;D:\doit16-maven\repository\asm\asm\3.1\asm-3.1.jar;D:\doit16-maven\repository\commons-logging\commons-logging\1.1.3\commons-logging-1.1.3.jar;D:\doit16-maven\repository\net\java\dev\jets3t\jets3t\0.9.0\jets3t-0.9.0.jar;D:\doit16-maven\repository\com\jamesmurty\utils\java-xmlbuilder\0.4\java-xmlbuilder-0.4.jar;D:\doit16-maven\repository\commons-lang\commons-lang\2.6\commons-lang-2.6.jar;D:\doit16-maven\repository\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;D:\doit16-maven\repository\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;D:\doit16-maven\repository\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;D:\doit16-maven\repository\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;D:\doit16-maven\repository\org\slf4j\slf4j-api\1.7.10\slf4j-api-1.7.10.jar;D:\doit16-maven\repository\org\slf4j\slf4j-log4j12\1.7.10\slf4j-log4j12-1.7.10.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar;D:\doit16-maven\repository\org\apache\avro\avro\1.7.4\avro-1.7.4.jar;D:\doit16-maven\repository\com\thoughtworks\paranamer\paranamer\2.3\paranamer-2.3.jar;D:\doit16-maven\repository\org\xerial\snappy\snappy-java\1.0.4.1\snappy-java-1.0.4.1.jar;D:\doit16-maven\repository\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar;D:\doit16-maven\repository\com\google\code\gson\gson\2.2.4\gson-2.2.4.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-auth\2.8.5\hadoop-auth-2.8.5.jar;D:\doit16-maven\repository\com\nimbusds\nimbus-jose-jwt\4.41.1\nimbus-jose-jwt-4.41.1.jar;D:\doit16-maven\repository\com\github\stephenc\jcip\jcip-annotations\1.0-1\jcip-annotations-1.0-1.jar;D:\doit16-maven\repository\net\minidev\json-smart\2.3\json-smart-2.3.jar;D:\doit16-maven\repository\net\minidev\accessors-smart\1.2\accessors-smart-1.2.jar;D:\doit16-maven\repository\org\ow2\asm\asm\5.0.4\asm-5.0.4.jar;D:\doit16-maven\repository\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar;D:\doit16-maven\repository\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar;D:\doit16-maven\repository\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar;D:\doit16-maven\repository\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar;D:\doit16-maven\repository\org\apache\curator\curator-framework\2.7.1\curator-framework-2.7.1.jar;D:\doit16-maven\repository\com\jcraft\jsch\0.1.54\jsch-0.1.54.jar;D:\doit16-maven\repository\org\apache\curator\curator-client\2.7.1\curator-client-2.7.1.jar;D:\doit16-maven\repository\org\apache\curator\curator-recipes\2.7.1\curator-recipes-2.7.1.jar;D:\doit16-maven\repository\com\google\code\findbugs\jsr305\3.0.0\jsr305-3.0.0.jar;D:\doit16-maven\repository\org\apache\htrace\htrace-core4\4.0.1-incubating\htrace-core4-4.0.1-incubating.jar;D:\doit16-maven\repository\org\apache\zookeeper\zookeeper\3.4.6\zookeeper-3.4.6.jar;D:\doit16-maven\repository\org\apache\commons\commons-compress\1.4.1\commons-compress-1.4.1.jar;D:\doit16-maven\repository\org\tukaani\xz\1.0\xz-1.0.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-hdfs\2.8.5\hadoop-hdfs-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-hdfs-client\2.8.5\hadoop-hdfs-client-2.8.5.jar;D:\doit16-maven\repository\com\squareup\okhttp\okhttp\2.4.0\okhttp-2.4.0.jar;D:\doit16-maven\repository\com\squareup\okio\okio\1.4.0\okio-1.4.0.jar;D:\doit16-maven\repository\commons-daemon\commons-daemon\1.0.13\commons-daemon-1.0.13.jar;D:\doit16-maven\repository\io\netty\netty\3.6.2.Final\netty-3.6.2.Final.jar;D:\doit16-maven\repository\io\netty\netty-all\4.0.23.Final\netty-all-4.0.23.Final.jar;D:\doit16-maven\repository\xerces\xercesImpl\2.9.1\xercesImpl-2.9.1.jar;D:\doit16-maven\repository\xml-apis\xml-apis\1.3.04\xml-apis-1.3.04.jar;D:\doit16-maven\repository\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-mapreduce-client-core\2.8.5\hadoop-mapreduce-client-core-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-common\2.8.5\hadoop-yarn-common-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-api\2.8.5\hadoop-yarn-api-2.8.5.jar;D:\doit16-maven\repository\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar;D:\doit16-maven\repository\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar;D:\doit16-maven\repository\javax\activation\activation\1.1\activation-1.1.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar;D:\doit16-maven\repository\com\google\inject\guice\3.0\guice-3.0.jar;D:\doit16-maven\repository\javax\inject\javax.inject\1\javax.inject-1.jar;D:\doit16-maven\repository\aopalliance\aopalliance\1.0\aopalliance-1.0.jar;D:\doit16-maven\repository\com\sun\jersey\contribs\jersey-guice\1.9\jersey-guice-1.9.jar;D:\doit16-maven\repository\com\google\inject\extensions\guice-servlet\3.0\guice-servlet-3.0.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-mapreduce-client-common\2.8.5\hadoop-mapreduce-client-common-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-client\2.8.5\hadoop-yarn-client-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-server-common\2.8.5\hadoop-yarn-server-common-2.8.5.jar;D:\doit16-maven\repository\log4j\log4j\1.2.17\log4j-1.2.17.jar com._51doit.mr.line.LineDemo
0    [main] INFO  org.apache.hadoop.conf.Configuration.deprecation  - session.id is deprecated. Instead, use dfs.metrics.session-id
2    [main] INFO  org.apache.hadoop.metrics.jvm.JvmMetrics  - Initializing JVM Metrics with processName=JobTracker, sessionId=
667  [main] WARN  org.apache.hadoop.mapreduce.JobResourceUploader  - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
715  [main] WARN  org.apache.hadoop.mapreduce.JobResourceUploader  - No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
953  [main] INFO  org.apache.hadoop.mapreduce.lib.input.FileInputFormat  - Total input files to process : 3
1000 [main] INFO  org.apache.hadoop.mapreduce.JobSubmitter  - number of splits:3
1130 [main] INFO  org.apache.hadoop.mapreduce.JobSubmitter  - Submitting tokens for job: job_local190908434_0001
1542 [main] INFO  org.apache.hadoop.mapreduce.Job  - The url to track the job: http://localhost:8080/
1543 [main] INFO  org.apache.hadoop.mapreduce.Job  - Running job: job_local190908434_0001
1545 [Thread-6] INFO  org.apache.hadoop.mapred.LocalJobRunner  - OutputCommitter set in config null
1550 [Thread-6] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - File Output Committer Algorithm version is 1
1550 [Thread-6] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
1550 [Thread-6] INFO  org.apache.hadoop.mapred.LocalJobRunner  - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
1595 [Thread-6] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Waiting for map tasks
1596 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Starting task: attempt_local190908434_0001_m_000000_0
1629 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - File Output Committer Algorithm version is 1
1630 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
1640 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.yarn.util.ProcfsBasedProcessTree  - ProcfsBasedProcessTree currently is supported only on Linux.
1683 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@6c404c50
1691 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Processing split: file:/D:/data/line/input/line - 副本 (2).txt:0+33
1712 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - (EQUATOR) 0 kvi 26214396(104857584)
1712 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - mapreduce.task.io.sort.mb: 100
1712 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - soft limit at 83886080
1713 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufvoid = 104857600
1713 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396; length = 6553600
1717 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
1726 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 
1726 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Starting flush of map output
1726 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Spilling map output
1726 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufend = 144; bufvoid = 104857600
1726 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396(104857584); kvend = 26214304(104857216); length = 93/6553600
1843 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Finished spill 0
1852 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task:attempt_local190908434_0001_m_000000_0 is done. And is in the process of committing
1872 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - map
1872 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task 'attempt_local190908434_0001_m_000000_0' done.
1876 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Final Counters for attempt_local190908434_0001_m_000000_0: Counters: 17
    File System Counters
        FILE: Number of bytes read=430
        FILE: Number of bytes written=373873
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=7
        Map output records=24
        Map output bytes=144
        Map output materialized bytes=204
        Input split bytes=111
        Combine input records=0
        Spilled Records=24
        Failed Shuffles=0
        Merged Map outputs=0
        GC time elapsed (ms)=0
        Total committed heap usage (bytes)=514850816
    File Input Format Counters 
        Bytes Read=33
1877 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Finishing task: attempt_local190908434_0001_m_000000_0
1877 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Starting task: attempt_local190908434_0001_m_000001_0
1878 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - File Output Committer Algorithm version is 1
1878 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
1878 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.yarn.util.ProcfsBasedProcessTree  - ProcfsBasedProcessTree currently is supported only on Linux.
1941 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@7f711871
1943 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Processing split: file:/D:/data/line/input/line - 副本.txt:0+33
1987 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - (EQUATOR) 0 kvi 26214396(104857584)
1987 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - mapreduce.task.io.sort.mb: 100
1987 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - soft limit at 83886080
1987 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufvoid = 104857600
1987 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396; length = 6553600
1988 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
1990 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 
1990 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Starting flush of map output
1990 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Spilling map output
1990 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufend = 144; bufvoid = 104857600
1990 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396(104857584); kvend = 26214304(104857216); length = 93/6553600
1997 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Finished spill 0
2006 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task:attempt_local190908434_0001_m_000001_0 is done. And is in the process of committing
2010 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - map
2010 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task 'attempt_local190908434_0001_m_000001_0' done.
2012 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Final Counters for attempt_local190908434_0001_m_000001_0: Counters: 17
    File System Counters
        FILE: Number of bytes read=798
        FILE: Number of bytes written=374133
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=7
        Map output records=24
        Map output bytes=144
        Map output materialized bytes=204
        Input split bytes=107
        Combine input records=0
        Spilled Records=24
        Failed Shuffles=0
        Merged Map outputs=0
        GC time elapsed (ms)=0
        Total committed heap usage (bytes)=514850816
    File Input Format Counters 
        Bytes Read=33
2012 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Finishing task: attempt_local190908434_0001_m_000001_0
2013 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Starting task: attempt_local190908434_0001_m_000002_0
2014 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - File Output Committer Algorithm version is 1
2014 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2015 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.yarn.util.ProcfsBasedProcessTree  - ProcfsBasedProcessTree currently is supported only on Linux.
2055 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@2114082e
2057 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Processing split: file:/D:/data/line/input/line.txt:0+33
2103 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - (EQUATOR) 0 kvi 26214396(104857584)
2104 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - mapreduce.task.io.sort.mb: 100
2104 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - soft limit at 83886080
2104 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufvoid = 104857600
2104 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396; length = 6553600
2111 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2113 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 
2114 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Starting flush of map output
2114 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Spilling map output
2114 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufend = 144; bufvoid = 104857600
2114 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396(104857584); kvend = 26214304(104857216); length = 93/6553600
2220 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Finished spill 0
2224 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task:attempt_local190908434_0001_m_000002_0 is done. And is in the process of committing
2226 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - map
2226 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task 'attempt_local190908434_0001_m_000002_0' done.
2226 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Final Counters for attempt_local190908434_0001_m_000002_0: Counters: 17
    File System Counters
        FILE: Number of bytes read=1166
        FILE: Number of bytes written=374393
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=7
        Map output records=24
        Map output bytes=144
        Map output materialized bytes=204
        Input split bytes=98
        Combine input records=0
        Spilled Records=24
        Failed Shuffles=0
        Merged Map outputs=0
        GC time elapsed (ms)=6
        Total committed heap usage (bytes)=514850816
    File Input Format Counters 
        Bytes Read=33
2226 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Finishing task: attempt_local190908434_0001_m_000002_0
2226 [Thread-6] INFO  org.apache.hadoop.mapred.LocalJobRunner  - map task executor complete.
2228 [Thread-6] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Waiting for reduce tasks
2229 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Starting task: attempt_local190908434_0001_r_000000_0
2241 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - File Output Committer Algorithm version is 1
2241 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2241 [pool-4-thread-1] INFO  org.apache.hadoop.yarn.util.ProcfsBasedProcessTree  - ProcfsBasedProcessTree currently is supported only on Linux.
2280 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@41c38664
2282 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.ReduceTask  - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@25868440
2298 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - MergerManager: memoryLimit=5328129024, maxSingleShuffleLimit=1332032256, mergeThreshold=3516565248, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2300 [EventFetcher for fetching Map Completion Events] INFO  org.apache.hadoop.mapreduce.task.reduce.EventFetcher  - attempt_local190908434_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2343 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.LocalFetcher  - localfetcher#1 about to shuffle output of map attempt_local190908434_0001_m_000001_0 decomp: 90 len: 94 to MEMORY
2350 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput  - Read 90 bytes from map-output for attempt_local190908434_0001_m_000001_0
2351 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - closeInMemoryFile -> map-output of size: 90, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->90
2356 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.LocalFetcher  - localfetcher#1 about to shuffle output of map attempt_local190908434_0001_m_000000_0 decomp: 90 len: 94 to MEMORY
2358 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput  - Read 90 bytes from map-output for attempt_local190908434_0001_m_000000_0
2358 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - closeInMemoryFile -> map-output of size: 90, inMemoryMapOutputs.size() -> 2, commitMemory -> 90, usedMemory ->180
2363 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.LocalFetcher  - localfetcher#1 about to shuffle output of map attempt_local190908434_0001_m_000002_0 decomp: 90 len: 94 to MEMORY
2364 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput  - Read 90 bytes from map-output for attempt_local190908434_0001_m_000002_0
2364 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - closeInMemoryFile -> map-output of size: 90, inMemoryMapOutputs.size() -> 3, commitMemory -> 180, usedMemory ->270
2365 [EventFetcher for fetching Map Completion Events] INFO  org.apache.hadoop.mapreduce.task.reduce.EventFetcher  - EventFetcher is interrupted.. Returning
2366 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 3 / 3 copied.
2366 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - finalMerge called with 3 in-memory map-outputs and 0 on-disk map-outputs
2376 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Merging 3 sorted segments
2377 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Down to the last merge-pass, with 3 segments left of total size: 258 bytes
2380 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merged 3 segments, 270 bytes to disk to satisfy reduce memory limit
2381 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merging 1 files, 270 bytes from disk
2382 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merging 0 segments, 0 bytes from memory into reduce
2382 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Merging 1 sorted segments
2383 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Down to the last merge-pass, with 1 segments left of total size: 262 bytes
2384 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 3 / 3 copied.
2391 [pool-4-thread-1] INFO  org.apache.hadoop.conf.Configuration.deprecation  - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2396 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task:attempt_local190908434_0001_r_000000_0 is done. And is in the process of committing
2398 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 3 / 3 copied.
2398 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task attempt_local190908434_0001_r_000000_0 is allowed to commit now
2400 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - Saved output of task 'attempt_local190908434_0001_r_000000_0' to file:/D:/data/line/res4/_temporary/0/task_local190908434_0001_r_000000
2401 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - reduce > reduce
2401 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task 'attempt_local190908434_0001_r_000000_0' done.
2402 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Final Counters for attempt_local190908434_0001_r_000000_0: Counters: 24
    File System Counters
        FILE: Number of bytes read=2216
        FILE: Number of bytes written=374688
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Combine input records=0
        Combine output records=0
        Reduce input groups=3
        Reduce shuffle bytes=282
        Reduce input records=33
        Reduce output records=3
        Spilled Records=33
        Shuffled Maps =3
        Failed Shuffles=0
        Merged Map outputs=3
        GC time elapsed (ms)=0
        Total committed heap usage (bytes)=514850816
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Output Format Counters 
        Bytes Written=25
2403 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Finishing task: attempt_local190908434_0001_r_000000_0
2403 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Starting task: attempt_local190908434_0001_r_000001_0
2404 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - File Output Committer Algorithm version is 1
2404 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2406 [pool-4-thread-1] INFO  org.apache.hadoop.yarn.util.ProcfsBasedProcessTree  - ProcfsBasedProcessTree currently is supported only on Linux.
2447 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@173b77b0
2447 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.ReduceTask  - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@1126b874
2447 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - MergerManager: memoryLimit=5328129024, maxSingleShuffleLimit=1332032256, mergeThreshold=3516565248, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2448 [EventFetcher for fetching Map Completion Events] INFO  org.apache.hadoop.mapreduce.task.reduce.EventFetcher  - attempt_local190908434_0001_r_000001_0 Thread started: EventFetcher for fetching Map Completion Events
2453 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.LocalFetcher  - localfetcher#2 about to shuffle output of map attempt_local190908434_0001_m_000001_0 decomp: 106 len: 110 to MEMORY
2454 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput  - Read 106 bytes from map-output for attempt_local190908434_0001_m_000001_0
2454 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - closeInMemoryFile -> map-output of size: 106, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->106
2458 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.LocalFetcher  - localfetcher#2 about to shuffle output of map attempt_local190908434_0001_m_000000_0 decomp: 106 len: 110 to MEMORY
2458 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput  - Read 106 bytes from map-output for attempt_local190908434_0001_m_000000_0
2458 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - closeInMemoryFile -> map-output of size: 106, inMemoryMapOutputs.size() -> 2, commitMemory -> 106, usedMemory ->212
2462 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.LocalFetcher  - localfetcher#2 about to shuffle output of map attempt_local190908434_0001_m_000002_0 decomp: 106 len: 110 to MEMORY
2463 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput  - Read 106 bytes from map-output for attempt_local190908434_0001_m_000002_0
2463 [localfetcher#2] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - closeInMemoryFile -> map-output of size: 106, inMemoryMapOutputs.size() -> 3, commitMemory -> 212, usedMemory ->318
2463 [EventFetcher for fetching Map Completion Events] INFO  org.apache.hadoop.mapreduce.task.reduce.EventFetcher  - EventFetcher is interrupted.. Returning
2464 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 3 / 3 copied.
2464 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - finalMerge called with 3 in-memory map-outputs and 0 on-disk map-outputs
2470 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Merging 3 sorted segments
2470 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Down to the last merge-pass, with 3 segments left of total size: 306 bytes
2473 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merged 3 segments, 318 bytes to disk to satisfy reduce memory limit
2474 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merging 1 files, 318 bytes from disk
2474 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merging 0 segments, 0 bytes from memory into reduce
2474 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Merging 1 sorted segments
2475 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Down to the last merge-pass, with 1 segments left of total size: 310 bytes
2475 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 3 / 3 copied.
2481 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task:attempt_local190908434_0001_r_000001_0 is done. And is in the process of committing
2482 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 3 / 3 copied.
2482 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task attempt_local190908434_0001_r_000001_0 is allowed to commit now
2489 [pool-4-thread-1] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - Saved output of task 'attempt_local190908434_0001_r_000001_0' to file:/D:/data/line/res4/_temporary/0/task_local190908434_0001_r_000001
2490 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - reduce > reduce
2490 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task 'attempt_local190908434_0001_r_000001_0' done.
2491 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.Task  - Final Counters for attempt_local190908434_0001_r_000001_0: Counters: 24
    File System Counters
        FILE: Number of bytes read=3032
        FILE: Number of bytes written=375032
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Combine input records=0
        Combine output records=0
        Reduce input groups=3
        Reduce shuffle bytes=330
        Reduce input records=39
        Reduce output records=3
        Spilled Records=39
        Shuffled Maps =3
        Failed Shuffles=0
        Merged Map outputs=3

        GC time elapsed (ms)=0
        Total committed heap usage (bytes)=514850816
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Output Format Counters 
        Bytes Written=26
2491 [pool-4-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Finishing task: attempt_local190908434_0001_r_000001_0
2491 [Thread-6] INFO  org.apache.hadoop.mapred.LocalJobRunner  - reduce task executor complete.
2545 [main] INFO  org.apache.hadoop.mapreduce.Job  - Job job_local190908434_0001 running in uber mode : false
2548 [main] INFO  org.apache.hadoop.mapreduce.Job  -  map 100% reduce 100%
2550 [main] INFO  org.apache.hadoop.mapreduce.Job  - Job job_local190908434_0001 completed successfully
2560 [main] INFO  org.apache.hadoop.mapreduce.Job  - Counters: 30
    File System Counters
        FILE: Number of bytes read=7642
        FILE: Number of bytes written=1872119
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=21
        Map output records=72
        Map output bytes=432
        Map output materialized bytes=612
        Input split bytes=316
        Combine input records=0
        Combine output records=0
        Reduce input groups=6
        Reduce shuffle bytes=612
        Reduce input records=72
        Reduce output records=6
        Spilled Records=144
        Shuffled Maps =6
        Failed Shuffles=0
        Merged Map outputs=6
        GC time elapsed (ms)=6
        Total committed heap usage (bytes)=2574254080
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Input Format Counters 
        Bytes Read=99
    File Output Format Counters 
        Bytes Written=51

Process finished with exit code 0
 

4.3 maptask和reducetask个数

  1. maptask的个数和处理文件的个数以及大小有关  计算逻辑在MR原理加强篇
  2. reducetask的个数可以使用job.setNumReduceTasks(n)设置

猜你喜欢

转载自blog.csdn.net/qq_37933018/article/details/107251344
今日推荐