1 数据
1,4
2,5
3,4
2,5
2,4
3,4
2,6
2 需求
统计x轴上每个点线段重叠的次数
3 代码实现
package com._51doit.mr.line;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* Author: 多易教育-行哥
* Date: 2020/7/10
* Description:
*/
public class LineDemo {
static class LineMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Text k = new Text() ;
IntWritable v = new IntWritable(1) ;
/**
* map方法每行执行一次
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//处理每行 2,5 --> 2,1 3,1 4,1 5,1
String[] split = value.toString().split(",");
String x1 = split[0];
String x2 = split[1];
int a1 = Integer.parseInt(x1);
int a2 = Integer.parseInt(x2);
for (int x = a1; x <= a2; x++) {
k.set(x+""); // 更新 不用创建大量的Text
// context.write(new Text(x+""),new IntWritable(1));
context.write(k,v);
}
}
}
static class LineReducer extends Reducer<Text,IntWritable ,Text ,IntWritable> {
IntWritable v =new IntWritable() ;
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable value : values) {
count++ ;
}
v.set(count);
context.write(key,v);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// 参数2 job的名字
Job job = Job.getInstance(conf, new LineDemo().getClass().getSimpleName());
job.setMapperClass(LineMapper.class);
job.setReducerClass(LineReducer.class);
// 设置map阶段的输出类型
//job.setMapOutputKeyClass(Text.class);
// job.setMapOutputValueClass(IntWritable.class);
// 最终结果的数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(2); //启动3个reduce任务
// 待处理数据的路径
FileInputFormat.setInputPaths(job, new Path("D:\\data\\line\\input\\"));
FileOutputFormat.setOutputPath(job,new Path("D:\\data\\line\\res4\\"));
job.waitForCompletion(true) ;
}
}
4 知识点
4.1 添加log4j日志配置文件
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=INFO, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
4.2 控制台观察日志
D:\tools\jdk1.8.0_65\bin\java.exe "-javaagent:D:\apps\IntelliJ IDEA 2020_01\IntelliJ IDEA 2020.1.3\lib\idea_rt.jar=12052:D:\apps\IntelliJ IDEA 2020_01\IntelliJ IDEA 2020.1.3\bin" -Dfile.encoding=UTF-8 -classpath D:\tools\jdk1.8.0_65\jre\lib\charsets.jar;D:\tools\jdk1.8.0_65\jre\lib\deploy.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\access-bridge-64.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\cldrdata.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\dnsns.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\jaccess.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\jfxrt.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\localedata.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\nashorn.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunec.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunjce_provider.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunmscapi.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\sunpkcs11.jar;D:\tools\jdk1.8.0_65\jre\lib\ext\zipfs.jar;D:\tools\jdk1.8.0_65\jre\lib\javaws.jar;D:\tools\jdk1.8.0_65\jre\lib\jce.jar;D:\tools\jdk1.8.0_65\jre\lib\jfr.jar;D:\tools\jdk1.8.0_65\jre\lib\jfxswt.jar;D:\tools\jdk1.8.0_65\jre\lib\jsse.jar;D:\tools\jdk1.8.0_65\jre\lib\management-agent.jar;D:\tools\jdk1.8.0_65\jre\lib\plugin.jar;D:\tools\jdk1.8.0_65\jre\lib\resources.jar;D:\tools\jdk1.8.0_65\jre\lib\rt.jar;F:\codes\doit16\hdp\target\classes;D:\doit16-maven\repository\org\apache\hadoop\hadoop-common\2.8.5\hadoop-common-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-annotations\2.8.5\hadoop-annotations-2.8.5.jar;D:\doit16-maven\repository\com\google\guava\guava\11.0.2\guava-11.0.2.jar;D:\doit16-maven\repository\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;D:\doit16-maven\repository\org\apache\commons\commons-math3\3.1.1\commons-math3-3.1.1.jar;D:\doit16-maven\repository\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;D:\doit16-maven\repository\org\apache\httpcomponents\httpclient\4.5.2\httpclient-4.5.2.jar;D:\doit16-maven\repository\org\apache\httpcomponents\httpcore\4.4.4\httpcore-4.4.4.jar;D:\doit16-maven\repository\commons-codec\commons-codec\1.4\commons-codec-1.4.jar;D:\doit16-maven\repository\commons-io\commons-io\2.4\commons-io-2.4.jar;D:\doit16-maven\repository\commons-net\commons-net\3.1\commons-net-3.1.jar;D:\doit16-maven\repository\commons-collections\commons-collections\3.2.2\commons-collections-3.2.2.jar;D:\doit16-maven\repository\javax\servlet\servlet-api\2.5\servlet-api-2.5.jar;D:\doit16-maven\repository\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;D:\doit16-maven\repository\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;D:\doit16-maven\repository\org\mortbay\jetty\jetty-sslengine\6.1.26\jetty-sslengine-6.1.26.jar;D:\doit16-maven\repository\javax\servlet\jsp\jsp-api\2.1\jsp-api-2.1.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;D:\doit16-maven\repository\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;D:\doit16-maven\repository\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;D:\doit16-maven\repository\asm\asm\3.1\asm-3.1.jar;D:\doit16-maven\repository\commons-logging\commons-logging\1.1.3\commons-logging-1.1.3.jar;D:\doit16-maven\repository\net\java\dev\jets3t\jets3t\0.9.0\jets3t-0.9.0.jar;D:\doit16-maven\repository\com\jamesmurty\utils\java-xmlbuilder\0.4\java-xmlbuilder-0.4.jar;D:\doit16-maven\repository\commons-lang\commons-lang\2.6\commons-lang-2.6.jar;D:\doit16-maven\repository\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;D:\doit16-maven\repository\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;D:\doit16-maven\repository\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;D:\doit16-maven\repository\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;D:\doit16-maven\repository\org\slf4j\slf4j-api\1.7.10\slf4j-api-1.7.10.jar;D:\doit16-maven\repository\org\slf4j\slf4j-log4j12\1.7.10\slf4j-log4j12-1.7.10.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar;D:\doit16-maven\repository\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar;D:\doit16-maven\repository\org\apache\avro\avro\1.7.4\avro-1.7.4.jar;D:\doit16-maven\repository\com\thoughtworks\paranamer\paranamer\2.3\paranamer-2.3.jar;D:\doit16-maven\repository\org\xerial\snappy\snappy-java\1.0.4.1\snappy-java-1.0.4.1.jar;D:\doit16-maven\repository\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar;D:\doit16-maven\repository\com\google\code\gson\gson\2.2.4\gson-2.2.4.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-auth\2.8.5\hadoop-auth-2.8.5.jar;D:\doit16-maven\repository\com\nimbusds\nimbus-jose-jwt\4.41.1\nimbus-jose-jwt-4.41.1.jar;D:\doit16-maven\repository\com\github\stephenc\jcip\jcip-annotations\1.0-1\jcip-annotations-1.0-1.jar;D:\doit16-maven\repository\net\minidev\json-smart\2.3\json-smart-2.3.jar;D:\doit16-maven\repository\net\minidev\accessors-smart\1.2\accessors-smart-1.2.jar;D:\doit16-maven\repository\org\ow2\asm\asm\5.0.4\asm-5.0.4.jar;D:\doit16-maven\repository\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar;D:\doit16-maven\repository\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar;D:\doit16-maven\repository\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar;D:\doit16-maven\repository\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar;D:\doit16-maven\repository\org\apache\curator\curator-framework\2.7.1\curator-framework-2.7.1.jar;D:\doit16-maven\repository\com\jcraft\jsch\0.1.54\jsch-0.1.54.jar;D:\doit16-maven\repository\org\apache\curator\curator-client\2.7.1\curator-client-2.7.1.jar;D:\doit16-maven\repository\org\apache\curator\curator-recipes\2.7.1\curator-recipes-2.7.1.jar;D:\doit16-maven\repository\com\google\code\findbugs\jsr305\3.0.0\jsr305-3.0.0.jar;D:\doit16-maven\repository\org\apache\htrace\htrace-core4\4.0.1-incubating\htrace-core4-4.0.1-incubating.jar;D:\doit16-maven\repository\org\apache\zookeeper\zookeeper\3.4.6\zookeeper-3.4.6.jar;D:\doit16-maven\repository\org\apache\commons\commons-compress\1.4.1\commons-compress-1.4.1.jar;D:\doit16-maven\repository\org\tukaani\xz\1.0\xz-1.0.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-hdfs\2.8.5\hadoop-hdfs-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-hdfs-client\2.8.5\hadoop-hdfs-client-2.8.5.jar;D:\doit16-maven\repository\com\squareup\okhttp\okhttp\2.4.0\okhttp-2.4.0.jar;D:\doit16-maven\repository\com\squareup\okio\okio\1.4.0\okio-1.4.0.jar;D:\doit16-maven\repository\commons-daemon\commons-daemon\1.0.13\commons-daemon-1.0.13.jar;D:\doit16-maven\repository\io\netty\netty\3.6.2.Final\netty-3.6.2.Final.jar;D:\doit16-maven\repository\io\netty\netty-all\4.0.23.Final\netty-all-4.0.23.Final.jar;D:\doit16-maven\repository\xerces\xercesImpl\2.9.1\xercesImpl-2.9.1.jar;D:\doit16-maven\repository\xml-apis\xml-apis\1.3.04\xml-apis-1.3.04.jar;D:\doit16-maven\repository\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-mapreduce-client-core\2.8.5\hadoop-mapreduce-client-core-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-common\2.8.5\hadoop-yarn-common-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-api\2.8.5\hadoop-yarn-api-2.8.5.jar;D:\doit16-maven\repository\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar;D:\doit16-maven\repository\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar;D:\doit16-maven\repository\javax\activation\activation\1.1\activation-1.1.jar;D:\doit16-maven\repository\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar;D:\doit16-maven\repository\com\google\inject\guice\3.0\guice-3.0.jar;D:\doit16-maven\repository\javax\inject\javax.inject\1\javax.inject-1.jar;D:\doit16-maven\repository\aopalliance\aopalliance\1.0\aopalliance-1.0.jar;D:\doit16-maven\repository\com\sun\jersey\contribs\jersey-guice\1.9\jersey-guice-1.9.jar;D:\doit16-maven\repository\com\google\inject\extensions\guice-servlet\3.0\guice-servlet-3.0.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-mapreduce-client-common\2.8.5\hadoop-mapreduce-client-common-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-client\2.8.5\hadoop-yarn-client-2.8.5.jar;D:\doit16-maven\repository\org\apache\hadoop\hadoop-yarn-server-common\2.8.5\hadoop-yarn-server-common-2.8.5.jar;D:\doit16-maven\repository\log4j\log4j\1.2.17\log4j-1.2.17.jar com._51doit.mr.line.LineDemo
0 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - session.id is deprecated. Instead, use dfs.metrics.session-id
2 [main] INFO org.apache.hadoop.metrics.jvm.JvmMetrics - Initializing JVM Metrics with processName=JobTracker, sessionId=
667 [main] WARN org.apache.hadoop.mapreduce.JobResourceUploader - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
715 [main] WARN org.apache.hadoop.mapreduce.JobResourceUploader - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
953 [main] INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input files to process : 3
1000 [main] INFO org.apache.hadoop.mapreduce.JobSubmitter - number of splits:3
1130 [main] INFO org.apache.hadoop.mapreduce.JobSubmitter - Submitting tokens for job: job_local190908434_0001
1542 [main] INFO org.apache.hadoop.mapreduce.Job - The url to track the job: http://localhost:8080/
1543 [main] INFO org.apache.hadoop.mapreduce.Job - Running job: job_local190908434_0001
1545 [Thread-6] INFO org.apache.hadoop.mapred.LocalJobRunner - OutputCommitter set in config null
1550 [Thread-6] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - File Output Committer Algorithm version is 1
1550 [Thread-6] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
1550 [Thread-6] INFO org.apache.hadoop.mapred.LocalJobRunner - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
1595 [Thread-6] INFO org.apache.hadoop.mapred.LocalJobRunner - Waiting for map tasks
1596 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - Starting task: attempt_local190908434_0001_m_000000_0
1629 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - File Output Committer Algorithm version is 1
1630 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
1640 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.yarn.util.ProcfsBasedProcessTree - ProcfsBasedProcessTree currently is supported only on Linux.
1683 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@6c404c50
1691 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Processing split: file:/D:/data/line/input/line - 副本 (2).txt:0+33
1712 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - (EQUATOR) 0 kvi 26214396(104857584)
1712 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - mapreduce.task.io.sort.mb: 100
1712 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - soft limit at 83886080
1713 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - bufstart = 0; bufvoid = 104857600
1713 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - kvstart = 26214396; length = 6553600
1717 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
1726 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner -
1726 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Starting flush of map output
1726 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Spilling map output
1726 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - bufstart = 0; bufend = 144; bufvoid = 104857600
1726 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - kvstart = 26214396(104857584); kvend = 26214304(104857216); length = 93/6553600
1843 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Finished spill 0
1852 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Task:attempt_local190908434_0001_m_000000_0 is done. And is in the process of committing
1872 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - map
1872 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Task 'attempt_local190908434_0001_m_000000_0' done.
1876 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Final Counters for attempt_local190908434_0001_m_000000_0: Counters: 17
File System Counters
FILE: Number of bytes read=430
FILE: Number of bytes written=373873
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=7
Map output records=24
Map output bytes=144
Map output materialized bytes=204
Input split bytes=111
Combine input records=0
Spilled Records=24
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=0
Total committed heap usage (bytes)=514850816
File Input Format Counters
Bytes Read=33
1877 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - Finishing task: attempt_local190908434_0001_m_000000_0
1877 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - Starting task: attempt_local190908434_0001_m_000001_0
1878 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - File Output Committer Algorithm version is 1
1878 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
1878 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.yarn.util.ProcfsBasedProcessTree - ProcfsBasedProcessTree currently is supported only on Linux.
1941 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@7f711871
1943 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Processing split: file:/D:/data/line/input/line - 副本.txt:0+33
1987 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - (EQUATOR) 0 kvi 26214396(104857584)
1987 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - mapreduce.task.io.sort.mb: 100
1987 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - soft limit at 83886080
1987 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - bufstart = 0; bufvoid = 104857600
1987 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - kvstart = 26214396; length = 6553600
1988 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
1990 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner -
1990 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Starting flush of map output
1990 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Spilling map output
1990 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - bufstart = 0; bufend = 144; bufvoid = 104857600
1990 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - kvstart = 26214396(104857584); kvend = 26214304(104857216); length = 93/6553600
1997 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Finished spill 0
2006 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Task:attempt_local190908434_0001_m_000001_0 is done. And is in the process of committing
2010 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - map
2010 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Task 'attempt_local190908434_0001_m_000001_0' done.
2012 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Final Counters for attempt_local190908434_0001_m_000001_0: Counters: 17
File System Counters
FILE: Number of bytes read=798
FILE: Number of bytes written=374133
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=7
Map output records=24
Map output bytes=144
Map output materialized bytes=204
Input split bytes=107
Combine input records=0
Spilled Records=24
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=0
Total committed heap usage (bytes)=514850816
File Input Format Counters
Bytes Read=33
2012 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - Finishing task: attempt_local190908434_0001_m_000001_0
2013 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - Starting task: attempt_local190908434_0001_m_000002_0
2014 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - File Output Committer Algorithm version is 1
2014 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2015 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.yarn.util.ProcfsBasedProcessTree - ProcfsBasedProcessTree currently is supported only on Linux.
2055 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@2114082e
2057 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Processing split: file:/D:/data/line/input/line.txt:0+33
2103 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - (EQUATOR) 0 kvi 26214396(104857584)
2104 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - mapreduce.task.io.sort.mb: 100
2104 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - soft limit at 83886080
2104 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - bufstart = 0; bufvoid = 104857600
2104 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - kvstart = 26214396; length = 6553600
2111 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2113 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner -
2114 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Starting flush of map output
2114 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Spilling map output
2114 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - bufstart = 0; bufend = 144; bufvoid = 104857600
2114 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - kvstart = 26214396(104857584); kvend = 26214304(104857216); length = 93/6553600
2220 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask - Finished spill 0
2224 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Task:attempt_local190908434_0001_m_000002_0 is done. And is in the process of committing
2226 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - map
2226 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Task 'attempt_local190908434_0001_m_000002_0' done.
2226 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task - Final Counters for attempt_local190908434_0001_m_000002_0: Counters: 17
File System Counters
FILE: Number of bytes read=1166
FILE: Number of bytes written=374393
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=7
Map output records=24
Map output bytes=144
Map output materialized bytes=204
Input split bytes=98
Combine input records=0
Spilled Records=24
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=6
Total committed heap usage (bytes)=514850816
File Input Format Counters
Bytes Read=33
2226 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner - Finishing task: attempt_local190908434_0001_m_000002_0
2226 [Thread-6] INFO org.apache.hadoop.mapred.LocalJobRunner - map task executor complete.
2228 [Thread-6] INFO org.apache.hadoop.mapred.LocalJobRunner - Waiting for reduce tasks
2229 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - Starting task: attempt_local190908434_0001_r_000000_0
2241 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - File Output Committer Algorithm version is 1
2241 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2241 [pool-4-thread-1] INFO org.apache.hadoop.yarn.util.ProcfsBasedProcessTree - ProcfsBasedProcessTree currently is supported only on Linux.
2280 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@41c38664
2282 [pool-4-thread-1] INFO org.apache.hadoop.mapred.ReduceTask - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@25868440
2298 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - MergerManager: memoryLimit=5328129024, maxSingleShuffleLimit=1332032256, mergeThreshold=3516565248, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2300 [EventFetcher for fetching Map Completion Events] INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher - attempt_local190908434_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2343 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher - localfetcher#1 about to shuffle output of map attempt_local190908434_0001_m_000001_0 decomp: 90 len: 94 to MEMORY
2350 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput - Read 90 bytes from map-output for attempt_local190908434_0001_m_000001_0
2351 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - closeInMemoryFile -> map-output of size: 90, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->90
2356 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher - localfetcher#1 about to shuffle output of map attempt_local190908434_0001_m_000000_0 decomp: 90 len: 94 to MEMORY
2358 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput - Read 90 bytes from map-output for attempt_local190908434_0001_m_000000_0
2358 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - closeInMemoryFile -> map-output of size: 90, inMemoryMapOutputs.size() -> 2, commitMemory -> 90, usedMemory ->180
2363 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher - localfetcher#1 about to shuffle output of map attempt_local190908434_0001_m_000002_0 decomp: 90 len: 94 to MEMORY
2364 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput - Read 90 bytes from map-output for attempt_local190908434_0001_m_000002_0
2364 [localfetcher#1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - closeInMemoryFile -> map-output of size: 90, inMemoryMapOutputs.size() -> 3, commitMemory -> 180, usedMemory ->270
2365 [EventFetcher for fetching Map Completion Events] INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher - EventFetcher is interrupted.. Returning
2366 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - 3 / 3 copied.
2366 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - finalMerge called with 3 in-memory map-outputs and 0 on-disk map-outputs
2376 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Merging 3 sorted segments
2377 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Down to the last merge-pass, with 3 segments left of total size: 258 bytes
2380 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - Merged 3 segments, 270 bytes to disk to satisfy reduce memory limit
2381 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - Merging 1 files, 270 bytes from disk
2382 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - Merging 0 segments, 0 bytes from memory into reduce
2382 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Merging 1 sorted segments
2383 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Down to the last merge-pass, with 1 segments left of total size: 262 bytes
2384 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - 3 / 3 copied.
2391 [pool-4-thread-1] INFO org.apache.hadoop.conf.Configuration.deprecation - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2396 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Task:attempt_local190908434_0001_r_000000_0 is done. And is in the process of committing
2398 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - 3 / 3 copied.
2398 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Task attempt_local190908434_0001_r_000000_0 is allowed to commit now
2400 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - Saved output of task 'attempt_local190908434_0001_r_000000_0' to file:/D:/data/line/res4/_temporary/0/task_local190908434_0001_r_000000
2401 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - reduce > reduce
2401 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Task 'attempt_local190908434_0001_r_000000_0' done.
2402 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Final Counters for attempt_local190908434_0001_r_000000_0: Counters: 24
File System Counters
FILE: Number of bytes read=2216
FILE: Number of bytes written=374688
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Combine input records=0
Combine output records=0
Reduce input groups=3
Reduce shuffle bytes=282
Reduce input records=33
Reduce output records=3
Spilled Records=33
Shuffled Maps =3
Failed Shuffles=0
Merged Map outputs=3
GC time elapsed (ms)=0
Total committed heap usage (bytes)=514850816
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Output Format Counters
Bytes Written=25
2403 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - Finishing task: attempt_local190908434_0001_r_000000_0
2403 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - Starting task: attempt_local190908434_0001_r_000001_0
2404 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - File Output Committer Algorithm version is 1
2404 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2406 [pool-4-thread-1] INFO org.apache.hadoop.yarn.util.ProcfsBasedProcessTree - ProcfsBasedProcessTree currently is supported only on Linux.
2447 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@173b77b0
2447 [pool-4-thread-1] INFO org.apache.hadoop.mapred.ReduceTask - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@1126b874
2447 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - MergerManager: memoryLimit=5328129024, maxSingleShuffleLimit=1332032256, mergeThreshold=3516565248, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2448 [EventFetcher for fetching Map Completion Events] INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher - attempt_local190908434_0001_r_000001_0 Thread started: EventFetcher for fetching Map Completion Events
2453 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher - localfetcher#2 about to shuffle output of map attempt_local190908434_0001_m_000001_0 decomp: 106 len: 110 to MEMORY
2454 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput - Read 106 bytes from map-output for attempt_local190908434_0001_m_000001_0
2454 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - closeInMemoryFile -> map-output of size: 106, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->106
2458 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher - localfetcher#2 about to shuffle output of map attempt_local190908434_0001_m_000000_0 decomp: 106 len: 110 to MEMORY
2458 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput - Read 106 bytes from map-output for attempt_local190908434_0001_m_000000_0
2458 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - closeInMemoryFile -> map-output of size: 106, inMemoryMapOutputs.size() -> 2, commitMemory -> 106, usedMemory ->212
2462 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher - localfetcher#2 about to shuffle output of map attempt_local190908434_0001_m_000002_0 decomp: 106 len: 110 to MEMORY
2463 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput - Read 106 bytes from map-output for attempt_local190908434_0001_m_000002_0
2463 [localfetcher#2] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - closeInMemoryFile -> map-output of size: 106, inMemoryMapOutputs.size() -> 3, commitMemory -> 212, usedMemory ->318
2463 [EventFetcher for fetching Map Completion Events] INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher - EventFetcher is interrupted.. Returning
2464 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - 3 / 3 copied.
2464 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - finalMerge called with 3 in-memory map-outputs and 0 on-disk map-outputs
2470 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Merging 3 sorted segments
2470 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Down to the last merge-pass, with 3 segments left of total size: 306 bytes
2473 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - Merged 3 segments, 318 bytes to disk to satisfy reduce memory limit
2474 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - Merging 1 files, 318 bytes from disk
2474 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl - Merging 0 segments, 0 bytes from memory into reduce
2474 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Merging 1 sorted segments
2475 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Merger - Down to the last merge-pass, with 1 segments left of total size: 310 bytes
2475 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - 3 / 3 copied.
2481 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Task:attempt_local190908434_0001_r_000001_0 is done. And is in the process of committing
2482 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - 3 / 3 copied.
2482 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Task attempt_local190908434_0001_r_000001_0 is allowed to commit now
2489 [pool-4-thread-1] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter - Saved output of task 'attempt_local190908434_0001_r_000001_0' to file:/D:/data/line/res4/_temporary/0/task_local190908434_0001_r_000001
2490 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - reduce > reduce
2490 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Task 'attempt_local190908434_0001_r_000001_0' done.
2491 [pool-4-thread-1] INFO org.apache.hadoop.mapred.Task - Final Counters for attempt_local190908434_0001_r_000001_0: Counters: 24
File System Counters
FILE: Number of bytes read=3032
FILE: Number of bytes written=375032
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Combine input records=0
Combine output records=0
Reduce input groups=3
Reduce shuffle bytes=330
Reduce input records=39
Reduce output records=3
Spilled Records=39
Shuffled Maps =3
Failed Shuffles=0
Merged Map outputs=3
GC time elapsed (ms)=0
Total committed heap usage (bytes)=514850816
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Output Format Counters
Bytes Written=26
2491 [pool-4-thread-1] INFO org.apache.hadoop.mapred.LocalJobRunner - Finishing task: attempt_local190908434_0001_r_000001_0
2491 [Thread-6] INFO org.apache.hadoop.mapred.LocalJobRunner - reduce task executor complete.
2545 [main] INFO org.apache.hadoop.mapreduce.Job - Job job_local190908434_0001 running in uber mode : false
2548 [main] INFO org.apache.hadoop.mapreduce.Job - map 100% reduce 100%
2550 [main] INFO org.apache.hadoop.mapreduce.Job - Job job_local190908434_0001 completed successfully
2560 [main] INFO org.apache.hadoop.mapreduce.Job - Counters: 30
File System Counters
FILE: Number of bytes read=7642
FILE: Number of bytes written=1872119
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=21
Map output records=72
Map output bytes=432
Map output materialized bytes=612
Input split bytes=316
Combine input records=0
Combine output records=0
Reduce input groups=6
Reduce shuffle bytes=612
Reduce input records=72
Reduce output records=6
Spilled Records=144
Shuffled Maps =6
Failed Shuffles=0
Merged Map outputs=6
GC time elapsed (ms)=6
Total committed heap usage (bytes)=2574254080
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=99
File Output Format Counters
Bytes Written=51Process finished with exit code 0
4.3 maptask和reducetask个数
- maptask的个数和处理文件的个数以及大小有关 计算逻辑在MR原理加强篇
- reducetask的个数可以使用job.setNumReduceTasks(n)设置