Job的初始化
1.WordCount.java
public static void main(String[] args) throws Exception { // 初始化Configuration Configuration conf = new Configuration(); --> Configuration.java 默认构造函数执行this(true);即 public Configuration(boolean loadDefaults) { // 设定加载默认设置 this.loadDefaults = loadDefaults; // 保存最近使用最多的key和value updatingResource = new HashMap<String, String>(); // 将Configuration放入一个WeakHashMap中 synchronized(Configuration.class) { REGISTRY.put(this, null); } } ... // 初始化Job Job job = new Job(conf, "word count"); --> JobConf.java conf作为构造参数传入,将其封装为JobConf private Credentials credentials = new Credentials(); // 初始化credentials public JobConf(Configuration conf) { super(conf);// JobConf继承自Configuration,即将传进来的conf复制到另一个的Configuration中 if (conf instanceof JobConf) { // 该条件不符合 ... } checkAndWarnDeprecation(); // 在conf中获取已经废弃的key,如果存在则向用户警告使用了过期的参数 } --> JobContextImpl.java Job的构造方法最终调用父类JobContextImpl的构造方法 public JobContextImpl(Configuration conf, JobID jobId) { if (conf instanceof JobConf) { this.conf = (JobConf)conf; // 初始化JobConf } else { this.conf = new JobConf(conf); } this.jobId = jobId; this.credentials = this.conf.getCredentials(); // 将JobConf中初始化的credentials赋给JobContextImpl try { this.ugi = UserGroupInformation.getCurrentUser(); // 初始化UGI } catch (IOException e) { throw new RuntimeException(e); } } job.setJarByClass(WordCount.class); --> JobConf.java 该方法会最终调用到JobConf的setJarByClass() public void setJarByClass(Class cls) { String jar = findContainingJar(cls); // 通过class名字获取到class所在的jar包 if (jar != null) { setJar(jar); // 调用Configuration的set(JobContext.JAR, jar);方法 } } job.setMapperClass(TokenizerMapper.class); // 调用Configuration的setClass();方法 ... FileInputFormat.addInputPath(job, new Path(otherArgs[0])); --> FileInputFormat.java中的addInputPath方法 public static void addInputPath(Job job, Path path) throws IOException { Configuration conf = job.getConfiguration(); path = path.getFileSystem(conf).makeQualified(path); // 获取文件系统,并补全路径 String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(INPUT_DIR); conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } 2. Job.java public boolean waitForCompletion(boolean verbose ) throws IOException, InterruptedException, ClassNotFoundException { if (state == JobState.DEFINE) { submit(); // 提交Job --> Job.java submit()方法 public void submit() throws IOException, InterruptedException, ClassNotFoundException { ensureState(JobState.DEFINE); setUseNewAPI(); connect(); // 初始化Cluster类,代理所有Job的操作和JobSubmitter的submitInternal操作(加载yarn-site.xml,mapred-site.xml等,最近的用于ClientProtocolProvider加载服务,通过在xml中指定框架的名字。初始化Cluster需要的UGI),该类用于确定当前的job运行在何种模式(Yarn、Local、JobTracker)通过ServiceLoader<ClientProtocolProvider> frameworkLoader加载ClientProtocolProvider,通过ClientProtocolProvider启动运行的框架(Yarn等等) final JobSubmitter submitter = getJobSubmitter(cluster.getFileSystem(), cluster.getClient()); // JobSubmitter:1、检查input和output 2、计算map数 3、设置DistributeCache即用户Map/Reduce程序运行需要的jar包 4、复制job的相关信息到hdfs上 5、提交Job status = ugi.doAs(new PrivilegedExceptionAction<JobStatus>() { public JobStatus run() throws IOException, InterruptedException, ClassNotFoundException { return submitter.submitJobInternal(Job.this, cluster); } }); state = JobState.RUNNING; LOG.info("The url to track the job: " + getTrackingURL()); } } if (verbose) { monitorAndPrintJob(); } else { // get the completion poll interval from the client. int completionPollIntervalMillis = Job.getCompletionPollInterval(cluster.getConf()); while (!isComplete()) { try { Thread.sleep(completionPollIntervalMillis); } catch (InterruptedException ie) { } } } return isSuccessful(); } public boolean monitorAndPrintJob() throws IOException, InterruptedException { String lastReport = null; // 记录最近一次的report Job.TaskStatusFilter filter; Configuration clientConf = getConfiguration(); filter = Job.getTaskOutputFilter(clientConf); JobID jobId = getJobID(); LOG.info("Running job: " + jobId); int eventCounter = 0; boolean profiling = getProfileEnabled(); IntegerRanges mapRanges = getProfileTaskRange(true); IntegerRanges reduceRanges = getProfileTaskRange(false); int progMonitorPollIntervalMillis = Job.getProgressPollInterval(clientConf); /* make sure to report full progress after the job is done */ boolean reportedAfterCompletion = false; boolean reportedUberMode = false; while (!isComplete() || !reportedAfterCompletion) { if (isComplete()) { reportedAfterCompletion = true; } else { Thread.sleep(progMonitorPollIntervalMillis); } if (status.getState() == JobStatus.State.PREP) { continue; } if (!reportedUberMode) { reportedUberMode = true; LOG.info("Job " + jobId + " running in uber mode : " + isUber()); } String report = (" map " + StringUtils.formatPercent(mapProgress(), 0)+ " reduce " + StringUtils.formatPercent(reduceProgress(), 0)); if (!report.equals(lastReport)) { LOG.info(report); lastReport = report; } TaskCompletionEvent[] events = getTaskCompletionEvents(eventCounter, 10); eventCounter += events.length; printTaskEvents(events, filter, profiling, mapRanges, reduceRanges); } boolean success = isSuccessful(); if (success) { LOG.info("Job " + jobId + " completed successfully"); } else { LOG.info("Job " + jobId + " failed with state " + status.getState() + " due to: " + status.getFailureInfo()); } Counters counters = getCounters(); if (counters != null) { LOG.info(counters.toString()); } return success; }