Clsssic MapReduce (MapReduce 1) - Job submission

Noted that the old and new MapReduce APIs are not the same thing as the classic and YRAN-based MapReduce implementations ( MapReduce 1 and MapReduce 2 respectively ). The APIs are user-facing client-side features and determine how you write MapReduce programs whereas the implementations are just different ways of running MapReduce programs.

The submit() method on Job creates an internal JobClient instance and call the submitJobInternal() on it.

  /**
   * Submit the job to the cluster and return immediately.
   * @throws IOException
   */
  public void submit() throws IOException, InterruptedException, 
                              ClassNotFoundException {
    ensureState(JobState.DEFINE);
    setUseNewAPI();
    
    // Connect to the JobTracker and submit the job
    connect();
    info = jobClient.submitJobInternal(conf);
    super.setJobID(info.getID());
    state = JobState.RUNNING;
   }

Having submitted the job, waitForCompletion() polls the job's progress once per second and reports the progress to the console if has changed since the last report. When the job completes successfully, the job counters are displayed. Otherwise the error caused the job to fail is logged to the console.

The JobClient submit a job with below code

  /**
   * Internal method for submitting jobs to the system.
   * @param job the configuration to submit
   * @return a proxy object for the running job
   * @throws FileNotFoundException
   * @throws ClassNotFoundException
   * @throws InterruptedException
   * @throws IOException
   */
  public 
  RunningJob submitJobInternal(final JobConf job
                               ) throws FileNotFoundException, 
                                        ClassNotFoundException,
                                        InterruptedException,
                                        IOException {
    /*
     * configure the command line options correctly on the submitting dfs
     */
    return ugi.doAs(new PrivilegedExceptionAction<RunningJob>() {
      public RunningJob run() throws FileNotFoundException, 
      ClassNotFoundException,
      InterruptedException,
      IOException{
        JobConf jobCopy = job;
        Path jobStagingArea = JobSubmissionFiles.getStagingDir(JobClient.this,
            jobCopy);
        JobID jobId = jobSubmitClient.getNewJobId();
        Path submitJobDir = new Path(jobStagingArea, jobId.toString());
        jobCopy.set("mapreduce.job.dir", submitJobDir.toString());
        JobStatus status = null;
        try {
          populateTokenCache(jobCopy, jobCopy.getCredentials());

          copyAndConfigureFiles(jobCopy, submitJobDir);

          // get delegation token for the dir
          TokenCache.obtainTokensForNamenodes(jobCopy.getCredentials(),
                                              new Path [] {submitJobDir},
                                              jobCopy);

          Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
          int reduces = jobCopy.getNumReduceTasks();
          InetAddress ip = InetAddress.getLocalHost();
          if (ip != null) {
            job.setJobSubmitHostAddress(ip.getHostAddress());
            job.setJobSubmitHostName(ip.getHostName());
          }
          JobContext context = new JobContext(jobCopy, jobId);

          // Check the output specification
          if (reduces == 0 ? jobCopy.getUseNewMapper() : 
            jobCopy.getUseNewReducer()) {
            org.apache.hadoop.mapreduce.OutputFormat<?,?> output =
              ReflectionUtils.newInstance(context.getOutputFormatClass(),
                  jobCopy);
            output.checkOutputSpecs(context);
          } else {
            jobCopy.getOutputFormat().checkOutputSpecs(fs, jobCopy);
          }
          
          jobCopy = (JobConf)context.getConfiguration();

          // Create the splits for the job
          FileSystem fs = submitJobDir.getFileSystem(jobCopy);
          LOG.debug("Creating splits at " + fs.makeQualified(submitJobDir));
          int maps = writeSplits(context, submitJobDir);
          jobCopy.setNumMapTasks(maps);

          // write "queue admins of the queue to which job is being submitted"
          // to job file.
          String queue = jobCopy.getQueueName();
          AccessControlList acl = jobSubmitClient.getQueueAdmins(queue);
          jobCopy.set(QueueManager.toFullPropertyName(queue,
              QueueACL.ADMINISTER_JOBS.getAclName()), acl.getACLString());

          // Write job file to JobTracker's fs        
          FSDataOutputStream out = 
            FileSystem.create(fs, submitJobFile,
                new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));

          try {
            jobCopy.writeXml(out);
          } finally {
            out.close();
          }
          //
          // Now, actually submit the job (using the submit name)
          //
          printTokens(jobId, jobCopy.getCredentials());
          status = jobSubmitClient.submitJob(
              jobId, submitJobDir.toString(), jobCopy.getCredentials());
          JobProfile prof = jobSubmitClient.getJobProfile(jobId);
          if (status != null && prof != null) {
            return new NetworkedJob(status, prof, jobSubmitClient);
          } else {
            throw new IOException("Could not launch job");
          }
        } finally {
          if (status == null) {
            LOG.info("Cleaning up the staging area " + submitJobDir);
            if (fs != null && submitJobDir != null)
              fs.delete(submitJobDir, true);
          }
        }
      }
    });
  }

Asks the jobtracker for a new job ID by calling JobTracker#getNewJobId()

/**
   * Allocates a new JobId string.
   */
  public synchronized JobID getNewJobId() throws IOException {
    return new JobID(getTrackerIdentifier(), nextJobId++);
  }

Check the output specification of the job to see if it's exists or not.

public void checkOutputSpecs(FileSystem ignored, JobConf job) 
    throws FileAlreadyExistsException, 
           InvalidJobConfException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null && job.getNumReduceTasks() != 0) {
      throw new InvalidJobConfException("Output directory not set in JobConf.");
    }
    if (outDir != null) {
      FileSystem fs = outDir.getFileSystem(job);
      // normalize the output directory
      outDir = fs.makeQualified(outDir);
      setOutputPath(job, outDir);
      
      // get delegation token for the outDir's file system
      TokenCache.obtainTokensForNamenodes(job.getCredentials(), 
                                          new Path[] {outDir}, job);
      
      // check its existence
      if (fs.exists(outDir)) {
        throw new FileAlreadyExistsException("Output directory " + outDir + 
                                             " already exists");
      }
    }
  }

Computes the input splits for the job. If the splits cannot be computed (because the input paths don't exist, for example), the job is not submitted and an error is thrown.

  private int writeSplits(org.apache.hadoop.mapreduce.JobContext job,
      Path jobSubmitDir) throws IOException,
      InterruptedException, ClassNotFoundException {
    JobConf jConf = (JobConf)job.getConfiguration();
    int maps;
    if (jConf.getUseNewMapper()) {
      maps = writeNewSplits(job, jobSubmitDir);
    } else {
      maps = writeOldSplits(jConf, jobSubmitDir);
    }
    return maps;
  }

Copies the resources needed to run the job, including the job JAR file, the configuration file and the computed input splits, the job tracker's filesystem in a directory named after the job ID. The job jar is copied with a high replication factor (controlled by the mapred.submit.replication property, default to 10) so that there're logs of copies across the cluster for the tasktrackers to access when they run tasks for the job.

  /** 
   * Write out the non-default properties in this configuration to the give
   * {@link OutputStream}.
   * 
   * @param out the output stream to write to.
   */
  public void writeXml(OutputStream out) throws IOException {
    Properties properties = getProps();
    try {
      Document doc =
        DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
      Element conf = doc.createElement("configuration");
      doc.appendChild(conf);
      conf.appendChild(doc.createTextNode("\n"));
      for (Enumeration e = properties.keys(); e.hasMoreElements();) {
        String name = (String)e.nextElement();
        Object object = properties.get(name);
        String value = null;
        if (object instanceof String) {
          value = (String) object;
        }else {
          continue;
        }
        Element propNode = doc.createElement("property");
        conf.appendChild(propNode);
      
        Element nameNode = doc.createElement("name");
        nameNode.appendChild(doc.createTextNode(name));
        propNode.appendChild(nameNode);
      
        Element valueNode = doc.createElement("value");
        valueNode.appendChild(doc.createTextNode(value));
        propNode.appendChild(valueNode);

        conf.appendChild(doc.createTextNode("\n"));
      }
    
      DOMSource source = new DOMSource(doc);
      StreamResult result = new StreamResult(out);
      TransformerFactory transFactory = TransformerFactory.newInstance();
      Transformer transformer = transFactory.newTransformer();
      transformer.transform(source, result);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * configure the jobconf of the user with the command line options of 
   * -libjars, -files, -archives
   * @param job the JobConf
   * @param submitJobDir
   * @throws IOException
   */
  private void copyAndConfigureFiles(JobConf job, Path jobSubmitDir) 
  throws IOException, InterruptedException {
    short replication = (short)job.getInt("mapred.submit.replication", 10);
    copyAndConfigureFiles(job, jobSubmitDir, replication);

    // Set the working directory
    if (job.getWorkingDirectory() == null) {
      job.setWorkingDirectory(fs.getWorkingDirectory());          
    }
  }
  
  private void copyAndConfigureFiles(JobConf job, Path submitJobDir, 
      short replication) throws IOException, InterruptedException {
    
    if (!(job.getBoolean("mapred.used.genericoptionsparser", false))) {
      LOG.warn("Use GenericOptionsParser for parsing the arguments. " +
               "Applications should implement Tool for the same.");
    }

    // Retrieve command line arguments placed into the JobConf
    // by GenericOptionsParser.
    String files = job.get("tmpfiles");
    String libjars = job.get("tmpjars");
    String archives = job.get("tmparchives");

    //
    // Figure out what fs the JobTracker is using.  Copy the
    // job to it, under a temporary name.  This allows DFS to work,
    // and under the local fs also provides UNIX-like object loading 
    // semantics.  (that is, if the job file is deleted right after
    // submission, we can still run the submission to completion)
    //

    // Create a number of filenames in the JobTracker's fs namespace
    FileSystem fs = submitJobDir.getFileSystem(job);
    LOG.debug("default FileSystem: " + fs.getUri());
    if (fs.exists(submitJobDir)) {
      throw new IOException("Not submitting job. Job directory " + submitJobDir
          +" already exists!! This is unexpected.Please check what's there in" +
          " that directory");
    }
    submitJobDir = fs.makeQualified(submitJobDir);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(fs, submitJobDir, mapredSysPerms);
    Path filesDir = JobSubmissionFiles.getJobDistCacheFiles(submitJobDir);
    Path archivesDir = JobSubmissionFiles.getJobDistCacheArchives(submitJobDir);
    Path libjarsDir = JobSubmissionFiles.getJobDistCacheLibjars(submitJobDir);
    // add all the command line files/ jars and archive
    // first copy them to jobtrackers filesystem 
    
    if (files != null) {
      FileSystem.mkdirs(fs, filesDir, mapredSysPerms);
      String[] fileArr = files.split(",");
      for (String tmpFile: fileArr) {
        URI tmpURI;
        try {
          tmpURI = new URI(tmpFile);
        } catch (URISyntaxException e) {
          throw new IllegalArgumentException(e);
        }
        Path tmp = new Path(tmpURI);
        Path newPath = copyRemoteFiles(fs,filesDir, tmp, job, replication);
        try {
          URI pathURI = getPathURI(newPath, tmpURI.getFragment());
          DistributedCache.addCacheFile(pathURI, job);
        } catch(URISyntaxException ue) {
          //should not throw a uri exception 
          throw new IOException("Failed to create uri for " + tmpFile, ue);
        }
        DistributedCache.createSymlink(job);
      }
    }
    
    if (libjars != null) {
      FileSystem.mkdirs(fs, libjarsDir, mapredSysPerms);
      String[] libjarsArr = libjars.split(",");
      for (String tmpjars: libjarsArr) {
        Path tmp = new Path(tmpjars);
        Path newPath = copyRemoteFiles(fs, libjarsDir, tmp, job, replication);
        DistributedCache.addArchiveToClassPath
          (new Path(newPath.toUri().getPath()), job, fs);
      }
    }
    
    
    if (archives != null) {
     FileSystem.mkdirs(fs, archivesDir, mapredSysPerms); 
     String[] archivesArr = archives.split(",");
     for (String tmpArchives: archivesArr) {
       URI tmpURI;
       try {
         tmpURI = new URI(tmpArchives);
       } catch (URISyntaxException e) {
         throw new IllegalArgumentException(e);
       }
       Path tmp = new Path(tmpURI);
       Path newPath = copyRemoteFiles(fs, archivesDir, tmp, job, replication);
       try {
         URI pathURI = getPathURI(newPath, tmpURI.getFragment());
         DistributedCache.addCacheArchive(pathURI, job);
       } catch(URISyntaxException ue) {
         //should not throw an uri excpetion
         throw new IOException("Failed to create uri for " + tmpArchives, ue);
       }
       DistributedCache.createSymlink(job);
     }
    }
    
    // First we check whether the cached archives and files are legal.
    TrackerDistributedCacheManager.validate(job);
    //  set the timestamps of the archives and files
    TrackerDistributedCacheManager.determineTimestamps(job);
    //  set the public/private visibility of the archives and files
    TrackerDistributedCacheManager.determineCacheVisibilities(job);
    // get DelegationTokens for cache files
    TrackerDistributedCacheManager.getDelegationTokens(job, 
                                                       job.getCredentials());

    String originalJarPath = job.getJar();

    if (originalJarPath != null) {           // copy jar to JobTracker's fs
      // use jar name if job is not named. 
      if ("".equals(job.getJobName())){
        job.setJobName(new Path(originalJarPath).getName());
      }
      Path submitJarFile = JobSubmissionFiles.getJobJar(submitJobDir);
      job.setJar(submitJarFile.toString());
      fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);
      fs.setReplication(submitJarFile, replication);
      fs.setPermission(submitJarFile, 
          new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
    } else {
      LOG.warn("No job jar file set.  User classes may not be found. "+
               "See JobConf(Class) or JobConf#setJar(String).");
    }
  }

Tell the jobtracker that the job is ready for execution by callingsubmitJob() on JobTracker.

  /**
   * JobTracker.submitJob() kicks off a new job.  
   *
   * Create a 'JobInProgress' object, which contains both JobProfile
   * and JobStatus.  Those two sub-objects are sometimes shipped outside
   * of the JobTracker.  But JobInProgress adds info that's useful for
   * the JobTracker alone.
   */
  public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
      throws IOException {
    return submitJob(jobId, jobSubmitDir, null, ts, false);
  }

  /**
   * JobTracker.submitJob() kicks off a new job.
   * 
   * Create a 'JobInProgress' object, which contains both JobProfile and
   * JobStatus. Those two sub-objects are sometimes shipped outside of the
   * JobTracker. But JobInProgress adds info that's useful for the JobTracker
   * alone.
   */
  public JobStatus submitJob(JobID jobId, String jobSubmitDir,
      UserGroupInformation ugi, Credentials ts, boolean recovered)
      throws IOException {
    // Check for safe-mode
    checkSafeMode();
    
    JobInfo jobInfo = null;
    if (ugi == null) {
      ugi = UserGroupInformation.getCurrentUser();
    }
    synchronized (this) {
      if (jobs.containsKey(jobId)) {
        // job already running, don't start twice
        return jobs.get(jobId).getStatus();
      }
      jobInfo = new JobInfo(jobId, new Text(ugi.getShortUserName()),
          new Path(jobSubmitDir));
    }
    
    // Create the JobInProgress, do not lock the JobTracker since
    // we are about to copy job.xml from HDFS
    JobInProgress job = null;
    try {
      job = new JobInProgress(this, this.conf, jobInfo, 0, ts);
    } catch (Exception e) {
      throw new IOException(e);
    }
    
    synchronized (this) {
      // check if queue is RUNNING
      String queue = job.getProfile().getQueueName();
      if (!queueManager.isRunning(queue)) {
        throw new IOException("Queue \"" + queue + "\" is not running");
      }
      try {
        aclsManager.checkAccess(job, ugi, Operation.SUBMIT_JOB);
      } catch (IOException ioe) {
        LOG.warn("Access denied for user " + job.getJobConf().getUser()
            + ". Ignoring job " + jobId, ioe);
        job.fail();
        throw ioe;
      }

      // Check the job if it cannot run in the cluster because of invalid memory
      // requirements.
      try {
        checkMemoryRequirements(job);
      } catch (IOException ioe) {
        throw ioe;
      }

      if (!recovered) {
        // Store the information in a file so that the job can be recovered
        // later (if at all)
        Path jobDir = getSystemDirectoryForJob(jobId);
        FileSystem.mkdirs(fs, jobDir, new FsPermission(SYSTEM_DIR_PERMISSION));
        FSDataOutputStream out = fs.create(getSystemFileForJob(jobId));
        jobInfo.write(out);
        out.close();
      }

      try {
        this.taskScheduler.checkJobSubmission(job);
      } catch (IOException ioe){
        LOG.error("Problem in submitting job " + jobId, ioe);
        throw ioe;
      }

      // Submit the job
      JobStatus status;
      try {
        status = addJob(jobId, job);
      } catch (IOException ioe) {
        LOG.info("Job " + jobId + " submission failed!", ioe);
        status = job.getStatus();
        status.setFailureInfo(StringUtils.stringifyException(ioe));
        failJob(job);
        throw ioe;
      }
      return status;
    }
  }

Clsssic MapReduce (MapReduce 1) - Job submission

猜你喜欢