hadoop-day06

1 Introduction to YARN

2 YARN installation

3 Submit the MR program to YARN

3.1 Submit MR program on windows

1 Add the mapred-site.xml configuration file to this project

<configuration>
<property>
  <name>yarn.app.mapreduce.am.env</name>
  <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
  <name>mapreduce.map.env</name>
  <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
  <name>mapreduce.reduce.env</name>
  <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
</configuration>

2 Set some parameters in the startup program 

   1) HADOOP_USER_NAME 

   2) The operating mode is yarn

   3) Location of resourcemanage

   4) File system for default operation

   5) Cross-platform parameters 

   6) Path to Jar package

 // Set the username of hadoop
 System.setProperty("HADOOP_USER_NAME", "root");
 // yarn cluster mode operation list
 // 1 configuration object
 Configuration conf = new Configuration();
 //Set MR program operation mode
 conf.set("mapreduce.framework.name" , "yarn");
 //The location of the program yarn
 conf.set("yarn.resourcemanager.hostname", "linux01");
// Process data in HDFS
 conf.set("fs.defaultFS", "hdfs://linux01:8020");
 // Set cross-platform parameters
 conf.set("mapreduce.app-submission.cross-platform","true");
 // Set the location of the jar package
 // 2 Get a Job object
 Job job = Job.getInstance(conf, "wordcount");
 job.setJar("C:\\Users\\Administrator\\Desktop\\demo.jar");
package com._51doit.mr.day06.yarn;

import com._51doit.mr.day03.WordCountMapper;
import com._51doit.mr.day03.WordCountReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @Classname WcDriver
 * @Date 2020/9/26 0026 10:48
 * @Created by 多易教育-DOIT18
 * @Description: MR程序默认是运行在本地上  通过参数设置将MR程序运行在YARN 上
 * 1 设置MR程序运行模式  yarn
 * 2 告诉程序yarn的位置
 * 3 MR程序处理HDFS系统中的文件数据
 * 4 设置windows上运行程序的跨平台参数
 * 5 mt  rm 程序 --> yarn -->调度 --运行在同的节点上  [jar包]
 *    设置jar包的位置
 * 6 添加mapred-site.xml  配置了MR程序的运行环境
 *
 */
public class WcDriver {
    public static void main(String[] args) throws Exception {
        // 设置hadoop的用户名
        System.setProperty("HADOOP_USER_NAME", "root");
        // yarn  集群模式运行  单
        // 1 配置对象
        Configuration conf = new Configuration();
        //设置MR程序运行模式
        conf.set("mapreduce.framework.name" , "yarn");
        //程序yarn的位置
        conf.set("yarn.resourcemanager.hostname", "linux01");
       // 处理HDFS中的数据
        conf.set("fs.defaultFS", "hdfs://linux01:8020");
        // 设置跨平台参数
        conf.set("mapreduce.app-submission.cross-platform","true");
        // 设置jar包的位置
        // 2 获取一个Job对象
        Job job = Job.getInstance(conf, "wordcount");
        job.setJar("C:\\Users\\Administrator\\Desktop\\demo.jar");
        //3 设置map和reducetask任务类
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        // 4 设置maptask和reducetask的输出的key value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        //5 设置reduce的个数
        job.setNumReduceTasks(2);
        //6 设置处理数据的输入路径 本地测试
        FileInputFormat.setInputPaths(job,new Path("/word.txt"));
        // 7设置结果的输出路径
        FileOutputFormat.setOutputPath(job,new Path("/wc2/"));
        //8提交job
        job.waitForCompletion(true) ;

    }
}

 

3.2 linux submit MR program

Only need to add the operating mode of the program in the startup class

 // Set the username of hadoop
 // System.setProperty("HADOOP_USER_NAME", "root");
  // yarn cluster mode operation list
  // 1 configuration object
  Configuration conf = new Configuration();
  //Set MR program operation mode
  conf.set("mapreduce.framework.name" , "yarn");
  //The location of the program yarn
  conf.set("yarn.resourcemanager.hostname", "linux01");
 // Process data in HDFS
//  conf.set("fs.defaultFS", "hdfs://linux01:8020");
  // Set cross-platform parameters
 // conf.set("mapreduce.app-submission.cross-platform","true");
  // Set the location of the jar package
  // 2 Get a Job object
  Job job = Job.getInstance(conf, "wordcount");
  //job.setJar("/demo.jar");
  /**
   * java -cp /demo.jar com._51doit.day06.uarn.WcDriver
   *
   */
  job.setJarByClass(WcDriver2.class);
package com._51doit.mr.day06.yarn;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * @Classname WcDriver
 * @Date 2020/9/26 0026 10:48
 * @Created by 多易教育-DOIT18
 * @Description: MR程序默认是运行在本地上  通过参数设置将MR程序运行在YARN 上
 * 1 设置MR程序运行模式  yarn
 * 2 告诉程序yarn的位置
 * 3 MR程序处理HDFS系统中的文件数据
 * 4 设置windows上运行程序的跨平台参数
 * 5 mt  rm 程序 --> yarn -->调度 --运行在同的节点上  [jar包]
 *    设置jar包的位置
 * 6 添加mapred-site.xml  配置了MR程序的运行环境
 *com._51doit.mr.day06.yarn.WcDriver2
 */
public class WcDriver2 {
    public static void main(String[] args) throws Exception {
        // 设置hadoop的用户名
       // System.setProperty("HADOOP_USER_NAME", "root");
        // yarn  集群模式运行  单
        // 1 配置对象
        Configuration conf = new Configuration();
        //设置MR程序运行模式
        conf.set("mapreduce.framework.name" , "yarn");
        //程序yarn的位置
        conf.set("yarn.resourcemanager.hostname", "linux01");
       // 处理HDFS中的数据
      //  conf.set("fs.defaultFS", "hdfs://linux01:8020");
        // 设置跨平台参数
       // conf.set("mapreduce.app-submission.cross-platform","true");
        // 设置jar包的位置
        // 2 获取一个Job对象
        Job job = Job.getInstance(conf, "wordcount");
        //job.setJar("/demo.jar");
        /**
         * java -cp  /demo.jar com._51doit.day06.uarn.WcDriver
         *
         */
        job.setJarByClass(WcDriver2.class);
        //3 设置map和reducetask任务类
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        // 4 设置maptask和reducetask的输出的key value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        //5 设置reduce的个数
        job.setNumReduceTasks(2);
        //6 设置处理数据的输入路径 本地测试
        FileInputFormat.setInputPaths(job,new Path("/word.txt"));
        // 7设置结果的输出路径
        FileOutputFormat.setOutputPath(job,new Path("/wc3/"));
        //8提交job
        job.waitForCompletion(true) ;


    }
}

Configure the HADOOP_HOME/ect/hadoop/mapred-site.xml file on the linux machine

<configuration>
<property>
  <name>yarn.app.mapreduce.am.env</name>
  <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
  <name>mapreduce.map.env</name>
  <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
  <name>mapreduce.reduce.env</name>
  <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>

</configuration>

starting program

hadoop jar /demo.jar com._51doit.mr.day06.yarn.WcDriver2

 

 

 

 

 

Guess you like

Origin blog.csdn.net/qq_37933018/article/details/108811877