1 Introduction to YARN
2 YARN installation
3 Submit the MR program to YARN
3.1 Submit MR program on windows
1 Add the mapred-site.xml configuration file to this project
<configuration> <property> <name>yarn.app.mapreduce.am.env</name> <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value> </property> <property> <name>mapreduce.map.env</name> <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value> </property> <property> <name>mapreduce.reduce.env</name> <value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value> </property> </configuration>
2 Set some parameters in the startup program
1) HADOOP_USER_NAME
2) The operating mode is yarn
3) Location of resourcemanage
4) File system for default operation
5) Cross-platform parameters
6) Path to Jar package
// Set the username of hadoop System.setProperty("HADOOP_USER_NAME", "root"); // yarn cluster mode operation list // 1 configuration object Configuration conf = new Configuration(); //Set MR program operation mode conf.set("mapreduce.framework.name" , "yarn"); //The location of the program yarn conf.set("yarn.resourcemanager.hostname", "linux01"); // Process data in HDFS conf.set("fs.defaultFS", "hdfs://linux01:8020"); // Set cross-platform parameters conf.set("mapreduce.app-submission.cross-platform","true"); // Set the location of the jar package // 2 Get a Job object Job job = Job.getInstance(conf, "wordcount"); job.setJar("C:\\Users\\Administrator\\Desktop\\demo.jar");
package com._51doit.mr.day06.yarn;
import com._51doit.mr.day03.WordCountMapper;
import com._51doit.mr.day03.WordCountReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @Classname WcDriver
* @Date 2020/9/26 0026 10:48
* @Created by 多易教育-DOIT18
* @Description: MR程序默认是运行在本地上 通过参数设置将MR程序运行在YARN 上
* 1 设置MR程序运行模式 yarn
* 2 告诉程序yarn的位置
* 3 MR程序处理HDFS系统中的文件数据
* 4 设置windows上运行程序的跨平台参数
* 5 mt rm 程序 --> yarn -->调度 --运行在同的节点上 [jar包]
* 设置jar包的位置
* 6 添加mapred-site.xml 配置了MR程序的运行环境
*
*/
public class WcDriver {
public static void main(String[] args) throws Exception {
// 设置hadoop的用户名
System.setProperty("HADOOP_USER_NAME", "root");
// yarn 集群模式运行 单
// 1 配置对象
Configuration conf = new Configuration();
//设置MR程序运行模式
conf.set("mapreduce.framework.name" , "yarn");
//程序yarn的位置
conf.set("yarn.resourcemanager.hostname", "linux01");
// 处理HDFS中的数据
conf.set("fs.defaultFS", "hdfs://linux01:8020");
// 设置跨平台参数
conf.set("mapreduce.app-submission.cross-platform","true");
// 设置jar包的位置
// 2 获取一个Job对象
Job job = Job.getInstance(conf, "wordcount");
job.setJar("C:\\Users\\Administrator\\Desktop\\demo.jar");
//3 设置map和reducetask任务类
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
// 4 设置maptask和reducetask的输出的key value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//5 设置reduce的个数
job.setNumReduceTasks(2);
//6 设置处理数据的输入路径 本地测试
FileInputFormat.setInputPaths(job,new Path("/word.txt"));
// 7设置结果的输出路径
FileOutputFormat.setOutputPath(job,new Path("/wc2/"));
//8提交job
job.waitForCompletion(true) ;
}
}
3.2 linux submit MR program
Only need to add the operating mode of the program in the startup class
// Set the username of hadoop
// System.setProperty("HADOOP_USER_NAME", "root");
// yarn cluster mode operation list
// 1 configuration object
Configuration conf = new Configuration();
//Set MR program operation mode
conf.set("mapreduce.framework.name" , "yarn");
//The location of the program yarn
conf.set("yarn.resourcemanager.hostname", "linux01");
// Process data in HDFS
// conf.set("fs.defaultFS", "hdfs://linux01:8020");
// Set cross-platform parameters
// conf.set("mapreduce.app-submission.cross-platform","true");
// Set the location of the jar package
// 2 Get a Job object
Job job = Job.getInstance(conf, "wordcount");
//job.setJar("/demo.jar");
/**
* java -cp /demo.jar com._51doit.day06.uarn.WcDriver
*
*/
job.setJarByClass(WcDriver2.class);
package com._51doit.mr.day06.yarn;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @Classname WcDriver
* @Date 2020/9/26 0026 10:48
* @Created by 多易教育-DOIT18
* @Description: MR程序默认是运行在本地上 通过参数设置将MR程序运行在YARN 上
* 1 设置MR程序运行模式 yarn
* 2 告诉程序yarn的位置
* 3 MR程序处理HDFS系统中的文件数据
* 4 设置windows上运行程序的跨平台参数
* 5 mt rm 程序 --> yarn -->调度 --运行在同的节点上 [jar包]
* 设置jar包的位置
* 6 添加mapred-site.xml 配置了MR程序的运行环境
*com._51doit.mr.day06.yarn.WcDriver2
*/
public class WcDriver2 {
public static void main(String[] args) throws Exception {
// 设置hadoop的用户名
// System.setProperty("HADOOP_USER_NAME", "root");
// yarn 集群模式运行 单
// 1 配置对象
Configuration conf = new Configuration();
//设置MR程序运行模式
conf.set("mapreduce.framework.name" , "yarn");
//程序yarn的位置
conf.set("yarn.resourcemanager.hostname", "linux01");
// 处理HDFS中的数据
// conf.set("fs.defaultFS", "hdfs://linux01:8020");
// 设置跨平台参数
// conf.set("mapreduce.app-submission.cross-platform","true");
// 设置jar包的位置
// 2 获取一个Job对象
Job job = Job.getInstance(conf, "wordcount");
//job.setJar("/demo.jar");
/**
* java -cp /demo.jar com._51doit.day06.uarn.WcDriver
*
*/
job.setJarByClass(WcDriver2.class);
//3 设置map和reducetask任务类
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
// 4 设置maptask和reducetask的输出的key value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//5 设置reduce的个数
job.setNumReduceTasks(2);
//6 设置处理数据的输入路径 本地测试
FileInputFormat.setInputPaths(job,new Path("/word.txt"));
// 7设置结果的输出路径
FileOutputFormat.setOutputPath(job,new Path("/wc3/"));
//8提交job
job.waitForCompletion(true) ;
}
}
Configure the HADOOP_HOME/ect/hadoop/mapred-site.xml file on the linux machine
<configuration>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
</configuration>
starting program
hadoop jar /demo.jar com._51doit.mr.day06.yarn.WcDriver2