IDEA submits hadoop tasks remotely

IDEA submits hadoop tasks remotely

  • Create a new maven project and add the following dependencies
<dependencies>
     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-common</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-core</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-hdfs</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
         <version>2.7.1</version>
     </dependency>
 </dependencies>
  • Write Map processing
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
      @Override
      protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
          String line = value.toString();
          System.out.println("行值:" + line);
          StringTokenizer tokenizer = new StringTokenizer(line, "\n");
          while (tokenizer.hasMoreTokens()) {
              StringTokenizer tokenizerLine = new StringTokenizer(tokenizer.nextToken());
              String strName = tokenizerLine.nextToken();
              String strScore = tokenizerLine.nextToken();
              Text name = new Text(strName);
              int score = Integer.parseInt(strScore);
              context.write(name, new IntWritable(score));
          }
      }
  }
  • Writing Reduce Processing
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
      @Override
      protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
          int sum = 0;
          int count = 0;
          Iterator<IntWritable> iterator = values.iterator();
          while (iterator.hasNext()) {
              sum += iterator.next().get();
              count++;
          }
          int average = sum / count;
          context.write(key, new IntWritable(average));
      }
  }
  • main function
System.setProperty("HADOOP_USER_NAME", "wujinlei");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000");
conf.set("mapreduce.app-submission.cross-platform", "true");
conf.set("mapred.jar", "E:\\JackManWu\\hadoo-ptest\\target\\hadoop-test-1.0-SNAPSHOT.jar");
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
Job job = Job.getInstance(conf, "student_score");
job.setJarByClass(StudentScore.class);//要执行的jar中的类

job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
  • Prepare the home/wujinlei/work/student/inputinput file, refer to the Create Input File section in the first hadoop program - WordCount , and prepare the input file in advance on the cluster (ps: home/wujinlei/work/student/outputno need to prepare, the system automatically generates the output)
    • Sample input file:
    陈洲立 67
    陈东伟 98
    李宁 87
    杨森 86
    刘东奇 78
    谭果 94
    盖盖 83
    陈洲立 68
    陈东伟 96
    李宁 82
    杨森 85
    刘东奇 72
    谭果 97
    盖盖 82
    
  • Execute the main function, combine the hadoop log, view the task execution status on the task page, and check the final result.

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324917002&siteId=291194637