Hadoop HDFS及MapReduce示例程序

1、实现将本地文件合并,并上传至HDFS
本地文件在D:/hadooptest下

merge1.txt
----------------------
hadooptest

merge2.txt
-----------------------
hadoop
map
reduce

Hadoop集群namenode：hdfs://192.168.31.225:9000

public class PutMerge {

    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.31.225:9000");
        FileSystem hdfs = FileSystem.get(conf);
        FileSystem local = FileSystem.getLocal(conf);

        Path inputDir = new Path("D:/hadooptest");
        Path hdfsFile = new Path("hdfs://192.168.31.225:9000/user/root/example.txt");

        try{
            FileStatus[] inputFiles = local.listStatus(inputDir);
            FSDataOutputStream out = hdfs.create(hdfsFile);

            for(int i= 0 ; i < inputFiles.length; i++){
                System.out.println(inputFiles[i].getPath().getName());
                FSDataInputStream in = local.open(inputFiles[i].getPath());
                byte buffer[] = new byte[256];
                int bytesRead = 0;
                while((bytesRead = in.read(buffer))>0){
                    out.write(buffer,0,bytesRead);
                }
                in.close();
            }
            out.close();

        }catch(Exception ex){
            ex.printStackTrace();
        }
        System.out.println("end---------->");
    }
}

2、统计学生的科目成绩及总分

chinese.txt
---------------------
nese|lily|80
chinese|zhangsan|90
chinese|lisi|70
chinese|lucy|85

math.txt
----------------------
math|lily|89
math|zhangsan|97
math|lisi|99
math|lucy|85

实现代码：

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class StatisticDemo extends Configured implements Tool{

    public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>{

        @Override
        public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {    
            //chinese|zhangsan|10 这样的数据 使用value.toString().split("|")是得不到
            //正确的结果的，可以使用value.toString.split("\\|")或者下面这种方式来拆分
            String fields[] = value.toString().split("[|]");
            if(fields.length>=3){
                String studentName = fields[1];

                output.collect(new Text(studentName), value);
            }
        }

    }

    public static class Reduce extends MapReduceBase implements Reducer<Text, Text, Text, Text>{

        @Override
        public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            String staticStr = "";
            long sum = 0;
            while(values.hasNext()){
                Text v = values.next();
                String fields[] = v.toString().split("[|]");

                String subject = fields[0];
                String score = fields[2];
                staticStr += subject+":"+score;
                sum+=Long.parseLong(score);
            }
            output.collect(key, new Text(staticStr+"  总分："+sum));
        }

    }
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        JobConf job = new JobConf(conf,StatisticDemo.class);

        Path in = new Path(args[0]);
        Path out = new Path(args[1]);
        FileInputFormat.setInputPaths(job, in);
        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("StatisticDemo");

        job.setMapperClass(MapClass.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        JobClient.runJob(job);
        return 0;
    }
    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new StatisticDemo(), args));
    }

}

结果：
这里写图片描述

3、使用Combiner处理

要处理的数据

data.txt
----------------------
ch,12
jp,13
us,123
en,456
as,12
ds,45
cv,123
ch,13
jp,21

代码：

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class CombineDemo extends Configured implements Tool{

    public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>{

        @Override
        public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
            String fields[] = value.toString().split(",");
            if(fields.length>=2){
                String country = fields[0];
                String numClaims = fields[1];
                if(numClaims.length()>0 && !numClaims.startsWith("\"")){
                    output.collect(new Text(country), new Text(numClaims+",1"));
                }

            }
        }

    }

    public static class Combine extends MapReduceBase implements Reducer<Text, Text, Text, Text>{

        @Override
        public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            double sum = 0;
            int count = 0;
            while(values.hasNext()){
                String fields[] = values.next().toString().split(",");
                sum += Double.parseDouble(fields[0]);
                count += Integer.parseInt(fields[1]);
            }
            output.collect(key, new Text(sum+","+count));
        }

    }
    public static class Reduce extends MapReduceBase implements Reducer<Text, Text, Text, DoubleWritable>{

        @Override
        public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
                throws IOException {

            double sum = 0;
            int count = 0;
            while(values.hasNext()){
                String fields[] = values.next().toString().split(",");
                sum += Double.parseDouble(fields[0]);
                count+=Integer.parseInt(fields[1]);
            }
            output.collect(key, new DoubleWritable(sum/count));
        }

    }
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        JobConf job = new JobConf(conf,CombineDemo.class);

        Path in = new Path(args[0]);
        Path out = new Path(args[1]);
        FileInputFormat.setInputPaths(job, in);
        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("CombineDemo");
        job.setMapperClass(MapClass.class);
        job.setCombinerClass(Combine.class); //设置Combiner
        job.setReducerClass(Reduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) {
        try{
            int res = ToolRunner.run(new Configuration(),new CombineDemo(),args);
            System.exit(res);

        }catch(Exception ex){
            ex.printStackTrace();
        }

    }
}

处理结果：
这里写图片描述

Hadoop HDFS及MapReduce示例程序

猜你喜欢