MapReduce(2)

一、去除重复的内容

源文件:
192.168.234.21
192.168.234.22
192.168.234.21
192.168.234.21
192.168.234.23
192.168.234.21
192.168.234.21
192.168.234.21
192.168.234.25
192.168.234.21
192.168.234.21
192.168.234.26
192.168.234.21

1.新建 Path : distinct ,上传文件

2.Mapper.java

public class DistinctMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
			throws IOException, InterruptedException {
		context.write(value, NullWritable.get());
	}
}



因为无需统计任务内容,所以value传空,故可用 NullWritable

3.Reducer.java

public class DistinctReducer extends Reducer<Text, NullWritable, Text, NullWritable> {

	@Override
	protected void reduce(Text key, Iterable<NullWritable> value,
			Reducer<Text, NullWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {
		
		context.write(key, NullWritable.get());
	}
}


4.Driver

public class DistinctDriver {

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(DistinctDriver.class);
		job.setMapperClass(DistinctMapper.class);
		job.setReducerClass(DistinctReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(NullWritable.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.76.131:9000/distinct/distinct.txt"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.76.131:9000/distinct/result"));
	
		job.waitForCompletion(true);
	}

}




二、面向对象编程

源文件:
手机号码 地区 姓名 流量使用
13877779999 bj zs 2145
13766668888 sh ls 1028
13766668888 sh ls 9987
13877779999 bj zs 5678
13544445555 sz ww 10577
13877779999 sh zs 2145
13766668888 sh ls 9987

1.面向对象编程,建立对象
package com.study.flow.day01;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class FlowBean implements Writable{

	private String phone ;
	private String addr ;
	private String name ;
	private Integer flow ;
	
	
	
	public String getPhone() {
		return phone;
	}
	public void setPhone(String phone) {
		this.phone = phone;
	}
	public String getAddr() {
		return addr;
	}
	public void setAddr(String addr) {
		this.addr = addr;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public Integer getFlow() {
		return flow;
	}
	public void setFlow(Integer flow) {
		this.flow = flow;
	}
	
	// 反序列化,顺序要与序列化时的顺序相同
	@Override
	public void readFields(DataInput input) throws IOException {
		
		this.phone = input.readUTF();
		this.addr = input.readUTF();
		this.name = input.readUTF();
		this.flow = input.readInt();
	}
	
	// 序列化
	@Override
	public void write(DataOutput output) throws IOException {
		output.writeUTF(phone);
		output.writeUTF(addr);
		output.writeUTF(name);
		output.writeInt(flow);
		
	}
	@Override
	public String toString() {
		return "FlowBean [phone=" + phone + ", addr=" + addr + ", name=" + name + ", flow=" + flow + "]";
	}
	
	
}



2.Mapper

public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {

	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {
		String line = value.toString();
		String [] datas = line.split(" ");
		FlowBean bean = new FlowBean();
		bean.setPhone(datas[0]);
		bean.setAddr(datas[1]);
		bean.setName(datas[2]);
		bean.setFlow(Integer.valueOf(datas[3]));
		
		context.write(new Text(bean.getName()), bean);
	}
}


3.Reducer


public class FlowReducer extends Reducer<Text, FlowBean, Text, FlowBean> {

	@Override
	protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {
		
		Integer sum = 0 ;
		FlowBean bean = new FlowBean();
		for(FlowBean flowBean : values){
			sum = sum + flowBean.getFlow();
			bean.setPhone(flowBean.getPhone());
			bean.setAddr(flowBean.getAddr());
			bean.setName(flowBean.getName());
		}
		bean.setFlow(sum);
		
		context.write(key, bean);
	}
}



4.Driver

public class FlowDriver {

	public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {

		// 配置文件
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		// 设置运行入口,Mapper,Reducer 
		job.setJarByClass(FlowDriver.class);
		job.setMapperClass(FlowMapper.class);
		job.setReducerClass(FlowReducer.class);
		// 设置Mapper 的key value 类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		// 设置Reducer 的 key value 类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);
		// 设置文件的读入 输出目录
		FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.76.131:9000/flow"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.76.131:9000/flow/result"));
		
		job.waitForCompletion(true);
		
	}

}


猜你喜欢

转载自mingyundezuoan.iteye.com/blog/2377710