统计一年最高温度的三个月
工程结构:
WeatherKey:
package com.test.weather;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class WeatherKey implements WritableComparable{
private int year;//年
private int month;//月
private double hot;//温度
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getMonth() {
return month;
}
public void setMonth(int month) {
this.month = month;
}
public double getHot() {
return hot;
}
public void setHot(double hot) {
this.hot = hot;
}
@Override
public void readFields(DataInput arg0) throws IOException {
this.year=arg0.readInt();
this.month=arg0.readInt();
this.hot=arg0.readDouble();
}
/**
* 转化为可序列化数字
*/
@Override
public void write(DataOutput arg0) throws IOException {
arg0.writeInt(year);
arg0.writeInt(month);
arg0.writeDouble(hot);
}
@Override
public int compareTo(Object arg0) {
return this==arg0?0:-1;
}
}
|
WeatherMapper:
package com.test.weather;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WeatherMapper extends Mapper<Text, Text, WeatherKey, Text>{
@Override
protected void map(Text key, Text value,Context context)
throws IOException, InterruptedException {
String datestring = key.toString();//得到时间字符串
try {
Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(datestring);//把字符串转化为时间格式
Calendar c =Calendar.getInstance();//定义日历
c.setTime(date);
int year =c.get(Calendar.YEAR);//获取年
int month =c.get(Calendar.MONTH);//获取月
double hot = Double.parseDouble(value.toString().substring(0, value.toString().length()-1));//截取value中的数字
WeatherKey outkey = new WeatherKey();
outkey.setYear(year);
outkey.setMonth(month);
outkey.setHot(hot);
context.write(outkey, new Text(key.toString()+"\t"+value.toString()));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
|
WeatherReduer:
package com.test.weather;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WeatherReduer extends Reducer<WeatherKey, Text, NullWritable, Text>{
@Override
protected void reduce(WeatherKey arg0, Iterable<Text> arg1,Context arg2)
throws IOException, InterruptedException {
int i=0;
for (Text text : arg1) {
if(i<3){//取前三个
arg2.write(NullWritable.get(), text);
}else{
break;
}
i++;
}
}
}
|
MyGroup:
package com.test.weather;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* 分组
* @author root
*
*/
public class MyGroup extends WritableComparator{
public MyGroup(){
super(WeatherKey.class,true);
}
public int compare(WritableComparable a, WritableComparable b) {
WeatherKey k1 =(WeatherKey) a;
WeatherKey k2=(WeatherKey) b;
int r1 =Integer.compare(k1.getYear(), k2.getYear());
if(r1==0){
int r2 =Integer.compare(k1.getMonth(), k2.getMonth());
return r2;
}
return r1;
}
}
|
MyPartioner:
package com.test.weather;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
public class MyPartioner extends HashPartitioner<WeatherKey,Text>{
public int getPartition(WeatherKey key, Text value, int numReduceTasks) {
return (key.getYear()-1949)%numReduceTasks;
}
}
|
MySort:
package com.test.weather;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* 排序
* @author root
*
*/
public class MySort extends WritableComparator{
public MySort(){
super(WeatherKey.class,true);
}
public int compare(WritableComparable a, WritableComparable b) {
WeatherKey k1 =(WeatherKey) a;
WeatherKey k2=(WeatherKey) b;
int r1 =Integer.compare(k1.getYear(), k2.getYear());
if(r1==0){
int r2 =Integer.compare(k1.getMonth(), k2.getMonth());
if(r2==0){
return -Double.compare(k1.getHot(), k2.getHot());
}
return r2;
}
return r1;
}
}
|
RunJob:
package com.test.weather;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 统计每年温度最高的三个月份
* @author root
*
*/
public class RunJob {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://node1:9000");
conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");//自定义key
try {
FileSystem fs = FileSystem.get(conf);
Job job = Job.getInstance(conf);
job.setJobName("Weather");
job.setJarByClass(RunJob.class);
job.setMapperClass(WeatherMapper.class);
job.setReducerClass(WeatherReduer.class);
job.setMapOutputKeyClass(WeatherKey.class);
job.setMapOutputValueClass(Text.class);
job.setPartitionerClass(MyPartioner.class);
job.setSortComparatorClass(MySort.class);
job.setGroupingComparatorClass(MyGroup.class);
job.setNumReduceTasks(3);//设置启动三个reduce,默认是一个
//设置一个map任务输入数据的格式
job.setInputFormatClass(KeyValueTextInputFormat.class);//KeyValueTextInputFormat 把第一个隔开符的左边为key,右边为value
//定义job任务输入数据目录和输出结果目录
//把wc.txt上传到hdfs目录中/usr/intput/weather
//输出结果数据放到/usr/output/weather
FileInputFormat.addInputPath(job, new Path("/usr/input/weather"));
//输出结果数据目录不能存在,job执行时自动创建的。如果在执行时目录已经存在,则job执行失败。
Path output =new Path("/usr/output/weather");
if(fs.exists(output)){
fs.delete(output, true);
}
FileOutputFormat.setOutputPath(job,output );
boolean f= job.waitForCompletion(true);
if(f){
System.out.println("job执行成功!");
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
|