- 数据准备
us-counties.csv
链接:https://pan.baidu.com/s/1YRUpIeZNmfO8_g504Q8Vnw
提取码:8fv8
- CovidCountBean编程
package com.covid.bean;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class CovidCountBean implements Writable {
private long cases;
private long deaths;
public CovidCountBean(){
}
public CovidCountBean(long cases, long deaths) {
this.cases = cases;
this.deaths = deaths;
}
public void set(long cases, long deaths) {
this.cases = cases;
this.deaths = deaths;
}
public long getCases() {
return cases;
}
public void setCases(long cases) {
this.cases = cases;
}
public long getDeaths() {
return deaths;
}
public void setDeaths(long deaths) {
this.deaths = deaths;
}
@Override
public String toString() {
return cases + "\t" + deaths;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(cases);
dataOutput.writeLong(deaths);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.cases = dataInput.readLong();
this.deaths = dataInput.readLong();
}
}
- map编程
package com.covid.sum;
import com.covid.bean.CovidCountBean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class CovidSumMapper extends Mapper<LongWritable, Text, Text, CovidCountBean> {
Text outKey = new Text();
CovidCountBean outValue = new CovidCountBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split(",");
String state = fields[2];
long cases =Long.parseLong(fields[3]) ;
long dealths = Long.parseLong(fields[4]) ;
outKey.set(state);
outValue.set(cases,dealths);
context.write(outKey,outValue);
}
}
- reduce编程
package com.covid.sum;
import com.covid.bean.CovidCountBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class CovidSumReducer extends Reducer<Text, CovidCountBean,Text,CovidCountBean> {
CovidCountBean outvalue = new CovidCountBean();
@Override
protected void reduce(Text key, Iterable<CovidCountBean> values, Context context) throws IOException, InterruptedException {
long totalCases = 0;
long totalDeaths = 0;
for (CovidCountBean value : values) {
totalCases += value.getCases();
totalDeaths += value.getDeaths();
}
outvalue.set(totalCases,totalDeaths);
context.write(key,outvalue);
}
}
- Driver编程
package com.covid.sum;
import com.covid.bean.CovidCountBean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CovidSumDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, CovidSumDriver.class.getSimpleName());
job.setJarByClass(CovidSumDriver.class);
job.setMapperClass(CovidSumMapper.class);
job.setReducerClass(CovidSumReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(CovidCountBean.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(CovidCountBean.class);
Path inputPath = new Path("input/covid");
Path outputPath = new Path("output/covid/sum");
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job,outputPath);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(outputPath)){
fs.delete(outputPath,true);
}
boolean resultFlag = job.waitForCompletion(true);
System.exit(resultFlag ? 0: 1);
}
}
- 运行Driver