什么是序列化?
序列化:把对象转换为字节序列的过程称为对象的序列化。
反序列化:把字节序列恢复为对象的过程称为对象的反序列化。
上面求最大值最小值,关于多个value值得问题我们用/001的分割符进行链接。但是这里是两个。要是很多个我们怎么解决?
我可以使用自定义序列化类,将这个类作为我们value的输出类型。再通过创建这个类的对象,获得封装对象的属性来实现多个value值。
实例:
自定义序列化类:
package com.hnxy.mr.entity;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/*
* 自定义序列化类
*/
public class WordWritable implements Writable {
private String keyName = "";
private Double keyval = 0D;
public String getKeyName() {
return keyName;
}
public void setKeyName(String keyName) {
this.keyName = keyName;
}
public Double getKeyval() {
return keyval;
}
public void setKeyval(Double keyval) {
this.keyval = keyval;
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(keyName);
out.writeDouble(keyval);
}
// 反序列化 -->从硬盘读取到内存
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.keyName = in.readUTF();
this.keyval = in.readDouble();
}
@Override
public String toString() {
return " [keyName=" + keyName + ", keyval=" + keyval + "]";
}
}
封装了商品的名称和价格属性
实现类:
package com.hnxy.mr.max;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.hnxy.mr.entity.WordWritable;
public class MaxMinWord extends Configured implements Tool {
private static class MWMapper extends Mapper<LongWritable, Text, Text, WordWritable> {
// 定义属性
private Text outkey = new Text();
private WordWritable outval = new WordWritable();
// 定义临时变量
private String maxkey = "";
private Double maxval = 0D;
private String minkey = "";
private Double minval = 0D;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, WordWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String[] str = value.toString().split("\t");
// 判断是否符合要求
if (null != str && str.length == 7) {
// 判断最大值
if (maxval < Double.parseDouble(str[6])) {
maxval = Double.parseDouble(str[6]);
maxkey = str[3];
}
// 判断最小值
if (minval <= 0D) {
minval = Double.parseDouble(str[6]);
minkey = str[3];
}
if (minval < Double.parseDouble(str[6])) {
minval = Double.parseDouble(str[6]);
minkey = str[3];
}
} else {
context.getCounter("错误数据", "记录").increment(1);
}
}
@Override
protected void cleanup(Mapper<LongWritable, Text, Text, WordWritable>.Context context)
throws IOException, InterruptedException {
// 输出
// 保存最大值
outkey.set("max_min");
outval.setKeyName(maxkey);
outval.setKeyval(maxval);
context.write(outkey, outval);
// 保存最小值
outkey.set("max_min");
outval.setKeyName(minkey);
outval.setKeyval(minval);
context.write(outkey, outval);
}
}
public static class MWReducer extends Reducer<Text, WordWritable, WordWritable, NullWritable> {
// 定义属性
private WordWritable outkey = new WordWritable();
// 定义临时变量
private String maxkey = "";
private Double maxval = 0D;
private String minkey = "";
private Double minval = 0D;
// 获取每个key的val
private Double tmpval = 0D;
@Override
protected void reduce(Text key, Iterable<WordWritable> values,
Reducer<Text, WordWritable, WordWritable, NullWritable>.Context context)
throws IOException, InterruptedException {
// 处理数据(最大值与最小值)
for (WordWritable w : values) {
// 求最大值
if (maxval < w.getKeyval()) {
maxval = w.getKeyval();
maxkey = w.getKeyName();
}
// 求最小值
if (minval <= 0D) {
minval = w.getKeyval();
minkey = w.getKeyName();
}
if (minval > w.getKeyval()) {
minval = w.getKeyval();
minkey = w.getKeyName();
}
}
// 输出最大值与最小值
outkey.setKeyName(maxkey);
outkey.setKeyval(maxval);
context.write(outkey, NullWritable.get());
outkey.setKeyName(minkey);
outkey.setKeyval(minval);
context.write(outkey, NullWritable.get());
}
}
@Override
public int run(String[] args) throws Exception {
// 创建方法的返回值
int count = 0;
// 创建核心配置对象
Configuration conf = this.getConf();
// 创建一个任务
Job job = Job.getInstance(conf, "maxword");
// 配置任务打包类
job.setJarByClass(MaxMinWord.class);
// 设置MapReduce类
job.setMapperClass(MWMapper.class);
job.setReducerClass(MWReducer.class);
// 设置 map 阶段 和 reduce 节点的 输出数据类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(WordWritable.class);
job.setOutputKeyClass(WordWritable.class);
job.setOutputValueClass(NullWritable.class);
// 设置文件路径
Path in = new Path(args[0]);
Path out = new Path(args[1]);
// 设置hdfs操作对象
FileSystem fs = FileSystem.get(conf);
// 绑定文件输出输入目录
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
// 自动删除
if (fs.exists(out)) {
fs.delete(out, true);
// 提示
System.out.println(job.getJobName() + "'s Path Output is deleted");
}
// 执行
boolean con = job.waitForCompletion(true);
// 返回
if (con) {
System.out.println("成功");
} else {
System.out.println("失败");
}
return 0;
}
public static void main(String[] args) throws Exception {
System.exit(ToolRunner.run(new MaxMinWord(), args));
}
}