@ [TOC] hadoop realizes the problem of simulating crawling data
Create a new orderBean about using Hadoop application data capture function
package com.cevent.hadoop.mapreduce.order;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
/**
-
Order bean, inherited serialization comparison
-
@author cevent
-
@date 2020年4月7日
*/
public class OrderBean implements WritableComparable{private String orderID; // Order id
private Double price; // Product price
// Mandatory empty parameter, parameterized, getter / setter
public OrderBean () {
super ();
}public OrderBean(String orderID, Double price) {
super();
this.orderID = orderID;
this.price = price;
}public String getOrderID() {
return orderID;
}public void setOrderID(String orderID) {
this.orderID = orderID;
}public Double getPrice() {
return price;
}public void setPrice(Double price) {
this.price = price;
}@Override
public String toString() {
return orderID + “\t” + price;
}@Override
public void write(DataOutput output) throws IOException {
// 1.写入类型设置(序列化)
output.writeUTF(orderID);
output.writeDouble(price);}
// Override method
@Override
public void readFields (DataInput input) throws IOException {
// 2. Set the read type (deserialization)
this.orderID = input.readUTF ();
this.price = input.readDouble () ;
}@Override
public int compareTo (OrderBean o) {
// twice sort
//1.ID sort, return 0 is equal, return -1 is less than, return + 1–> positive order
int compareResult = this.orderID.compareTo (o. getOrderID ());
if (compareResult == 0) {
// 2. Price ordering-> Reverse order
compareResult = this.price> o.getPrice ()?-1: 1;
}return compareResult;
}
}
orderMapper
package com.cevent.hadoop.mapreduce.order;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
-
Mapper cuts data by rows
-
@author cevent
-
@date 2020年4月7日
*/
public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{OrderBean orderBean = new OrderBean ();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
// 1.读取数据
String mapLine=value.toString();
// 2.切割数据
String[] fields=mapLine.split("\t");//3.封装bean对象:Order_0000001 Pdt_01 222.8 下标=1的产品id跳过 orderBean.setOrderID(fields[0]); orderBean.setPrice(Double.parseDouble(fields[2])); //4.写出数据 context.write(orderBean, NullWritable.get());
}
}
orderPartitioner
package com.cevent.hadoop.mapreduce.order;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
/**
-
partitioner: According to ID
-
@author cevent
-
@date 2020年4月7日
*/
public class OrderPartitioner extends Partitioner<OrderBean, NullWritable>{@Override
public int getPartition (OrderBean key, NullWritable value, int numPartitions) {
// HashPartitioner takes the remainder by default, gets the ID value, and uses the hashCode value of oderID to partition
return (key.getOrderID (). HashCode () & Integer.MAX_VALUE) % numPartitions;
}
}
orderGroupingComparator
package com.cevent.hadoop.mapreduce.order;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class OrderGroupingComparator extends WritableComparator{
//必须有一个空参构造
public OrderGroupingComparator() {
super(OrderBean.class,true);
}
@Override
public int compare(WritableComparable a,WritableComparable b) {
// 根据订单id比较,判断是否是一组数据
OrderBean asBean=(OrderBean) a;
OrderBean bsBean=(OrderBean) b;
return asBean.getOrderID().compareTo(bsBean.getOrderID());
}
}
orderReducer
package com.cevent.hadoop.mapreduce.order;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
/**
-
Reducer receives mapper data
-
@author cevent
-
@date 2020 年 4 月 7 日
* /
public class OrderReducer extends Reduce <OrderBean, NullWritable, OrderBean, NullWritable> {@Override
protected void reduce(OrderBean bean, Iterable values,Context context)
throws IOException, InterruptedException {
// 写出数据
context.write(bean,NullWritable.get());
}
}
orderDriver
Link: link .
package com.cevent.hadoop.mapreduce.order;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
-
Driver
-
@author cevent
-
@date 2020年4月7日
*/
public class OrderDriver {
public static void main(String[] args) throws Exception {
//1.获取配置信息
Configuration configuration=new Configuration();
Job job=Job.getInstance(configuration);//2.设置jar包加载路径 job.setJarByClass(OrderDriver.class); //3.设置map/reduce加载类 job.setMapperClass(OrderMapper.class); job.setReducerClass(OrderReducer.class); //4.设置map输出数据的key和value类型 job.setMapOutputKeyClass(OrderBean.class); job.setMapOutputValueClass(NullWritable.class); //5.设置最终输出数据的key和value类型 job.setOutputKeyClass(OrderBean.class); job.setOutputValueClass(NullWritable.class); //6.设置输入数据和输出数据的路径 FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //关联groupingComparator/comparable job.setGroupingComparatorClass(OrderGroupingComparator.class); //7.设置分区 job.setPartitionerClass(OrderPartitioner.class); //8.设置reduce个数 job.setNumReduceTasks(3); //9.提交 boolean result=job.waitForCompletion(true); System.exit(result?0:1);
}
}
But I encountered a disgusting result ...