hadoop to simulate the problem of fetching data

@ [TOC] hadoop realizes the problem of simulating crawling data

Create a new orderBean about using Hadoop application data capture function

package com.cevent.hadoop.mapreduce.order;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;
/**

  • Order bean, inherited serialization comparison

  • @author cevent

  • @date 2020年4月7日
    */
    public class OrderBean implements WritableComparable{

    private String orderID; // Order id
    private Double price; // Product price
    // Mandatory empty parameter, parameterized, getter / setter
    public OrderBean () {
    super ();
    }

    public OrderBean(String orderID, Double price) {
    super();
    this.orderID = orderID;
    this.price = price;
    }

    public String getOrderID() {
    return orderID;
    }

    public void setOrderID(String orderID) {
    this.orderID = orderID;
    }

    public Double getPrice() {
    return price;
    }

    public void setPrice(Double price) {
    this.price = price;
    }

    @Override
    public String toString() {
    return orderID + “\t” + price;
    }

    @Override
    public void write(DataOutput output) throws IOException {
    // 1.写入类型设置(序列化)
    output.writeUTF(orderID);
    output.writeDouble(price);

    }
    // Override method
    @Override
    public void readFields (DataInput input) throws IOException {
    // 2. Set the read type (deserialization)
    this.orderID = input.readUTF ();
    this.price = input.readDouble () ;
    }

    @Override
    public int compareTo (OrderBean o) {
    // twice sort
    //1.ID sort, return 0 is equal, return -1 is less than, return + 1–> positive order
    int compareResult = this.orderID.compareTo (o. getOrderID ());
    if (compareResult == 0) {
    // 2. Price ordering-> Reverse order
    compareResult = this.price> o.getPrice ()?-1: 1;
    }

     return compareResult;
    

    }

}

orderMapper

package com.cevent.hadoop.mapreduce.order;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**

  • Mapper cuts data by rows

  • @author cevent

  • @date 2020年4月7日
    */
    public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{

    OrderBean orderBean = new OrderBean ();

    @Override
    protected void map(LongWritable key, Text value,Context context)
    throws IOException, InterruptedException {
    // 1.读取数据
    String mapLine=value.toString();
    // 2.切割数据
    String[] fields=mapLine.split("\t");

     //3.封装bean对象:Order_0000001	Pdt_01	222.8  下标=1的产品id跳过
     orderBean.setOrderID(fields[0]);
     orderBean.setPrice(Double.parseDouble(fields[2]));
     //4.写出数据
     context.write(orderBean, NullWritable.get());
    

    }

}

orderPartitioner

package com.cevent.hadoop.mapreduce.order;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
/**

  • partitioner: According to ID

  • @author cevent

  • @date 2020年4月7日
    */
    public class OrderPartitioner extends Partitioner<OrderBean, NullWritable>{

    @Override
    public int getPartition (OrderBean key, NullWritable value, int numPartitions) {
    // HashPartitioner takes the remainder by default, gets the ID value, and uses the hashCode value of oderID to partition
    return (key.getOrderID (). HashCode () & Integer.MAX_VALUE) % numPartitions;
    }

}

orderGroupingComparator

package com.cevent.hadoop.mapreduce.order;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class OrderGroupingComparator extends WritableComparator{

//必须有一个空参构造
public OrderGroupingComparator() {
	super(OrderBean.class,true);
}

@Override
public int compare(WritableComparable a,WritableComparable b) {
	// 根据订单id比较,判断是否是一组数据
	OrderBean asBean=(OrderBean) a;
	OrderBean bsBean=(OrderBean) b;
	return asBean.getOrderID().compareTo(bsBean.getOrderID());
}

}

orderReducer

package com.cevent.hadoop.mapreduce.order;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
/**

  • Reducer receives mapper data

  • @author cevent

  • @date 2020 年 4 月 7 日
    * /
    public class OrderReducer extends Reduce <OrderBean, NullWritable, OrderBean, NullWritable> {

    @Override
    protected void reduce(OrderBean bean, Iterable values,Context context)
    throws IOException, InterruptedException {
    // 写出数据
    context.write(bean,NullWritable.get());
    }
    }

orderDriver

Link: link .

package com.cevent.hadoop.mapreduce.order;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**

  • Driver

  • @author cevent

  • @date 2020年4月7日
    */
    public class OrderDriver {
    public static void main(String[] args) throws Exception {
    //1.获取配置信息
    Configuration configuration=new Configuration();
    Job job=Job.getInstance(configuration);

     //2.设置jar包加载路径
     job.setJarByClass(OrderDriver.class);
     
     //3.设置map/reduce加载类
     job.setMapperClass(OrderMapper.class);
     job.setReducerClass(OrderReducer.class);
     
     //4.设置map输出数据的key和value类型
     job.setMapOutputKeyClass(OrderBean.class);
     job.setMapOutputValueClass(NullWritable.class);
     
     //5.设置最终输出数据的key和value类型
     job.setOutputKeyClass(OrderBean.class);
     job.setOutputValueClass(NullWritable.class);
     
     //6.设置输入数据和输出数据的路径
     FileInputFormat.setInputPaths(job, new Path(args[0]));
     FileOutputFormat.setOutputPath(job, new Path(args[1]));
     
     //关联groupingComparator/comparable
     job.setGroupingComparatorClass(OrderGroupingComparator.class);
     
     //7.设置分区
     job.setPartitionerClass(OrderPartitioner.class);
     
     //8.设置reduce个数
     job.setNumReduceTasks(3);
     //9.提交
     boolean result=job.waitForCompletion(true);
     System.exit(result?0:1);
    

    }
    }

But I encountered a disgusting result ...
Insert picture description here

Published 5 original articles · praised 4 · visits 61

Guess you like

Origin blog.csdn.net/weixin_37056888/article/details/105460257