✌✌✌Los antiguos tienen algo que decir, la buena memoria no es tan buena como los malos bolígrafos. Un viaje de mil millas comienza con un solo paso. Mil líneas de código cada día son esenciales. Escribe un resumen todos los días. gran fábrica, si estás lleno de esperanza, serás invencible, ¡jajaja! ! ! ✌✌✌

Inserte la descripción de la imagen aquí

1. ✌Requisitos de la pregunta

tabla de registro:

IDENTIFICACIÓN	Número de ciudad	Índice de aire
001	03	245
002	02	655
003	05	743
004	04	246
005	02	956
006	01	637
007	05	831
008	03	683
009	02	349

tabla de la ciudad:

Número de ciudad	nombre de la ciudad
01	Changsha
02	Zhuzhou
03	Xiangtan
04	Huaihua
05	Yueyang

Tabla de destino:

IDENTIFICACIÓN	nombre de la ciudad	Índice de aire
001	Xiangtan	245
002	Zhuzhou	655
003	Yueyang	743
004	Huaihua	246
005	Zhuzhou	956
006	Changsha	637
007	Yueyang	831
008	Xiangtan	683
009	Zhuzhou	349

2. ✌Realice la idea

Usamos el campo repetido como la Clave del Mapa, y otros atributos se encapsulan en el Bean como Valor.
Después de pasar por el Mapa, el formato del archivo es:

Número de ciudad	IDENTIFICACIÓN	nombre de la ciudad	Índice de aire	Tipo de archivo
1	006		637	registro
1		Changsha		ciudad
2	002		655	registro
2	005		956	registro
2	009		349	registro
2		Zhuzhou		ciudad
3	001		245	registro
3	008		683	registro
3		Xiangtan		ciudad
4	004		246	registro
4		Huaihua		ciudad
5	003		743	registro
5	007		831	registro
5		Yueyang		ciudad

1. Encapsular todas las propiedades en un objeto y realizar la serialización al mismo tiempo.
2. El formato de entrada de Map debe ser LongWritable, Text
3. El formato de salida de Map debe ser Text, Bean
4. El formato de salida de Reduce debe ser Bean , NullWritable

Tres, implementación de código

1.✌Clase de frijoles

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Bean implements Writable {
    
    

	//定义属性
    private String id;
    private String pid;
    private int amount;
    private String pname;
    private String type;

	//定义空参构造，为后面反射使用
    public Bean() {
    
    
        super();
    }

	//有参构造
    public Bean(String id, String pid, int amount, String pname, String type) {
    
    
        this.id = id;
        this.pid = pid;
        this.amount = amount;
        this.pname = pname;
        this.type = type;
    }
	
	//重写toString方法
    @Override
    public String toString() {
    
    
        return id + "\t" + pname + "\t\t" + amount;
    }

    public String getId() {
    
    
        return id;
    }

    public void setId(String id) {
    
    
        this.id = id;
    }

    public String getPid() {
    
    
        return pid;
    }

    public void setPid(String pid) {
    
    
        this.pid = pid;
    }

    public int getAmount() {
    
    
        return amount;
    }

    public void setAmount(int amount) {
    
    
        this.amount = amount;
    }

    public String getPname() {
    
    
        return pname;
    }

    public void setPname(String pname) {
    
    
        this.pname = pname;
    }

    public String getType() {
    
    
        return type;
    }

    public void setType(String type) {
    
    
        this.type = type;
    }

	//重写序列化方法
    @Override
    public void write(DataOutput out) throws IOException {
    
    
        out.writeUTF(id);
        out.writeUTF(pid);
        out.writeInt(amount);
        out.writeUTF(pname);
        out.writeUTF(type);
    }

	//反序列化方法
    @Override
    public void readFields(DataInput in) throws IOException {
    
    
        id = in.readUTF();
        pid = in.readUTF();
        amount = in.readInt();
        pname = in.readUTF();
        type = in.readUTF();
    }

}

2.✌Clase de mapa

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class Map extends Mapper<LongWritable, Text, Text, Bean> {
    
    

    String fileName;
	
	//获得文件的名称，因为在同一目录，方便再map阶段对不同表做不同操作
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
    
    

        FileSplit split = (FileSplit) context.getInputSplit();

        fileName = split.getPath().getName();

    }

	//map阶段，将文章内容封装为Bean对象
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
    

        String line = value.toString();

        if (fileName.startsWith("record")) {
    
    

            String[] words = line.split("\t");

            context.write(new Text(words[1]), new Bean(words[0], words[1], Integer.parseInt(words[2]), "", "record"));

        } else {
    
    

            String[] words = line.split("\t");

            context.write(new Text(words[0]), new Bean("", words[0], 0, words[1], "city"));

        }

    }
}

3.✌Reducir clase

import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;

public class Reduce extends Reducer<Text, Bean, Bean, NullWritable> {
    
    

    @Override
    protected void reduce(Text key, Iterable<Bean> values, Context context) throws IOException, InterruptedException {
    
    

		//存取多个Bean对象
        ArrayList<Bean> list = new ArrayList<>();

        Bean pd = new Bean();
		
		//对不同表做不同操作，设置pname
        for (Bean value : values) {
    
    

            if ("record".equals(value.getType())) {
    
    

                Bean temp = new Bean();

                try {
    
    
                    BeanUtils.copyProperties(temp, value);
                } catch (IllegalAccessException e) {
    
    
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
    
    
                    e.printStackTrace();
                }

                list.add(temp);

            } else {
    
    
                try {
    
    
                    BeanUtils.copyProperties(pd, value);
                } catch (IllegalAccessException e) {
    
    
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
    
    
                    e.printStackTrace();
                }
            }
        }

        for (Bean bean : list) {
    
    
            bean.setPname(pd.getPname());
            context.write(bean, NullWritable.get());
        }
    }

}

4.✌Clase de conductor

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;

import java.io.IOException;

public class Driver {
    
    

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
    

		//设置本地输入输出路径
        args = new String[]{
    
    "D:/input", "D:/output"};
		
        BasicConfigurator.configure();
		
		//配置信息
        Configuration conf = new Configuration();

		//获取job对象
        Job job = Job.getInstance(conf);
		
		//关联相关类
        job.setJarByClass(Driver.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

		//设置map输出类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Bean.class);

		//设置最终输出类型
        job.setOutputKeyClass(Bean.class);
        job.setOutputValueClass(NullWritable.class);

		//设置文件路径
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		//提交任务
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);
    }
}

、

Aprendizaje de Hadoop: MapReduce para fusionar dos tablas