Save the output of MapReduce to MySQL

Code writing ideas

Taking the case of word frequency statistics as an example, how to save the output result of MapReduce to MySQL. The Map task is basically unchanged, and the implementation mainly focuses on the output of Reduce. The output key of the Reduce task is implemented by the javaBean class of the corresponding output data table. This class needs to implement the Writable interface of org.apache.hadoop.io.Writable and the DBWritable interface of org.apache.hadoop.mapreduce.lib.db, value is null, that is, an empty value.

Code

Map class
package mapreduce_demo.mapreduce4;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.io.Text;

public class Map extends Mapper<LongWritable, Text, Text, IntWritable>{
    
    
@Override
public void map(LongWritable key,Text value,Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
    
    
	String line = value.toString();
	String[] words = line.split(" ");
	for(int i = 0;i<words.length;i++) {
    
    
		context.write(new Text(words[i]), new IntWritable(1));
	}
}
}

The javaBean class that outputs the data table
package mapreduce_demo.mapreduce4;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable;

import org.apache.hadoop.io.Text;

public class KeyWordTable implements Writable,DBWritable{
    
    
	private String keyword;
	private int sum;
	
	public KeyWordTable() {
    
    
		super();
	}
	public KeyWordTable(String keyword,int sum) {
    
    
		super();
		this.keyword = keyword;
		this.sum = sum;
	}
	public String getKeyword() {
    
    
		return keyword;
	}
	public void setKeyword(String keyword) {
    
    
		this.keyword = keyword;
	}
	public int getSum() {
    
    
		return sum;
	}
	public void setSum(int sum) {
    
    
		this.sum = sum;
	}
	
	//实现Writable接口
	public void write(DataOutput out) throws IOException {
    
    
		out.writeInt(this.sum);
		out.writeUTF(this.keyword);
		
	}
    public void readFields(DataInput in) throws IOException {
    
    
        this.sum = in.readInt();
        this.keyword = in.readUTF();
    }
    
    //实现DBWritable
    //java获取mysql的数据,得到的ResultSet 集合,索引是从1开始
    public void write(PreparedStatement statement) throws SQLException {
    
    
        statement.setString(1,this.keyword);
        statement.setInt(2,this.sum);
    }

    /**DBWritable
     * get data from resultset.And set in your fields
     * @param resultSet
     * @throws SQLException
     */
    public void readFields(ResultSet resultSet) throws SQLException {
    
    
        this.keyword = resultSet.getString(1);
        this.sum = resultSet.getInt(2);
    }	
}
Reduce class
package mapreduce_demo.mapreduce4;

import java.io.IOException;

import org.apache.commons.lang.ObjectUtils.Null;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class Red extends Reducer<Text, IntWritable, KeyWordTable,NullWritable>{
    
    
	@Override
	public void reduce(Text key,Iterable<IntWritable> values,Reducer<Text, IntWritable, KeyWordTable,NullWritable>.Context context) throws IOException, InterruptedException {
    
    
		int sum = 0;
		for(IntWritable val:values) {
    
    
			sum += val.get();
		}
		KeyWordTable keywordTable = new KeyWordTable(key.toString(),sum);
		context.write(keywordTable,NullWritable.get());
	}
}
Job class
package mapreduce_demo.mapreduce4;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

public class App {
    
    
    public static String driverClass = "com.mysql.jdbc.Driver";
    public static String dbUrl = "jdbc:mysql://Master:3306/mydatabase";
    public static String userName = "root";
    public static String passwd = "root";
    public static String inputFilePath = "hdfs://Master:9000/usr/local/hadoop/input_demo4";
    public static String tableName = "keyword";
    public static String [] fields = {
    
    "keyword","total"};

    public static void main(String[] args) {
    
    
        Configuration conf = new Configuration();
        DBConfiguration.configureDB(conf,driverClass,dbUrl,userName,passwd);
        try {
    
    
            Job job = Job.getInstance(conf);

            job.setJarByClass(App.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setMapOutputKeyClass(Text.class);

            job.setMapperClass(Map.class);
            job.setReducerClass(Red.class);

            job.setJobName("MyWordCountDB");

            FileInputFormat.setInputPaths(job,new Path(inputFilePath));
            DBOutputFormat.setOutput(job,tableName,fields);

            job.waitForCompletion(true);
        } catch (IOException e) {
    
    
            e.printStackTrace();
        } catch (InterruptedException e) {
    
    
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
    
    
            e.printStackTrace();
        }
    }
}

After the code is written, package the project into a jar package and upload it to the Master node in the Hadoop cluster

Prepare the jar package before running

  • Log in to MySQL and create the corresponding data table
CREATE TABLE `keyword` (
  `keyword` varchar(10) NOT NULL,
  `total` int(10) NOT NULL
)
  • Upload the MySQL driver jar package to the hadoop installation directory /share/hadoop/common/lib directory under each node of Hadoop. Otherwise, a jave.io.IOException: com.mysql.jdbc.Driver exception will be reported.

Run the jar package

hadoop jar /usr/local/code/mapreduce_demo4.jar

insert image description here

After the jar package runs, log in to MySQL to view the corresponding data table

insert image description here

MapReduce output results are successfully stored in MySQL

Guess you like

Origin blog.csdn.net/atuo200/article/details/108326053