使用MapReduce读取HBase数据存储到MySQL

Mapper读取HBase数据

package MapReduce;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;

import java.io.IOException;

public class CallMapper extends TableMapper<phoneInfoDBWritable,phoneInfoDBWritable>{

    //将log的caller,callee,time,dur提取出来,相当于将每一行数据读取出来放入到 phoneInfo 对象中。
    private phoneInfo pp = new phoneInfo();
    private phoneInfoDBWritable pDB = null;
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

        //获取rowkey
        String rowkey = new String(key.get());
        //获取一行数据
        Cell[] cells = value.rawCells();
        // 获取的数据,通话时长,日期
        String caller = "";
        String callee = "";
        String time = "";
        String dur = "";
        String flag = "";
        String dateCallk = "";
        //循环取出
        for (Cell cell :cells){
            // 取出行名称
            String lineName = new String(CellUtil.cloneQualifier(cell));

            // 判断打电话的人
            if(lineName.equals("caller")){
                caller = new String(CellUtil.cloneValue(cell));
            }
            // 接电话的人
            if(lineName.equals("callee")){
                callee = new String(CellUtil.cloneValue(cell));
            }
            // 判断日期
            if(lineName.equals("time")){
                time = new String(CellUtil.cloneValue(cell));
            }
            // 判断时长
            if(lineName.equals("dur")){
                dur = new String(CellUtil.cloneValue(cell));
            }
            // 判断日期
            if(lineName.equals("flag")){
                flag = new String(CellUtil.cloneValue(cell));
            }
            //01_手机号_yyyMMddhhmmss_1
            String[] split = rowkey.split("_");
            //截取打电话的人的电话号码
            String phoneNum = split[1];
            //拼接key
            dateCallk = phoneNum + "_" + split[2].substring(0, 6);
            //输出到文件

        }
        //测试输出内容
        pp.setCaller(caller);
        pp.setCallee(callee);
        pp.setTime(time);
        pp.setDur(dur);
        pp.setFlag(flag);
        //System.err.println("rowkey: " + rowkey + "-" +caller+ "-" +callee+ "-" + time + "-" +dur+ "-" +flag);
        //String string = "rowkey: " + rowkey + "-" +caller+ "-" +callee+ "-" + time + "-" +dur+ "-" +flag;
        //将数据写入到mysql中
        pDB = new phoneInfoDBWritable(pp);
        context.write(pDB,null);
    }
}

Driver配置分发任务

package MapReduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;

public class MRRunner {

    public static void main(String[] args) throws Exception {

        Configuration conf = HBaseConfiguration.create();

        //创建configuration
        conf.set("hbase.zookeeper.quorum", "hadoop1,hadoop2,hadoop3");
        conf.set("hbase.zookeeper.property.clientPort", "2181");
        Job job = Job.getInstance(conf, "db store");

        //实现与数据库的连接
        DBConfiguration.configureDB(job.getConfiguration(), "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/callphone", "root","root");
        //将从HBase表中获取的数据封装写入到数据库表的格式
        DBOutputFormat.setOutput(job, "phone", "caller", "callee", "time", "dur","flag");

        //设置Driver
        job.setJarByClass(MRRunner.class);
        //设置数据输出学出到mysql的类格式
        job.setOutputFormatClass(DBOutputFormat.class);

        //扫描HBase表
        Scan scan = new Scan();
        scan.setCacheBlocks(false);
        scan.setCaching(500);

        //设置Mapper
        job.setMapperClass(CallMapper.class);
        TableMapReduceUtil.initTableMapperJob(
                "phone:log",
                scan,
                CallMapper.class,
                phoneInfoDBWritable.class,
                phoneInfoDBWritable.class,
                job);

        // 设置Reduce数量,没有使用到Reducer
        job.setNumReduceTasks(0);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

phoneInfo封装读取到的HBase

package MapReduce;

/**
 * 构建phoneInfo类,将HBase表中的数据存储到phoneInfo对象中
 * 实现封装数据
 */
public class phoneInfo{

    private String caller;
    private String callee;
    private String time;
    private String dur;
    private String flag;

    public String getCaller() {
        return caller;
    }

    public void setCaller(String caller) {
        this.caller = caller;
    }

    public String getCallee() {
        return callee;
    }

    public void setCallee(String callee) {
        this.callee = callee;
    }

    public String getTime() {
        return time;
    }

    public void setTime(String time) {
        this.time = time;
    }

    public String getDur() {
        return dur;
    }

    public void setDur(String dur) {
        this.dur = dur;
    }

    public String getFlag() {
        return flag;
    }

    public void setFlag(String flag) {
        this.flag = flag;
    }
}

phoneInfoDBWritable实现DBWritable用于存放phoneInfo对象

package MapReduce;

import org.apache.hadoop.mapreduce.lib.db.DBWritable;

import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

/**
 * 编写phoneInfoDBWritable类实现DBWritable,完成HBase的数据写入到指定的MySQL的序列化
 */
public class phoneInfoDBWritable implements DBWritable {

    private phoneInfo phoneinfo;

    public phoneInfoDBWritable() { }

    public phoneInfoDBWritable(phoneInfo phoneinfo) {
        this.phoneinfo = phoneinfo;
    }
    public void write(PreparedStatement statement) throws SQLException {
        statement.setString(1, phoneinfo.getCaller());
        statement.setString(2, phoneinfo.getCallee());
        statement.setString(3, phoneinfo.getTime());
        statement.setString(4, phoneinfo.getDur());
        statement.setString(5, phoneinfo.getFlag());
    }

    public void readFields(ResultSet resultSet) throws SQLException {

    }
}

猜你喜欢

转载自www.cnblogs.com/gentle-awen/p/10087199.html
今日推荐