14_ Write pulsar data to HBase based on Flink

3.7. Write data to HBase based on Flink

3.7.1. Write Flink to complete data writing to Hbase operation, complete data backup, and facilitate subsequent ad hoc query and offline analysis

3.7.1.1. HBase basic introduction

hbase is a software based on the bigTable paper released by Google. It is a noSQL type data and does not support SQL. It does not support join operations, has no table relationship, and does not support transactions (multi-row transactions). Hbase is based on HDFS and uses java language write

There are generally three options for querying hbase data (primary key (row key) query, primary key range retrieval, and querying all data)

All are stored in byte type, storing structured and semi-structured data.

Features of hbase table: large column-oriented storage scheme sparsity

2.7.1.2. Application scenarios

1) Random read and write operations are required.
2) The amount of data is relatively large.
3) The data is relatively sparse.

2.7.1.3.HBase installation operation

The HBase installed this time is 2.2.7. You can refer to the detailed installation manual. You also need to pay attention to the fact that the startup of HBase depends on zookeeper
and HDFS. You need to install HADOOP and zookeeper first.
insert image description here

  • 1- Create target table in Hbase
create 'itcast_h_ems, {NAME=>'f1',COMPRESSION=>'GZ'},{NUMREGIONS=>6, SPLITALGO=>'HexStringSplit'}
  • 2- Write Flink code to complete writing to Hbase
import com.itheima.pojo.PulsarTopicPojo;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.pulsar.FlinkPulsarSource;
import org.apache.flink.streaming.connectors.pulsar.internal.JsonDeser;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Schema;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.util.Properties;

// 基于Flink消费Pulsar数据, 然后将数据灌入到HBase中, 完成数据备份, 以及后续即席查询和离线分析
public class ItcastFlinkToHBase {
    
    

    public static void main(String[] args) throws Exception {
    
    

        //1. 创建Flinnk流式处理核心环境类对象 和 Table API 核心环境类对象
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        //2. 添加Source组件, 从Pulsar中读取消息数据
        Properties props = new Properties();
        props.setProperty("topic","persistent://public/default/itcast_ems_tab");
        props.setProperty("partition.discovery.interval-millis","5000");
        FlinkPulsarSource<PulsarTopicPojo> pulsarSource = new FlinkPulsarSource<PulsarTopicPojo>(
                "pulsar://node1:6650,node2:6650,node3:6650","http://node1:8080,node2:8080,node3:8080",
                JsonDeser.of(PulsarTopicPojo.class),props);
        //2.1 设置pulsarSource组件在消费数据的时候, 默认从什么位置开始消费
        pulsarSource.setStartFromLatest();

        DataStreamSource<PulsarTopicPojo> dataStreamSource = env.addSource(pulsarSource);


        //2.2 转换为Flink Table

        Schema schema = Schema.newBuilder()
                .column("id", DataTypes.INT())
                .column("sid", DataTypes.STRING())
                .column("ip", DataTypes.STRING())
                .column("session_id", DataTypes.STRING())
                .column("create_time", DataTypes.STRING())
                .column("yearInfo", DataTypes.STRING())
                .column("monthInfo", DataTypes.STRING())
                .column("dayInfo", DataTypes.STRING())
                .column("hourInfo", DataTypes.STRING())
                .column("seo_source", DataTypes.STRING())
                .column("area", DataTypes.STRING())
                .column("origin_channel", DataTypes.STRING())
                .column("msg_count", DataTypes.INT())
                .column("from_url", DataTypes.STRING())
                .build();


        tableEnv.createTemporaryView("itcast_ems",dataStreamSource,schema);


        //2.3: 定义HBase的目标表
        String hTable = "create table itcast_h_ems("+
                "rowkey int,"+
                "f1 ROW<sid STRING,ip STRING,session_id STRING,create_time STRING,yearInfo STRING,monthInfo STRING,dayInfo STRING,hourInfo STRING,seo_source STRING,area STRING,origin_channel STRING,msg_count INT,from_url STRING>,"+
                "primary key(rowkey) NOT ENFORCED" +
                ") WITH ("+
                "'connector'='hbase-2.2',"+
                "'table-name'='itcast_h_ems',"+
                "'zookeeper.quorum'='node1:2181,node2:2181,node3:2181'"+
                ")";
        //4. 执行操作
        tableEnv.executeSql(hTable);

        tableEnv.executeSql("insert into itcast_h_ems select id,ROW(sid,ip,session_id,create_time,yearInfo,monthInfo,dayInfo,hourInfo,seo_source,area,origin_channel,msg_count,from_url) from itcast_ems");

    }

}

PulsarTopicPojo

public class PulsarTopicPojo {
    
    
    private Integer id;
    private String sid;
    private String ip;
    private String session_id;
    private String create_time;
    private String yearInfo;
    private String monthInfo;
    private String dayInfo;
    private String hourInfo;
    private String seo_source;
    private String area;
    private String origin_channel;
    private Integer msg_count;
    private  String from_url;

    public PulsarTopicPojo() {
    
    
    }

    public PulsarTopicPojo(Integer id, String sid, String ip, String session_id, String create_time, String yearInfo, String monthInfo, String dayInfo, String hourInfo, String seo_source, String area, String origin_channel, Integer msg_count, String from_url) {
    
    
        this.id = id;
        this.sid = sid;
        this.ip = ip;
        this.session_id = session_id;
        this.create_time = create_time;
        this.yearInfo = yearInfo;
        this.monthInfo = monthInfo;
        this.dayInfo = dayInfo;
        this.hourInfo = hourInfo;
        this.seo_source = seo_source;
        this.area = area;
        this.origin_channel = origin_channel;
        this.msg_count = msg_count;
        this.from_url = from_url;
    }

    public void setData(Integer id, String sid, String ip, String session_id, String create_time, String yearInfo, String monthInfo, String dayInfo, String hourInfo, String seo_source, String area, String origin_channel, Integer msg_count, String from_url) {
    
    
        this.id = id;
        this.sid = sid;
        this.ip = ip;
        this.session_id = session_id;
        this.create_time = create_time;
        this.yearInfo = yearInfo;
        this.monthInfo = monthInfo;
        this.dayInfo = dayInfo;
        this.hourInfo = hourInfo;
        this.seo_source = seo_source;
        this.area = area;
        this.origin_channel = origin_channel;
        this.msg_count = msg_count;
        this.from_url = from_url;
    }

    public Integer getId() {
    
    
        return id;
    }

    public void setId(Integer id) {
    
    
        this.id = id;
    }

    public String getSid() {
    
    
        return sid;
    }

    public void setSid(String sid) {
    
    
        this.sid = sid;
    }

    public String getIp() {
    
    
        return ip;
    }

    public void setIp(String ip) {
    
    
        this.ip = ip;
    }

    public String getSession_id() {
    
    
        return session_id;
    }

    public void setSession_id(String session_id) {
    
    
        this.session_id = session_id;
    }
    public String getCreate_time() {
    
    
        return create_time;
    }

    public void setCreate_time(String create_time) {
    
    
        this.create_time = create_time;
    }
    public String getYearInfo() {
    
    
        return yearInfo;
    }

    public void setYearInfo(String yearInfo) {
    
    
        this.yearInfo = yearInfo;
    }

    public String getMonthInfo() {
    
    
        return monthInfo;
    }

    public void setMonthInfo(String monthInfo) {
    
    
        this.monthInfo = monthInfo;
    }

    public String getDayInfo() {
    
    
        return dayInfo;
    }

    public void setDayInfo(String dayInfo) {
    
    
        this.dayInfo = dayInfo;
    }

    public String getHourInfo() {
    
    
        return hourInfo;
    }

    public void setHourInfo(String hourInfo) {
    
    
        this.hourInfo = hourInfo;
    }

    public String getSeo_source() {
    
    
        return seo_source;
    }

    public void setSeo_source(String seo_source) {
    
    
        this.seo_source = seo_source;
    }

    public String getArea() {
    
    
        return area;
    }

    public void setArea(String area) {
    
    
        this.area = area;
    }

    public String getOrigin_channel() {
    
    
        return origin_channel;
    }

    public void setOrigin_channel(String origin_channel) {
    
    
        this.origin_channel = origin_channel;
    }

    public Integer getMsg_count() {
    
    
        return msg_count;
    }

    public void setMsg_count(Integer msg_count) {
    
    
        this.msg_count = msg_count;
    }

    public String getFrom_url() {
    
    
        return from_url;
    }

    public void setFrom_url(String from_url) {
    
    
        this.from_url = from_url;
    }

    @Override
    public String toString() {
    
    
        return "PulsarTopicPojo{" +
                "id=" + id +
                ", sid='" + sid + '\'' +
                ", ip='" + ip + '\'' +
                ", session_id='" + session_id + '\'' +
                ", create_time='" + create_time + '\'' +
                ", yearInfo='" + yearInfo + '\'' +
                ", monthInfo='" + monthInfo + '\'' +
                ", dayInfo='" + dayInfo + '\'' +
                ", hourInfo='" + hourInfo + '\'' +
                ", seo_source='" + seo_source + '\'' +
                ", area='" + area + '\'' +
                ", origin_channel='" + origin_channel + '\'' +
                ", msg_count=" + msg_count +
                ", from_url='" + from_url + '\'' +
                '}';
    }
}

Guess you like

Origin blog.csdn.net/toto1297488504/article/details/132175399
Recommended