Flink实时数仓任务开启步骤

# 分流日志数据到不同kafka topic
# com.elaiza.gmall.realtime.app.dwd.BaseLogApp
1.开启hdfs
[root@master bin]# start-all.sh
[root@master bin]# hadoop dfsadmin -safemode leave
[root@master bin]# mr-jobhistory-daemon.sh start historyserver

2.开启zk和kafka
[root@master bin]# /usr/local/src/sh/zk.sh start
[root@master bin]# /usr/local/src/sh/kf.sh start

3.开启任务
[root@master bin]# flink run -d -c com.elaiza.gmall.realtime.app.dwd.BaseLogApp /usr/local/src/sh/gmall-flink/sub-jar/gmall2021-realtime-1.0.jar

4.开启日志模拟和日志接收器
[root@master bin]# java -jar /usr/local/src/sh/gmall-flink/mock-log/gmall2021-log-sink-kafka.jar
[root@master bin]# java -jar /usr/local/src/sh/gmall-flink/mock-log/gmall2021-mock-log.jar

4.或者开启kafka生产者来自己生产数据
[root@master bin]# java -jar /usr/local/src/sh/gmall-flink/mock-log/gmall2021-log-sink-kafka.jar
[root@master bin]# kafka-console-producer.sh --broker-list master:9092 --topic ods_base_log

# 测试数据
# dwd_page_log topic
{"common":{"ar":"110000","uid":"4","os":"Android 11.0","ch":"web","is_new":"0","md":"Xiaomi 10 Pro ","mid":"mid_18","vc":"v2.1.134","ba":"Xiaomi"},"page":{"page_id":"good_detail","item":"6","during_time":6802,"item_type":"sku_id","last_page_id":"home","source_type":"query"},"displays":[{"display_type":"query","item":"7","item_type":"sku_id","pos_id":4,"order":1},{"display_type":"query","item":"6","item_type":"sku_id","pos_id":4,"order":2},{"display_type":"query","item":"1","item_type":"sku_id","pos_id":2,"order":3},{"display_type":"query","item":"6","item_type":"sku_id","pos_id":5,"order":4}],"actions":[{"item":"1","action_id":"get_coupon","item_type":"coupon_id","ts":1608272790401}],"ts":1608272787000}

# dwd_start_log topic
{"common":{"ar":"110000","uid":"49","os":"iOS 13.2.3","ch":"Appstore","is_new":"0","md":"iPhone Xs","mid":"mid_3","vc":"v2.1.134","ba":"iPhone"},"start":{"entry":"icon","open_ad_skip_ms":3347,"open_ad_ms":4737,"loading_time":9640,"open_ad_id":8},"ts":1608272783000}





# 动态分流事实表到kafka topic 和 维度表到Phoenix
# com.elaiza.gmall.realtime.app.dwd.BaseDBApp
1.新建配置mysql数据库gmall2021_realtime
CREATE TABLE `table_process` (
`source_table` varchar(200) NOT NULL COMMENT '来源表',
`operate_type` varchar(200) NOT NULL COMMENT '操作类型 insert,update,delete',
`sink_type` varchar(200) DEFAULT NULL COMMENT '输出类型 hbase kafka',
`sink_table` varchar(200) DEFAULT NULL COMMENT '输出表(主题)',
`sink_columns` varchar(2000) DEFAULT NULL COMMENT '输出字段',
`sink_pk` varchar(200) DEFAULT NULL COMMENT '主键字段',
`sink_extend` varchar(200) DEFAULT NULL COMMENT '建表扩展',
PRIMARY KEY (`source_table`,`operate_type`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8

2.在mysql配置文件中增加gmall2021_realtime开启Binlog
[root@master bin]# vim /etc/my.cnf
 * server-id = 1
 * log-bin=mysql-bin
 * binlog_format=row
 * binlog-do-db=gmall2021
 * binlog_do_db=gmall2021_realtime

3.重启mysql
[root@master bin]# sudo systemctl restart mysqld

4.开启hdfs
[root@master bin]# start-all.sh
[root@master bin]# hadoop dfsadmin -safemode leave
[root@master bin]# mr-jobhistory-daemon.sh start historyserver

5.开启zk和kafka
[root@master bin]# /usr/local/src/sh/zk.sh start
[root@master bin]# /usr/local/src/sh/kf.sh start

6.开启hbase
[root@master bin]# /usr/local/src/hbase-1.3.1/bin/start-hbase.sh

7.进入并创建Phoenix数据库
[root@master bin]# source activate dev
[root@master bin]# source deactivate
[root@master bin]# source activate env
[root@master bin]# /usr/local/src/apache-phoenix-4.14.3-HBase-1.3-bin/bin/sqlline.py master:2181

create schema IF NOT EXISTS "GMALL2021_REALTIME";

8.开启 com.elaiza.gmall.realtime.app.ods.FlinkCDC 配置库 cdc
[root@master bin]# flink run -d -c com.elaiza.gmall.realtime.app.ods.FlinkCDC /usr/local/src/sh/gmall-flink/sub-jar/gmall2021-realtime-1.0.jar

9.开启 com.elaiza.gmall.realtime.app.dwd.BaseDBApp
[root@master bin]# flink run -d -c com.elaiza.gmall.realtime.app.dwd.BaseDBApp /usr/local/src/sh/gmall-flink/sub-jar/gmall2021-realtime-1.0.jar





# kafka操作
1.查看主题:
[root@master bin]# kafka-topics.sh --list --zookeeper master:2181

2.创建主题为first,分区数为2,副本为2
[root@master bin]# kafka-topics.sh --create --zookeeper master:2181 --topic first --partitions 1 --replication-factor 1 

3.描述主题:
[root@master bin]# kafka-topics.sh --describe --topic first --zookeeper master:2181 

4.删除主题:
[root@master bin]# kafka-topics.sh --delete --zookeeper master:2181 --topic order_info 

6.开启生产者(测试): 
[root@master bin]# kafka-console-producer.sh --topic dwd_page_log --broker-list master:9092

7.开启消费者(测试): 
[root@master bin]# kafka-console-consumer.sh --bootstrap-server master:9092 --topic first 

8.开启消费者(测试) 从头开始获取数据 默认7天的存储时间 
[root@master bin]# kafka-console-consumer.sh --bootstrap-server master:9092 --topic dwd_order_info --from-beginning


# 存档、读档、删除sv和cp
1.存档
[root@master bin]# flink savepoint b99afaa23462ae4732747f95bdf43b8b hdfs://master:9000/gmall-flink/sv

2.读档
[root@master bin]# flink run -m master:8081 -s hdfs://master:9000/gmall-flink/sv/savepoint-b99afa-61b6e07bbfa1  -c com.elaiza.gmall.realtime.app.ods.FlinkCDC  /usr/local/src/sh/gmall-flink/sub-jar/sh/gmall-flink/sub-jar/gmall2021-realtime-1.0.jar

3.删除 sv cp
[root@master bin]# hadoop fs -rm -r /gmall2021


复制代码

Guess you like

Origin juejin.im/post/7054365618509709348