1.kafka建表
json数据格式:
{"id":"10001","ts_server":"1629444027000","params":{"adid":"","click_id":"","aid":""}}
set allow_experimental_map_type = 1;
CREATE TABLE kafka.dadian_raw
(
`id` Nullable(String) ,
`ts_server` Nullable(String) ,
`params` Map(String,String)
)
ENGINE = Kafka
SETTINGS kafka_broker_list = '172.17.32.10:9092',
kafka_topic_list = 'dadian-json',
kafka_group_name = 'clickhouse-dadian-raw',
kafka_format = 'JSONEachRow',
kafka_max_block_size =20240,
kafka_num_consumers = 12,
kafka_skip_broken_messages =20560,
kafka_thread_per_consumer=1;
21.3.4.25版本才支持Map类型,20版本不支持,Map类型可以放一些自定义的参数,要加上set allow_experimental_map_type = 1,才可以创建含Map类型字段的表,kafka_thread_per_consumer=1 也是21版本才支持,这个参数设为1可以为每一个consumer启动一个进程来消费落盘,可以提高消费的并行度,加上去之后可以很大程度提高消费速度。
2.创建目标表
CREATE TABLE IF NOT EXISTS ods.dadian_raw
(
`ts_server_time` DateTime64(3,
'Asia/Shanghai') COMMENT '服务器接收到的时间戳,精度到毫秒',
`id` Nullable(String),
`kafka_partition` Nullable(UInt64) COMMENT 'Partition of Kafka topic',
`kafka_offset` Nullable(UInt64) COMMENT 'Offset of the message',
`kafka_timestamp` Nullable(DateTime('Asia/Shanghai')) COMMENT 'Timestamp of the message',
`etl_time` DateTime('Asia/Shanghai') DEFAULT now() COMMENT '导入时间',
`dt` Date DEFAULT toDate(now()) COMMENT '服务端接收日期'
) ENGINE = MergeTree() PARTITION BY dt
ORDER BY
(id,
ts_server) TTL dt + toIntervalMonth(3) SETTINGS allow_nullable_key = 1,
index_granularity = 8192;
3.创建物化视图
CREATE MATERIALIZED VIEW ods.view_dadian TO ods.dadian_raw
(.
`id` String,
`ts_server_time` Nullable(DateTime64(3, 'Asia/Shanghai')),
`kafka_partition` UInt64,
`kafka_offset` UInt64,
`kafka_timestamp` Nullable(DateTime),
`etl_time` DateTime,
`dt` Nullable(Date)
) AS
SELECT
id,
fromUnixTimestamp64Milli(toInt64OrZero(ts_server), 'Asia/Shanghai') as ts_server_time,
_partition AS kafka_partition,
_offset AS kafka_offset,
_timestamp AS kafka_timestamp,
now() AS etl_time,
toDate(fromUnixTimestamp64Milli(toInt64OrZero(ts_server), 'Asia/Shanghai')) as dt
FROM kafka.dadian_raw;
clickhouse 有个点要注意,查出来的字段名跟写进去的字段名不能是同一个字段名,如果是会有问题。之前hive,mysql没遇到这种问题。
带 ‘ _ ’的是 kafak表的虚拟字段。