转载--Flink sql 按时间分区写入到本地文件,hdfs文件

详情请看:https://www.aboutyun.com/forum.php?mod=viewthread&tid=29104

直接上代码:

已经验证过json paquet 等数据格式写入,在本地磁盘生成了文件。要注意的是要注意导入依赖:

package flinksql;



import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.sql.Timestamp;

/**
 * @author zhangjun 欢迎关注我的公众号[大数据技术与应用实战],获取更多精彩实战内容
 * <p>
 * 流式数据以sql的形式写入file
 */
public class StreamingWriteFile {
   public static void main(String[] args) throws Exception{

      StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
      EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
      bsEnv.enableCheckpointing(10000);
      StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
      DataStream<UserInfo> dataStream = bsEnv.addSource(new MySource());
      String sql = "CREATE TABLE fs_table (\n" +
                   "  user_id STRING,\n" +
                   "  order_amount DOUBLE,\n" +
                   "  dt STRING," +
                   "  h string," +
                   "  m string  \n" +
                   ") PARTITIONED BY (dt,h,m) WITH (\n" +
                   "  'connector'='filesystem',\n" +
                   "  'path'='file:///G:\\汪小剑的文件夹\\flink_test\\',\n" +
                   "  'format'='json'\n" +
                   ")";
      bsTableEnv.executeSql(sql);
      bsTableEnv.createTemporaryView("users", dataStream);
      String insertSql = "insert into  fs_table SELECT userId, amount, " +
                         " DATE_FORMAT(ts, 'yyyy-MM-dd'), DATE_FORMAT(ts, 'HH'), DATE_FORMAT(ts, 'mm') FROM users";

      bsTableEnv.executeSql(insertSql);

   }

   public static class MySource implements SourceFunction<UserInfo>{

      String userids[] = {
            "4760858d-2bec-483c-a535-291de04b2247", "67088699-d4f4-43f2-913c-481bff8a2dc5",
            "72f7b6a8-e1a9-49b4-9a0b-770c41e01bfb", "dfa27cb6-bd94-4bc0-a90b-f7beeb9faa8b",
            "aabbaa50-72f4-495c-b3a1-70383ee9d6a4", "3218bbb9-5874-4d37-a82d-3e35e52d1702",
            "3ebfb9602ac07779||3ebfe9612a007979", "aec20d52-c2eb-4436-b121-c29ad4097f6c",
            "e7e896cd939685d7||e7e8e6c1930689d7", "a4b1e1db-55ef-4d9d-b9d2-18393c5f59ee"
      };

      @Override
      public void run(SourceContext<UserInfo> sourceContext) throws Exception{
         while (true){
            String userid = userids[(int) (Math.random() * (userids.length - 1))];
            UserInfo userInfo = new UserInfo();
            userInfo.setUserId(userid);
            userInfo.setAmount(Math.random() * 100);
            userInfo.setTs(new Timestamp(System.currentTimeMillis()));
            sourceContext.collect(userInfo);
            Thread.sleep(100);
         }
      }

      @Override
      public void cancel(){

      }
   }

   public static class UserInfo implements java.io.Serializable{
      private String userId;
      private Double amount;
      private Timestamp ts;

      public String getUserId(){
         return userId;
      }

      public void setUserId(String userId){
         this.userId = userId;
      }

      public Double getAmount(){
         return amount;
      }

      public void setAmount(Double amount){
         this.amount = amount;
      }

      public Timestamp getTs(){
         return ts;
      }

      public void setTs(Timestamp ts){
         this.ts = ts;
      }
   }
}

FlinkSQL 在hive中创建空表

// TODO: 2020/7/13 创建表
String hiveSql = "CREATE external TABLE test_table (\n" +
        "  aaa STRING,\n" +
        "  bbb DOUBLE" +
        ") partitioned by (dt string,h string,m string) " +
        "stored as ORC " +
        "TBLPROPERTIES (\n" +
        "  'partition.time-extractor.timestamp-pattern'='$dt $h:$m:00',\n" +
        "  'sink.partition-commit.delay'='0s',\n" +
        "  'sink.partition-commit.trigger'='partition-time',\n" +
        "  'sink.partition-commit.policy.kind'='metastore'" +
        ")";
tEnv.executeSql(hiveSql);

猜你喜欢

转载自blog.csdn.net/qq_31866793/article/details/107316722