批量日志数据库外表写入

#创建外链表映射日志文件

audience_attributes_path = "gphdfs://xxx/audience_attributes/#{batch_id}"

create_sql = <<-eos
        create writable external table #{audience_attributes_table} (
          opxpid varchar(60),
          hash_key varchar(50),
          hash_value text
        )
       
        location ('#{audience_attributes_path}')
        format 'text';
        eos

AudienceStructure.connection.execute(create_sql)

#表数据插入

insert_sql = <<-sql
        insert into #{audience_attributes_table}
        select opxpid, hash_key, array_to_string(jx_array_sort(madlib.array_agg(audience_id)), ',')
        from (
        select opxpid, hash_key, hash_value audience_id
        from #{master_table}
        where hash_key = 'aids'
        and hash_value in (#{aids.uniq.join(",")})
        ) m
        group by 1, 2;
        sql

AudienceStructure.connection.execute(insert_sql)


#保留12个表超过12的删除。

drop_tables("select 'drop external table workspace.' || relname || ';' cmd from pg_class where relname like 'audience_segments_hdfs_%' order by 1")

def drop_tables(sql, col_name="cmd")
        drop_sqls = AudienceStructure.connection.select_all(sql)
        # only keep latest 12 tables
        if drop_sqls.size > 12
          drop_sqls[0..drop_sqls.size-13].each do |drop_sql|
            AudienceStructure.connection.execute(drop_sql[col_name])
          end
        end
   end

猜你喜欢

转载自schooltop.iteye.com/blog/2219811