119.Spark大型电商项目-广告点击流量实时统计-计算每天各广告最近1小时滑动窗口内的点击趋势

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/someby/article/details/89035937

目录

代码

AdClickTrend.java

IAdClickTrendDAO.java

AdClickTrendDAOImpl.java

DAOFactory.java

AdClickTrendQueryResult.java

AdClickRealTimeStatSpark.java


本篇文章记录广告点击流量实时统计-计算每天各广告最近1小时滑动窗口内的点击趋势。

代码

domain

AdClickTrend.java

package graduation.java.domain;

/**
 * FileName: AdClickTrend
 * Author:   hadoop
 * Email:    [email protected]
 * Date:     19-4-4 下午8:33
 * Description:
 * 一小时内广告点击趋势实体类
 */
public class AdClickTrend {

    private String date;
    private String hour;
    private String minute;
    private long adid;
    private long clickCount;

    public String getDate() {
        return date;
    }

    public void setDate(String date) {
        this.date = date;
    }

    public String getHour() {
        return hour;
    }

    public void setHour(String hour) {
        this.hour = hour;
    }

    public String getMinute() {
        return minute;
    }

    public void setMinute(String minute) {
        this.minute = minute;
    }

    public long getAdid() {
        return adid;
    }

    public void setAdid(long adid) {
        this.adid = adid;
    }

    public long getClickCount() {
        return clickCount;
    }

    public void setClickCount(long clickCount) {
        this.clickCount = clickCount;
    }


    @Override
    public String toString() {
        return "AdClickTrend{" +
                "date='" + date + '\'' +
                ", hour='" + hour + '\'' +
                ", minute='" + minute + '\'' +
                ", adid=" + adid +
                ", clickCount=" + clickCount +
                '}';
    }
}

dao

IAdClickTrendDAO.java

package graduation.java.dao;

import graduation.java.domain.AdClickTrend;

import java.util.List;

/**
 * FileName: IAdClickTrendDAO
 * Author:   hadoop
 * Email:    [email protected]
 * Date:     19-4-4 下午8:36
 * Description:
 * 一小时内广告点击DAO接口类
 */
public interface IAdClickTrendDAO {
    /**
     * 批量更新或插入一小时内广告点击趋势数据
     * @param adClickTrendList
     */
    void updateBatch(List<AdClickTrend> adClickTrendList);
}

impl

AdClickTrendDAOImpl.java

package graduation.java.domain;

import com.mysql.jdbc.JDBC4CallableStatement;
import graduation.java.dao.IAdClickTrendDAO;
import graduation.java.jdbc.JDBCHelper;
import graduation.java.model.AdClickTrendQueryResult;

import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;

/**
 * FileName: AdClickTrendDAOImpl
 * Author:   hadoop
 * Email:    [email protected]
 * Date:     19-4-4 下午8:39
 * Description:
 * 一小时广告点击趋势DAO实现类
 */
public class AdClickTrendDAOImpl implements IAdClickTrendDAO {
    @Override
    public void updateBatch(List<AdClickTrend> adClickTrendList) {
        JDBCHelper jdbcHelper = JDBCHelper.getInstance();
        // 区分出来哪些数据是要插入的,哪些数据是要更新的
        // 提醒一下,比如说,通常来说,同一个key的数据(比如rdd,包含了多条相同的key)
        // 通常是在一个分区内的
        // 一般不会出现重复插入的

        // 但是根据业务需求来
        // 各位自己在实际做项目的时候,一定要自己思考,不要生搬硬套
        // 如果说可能会出现key重复插入的情况
        // 给一个create_time字段

        // j2ee系统在查询的时候,直接查询最新的数据即可(规避掉重复插入的问题)

        List<AdClickTrend> updateAdClickTrendList = new ArrayList<AdClickTrend>();
        List<AdClickTrend> insertAdClickTrendList = new ArrayList<AdClickTrend>();

        String selectSQL = "SELECT " +
                "count(*) " +
                "FROM ad_click_trend " +
                "WHERE date=? " +
                "AND hour=? " +
                "AND minute=? " +
                "AND ad_id=?";
        for (AdClickTrend adClickTrend : adClickTrendList){
            AdClickTrendQueryResult queryResult = new AdClickTrendQueryResult();
            Object[] params = new Object[]{
                    adClickTrend.getDate(),
                    adClickTrend.getHour(),
                    adClickTrend.getMinute(),
                    adClickTrend.getAdid()
            };
            jdbcHelper.executeQuery(selectSQL, params, new JDBCHelper.QueryCallback() {
                @Override
                public void process(ResultSet rs) throws Exception {
                    while (rs.next()){
                        int clickCount = rs.getInt(1);
                        queryResult.setCount(clickCount);
                    }
                }
            });

            int count = queryResult.getCount();
            if (count > 0){
                updateAdClickTrendList.add(adClickTrend);
            }else{
                insertAdClickTrendList.add(adClickTrend);
            }
        }

        //执行批量更新操作
        String updateSQL = "UPDATE ad_click_trend SET click_count=? " +
                "WHERE date=? " +
                "AND hour=? " +
                "AND minute=? " +
                "AND ad_id=?";

        List<Object[]> updateParamsList = new ArrayList<Object[]>();

        for (AdClickTrend adClickTrend : updateAdClickTrendList){
            Object[] params = new Object[]{
                    adClickTrend.getClickCount(),
                    adClickTrend.getDate(),
                    adClickTrend.getHour(),
                    adClickTrend.getMinute(),
                    adClickTrend.getAdid()
            };

            updateParamsList.add(params);
        }

        jdbcHelper.executeBatch(updateSQL,updateParamsList);


        //执行批量插入操作

        String insertSQL = "INSERT INTO ad_click_count VALUES(?,?,?,?,?)";

        List<Object[]> insertParamsList = new ArrayList<Object[]>();

        for (AdClickTrend adClickTrend : insertAdClickTrendList){
            Object[] params = new Object[]{
                    adClickTrend.getDate(),
                    adClickTrend.getHour(),
                    adClickTrend.getMinute(),
                    adClickTrend.getAdid(),
                    adClickTrend.getClickCount()
            };

            insertParamsList.add(params);
        }

        jdbcHelper.executeBatch(insertSQL,insertParamsList);

    }
}

factory

DAOFactory.java

/**
     * 一小时广告点击趋势管理DAO
     * @return
     */
    public static IAdClickTrendDAO getAdClickTrendADO() {
        return new AdClickTrendDAOImpl();
    }

model

AdClickTrendQueryResult.java

package graduation.java.model;

/**
 * FileName: AdClickQueryResult
 * Author:   hadoop
 * Email:    [email protected]
 * Date:     19-4-4 下午8:42
 * Description:
 * 1一小时广告点击查询结果保存类
 */
public class AdClickTrendQueryResult {

    private int count;

    public int getCount() {
        return count;
    }

    public void setCount(int count) {
        this.count = count;
    }

    @Override
    public String toString() {
        return "AdClickTrendQueryResult{" +
                "count=" + count +
                '}';
    }
}

spark,ad

AdClickRealTimeStatSpark.java


    /**
     *  计算最近1小时滑动窗口内的广告点击趋势
     * @param adRealTimeLogDStream
     */

    private static void calculateAdClickCountByWindow(JavaPairInputDStream<String, String> adRealTimeLogDStream) {
        // 映射成<yyyyMMddHHMM_adid,1L>格式
        JavaPairDStream<String,Long> pairDStream = adRealTimeLogDStream.mapToPair(new PairFunction<Tuple2<String, String>, String, Long>() {

            private static final long serialVersionUID =1L;
            @Override
            public Tuple2<String, Long> call(Tuple2<String, String> tuple) throws Exception {

                // timestamp province city userid adid
                String[] logSplited = tuple._2.split(" ");
                String timeMinute = DateUtils.formatTimeMinute(new Date(Long.valueOf(logSplited[0])));
                long adid = Long.valueOf(logSplited[3]);

                String key = timeMinute + "_" + adid;
                return new Tuple2<String,Long>(key,adid);
            }
        });

        // 过来的每个batch rdd,都会被映射成<yyyyMMddHHMM_adid,1L>的格式
        // 每次出来一个新的batch,都要获取最近1小时内的所有的batch
        // 然后根据key进行reduceByKey操作,统计出来最近一小时内的各分钟各广告的点击次数
        // 1小时滑动窗口内的广告点击趋势
        // 点图 / 折线图
        JavaPairDStream<String,Long> aggrRDD = pairDStream.reduceByKeyAndWindow(new Function2<Long, Long, Long>() {
            private static final long serialVersionUID = 1L;
            @Override
            public Long call(Long v1, Long v2) throws Exception {
                return v1+v2;
            }
        },Durations.minutes(60),Durations.seconds(10));

        // aggrRDD
        // 每次都可以拿到,最近1小时内,各分钟(yyyyMMddHHMM)各广告的点击量
        // 各广告,在最近1小时内,各分钟的点击量

        aggrRDD.foreachRDD(new VoidFunction<JavaPairRDD<String, Long>>() {
            private static final long serialVersionUID = 1L;

            @Override
            public void call(JavaPairRDD<String, Long> rdd) throws Exception {
                rdd.foreachPartition(new VoidFunction<Iterator<Tuple2<String, Long>>>() {
                    private static final long serialVersionUID =1L;
                    @Override
                    public void call(Iterator<Tuple2<String, Long>> iterator) throws Exception {
                        List<AdClickTrend> adClickTrends = new ArrayList<AdClickTrend>();
                        while (iterator.hasNext()){

                            Tuple2<String,Long> tuple = iterator.next();
                            String[] keySplited = tuple._1.split("_");
                            //yyyyMMddHHmm
                            String dateKey  = keySplited[0];

                            String date = dateKey.substring(0,8);
                            String hour = dateKey.substring(8,10);
                            String minute = dateKey.substring(10);

                            long adid = Long.valueOf(keySplited[1]);
                            long clickCount = tuple._2;

                            AdClickTrend adClickTrend = new AdClickTrend();
                            adClickTrend.setDate(date);
                            adClickTrend.setHour(hour);
                            adClickTrend.setMinute(minute);
                            adClickTrend.setAdid(adid);
                            adClickTrend.setClickCount(clickCount);

                            adClickTrends.add(adClickTrend);

                        }

                        IAdClickTrendDAO adClickTrendDAO = DAOFactory.getAdClickTrendADO();
                        adClickTrendDAO.updateBatch(adClickTrends);

                    }
                });

            }
        });

    }

猜你喜欢

转载自blog.csdn.net/someby/article/details/89035937
今日推荐