45.Spark大型电商项目-用户访问session分析-top10热门品类之计算各品类点击、下单和支付的次数

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/someby/article/details/88749726

目录

代码

UserVisitSessionAnalyzeSpark.java


本篇文章将记录用户访问session分析-top10热门品类之计算各品类点击、下单和支付的次数。

代码

UserVisitSessionAnalyzeSpark.java

    /**
     * 获取Top10的品类
     * @param filteredSessionid2AggrInfoRDD
     * @param session2actionRDD
     */
    private static void getTop10Category(JavaPairRDD<String, String> filteredSessionid2AggrInfoRDD, JavaPairRDD<String, Row> session2actionRDD) {

        JavaPairRDD<String,Row> sessionid2detailRDD = filteredSessionid2AggrInfoRDD
                .join(session2actionRDD)
                .mapToPair(
                        new PairFunction<Tuple2<String, Tuple2<String, Row>>, String, Row>() {

                            private static final long serialVersionUID = 1L;

                            @Override
                            public Tuple2<String, Row> call(Tuple2<String, Tuple2<String, Row>> tuple) throws Exception {
                                return new Tuple2<String,Row>(tuple._1,tuple._2._2);
                            }
                });


        // 获取session访问过的所有品类id
        // 访问过:指的是,点击过、下单过、支付过的品类

        JavaPairRDD<Long,Long> categoryidRDD = sessionid2detailRDD.flatMapToPair(
                new PairFlatMapFunction<Tuple2<String, Row>, Long, Long>() {
                    private static final long serialVersionUID = 1L;

                    @Override
                    public Iterator<Tuple2<Long, Long>> call(Tuple2<String, Row> tuple) throws Exception {

                        Row row = tuple._2;
                        List<Tuple2<Long,Long>> list = new ArrayList<>();

                        Long clickCategoryId = Long.valueOf(row.getLong(6));
                        long maxid = 10240L;
                        if (clickCategoryId != maxid){
                            list.add(new Tuple2<Long,Long>(clickCategoryId,clickCategoryId));
                        }

                        String orderCategoryIds = row.getString(8);
                        if (orderCategoryIds != null){
                            String[] orderCategoryIdsSplited = orderCategoryIds.split(",");
                            for (String orderCategory: orderCategoryIdsSplited){
                                list.add(new Tuple2<Long,Long>(Long.valueOf(orderCategory),Long.valueOf(orderCategory)));
                            }
                        }

                        String payCategoryIds = row.getString(10);
                        if (payCategoryIds != null){
                            String[] payCategoryIdsSplited = payCategoryIds.split(",");
                            for (String payCategoryId : payCategoryIdsSplited){
                                list.add(new Tuple2<Long,Long>(Long.valueOf(payCategoryId),Long.valueOf(payCategoryId)));
                            }
                        }


                        return list.iterator();
                    }
                }


        );
        /**
         * 第二步:计算各品类的点击、下单和支付的次数
         */

        // 访问明细中,其中三种访问行为是:点击、下单和支付
        // 分别来计算各品类点击、下单和支付的次数,可以先对访问明细数据进行过滤
        // 分别过滤出点击、下单和支付行为,然后通过map、reduceByKey等算子来进行计算

        // 计算各个品类的点击次数
        JavaPairRDD<Long, Long> clickCategoryId2CountRDD =
                getClickCategoryId2CountRDD(sessionid2detailRDD);
        // 计算各个品类的下单次数
        JavaPairRDD<Long, Long> orderCategoryId2CountRDD =
                getOrderCategoryId2CountRDD(sessionid2detailRDD);
        // 计算各个品类的支付次数
        JavaPairRDD<Long, Long> payCategoryId2CountRDD =
                getPayCategoryId2CountRDD(sessionid2detailRDD);

    }

    /**
     * 获取个品类点击次数RDD
     * @param sessionid2detailRDD
     * @return
     */

    private static JavaPairRDD<Long,Long> getClickCategoryId2CountRDD(
            JavaPairRDD<String,Row> sessionid2detailRDD
    ) {
        JavaPairRDD<String,Row> clickActionRDD = sessionid2detailRDD.filter(
                new Function<Tuple2<String, Row>, Boolean>() {
                    private static final long serialVersionUID = 1L;
                    @Override
                    public Boolean call(Tuple2<String, Row> tuple) throws Exception {
                        Row row = tuple._2;
                        return Long.valueOf(row.getLong(6)) !=null ? true :false;
                    }
                }
        );

        JavaPairRDD<Long,Long> clickCategoryIdRDD = clickActionRDD.mapToPair(new PairFunction<Tuple2<String, Row>, Long, Long>() {
            private static final long serialVersionUID = 1L;

            @Override
            public Tuple2<Long, Long> call(Tuple2<String, Row> tuple) throws Exception {
                long clickCategoryId = tuple._2.getLong(6);
                return new Tuple2<Long,Long>(clickCategoryId,1L);
            }
        });

        JavaPairRDD<Long,Long> clickCategoryId2CountRDD = clickCategoryIdRDD.reduceByKey(
                new Function2<Long, Long, Long>() {
                    private static final long serialVersionID = 1L;
                    @Override
                    public Long call(Long v1, Long v2) throws Exception {
                        return v1+v2;
                    }
                }
        );
        return clickCategoryId2CountRDD;

    }

   
 /**
     * 获取品类下单次数的RDD
     * @param sessionid2detailRDD
     * @return
     */
    private static JavaPairRDD<Long,Long> getOrderCategoryId2CountRDD(JavaPairRDD<String,Row> sessionid2detailRDD){
        JavaPairRDD<String,Row> orderAction = sessionid2detailRDD.filter(
                new Function<Tuple2<String, Row>, Boolean>() {
                    private static final long serialVersionUID = 1L;
                    @Override
                    public Boolean call(Tuple2<String, Row> tuple) throws Exception {
                        Row row = tuple._2;
                        return row.getString(8) != null ? true:false;

                    }
                }
        );

        JavaPairRDD<Long,Long> orderCategoryIdRDD = orderAction.flatMapToPair(new PairFlatMapFunction<Tuple2<String, Row>, Long, Long>() {
            private static final long serialVersionUID = 1L;
            @Override
            public Iterator<Tuple2<Long, Long>> call(Tuple2<String, Row> tuple) throws Exception {
                Row row = tuple._2;
                String orderCategoryIds = row.getString(8);
                String[] orderCategoryIdsSplited = orderCategoryIds.split(",");
                List<Tuple2<Long,Long>> list = new ArrayList<Tuple2<Long, Long>>();
                for (String orderCategoryid: orderCategoryIdsSplited){
                    list.add(new Tuple2<Long,Long>(Long.valueOf(orderCategoryid),Long.valueOf(orderCategoryid)));
                }
                return list.iterator();
            }
        });
        JavaPairRDD<Long,Long> orderCategoryId2CountRDD = orderCategoryIdRDD.reduceByKey(
                new Function2<Long, Long, Long>() {
                    private static final long serialVersionID = 1L;
                    @Override
                    public Long call(Long v1, Long v2) throws Exception {
                        return v1+v2;
                    }
                }
        );

        return orderCategoryId2CountRDD;
    }


    /**
     * 获取品类支付次数的RDD
     * @param sessionid2detailRDD
     * @return
     */
    private static JavaPairRDD<Long,Long> getPayCategoryId2CountRDD(JavaPairRDD<String,Row> sessionid2detailRDD){
        JavaPairRDD<String,Row> payAction = sessionid2detailRDD.filter(
                new Function<Tuple2<String, Row>, Boolean>() {
                    private static final long serialVersionUID = 1L;
                    @Override
                    public Boolean call(Tuple2<String, Row> tuple) throws Exception {
                        Row row = tuple._2;
                        return row.getString(8) != null ? true:false;

                    }
                }
        );

        JavaPairRDD<Long,Long> payCategoryIdRDD = payAction.flatMapToPair(new PairFlatMapFunction<Tuple2<String, Row>, Long, Long>() {
            private static final long serialVersionUID = 1L;
            @Override
            public Iterator<Tuple2<Long, Long>> call(Tuple2<String, Row> tuple) throws Exception {
                Row row = tuple._2;
                String payCategoryIds = row.getString(10);
                String[] payCategoryIdsSplited = payCategoryIds.split(",");
                List<Tuple2<Long,Long>> list = new ArrayList<Tuple2<Long, Long>>();
                for (String payCategoryid: payCategoryIdsSplited){
                    list.add(new Tuple2<Long,Long>(Long.valueOf(payCategoryid),Long.valueOf(payCategoryid)));
                }
                return list.iterator();
            }
        });

        JavaPairRDD<Long,Long> payCategoryId2CountRDD = payCategoryIdRDD.reduceByKey(
                new Function2<Long, Long, Long>() {
                    private static final long serialVersionID = 1L;
                    @Override
                    public Long call(Long v1, Long v2) throws Exception {
                        return v1+v2;
                    }
                }
        );

        return payCategoryId2CountRDD;
    }

猜你喜欢

转载自blog.csdn.net/someby/article/details/88749726
今日推荐