Spark-Java分组取TopN的优化

版权声明:有一种生活不去经历不知其中艰辛,有一种艰辛不去体会,不会知道其中快乐,有一种快乐,没有拥有不知其中纯粹 https://blog.csdn.net/wwwzydcom/article/details/84643772

代码

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

import java.util.Iterator;

public class TestTN {
    public static void main (String[] args){
        SparkConf conf = new SparkConf();
        conf.setMaster("local").setAppName("TestTN");
        JavaSparkContext context = new JavaSparkContext(conf);

        JavaRDD<String> lineRDD = context.textFile("./data/scores.txt");

        JavaPairRDD<String, Integer> pairRDD = lineRDD.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) throws Exception {
                String[] split = s.split(" ");
                String topType = split[0];
                Integer score = Integer.valueOf(split[1]);
                return new Tuple2<String, Integer>(topType,score);
            }
        });

        JavaPairRDD<String, Iterable<Integer> >groupRDD = pairRDD.groupByKey();

        groupRDD.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
            @Override
            public void call(Tuple2<String, Iterable<Integer>> tuple2) throws Exception {
                String className = tuple2._1;
                Iterator<Integer> iterator = tuple2._2.iterator();

                System.out.println(tuple2);

                Integer[] top3 = new Integer[3];
                while (iterator.hasNext()){
                    Integer next = iterator.next();
                    for (int i = 0; i < top3.length; i++) {
                        if (top3[i] == null){
                            top3[i] = next;
                            break;
                        }else if (next > top3[i]){
                            for (int j = top3.length-1; j>i;j--){
                                top3[j] = top3[j-1];
                            }
                            top3[i] = next;
                            break;
                        }
                    }
                }
                System.out.println("TopType : "+ className);

                for (Integer score : top3){
                    System.out.println(score);
                }

            }
        });
    }
}

优化

 Integer[] top3 = new Integer[3];
	                while (iterator.hasNext()){
	                    Integer next = iterator.next();
	                    for (int i = 0; i < top3.length; i++) {
	                        if (top3[i] == null){
	                            top3[i] = next;
	                            break;
	                        }else if (next > top3[i]){
	                            for (int j = top3.length-1; j>i;j--){
	                                top3[j] = top3[j-1];
	                            }
	                            top3[i] = next;
	                            break;
	                        }

猜你喜欢

转载自blog.csdn.net/wwwzydcom/article/details/84643772
今日推荐