Spark 高级编程(三):topn

版权声明: https://blog.csdn.net/weixin_39966065/article/details/89714043

1.目的

例子:

  将上面图片中的数据,以班级为维度,获取到前三名的信息

2.思路

(1)先依据 className进行聚合

(2)使用冒泡排序摘选数据(重点)

3.代码

package cn.spark.study.core;

import java.util.Arrays;
import java.util.Iterator;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;

import scala.Tuple2;

/**
 * 分组取top3
 * @author Administrator
 *
 */
public class ClassTop3 {
	
	public static void main(String[] args) {
		SparkConf conf = new SparkConf()
				.setAppName("ClassTop3")
				.setMaster("local");  
		JavaSparkContext sc = new JavaSparkContext(conf);
		
		JavaRDD<String> lines = sc.textFile("E://spark//classTop3.txt");
		
		JavaPairRDD<String, Integer> pairs = lines.mapToPair(
				
				new PairFunction<String, String, Integer>() {

					private static final long serialVersionUID = 1L;

					@Override
					public Tuple2<String, Integer> call(String line) throws Exception {
						String[] lineSplited = line.split(" ");  
						return new Tuple2<String, Integer>(lineSplited[0], 
								Integer.valueOf(lineSplited[1]));
					}
					
				});
		
		JavaPairRDD<String, Iterable<Integer>> groupedPairs = pairs.groupByKey();
		
		JavaPairRDD<String, Iterable<Integer>> top3Score = groupedPairs.mapToPair(
				
				new PairFunction<Tuple2<String,Iterable<Integer>>, String, Iterable<Integer>>() {

					private static final long serialVersionUID = 1L;

					@Override
					public Tuple2<String, Iterable<Integer>> call(
							Tuple2<String, Iterable<Integer>> classScores)
							throws Exception {
						Integer[] top3 = new Integer[3];
						
						String className = classScores._1;
						Iterator<Integer> scores = classScores._2.iterator();
						//冒泡排序的算法,降序 ———重点 
						while(scores.hasNext()) {
							Integer score = scores.next();
							
							for(int i = 0; i < 3; i++) {
								if(top3[i] == null) {
									top3[i] = score;
									break;
								} else if(score > top3[i]) {
									for(int j = 2; j > i; j--) {
										top3[j] = top3[j - 1];  
									}
									
									top3[i] = score;
									
									break;
								} 
							}
						}
						
						return new Tuple2<String, 
								Iterable<Integer>>(className, Arrays.asList(top3));    
					}
					
				});
		
		top3Score.foreach(new VoidFunction<Tuple2<String,Iterable<Integer>>>() {
			
			private static final long serialVersionUID = 1L;

			@Override
			public void call(Tuple2<String, Iterable<Integer>> t) throws Exception {
				System.out.println("class: " + t._1);  
				Iterator<Integer> scoreIterator = t._2.iterator();
				while(scoreIterator.hasNext()) {
					Integer score = scoreIterator.next();
					System.out.println(score);  
				}
				System.out.println("=======================================");   
			}
			
		});
		
		sc.close();
	}
	
}

猜你喜欢

转载自blog.csdn.net/weixin_39966065/article/details/89714043
今日推荐