package com.sparktest; import java.io.Serializable; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2; import scala.math.Ordered; /** * Secondary sorting (ascending by the first value, descending by the second value) * Some raw data: * 5 6 4 1 6 7 6 4 7 2 4 1 */ public class SecondSort { public static void main(String[] args) { SparkConf conf = new SparkConf () .setAppName("SecondSort") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> linesRDD = sc.textFile("d://spark_data//secondSort.txt"); /** * Convert each line to a tuple: (num1, num2) */ JavaPairRDD<MySortKey, String> tupleRDD = linesRDD.mapToPair(new PairFunction<String, MySortKey, String>() { @Override public Tuple2<MySortKey, String> call(String line) throws Exception { String data[] = line.split(" "); int num1 = Integer.parseInt(data[0]); int num2 = Integer.parseInt(data[1]); return new Tuple2<MySortKey, String>(new MySortKey(num1,num2),line); } }); JavaPairRDD<MySortKey, String> sortByKeyRDD = tupleRDD.sortByKey(); JavaRDD<String> sortedRDD = sortByKeyRDD.map(new Function<Tuple2<MySortKey,String>, String>() { @Override public String call(Tuple2<MySortKey, String> tuple) throws Exception { return tuple._2; } }); sortedRDD.foreach(new VoidFunction<String>() { @Override public void call(String line) throws Exception { System.out.println(line); } }); sc.close(); } } /** * Custom sort key, ascending by the first value, descending by the second value */ class MySortKey implements Ordered<MySortKey>,Serializable{ /** * (the meaning, purpose, function of the domain) */ private static final long serialVersionUID = -8363444941537305530L; private int first; private int second; public MySortKey(int first,int second) { this.first = first; this.second = second; } public int getFirst() { return first; } public void setFirst(int first) { this.first = first; } public int getSecond() { return second; } public void setSecond(int second) { this.second = second; } @Override public boolean $greater(MySortKey other) { if(this.first > other.getFirst()){ return true; }else if(this.first == other.getFirst() && this.second < other.getSecond()){ return true; } return false; } @Override public boolean $greater$eq(MySortKey other) { if(this.$greater(other)){ return true; }else if(this.first == other.getFirst() && this.second == other.getSecond()){ return true; } return false; } @Override public boolean $less(MySortKey other) { return !this.$greater$eq(other); } @Override public boolean $less$eq(MySortKey other) { return !this.$greater(other); } @Override public int compare(MySortKey other) { if(this.first - other.first != 0){ return this.first - other.getFirst(); }else{ return -(this.second - other.getSecond()); } } @Override public int compareTo(MySortKey other) { if(this.first - other.first != 0){ return this.first - other.getFirst(); }else{ return -(this.second - other.getSecond()); } } }
Spark secondary sorting (java)
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=325506697&siteId=291194637
Ranking