自定义排序 I自定义排序类

package com.ws.demo
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
  * 自定义排序
  * 排序规则 ： 按分数降序，分数相同，年龄升序
  */
object CustomSort {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("CustomSort").setMaster("local[4]")

    val sc = new SparkContext(conf)

    val data = Array("ws 18 150", "tt 19 145", "nn 20 130", "bb 16 120", "cb 19 150")

    val dataRdd: RDD[String] = sc.parallelize(data)

    //切数据
    val studentRdd: RDD[Student] = dataRdd.map(line => {
      val lineArr = line.split(" ")
      val name = lineArr(0)
      val age = lineArr(1).toInt
      val score = lineArr(2).toInt
      new Student(name, age, score)
    })
    //排序
    val sort: RDD[Student] = studentRdd.sortBy(s => s)

    val students: Array[Student] = sort.collect()

    println(students.toBuffer)

    sc.stop()
  }
}

/**
  * 实现排序类以及序列化，否则报错，参考hadoop
  */
class Student(val name: String, val age: Int, val score: Int) extends Ordered[Student] with Serializable {

  //重写排序规则
  override def compare(that: Student): Int = {
    if (this.score == that.score) {
      this.age - that.age
    } else {
      -(this.score - that.score)
    }
  }
 override def toString: String = s"{ name : $name , age : $age , score : $score } \n"
}

代码结果输出：
ArrayBuffer(
{ name : ws , age : 18 , score : 150 } ,
{ name : cb , age : 19 , score : 150 } ,
{ name : tt , age : 19 , score : 145 } ,
{ name : nn , age : 20 , score : 130 } ,
{ name : bb , age : 16 , score : 120 } 
)

自定义排序 I自定义排序类

猜你喜欢