Spark rewrite sorting rules (3)

Specify the implicit value for sorting


Data
Array("mimi1 21 85", "mimi2 22 87", "mimi3 23 87")
name age age face value fv


import org.apache.spark.{
    
    SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object CustomSort_4 {
    
    
  def main(args: Array[String]): Unit = {
    
    
    val conf = new SparkConf()
    conf.setAppName(this.getClass.getName).setMaster("local[2]")
    val sc = new SparkContext(conf)
    //文本数据
    val userInfo: RDD[String]
          = sc.parallelize(Array("mimi1 21 85", "mimi2 22 87", "mimi3 23 87"))
     //切分数据,变成元组
    val personRDD: RDD[(String, Int, Int)] = userInfo.map(x => {
    
    
      val arr = x.split(" ")
      val name = arr(0)
      val age = arr(1).toInt
      val fv = arr(2).toInt
      (name, age, fv)
    })
    /**
     * 下面调用过程中
     * (Int,Int)指定函数的返回类型
     * (String, Int, Int):输入参数的类型
     *
     */
    //排序
    //指定隐式值
   implicit val ord 
         =Ordering[(Int,Int)].on[(String, Int, Int)]
                  (x=> (-x._3,x._2))
             //负号代表降序,指定元组的第三个字段和第二个字段进行排序
    val sorted: RDD[(String, Int, Int)] = personRDD.sortBy(x => x)
    println(sorted.collect.toBuffer)
  }
}

result

ArrayBuffer((mimi2,22,87), (mimi3,23,87), (mimi1,21,85))

Guess you like

Origin blog.csdn.net/qq_42706464/article/details/108355171