Implementing secondary sorting in Spark

The second sorting is to sort by the first field first, and then sort the rows with the same first field according to the second field, pay attention not to destroy the result of the first sorting

object Demo02 {
    
    
  def main(args: Array[String]): Unit = {
    
    
  //控制日志输出
    Logger.getLogger("org").setLevel(Level.ERROR)
    val conf = new SparkConf().setAppName("twosort").setMaster("local")
    val sc = new SparkContext(conf)

    //二次排序 (第一列正序,第二列倒序)
    val file = sc.textFile("E:/spark笔记/day00/sparkData/twosort.txt")

    //排序并二次排序
    val res1 = file.map(line => {
    
    
      val splited = line.split(" ")
      (splited(0), splited(1))
    }).groupByKey()
      .sortByKey(true)
      .map(x => (x._1, x._2.toList.sortWith(_ > _)))  
      //(70,List(54, 57, 58, 58))
//      .foreach(println(_))

    //按照二位数组形式输出
    val res3 = res1.flatMap {
    
     x =>
      //List(54, 57, 58, 58)  数组长度
      val length01 = x._2.length
      val res2 = new Array[(String, String)](length01)
      //下标 0,1,2,3
      for (i <- 0 until length01) {
    
    
        res2(i) = (x._1, x._2(i))
      }
      res2
    }
    res3.foreach(println(_))
    sc.stop()

  }

}

Data:
20 21
50 51
50 54
60 51
60 53
70 58
60 61
70 54
70 57
70 58
10 55
Output result:
(10,55)
(20,21)
( 50,54 )
(50,51)
(60, 61)
(60,53)
(60,51)
(70,58)
(70,58)
(70,57)
(70,54)

Guess you like

Origin blog.csdn.net/Poolweet_/article/details/108483148