Scala collection implementation WordCount code implementation

Analysis of implementation ideas

Insert picture description here

Code

Insert picture description here

package com.zxl.chapter10

/**
 * 使用scala集合实现WordCount
 */
object Scala09_WordCount {
  def main(args: Array[String]): Unit = {

    val list: List[(String, Int)] = List(("Hello Scala World", 4), ("Hello World", 3), ("Hello Hadoop", 2), ("Hello HBase", 1))

    /*
     * 将一行一行的数据拆分成一个一个的单词数据 flatMap
     * ("Hello Scala World", 4)
     * => [(Hello),(Scala),(World)]
     * => [(Hello,4),(Scala,4),(World,4)]
     */
    val flatMapList: List[(String, Int)] = list.flatMap(t => {
      val words: Array[String] = t._1.split(" ")
      words.map(w => (w, t._2))
    })
    println("拆分成单个单词:"+flatMapList)

    /**
     * 将单词进行分组 groupBy
     */
    val groupByMap: Map[String, List[(String, Int)]] = flatMapList.groupBy(t => t._1)
    println("按照单词进行分组:"+groupByMap)

    /**
     * 将分组后的数据进行结构的转换 map
     */
    val wordToSumMap: Map[String, Int] = groupByMap.map(t => {
      val countList: List[Int] = t._2.map(tt => tt._2)
      (t._1, countList.sum)
    })
    println("将分组后的数据进行结构的转换:"+wordToSumMap)
    //下面是Scala提供的更简单的写法(已经过时,不推荐使用)
    /*val wordToSumMap: MapView[String, Int] = groupByMap.mapValues(datas => datas.map(tt => tt._2).sum)
    println("将分组后的数据进行结构的转换:"+wordToSumMap)*/

    /**
     * 将统计的结果进行降序排列
     */
    println("map转换为list:"+wordToSumMap.toList)
    val sortList: List[(String, Int)] = wordToSumMap.toList.sortWith((left, right) => {
      left._2 > right._2
    })
    println("将统计的结果进行降序排列:"+sortList)

    /**
     * 从排序后的集合中获取前3条
     */
    val resultList: List[(String, Int)] = sortList.take(3)
    println("从排序后的集合中获取前3条:"+resultList)
  }
}

Test Results

Insert picture description here

D:\develop\Java\jdk-8u101\bin\java.exe "-javaagent:D:\Program Files\JetBrains\IntelliJ IDEA 2020.1\lib\idea_rt.jar=51749:D:\Program Files\JetBrains\IntelliJ IDEA 2020.1\bin" -Dfile.encoding=UTF-8 -classpath D:\develop\Java\jdk-8u101\jre\lib\charsets.jar;D:\develop\Java\jdk-8u101\jre\lib\deploy.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\access-bridge-64.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\cldrdata.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\dnsns.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\jaccess.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\jfxrt.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\localedata.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\nashorn.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\sunec.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\sunjce_provider.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\sunmscapi.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\sunpkcs11.jar;D:\develop\Java\jdk-8u101\jre\lib\ext\zipfs.jar;D:\develop\Java\jdk-8u101\jre\lib\javaws.jar;D:\develop\Java\jdk-8u101\jre\lib\jce.jar;D:\develop\Java\jdk-8u101\jre\lib\jfr.jar;D:\develop\Java\jdk-8u101\jre\lib\jfxswt.jar;D:\develop\Java\jdk-8u101\jre\lib\jsse.jar;D:\develop\Java\jdk-8u101\jre\lib\management-agent.jar;D:\develop\Java\jdk-8u101\jre\lib\plugin.jar;D:\develop\Java\jdk-8u101\jre\lib\resources.jar;D:\develop\Java\jdk-8u101\jre\lib\rt.jar;D:\develop\workspace\scala-demo\Scala-atguigu-study\out\production\Scala-atguigu-study;D:\develop\scala-2.13.1\lib\scala-library.jar;D:\develop\scala-2.13.1\lib\scala-reflect.jar com.zxl.chapter10.Scala09_WordCount
拆分成单个单词:List((Hello,4), (Scala,4), (World,4), (Hello,3), (World,3), (Hello,2), (Hadoop,2), (Hello,1), (HBase,1))
按照单词进行分组:HashMap(Scala -> List((Scala,4)), HBase -> List((HBase,1)), Hello -> List((Hello,4), (Hello,3), (Hello,2), (Hello,1)), Hadoop -> List((Hadoop,2)), World -> List((World,4), (World,3)))
将分组后的数据进行结构的转换:HashMap(Scala -> 4, HBase -> 1, Hello -> 10, Hadoop -> 2, World -> 7)
map转换为list:List((Scala,4), (HBase,1), (Hello,10), (Hadoop,2), (World,7))
将统计的结果进行降序排列:List((Hello,10), (World,7), (Scala,4), (Hadoop,2), (HBase,1))
从排序后的集合中获取前3条:List((Hello,10), (World,7), (Scala,4))
Published 1792 original articles · 1135 praises · 960,000 views

Guess you like

Origin blog.csdn.net/a772304419/article/details/105578134