wordcount in scala

package com.bjsxt.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
/**
 * Count the occurrences of words.
 */
object WordCount {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("wc")
    val sc = new SparkContext(conf)
    val lines:RDD[String] = sc.textFile("./words")//Read each line in this file
    val words:RDD[String] = lines.flatMap(line=>{//Put all lines of All words are taken out
      line.split(" ")
    })
    val pairWords:RDD[(String, Int)] = words.map(word=>{new Tuple2(word,1)})//A pair of map The output format is: the first parameter is each word, and the second parameter is 1. (String,
    val result :RDD[(String, Int)] = pairWords.reduceByKey((v1:Int,v2:Int)=>{v1+v2})//Grouping: two parameters in (v1:Int,v2:Int) , the first parameter is the last calculated number, the second parameter is the 1 of this word, and then add
    val end :RDD[(String, Int)] = result.sortBy(tuple=>{tuple._2 },false)//Sort after grouping, output in descending order, false indicates whether to install ask or sort by desc
    println("%%%%%");
    end.foreach(tuple=>{
      println(tuple )//printout
    })
    sc.stop()
   
   
   /* val conf = new SparkConf().setMaster("local").setAppName("wc")
    new SparkContext(conf).textFile("./words"). flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).sortBy(_._2,false).foreach(println)
    */
  }
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326169681&siteId=291194637