SPARK算子实例SCALA实现(take,SavaASTextFile,CountByKey)

package day06
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
import scala.collection.Map

object MyAction_Scala_2 {
   def myTake(sc:SparkContext){
     val RDD1 = sc.parallelize(List("aa", "bb", "cc"))
  val array: Array[String] = RDD1.take(2)
    array.toList.toString
   }


  def mySaveASTextFile(sc:SparkContext): String ={
    val parallelizeRDD:RDD[String]=sc.parallelize(List("hello world","hello scala"))
    val flatMapRDD:RDD[String]=parallelizeRDD.flatMap(_.split(" "))
  //flatMapRDD.saveAsTextFile("D://3.txt")
    flatMapRDD.saveAsTextFile("hdfs://hadoop-1707-001:9000/save/test000")
    "保存成功"
  }
  //统计集合key出现的次数
  def myCountByKey(sc:SparkContext): Unit ={
    var array: Array[(String, String)] = Array(Tuple2("class2","liao"),Tuple2("class2","ao"),Tuple2("class2","li"),Tuple2("class1","lao"))
     var parallelizeRDD: RDD[(String, String)] = sc.parallelize(array)
      var countByKeyRDD: Map[String, Long] = parallelizeRDD.countByKey()
        for((k,v)<-countByKeyRDD){
          println("key="+k,"value="+v)
        }

  }

  def main(args: Array[String]) {
    val conf: SparkConf = new SparkConf().setMaster("local").setAppName("MyAction_scala_2")
    var sc: SparkContext = new SparkContext(conf)
    myCountByKey(sc)
    // myTake(sc)
    //mySaveASTextFile(sc)
   sc.stop()
  }
}

猜你喜欢

转载自blog.csdn.net/wjn19921104/article/details/80230283