Simple implementation example of SPARK conversion operator (map, filter, flatmap, groupByKey)

package day05
 import org.apache.spark.rdd.RDD
 import org.apache.spark. {SparkConf, SparkContext}

class MyTransformation_scala {

  //Use map to multiply each element in the collection by 2
 def myMap(sc: SparkContext): Unit ={
     val array: Array[Int] = Array ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 )
     val pRdd: RDD[Int] = sc.parallelize(array)
     val mapRDD: RDD[Int] = pRdd.map(_ * 2 )  
    mapRDD.collect()
    println(mapRDD.collect().toList.toString)
  }

  //Filter the odd numbers in the integer collection to keep the even numbers
 def myFilter(sc: SparkContext): Unit ={
     val array: Array[Int] = Array ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 )
     val pRdd: RDD[Int] = sc.parallelize(array)
     val filterRDD: RDD[Int] = pRdd.filter(_ % 2 == 0 )
     println (filterRDD.collect().toList.toString())  
  }
  // Flatten a multi-dimensional collection into one-dimensional
 def myFlatMap(sc : SparkContext){
     val array: Array[ String ] =
       Array ( "Hello World" , "Hello scala" , "Hello Java" )
     val parallelizeRDD: RDD[ String ] = sc.parallelize(array)
     val flatMapRDD: RDD[ String ] = parallelizeRDD.flatMap(_.split( " " ))
     println (flatMapRDD.collect().toList.toString())  
  }

  //按照key分组
def myGroupByKey(sc : SparkContext): Unit ={
    val array: Array[(String, Int)] = Array(Tuple2("class1",80),
      Tuple2("class2",60),Tuple2("class1",75),Tuple2("class2",92))
    val parallelizeRDD: RDD[(String, Int)] = sc.parallelize(array)
    val groupByKeyRDD: RDD[(String, Iterable  [Int])] =
      parallelizeRDD.groupByKey()
    println(groupByKeyRDD.collect().toList.toString())
  }
}


object MyTransformation_scala{
  val tfs: MyTransformation_scala = new MyTransformation_scala
  def main(args: Array[String]) {
    val conf =  new SparkConf().
      setMaster("local").setAppName("MyTransformation_scala")
    val sc: SparkContext = new SparkContext(conf)
    //    tfs.myMap(sc)
    //    tfs.myFilter(sc)
    //    tfs.myFlatMap(sc)
tfs.myGroupByKey(sc)    
    sc.stop()
  }
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325934098&siteId=291194637