package day05 import org.apache.spark.rdd.RDD import org.apache.spark. {SparkConf, SparkContext} class MyTransformation_scala { //Use map to multiply each element in the collection by 2 def myMap(sc: SparkContext): Unit ={ val array: Array[Int] = Array ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ) val pRdd: RDD[Int] = sc.parallelize(array) val mapRDD: RDD[Int] = pRdd.map(_ * 2 ) mapRDD.collect() println(mapRDD.collect().toList.toString) } //Filter the odd numbers in the integer collection to keep the even numbers def myFilter(sc: SparkContext): Unit ={ val array: Array[Int] = Array ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ) val pRdd: RDD[Int] = sc.parallelize(array) val filterRDD: RDD[Int] = pRdd.filter(_ % 2 == 0 ) println (filterRDD.collect().toList.toString()) } // Flatten a multi-dimensional collection into one-dimensional def myFlatMap(sc : SparkContext){ val array: Array[ String ] = Array ( "Hello World" , "Hello scala" , "Hello Java" ) val parallelizeRDD: RDD[ String ] = sc.parallelize(array) val flatMapRDD: RDD[ String ] = parallelizeRDD.flatMap(_.split( " " )) println (flatMapRDD.collect().toList.toString()) } //按照key分组 def myGroupByKey(sc : SparkContext): Unit ={ val array: Array[(String, Int)] = Array(Tuple2("class1",80), Tuple2("class2",60),Tuple2("class1",75),Tuple2("class2",92)) val parallelizeRDD: RDD[(String, Int)] = sc.parallelize(array) val groupByKeyRDD: RDD[(String, Iterable [Int])] = parallelizeRDD.groupByKey() println(groupByKeyRDD.collect().toList.toString()) } } object MyTransformation_scala{ val tfs: MyTransformation_scala = new MyTransformation_scala def main(args: Array[String]) { val conf = new SparkConf(). setMaster("local").setAppName("MyTransformation_scala") val sc: SparkContext = new SparkContext(conf) // tfs.myMap(sc) // tfs.myFilter(sc) // tfs.myFlatMap(sc) tfs.myGroupByKey(sc) sc.stop() } }
Simple implementation example of SPARK conversion operator (map, filter, flatmap, groupByKey)
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=325934098&siteId=291194637
Ranking