import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark. {SparkConf, SparkContext} object JoinRDD { def main(args: Array[String]) { val conf = new SparkConf().setAppName("sparkjson").setMaster("local") val sc = new SparkContext(conf) val file1 = sc.textFile("C:\\Users\\Think\\Desktop\\json.txt") val file2 = sc.textFile("C:\\Users\\Think\\Desktop\\json2.txt") //(tom,1,2), (jack,1,2) val words1: RDD[(String, Int, Int)] = file1.flatMap(_.split(" ")).map((_, 1, 2)) //(tom,3,4), (jack,3,4) val words2 : RDD[(String,Int,Int)] = file2.flatMap(_.split(" ")).map((_, 3, 4)) val words1Map = words1.map(x=>(x._1,(x._2,x._3))) val words2Map = words2.map(x=>(x._1,(x._2,x._3))) val result: RDD[(String, ((Int, Int), Option[(Int, Int)]))] = words1Map.leftOuterJoin(words2Map) val finalResult = result.map(x=>{ val key = x._1 val outerTuple = x._2 val outerTupleMeta1 = x._2._1._1 val outerTuplemeta2 = x._2._1._2 val outerTuple2_1 = x._2._2.get._1 val outerTuple2_2 = x._2._2.get._2 (key,outerTupleMeta1,outerTuplemeta2,outerTuple2_1,outerTuple2_2) }) // println(words1.collect().toBuffer) // println(words2.collect().toBuffer) println(finalResult.collect().toBuffer) sc.stop() } }
leftOuterJoin operation
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=326525394&siteId=291194637
Recommended
Ranking