spark中各种连接操作以及实用方法

val a = sc.parallelize(Array(("123",4.0),("456",9.0),("789",9.0))

     val b = sc.parallelize(Array(("123",8.0),("789",10)))

    val c = a.join(b)

    c.foreach(println)

    /*

    (123,(4.0,8.0))

    (789,(9.0,10))

     */

    val d = a.cogroup(b)

    d.foreach(println)

    /*

    (456,(CompactBuffer(9.0),CompactBuffer()))

    (123,(CompactBuffer(4.0),CompactBuffer(8.0)))

    (789,(CompactBuffer(9.0),CompactBuffer(10)))

    */

    val e = a.leftOuterJoin(b)

    e.foreach(println)

    /*

      (456,(9.0,None))

      (123,(4.0,Some(8.0)))

      (789,(9.0,Some(10)))

      */

    val f = a.fullOuterJoin(b)

    f.foreach(println)

    /*

      (456,(Some(9.0),None))

      (123,(Some(4.0),Some(8.0)))

      (789,(Some(9.0),Some(10)))

      */

    val g = a.cartesian(b)

    g.foreach(println)

    /*

((123,4.0),(123,8.0))

((123,4.0),(789,10))

((456,9.0),(123,8.0))

((456,9.0),(789,10))

((789,9.0),(123,8.0))

((789,9.0),(789,10))

      */

    /*val h = a.coalesce(6,true)

    h.foreach(println)

    a.dependencies.foreach(println)*/

    val i = a.keyBy{case (k,v)=>("haha",234)}

    i.foreach(println)

    /*

      ((haha,234),(123,4.0))

      ((haha,234),(456,9.0))

      ((haha,234),(789,9.0))

     */

猜你喜欢

转载自blog.csdn.net/qq_36932624/article/details/82828243