Spark SQL 笔记(7)—— DataFrame API操作案例

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u012292754/article/details/83628860

1 测试数据

stu.txt

1|Anaa|111111|[email protected]
2|Bob|22222|[email protected]
3|Candy|333333|[email protected]
4|Dany|44444|[email protected]
5|Elf|55555|[email protected]
6|Frank|6666|[email protected]
7|George|777777|George@[email protected]
8|Hlnk|888888|[email protected]
9||999999|[email protected]
10||101010|[email protected]
11|NULL|121212|[email protected]

2 测试代码

package com.tzb.demo2

import org.apache.spark.sql.SparkSession


object DataFrameTest {
  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder().appName("DataFrameAPITest").master("local[2]").getOrCreate()

    val rdd = spark.sparkContext.textFile("file:///d://stu.txt")

    import spark.implicits._
    val stuDF = rdd.map(_.split("\\|")).map(line => {
      Stu(line(0).toInt, line(1), line(2), line(3))
    }).toDF()

    //默认只显示20// stuDF.show()

    //stuDF.take(10).foreach(println)

    stuDF.first()
    stuDF.head(3)

    //stuDF.select("email").show(20,false)
    //stuDF.select("name","email").show(20,false)

   // stuDF.filter("name=''").show()
    //stuDF.filter("name='' OR name='NULL'").show()

    //查看 name 以 H开头的人
    //stuDF.filter("SUBSTR(name,0,1)='H'").show()

    //stuDF.sort(stuDF("name")).show()
    //stuDF.sort(stuDF("name").desc).show()

    //stuDF.sort(stuDF("name").asc,stuDF("id").desc).show()

    val stuDF2 = rdd.map(_.split("\\|")).map(line => {
      Stu(line(0).toInt, line(1), line(2), line(3))
    }).toDF()

    stuDF.join(stuDF2,stuDF.col("id")===stuDF2.col("id")).show()


    spark.stop()

  }

  case class Stu(id: Int, name: String, phone: String, email: String)

}

猜你喜欢

转载自blog.csdn.net/u012292754/article/details/83628860