SparkSQL declarative

Simple case

import org.apache.spark.sql.SparkSession
import org.junit.Test

case class Person(id:Int,name:String,sex:String,age:Int)
class DataSetCreate {
    
    

  val spark = SparkSession
    .builder()
    .appName("test")
    .master("local[4]")
    .getOrCreate()
  import spark.implicits._

  @Test
  def createData():Unit={
    
    
    val list = List(Person(1,"zhangsan","man",10),
      Person(2,"zhang2","woman",66),
      Person(3,"zhang3","man",70),
      Person(4,"zhang4","man",22))

    val df = list.toDF()
    //TODO 1.将dataFrame/dataSet数据集注册成表
    df.createOrReplaceTempView("person")
    spark.sql(
      """
        |select
        |name,age
        |from person
        |where age >=30
        |""".stripMargin).show()
  }

}

Insert image description here
Write a small case

  • Get the number of each word

data source

hello java
spark hadoop flume kafka
hbase kafka flume hadoop

Lateral View explode(split(value," ")) is to convert columns to rows

import org.apache.spark.sql.SparkSession
import org.junit.Test

class DataSetCreate {
    
    

  val spark = SparkSession
    .builder()
    .appName("test")
    .master("local[4]")
    .getOrCreate()
  import spark.implicits._

  @Test
  def createData():Unit={
    
    
    val ds = spark.read.textFile("src/main/resources/wc.txt")
    ds.createOrReplaceTempView("wordCount")

    spark.sql(
      """
        |select
        | wc,
        | count(1)
        |from wordCount Lateral View explode(split(value," ")) as wc
        |group by wc;
        |""".stripMargin).show()
  }
}

Insert image description here

Guess you like

Origin blog.csdn.net/qq_46548855/article/details/134398514