Cómo escribir SparkSql

Hay muchas personas que no saben cómo empezar con SparkSql. A continuación, escribiré un ejemplo para ayudarte a recordar cómo se escribió SparkSql.

package com.sparksql

import org.apache.log4j.{
    
    Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{
    
    DataFrame, SparkSession}

case class Employee(id:String,name:String,sex:String,balance:Int)
object SparkSQLCase1 {
    
    
  def main(args: Array[String]): Unit = {
    
    
    //SparkSession
    val spark: SparkSession = SparkSession
      .builder()
      .master("local")
      .appName("DataFrameFromStuctType")
      .getOrCreate()
    Logger.getLogger("org").setLevel(Level.ERROR)

    val lineRDD: RDD[String] = spark.sparkContext.textFile("C:\\employee.txt")

    val employeeRDD: RDD[Employee] = lineRDD.map(line => {
    
    
      val linearray: Array[String] = line.split(",")
      Employee(linearray(0), linearray(1), linearray(2), linearray(3).trim.toInt)
    })

	//一定要导入这个包,spark是session对象
    import spark.implicits._
	
    val employeeDF: DataFrame = employeeRDD.toDF()
	
	//赋予表名
    employeeDF.createOrReplaceTempView("employee")
	
    //1、求销售额最高的员工信息
    //spark.sql("select * from employee where balance=(select  max(balance) mb from employee)").show()
	
    //2、按照性别分组求最高销售额的员工信息
    spark.sql("select max(balance) mb,sex balance from employee group by sex").show()
   

    //3、按照性别分组,求销售额前3的员工信息
    //开窗函数适用于求Top N
    //按照性别开窗,按照销售额倒序排序,通过开窗函数进行编号,求编号前3即为结果
    //row_number() over
    spark.sql("select * from (select *,row_number() over (partition by sex order by balance desc) rk from employee) tmp " +
      " where rk<=3 ").show()

	//使用其他的开窗函数
    spark.sql("select *,row_number() over (partition by sex order by balance desc) rk from employee ").show()
    spark.sql("select *,rank() over (partition by sex order by balance desc) rk from employee ").show()
    spark.sql("select *,dense_rank() over (partition by sex order by balance desc) rk from employee ").show()
    spark.sql("select *,ntile(2) over (partition by sex order by balance desc) rk from employee ").show()

  }

}

Supongo que te gusta

Origin blog.csdn.net/dudadudadd/article/details/114241380
Recomendado
Clasificación