求每门课程第一名学生信息
package sparkRdd_practice
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
* @Description * @Author 黄仁议<[email protected]>
* @Version V1.0
* @Since 1.0
* @Date 2019/6/4 0004 17:46
* @Description * @ClassName WindowDemo
*/
case class Student(subject:String,name:String)
object WindowDemo {
//分组topn
// row_number rank dense_rank
// over partition by
//学科 name rank
def main(args: Array[String]): Unit = {
val sparkSession = SparkSession.builder().appName("windowDemo").master("local[2]").getOrCreate()
val frame: DataFrame = sparkSession.read.json("d:\\data\\score.json").toDF()
//注册一个临时表
frame.createOrReplaceTempView("t_student")
//根据科目进行分组,统计每个人在每个科目的票数(subject,name,counts)
val dataFrame1: DataFrame = sparkSession.sql("SELECT course,name,score,count(*) counts from t_student GROUP BY course,name,score")
//根据票数进行排序,显示每个人最后的排名结果,最后要取每组的top1
dataFrame1.createOrReplaceTempView("t_student_count")
//(10,10,20,30)row_number(1,2,3,4) rank(1,1,3,4) dense_ranke(1,1,2,3)
sparkSession.sql("SELECT * FROM (SELECT *,row_number() over(partition by course order by score desc)rank FROM " +
" t_student_count) t_temp where rank <2").show()
sparkSession.stop()
}
}