Writing SparkSQL code through IDEA_Fancy Query

Writing SparkSQL code through IDEA_Fancy Query

package cn.itcast.sql
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
object QueryDemo {
case class Person(id:Int,name:String,age:Int)
def main(args: Array[String]): Unit = {
//1.创建SparkSession
val spark: SparkSession = SparkSession.builder().master(“local[]").appName(“SparkSQL”)
.getOrCreate()
val sc: SparkContext = spark.sparkContext
sc.setLogLevel(“WARN”)
//2.读取文件
val fileRDD: RDD[String] = sc.textFile(“D:\data\person.txt”)
val linesRDD: RDD[Array[String]] = fileRDD.map(.split(" "))
val rowRDD: RDD [Person] = linesRDD.map (line => Person (line (0) .toInt, line (1), line (2) .toInt))
// 3. Convert RDD to DF
// Note: There is no toDF method in RDD.In the new version, you need to add a method to it. You can use the implicit conversion
import spark.implicits.

// Note: The generic type of rowRDD above is Person, which contains Schema information
// so SparkSQL can
Obtained automatically by reflection and added to DF val personDF: DataFrame = rowRDD.toDF
personDF.show (10)
personDF.printSchema ()
// =SQL query=
// 0. Registry
personDF.createOrReplaceTempView ("t_person")
// 1. Query all data
spark.sql ("select * from t_person"). Show ()
// 2. Query age + 1
spark.sql (" select age, age + 1 from t_person "). show ()
// 3. Query the two people with the largest age
spark.sql (" select name, age from t_person order by age desc limit 2 "). show ()
// 4
.Query the number of people of various ages spark.sql ("select age, count (
) from t_person group by age"). Show ()
// 5. Query
spark.sql ("select * from t_person where age> 30 ”) .Show ()
// =DSL query=
// 1. Query all data
personDF.select ("name", "age")
// 2. Query age + 1
personDF.select ( " n a m e " , "name", "age" + 1)
// 3. Query the
personDF.sort ( " a g e " . d e s c ) . s h o w ( 2 ) / / 4. p e r s o n D F . g r o u p B y ( " a g e " ) . c o u n t ( ) . s h o w ( ) / / 5. 30 p e r s o n D F . f i l t e r ( "age" .desc) .show (2) // 4. Query the number of people of all ages personDF.groupBy ("age"). count (). show () // 5. Query the personDF.filter (age above 30) "Age"> 30) the .Show ()
SC. stop ()
spark.stop ()
}
}

Published 238 original articles · praised 429 · 250,000 views

Guess you like

Origin blog.csdn.net/qq_45765882/article/details/105561091