Spark examples

1. parquet

object testSparkReadParquet {
    def main(args: Array[String]): Unit = {
        var spark = SparkSession.builder().appName("TestSparkSession").master("local").getOrCreate()
        
        val df = spark.read.parquet("D:\\tools\\testSparkFile\\users.parquet");
        
        df.printSchema()
        
        df.select("name","favorite_color","favorite_numbers").show()
        
         df.select("name","favorite_color").write.mode("overwrite").save("D:\\tools\\testSparkFile\\namesAndFavColors.parquet")
         
         val df2 = spark.read.parquet("D:\\tools\\testSparkFile\\namesAndFavColors.parquet");
        df2.printSchema()
    }
}
object TestParquet {
    def main(args: Array[String]): Unit = {
       val spark = SparkSession
                  .builder()
                  .appName("Java Spark SQL basic example")
                  .config("spark.some.config.option", "some-value")
                  .master("local")
                  .getOrCreate();
        import spark.implicits._
        val squaresDF = spark.sparkContext.makeRDD(1 to 5).map(i => (i, i * i)).toDF("value", "square")
        squaresDF.write.mode("append").parquet("D:\\tools\\testSparkFile\\test\\key=1")
        
        val cubesDF = spark.sparkContext.makeRDD(6 to 10).map(i => (i, i * i * i)).toDF("value", "cube")
        cubesDF.write.mode("append").parquet("D:\\tools\\testSparkFile\\test\\key=2")
        
        // Read the partitioned table
        val mergedDF = spark.read.option("mergeSchema", "true").parquet("D:\\tools\\testSparkFile\\test\\")
        mergedDF.select("value", "square","key").show()
        mergedDF.printSchema()
    }
}

2. DataFrame

object DFExample {
    case class Student(id:Int, name:String, phone:String, email:String, age:Int)
    def main(args: Array[String]): Unit = {
        val spark = SparkSession.builder().appName("DFExample").master("local").getOrCreate()
        import spark.implicits._
        val studentDF = spark.sparkContext.textFile("D:\\tools\\testSparkFile\\dfTestFile.txt").map(_.split("\\|"))
        .map(line => Student (Line (0) .trim (). ToInt, Line (. 1) .trim (), Line (2) .trim (), Line (. 3) .trim (), Line (. 4 ) .trim () .toInt)). toDF () 
        
        Val studentDF2 = studentDF 
        
        studentDF.show ()
        
         // filter out names of empty data
         //studentDF.filter ( " name! = '' OR name! = 'NULL' " ) the .Show ( ) // did not work 
        studentDF.filter ( " name! = 'NULL' " ) .filter ( " name! = '' " ) .Show ()  
        
         // find out the name of the people l at the beginning of 
        studentDF.filter ( " substr (name, 0,1) = 'L' " ) the .Show ()
        
 // Spark.sql("show functions" ) .Show (2000 )
         Import org.apache.spark.sql.functions._
         Import spark.implicits._
        
         // By Name sort operations 
        studentDF.sort ( " name " ) .Show () 
        studentDF.sort (studentDF.col ( " name " ) .desc) .Show ()
        
         // queue rename 
        studentDF.select (studentDF.col ( " name " ) the .as ( " student_name " )). Show ()
        
         // the Join operation 
        studentDF.join ( studentDF2, studentDF.col ( " the above mentioned id " ) === studentDF2.col ("id")).sort(studentDF.col("id")).show()
    }
}

 

Guess you like

Origin www.cnblogs.com/redhat0019/p/11423811.html
Recommended