spark--SparkSQLマルチデータソースの相互作用-★★★

  • SparkSQLは、通常のテキスト形式、csv形式、json形式、寄木細工の形式、JDBCなどの複数のデータソースをサポートしています。
package cn.hanjiaxiaozhi.sql
​
import java.util.Properties
​
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{
    
    DataFrame, SaveMode, SparkSession}/**
 * Author hanjiaxiaozhi
 * Date 2020/7/25 9:43
 * Desc 演示SparkSQL多数据源交互
 */
object DataSourceDemo {
    
    
  case class Person(id:Int,name:String,age:Int)
  def main(args: Array[String]): Unit = {
    
    
    //0.创建SparkSQL执行环境-SparkSession
    val spark: SparkSession = SparkSession.builder().appName("sql").master("local[*]").getOrCreate()
    val sc: SparkContext = spark.sparkContext
    sc.setLogLevel("WARN")//0.准备数据
    val fileRDD: RDD[String] = sc.textFile("D:\\data\\spark\\person.txt")
    val personRDD: RDD[Person] = fileRDD.map(line => {
    
    
      val arr: Array[String] = line.split(" ") //每一行
      Person(arr(0).toInt, arr(1), arr(2).toInt)
    })
    import spark.implicits._
    val df: DataFrame = personRDD.toDF
    df.show(false)
    /*
     * +---+--------+---+
     * |id |name    |age|
     * +---+--------+---+
     * |1  |zhangsan|20 |
     * |2  |lisi    |29 |
     * |3  |wangwu  |25 |
     * |4  |zhaoliu |30 |
     * |5  |tianqi  |35 |
     * |6  |kobe    |40 |
     * +---+--------+---+
     *///1.将DF/DS中的数据使用SparkSQL写入文件
    val prop = new Properties()
    prop.setProperty("user","root")
    prop.setProperty("password","root")
    df.repartition(1).write.mode(SaveMode.Append).json("D:\\data\\spark\\output\\json")
    df.write.mode(SaveMode.Overwrite).csv("D:\\data\\spark\\output\\csv")
    df.write.mode(SaveMode.Overwrite).parquet("D:\\data\\spark\\output\\parquet")
    df.write.mode(SaveMode.Overwrite).jdbc("jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8","person",prop)
    //df.write.mode(SaveMode.Overwrite).text("D:\\data\\spark\\output\\text")//注意这里会报错,因为写text只能有1列,而这里有3列//2.使用SparkSQL读取文件中的数据转为DF/DS
    spark.read.json("D:\\data\\spark\\output\\json").show()
    spark.read.csv("D:\\data\\spark\\output\\csv").toDF("id","name","age").show()
    spark.read.parquet("D:\\data\\spark\\output\\parquet").show()
    spark.read.jdbc("jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8","person",prop).show()}
}

おすすめ

転載: blog.csdn.net/qq_46893497/article/details/113926575
おすすめ