- SparkSQLは、通常のテキスト形式、csv形式、json形式、寄木細工の形式、JDBCなどの複数のデータソースをサポートしています。
package cn.hanjiaxiaozhi.sql
import java.util.Properties
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{
DataFrame, SaveMode, SparkSession}
object DataSourceDemo {
case class Person(id:Int,name:String,age:Int)
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().appName("sql").master("local[*]").getOrCreate()
val sc: SparkContext = spark.sparkContext
sc.setLogLevel("WARN")
val fileRDD: RDD[String] = sc.textFile("D:\\data\\spark\\person.txt")
val personRDD: RDD[Person] = fileRDD.map(line => {
val arr: Array[String] = line.split(" ")
Person(arr(0).toInt, arr(1), arr(2).toInt)
})
import spark.implicits._
val df: DataFrame = personRDD.toDF
df.show(false)
val prop = new Properties()
prop.setProperty("user","root")
prop.setProperty("password","root")
df.repartition(1).write.mode(SaveMode.Append).json("D:\\data\\spark\\output\\json")
df.write.mode(SaveMode.Overwrite).csv("D:\\data\\spark\\output\\csv")
df.write.mode(SaveMode.Overwrite).parquet("D:\\data\\spark\\output\\parquet")
df.write.mode(SaveMode.Overwrite).jdbc("jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8","person",prop)
spark.read.json("D:\\data\\spark\\output\\json").show()
spark.read.csv("D:\\data\\spark\\output\\csv").toDF("id","name","age").show()
spark.read.parquet("D:\\data\\spark\\output\\parquet").show()
spark.read.jdbc("jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8","person",prop).show()
}
}