Simple case
import org.apache.spark.sql.SparkSession
import org.junit.Test
case class Person(id:Int,name:String,sex:String,age:Int)
class DataSetCreate {
val spark = SparkSession
.builder()
.appName("test")
.master("local[4]")
.getOrCreate()
import spark.implicits._
@Test
def createData():Unit={
val list = List(Person(1,"zhangsan","man",10),
Person(2,"zhang2","woman",66),
Person(3,"zhang3","man",70),
Person(4,"zhang4","man",22))
val df = list.toDF()
//TODO 1.将dataFrame/dataSet数据集注册成表
df.createOrReplaceTempView("person")
spark.sql(
"""
|select
|name,age
|from person
|where age >=30
|""".stripMargin).show()
}
}
Write a small case
- Get the number of each word
data source
hello java
spark hadoop flume kafka
hbase kafka flume hadoop
Lateral View explode(split(value," ")) is to convert columns to rows
import org.apache.spark.sql.SparkSession
import org.junit.Test
class DataSetCreate {
val spark = SparkSession
.builder()
.appName("test")
.master("local[4]")
.getOrCreate()
import spark.implicits._
@Test
def createData():Unit={
val ds = spark.read.textFile("src/main/resources/wc.txt")
ds.createOrReplaceTempView("wordCount")
spark.sql(
"""
|select
| wc,
| count(1)
|from wordCount Lateral View explode(split(value," ")) as wc
|group by wc;
|""".stripMargin).show()
}
}