版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_41455420/article/details/84346541
Spark SQL中的DataFrame类似于一张关系型数据表。在关系型数据库中对单表或进行的查询操作,在DataFrame中都可以通过调用Scala提供的DataFrame API来实现。
一、数据库数据准备
-- ----------------------------
-- Table structure for persion
-- ----------------------------
DROP TABLE IF EXISTS `persion`;
CREATE TABLE `persion` (
`id` int(11) NOT NULL,
`name` text,
`age` int(11) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of persion
-- ----------------------------
INSERT INTO `persion` VALUES ('2', 'lisi', '29');
INSERT INTO `persion` VALUES ('3', 'wangwu', '25');
INSERT INTO `persion` VALUES ('1', 'zhangsan', '20');
INSERT INTO `persion` VALUES ('6', 'kobe', '40');
INSERT INTO `persion` VALUES ('5', 'tianqi', '35');
INSERT INTO `persion` VALUES ('4', 'zhaoliu', '30');
-- ----------------------------
-- Table structure for student
-- ----------------------------
DROP TABLE IF EXISTS `student`;
CREATE TABLE `student` (
`id` int(20) NOT NULL,
`name` varchar(50) DEFAULT NULL,
`height` varchar(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of student
-- ----------------------------
INSERT INTO `student` VALUES ('1', 'zhangsan', '20');
INSERT INTO `student` VALUES ('2', 'lisi', '12');
INSERT INTO `student` VALUES ('3', 'wangwu', '123');
INSERT INTO `student` VALUES ('4', 'zhaoliu', '34');
INSERT INTO `student` VALUES ('5', 'tianqi', '100');
-- ----------------------------
-- Table structure for student_class_cj
-- ----------------------------
DROP TABLE IF EXISTS `student_class_cj`;
CREATE TABLE `student_class_cj` (
`id` int(10) NOT NULL,
`name` varchar(20) DEFAULT NULL,
`class_no` varchar(10) DEFAULT NULL,
`class_cj` varchar(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of student_class_cj
-- ----------------------------
INSERT INTO `student_class_cj` VALUES ('1', 'tianqi', '01', '120');
INSERT INTO `student_class_cj` VALUES ('2', 'tianqi', '02', '99');
INSERT INTO `student_class_cj` VALUES ('3', 'zhaoliu', '01', '60');
INSERT INTO `student_class_cj` VALUES ('10', 'lisi', '04', '77');
INSERT INTO `student_class_cj` VALUES ('5', 'lisi', '02', '100');
INSERT INTO `student_class_cj` VALUES ('6', 'lisi', '03', '52');
INSERT INTO `student_class_cj` VALUES ('7', 'tianqi', '03', '150');
二、实战
**主要操作:**查询、过滤、join、聚合
package com.little_devil.mysql
import java.util.Properties
import org.apache.spark.SparkContext
import org.apache.spark.sql._
object loadDataFromMysql {
def main(args: Array[String]): Unit = {
//1、创建sparkSession
val spark: SparkSession = SparkSession.builder().appName("loadDataFromMysql").master("local[2]").getOrCreate()
val sc: SparkContext = spark.sparkContext
sc.setLogLevel("WARN")
//2、读取mysql表中的数据
//定义数据库连接
val url = "jdbc:mysql://192.168.52.140:3306/spark"
//定义相关属性
val properties = new Properties()
properties.setProperty("user", "root")
properties.setProperty("password", "root")
val mysqlDF: DataFrame = spark.read.jdbc(url, "persion", properties)
val mysqlDF2: DataFrame = spark.read.jdbc(url, "student", properties)
val mysqlDF3: DataFrame = spark.read.jdbc(url, "student_class_cj", properties)
//打印结果数据
mysqlDF.show()
mysqlDF2.show()
mysqlDF3.show()
val join1: DataFrame = mysqlDF.join(mysqlDF2,mysqlDF("name")===mysqlDF2("name"),"left")
join1.show()
join1.limit(3).show()
val view1: DataFrame = join1.select(mysqlDF("id"),mysqlDF("name"),mysqlDF("age"),mysqlDF2("height"))
view1.show()
val view2: DataFrame = view1.join(mysqlDF3,view1("name")===mysqlDF3("name"),"left").select(view1("*"),
mysqlDF3("class_no"),mysqlDF3("class_cj")).orderBy(view1("id").desc)
view2.show()
val view3: DataFrame = view2.agg("id"-> "max")
view3.show()
val view4: DataFrame = view2.where("class_cj is not null and class_no is not null").groupBy("name").agg("class_no"->"max","class_cj"->"avg")
view4.show()
val view5: DataFrame = view2.where("class_cj is not null and class_no is not null").groupBy("name").agg(Map("class_no"->"max","class_cj"->"avg"))
view5.show()
spark.stop()
}
}
执行结果:
18/11/22 18:37:19 INFO SharedState: Warehouse path is 'file:/D:/idea/spark-to-mysql/spark-warehouse'.
+---+--------+---+
| id| name|age|
+---+--------+---+
| 2| lisi| 29|
| 3| wangwu| 25|
| 1|zhangsan| 20|
| 6| kobe| 40|
| 5| tianqi| 35|
| 4| zhaoliu| 30|
+---+--------+---+
+---+--------+------+
| id| name|height|
+---+--------+------+
| 1|zhangsan| 20|
| 2| lisi| 12|
| 3| wangwu| 123|
| 4| zhaoliu| 34|
| 5| tianqi| 100|
+---+--------+------+
+---+-------+--------+--------+
| id| name|class_no|class_cj|
+---+-------+--------+--------+
| 1| tianqi| 01| 120|
| 2| tianqi| 02| 99|
| 3|zhaoliu| 01| 60|
| 10| lisi| 04| 77|
| 5| lisi| 02| 100|
| 6| lisi| 03| 52|
| 7| tianqi| 03| 150|
+---+-------+--------+--------+
+---+--------+---+----+--------+------+
| id| name|age| id| name|height|
+---+--------+---+----+--------+------+
| 3| wangwu| 25| 3| wangwu| 123|
| 4| zhaoliu| 30| 4| zhaoliu| 34|
| 6| kobe| 40|null| null| null|
| 1|zhangsan| 20| 1|zhangsan| 20|
| 2| lisi| 29| 2| lisi| 12|
| 5| tianqi| 35| 5| tianqi| 100|
+---+--------+---+----+--------+------+
+---+--------+---+---+--------+------+
| id| name|age| id| name|height|
+---+--------+---+---+--------+------+
| 3| wangwu| 25| 3| wangwu| 123|
| 1|zhangsan| 20| 1|zhangsan| 20|
| 2| lisi| 29| 2| lisi| 12|
+---+--------+---+---+--------+------+
+---+--------+---+------+
| id| name|age|height|
+---+--------+---+------+
| 3| wangwu| 25| 123|
| 4| zhaoliu| 30| 34|
| 6| kobe| 40| null|
| 1|zhangsan| 20| 20|
| 2| lisi| 29| 12|
| 5| tianqi| 35| 100|
+---+--------+---+------+
+---+--------+---+------+--------+--------+
| id| name|age|height|class_no|class_cj|
+---+--------+---+------+--------+--------+
| 6| kobe| 40| null| null| null|
| 5| tianqi| 35| 100| 02| 99|
| 5| tianqi| 35| 100| 03| 150|
| 5| tianqi| 35| 100| 01| 120|
| 4| zhaoliu| 30| 34| 01| 60|
| 3| wangwu| 25| 123| null| null|
| 2| lisi| 29| 12| 04| 77|
| 2| lisi| 29| 12| 02| 100|
| 2| lisi| 29| 12| 03| 52|
| 1|zhangsan| 20| 20| null| null|
+---+--------+---+------+--------+--------+
+-------+
|max(id)|
+-------+
| 6|
+-------+
+-------+-------------+-----------------+
| name|max(class_no)| avg(class_cj)|
+-------+-------------+-----------------+
|zhaoliu| 01| 60.0|
| lisi| 04|76.33333333333333|
| tianqi| 03| 123.0|
+-------+-------------+-----------------+
+-------+-------------+-----------------+
| name|max(class_no)| avg(class_cj)|
+-------+-------------+-----------------+
|zhaoliu| 01| 60.0|
| lisi| 04|76.33333333333333|
| tianqi| 03| 123.0|
+-------+-------------+-----------------+
Process finished with exit code 0
喜欢就点赞评论+关注吧
感谢阅读,希望能帮助到大家,谢谢大家的支持!