添加依赖
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.27</version>
</dependency>
读取MySQL数据
val conf: SparkConf = new SparkConf().setAppName(this.getClass.getName).setMaster("local[*]")
val sc = new SparkContext(conf)
// 连接数据库四要素
val driver = "com.mysql.jdbc.Driver"
val url = "jdbc:mysql://localhost:3306/student"
val userName = "root"
val passWord = "root"
// 创建JdbcRDD
/*
sc: SparkContext, spark程序的入口,上下文对象
getConnection: () => Connection, 数据库连接
sql: String, 要执行的sql
lowerBound: Long, sql语句的参数1
upperBound: Long, sql语句的参数2
numPartitions: Int, 分区数量
mapRow: (ResultSet) 处理结果集
*/
val rdd: JdbcRDD[(Int, String)] = new JdbcRDD(
sc,
() => {
// 注册驱动
Class.forName(driver)
// 创建连接
DriverManager.getConnection(url, userName, passWord)
},
"select * from stuinfo where id>=? and id<=?",
1000,
2000,
1,
// 处理结果集
rs => {
(rs.getInt(1), rs.getString(2))
}
)
// 打印结果
rdd.foreach(println)
sc.stop()
插入数据到MySQL
val conf: SparkConf = new SparkConf().setAppName(this.getClass.getName).setMaster("local[*]")
val sc = new SparkContext(conf)
val rdd: RDD[(String, String)] = sc.makeRDD(List(("Tom", "男"), ("Jerry", "男"),
("柯南", "男"), ("毛利兰", "女")
))
// 连接数据库四要素
val driver = "com.mysql.jdbc.Driver"
val url = "jdbc:mysql://localhost:3306/student"
val userName = "root"
val passWord = "root"
// 向mysql插入数据
// 每一条数据都要执行连接关闭操作,十分消耗性能
/*rdd.foreach {
case (name, gender) => {
// 注册驱动
Class.forName(driver)
// 创建连接
val conn: Connection = DriverManager.getConnection(url, userName, passWord)
val sql = "insert into stuinfo(sname,gender) values(?,?)"
val ps: PreparedStatement = conn.prepareStatement(sql)
// 赋值
ps.setString(1, name)
ps.setString(2, gender)
// 执行sql
ps.executeUpdate()
// 关闭连接
ps.close()
conn.close()
}
}*/
// 将一个分区内的数据进行操作,可以减少连接关闭操作的次数,节约资源
rdd.foreachPartition(
datas => {
// 注册驱动
Class.forName(driver)
// 创建连接
val conn: Connection = DriverManager.getConnection(url, userName, passWord)
val sql = "insert into stuinfo(sname,gender) values(?,?)"
val ps: PreparedStatement = conn.prepareStatement(sql)
// 遍历迭代器中的元素
datas.foreach {
case (name, gender) => {
// 赋值
ps.setString(1, name)
ps.setString(2, gender)
// 执行
ps.executeUpdate()
}
}
// 关闭连接
ps.close()
conn.close()
})
sc.stop()