jdbc operation of spark program
- Requirement: Find the corresponding province according to the ip address accessed by the user, and ask for the amount of user access in the area
- Ideas:
- 1. Obtain basic ip information and broadcast it
- 2. Get user access information
- 3. Find the corresponding province through the user's ip address (binary search)
- 4. Count regional visits based on the provinces found
- 5. Output the result to mysql
import java.sql.{
Connection, Date, DriverManager, PreparedStatement}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.{
SparkConf, SparkContext}
object IpSearchDemo {
def main(args: Array[String]): Unit = {
val conf =new SparkConf().setAppName(this.getClass.getName).setMaster("local[2]")
val sc =new SparkContext(conf)
val ipInfo: RDD[(String, String, String)] = sc.textFile("D://xxxxx")
.map(line => {
val fields = line.split("\\|")
val startIp = fields(2)
val endIp = fields(3)
val province = fields(6)
(startIp, endIp, province)
})
val broadcastIpInfo: Broadcast[Array[(String, String, String)]]
= sc.broadcast(ipInfo.collect)
val logs = sc.textFile("D://xxxxxx")
.map(line => {
val fields = line.split("\\|")
val userIp = fields(1)
val userIp_long = ip2long(userIp)
val ipInfoArr: Array[(String, String, String)] = broadcastIpInfo.value
val index: Int = binarySearch(ipInfoArr, userIp_long)
val province = ipInfoArr(index)._3
(province,1)
})
val aggred = logs.reduceByKey(_+_)
println(aggred.collect.toBuffer)
aggred.foreachPartition(data2Mysql)
sc.stop()
}
def ip2long(ip:String):Long={
val fragment = ip.split("[.]")
var ipNum = 0L
for(i <- 0 until fragment.length){
ipNum = fragment(i).toLong | ipNum << 8L
}
ipNum
}
def binarySearch(arr: Array[(String, String, String)], ip: Long): Int = {
var start = 0
var end =arr.length-1
while(start <= end){
val middle = (start + end )/2
if(ip >= arr(middle)._1.toLong && ip <= arr(middle)._2.toLong){
return middle
}else if(ip < arr(middle)._1.toLong){
end = middle -1
}else{
start = middle + 1
}
}
-1
}
val data2Mysql= (it: Iterator[(String, Int)]) => {
var conn :Connection =null
var ps :PreparedStatement =null
val sql ="insert into spark.location_info(localtion,counts,access_date) values(?,?,?)"
val url ="jdbc:mysql://192:168.157.133:3306"
val user ="root"
val password ="root"
try{
conn=DriverManager.getConnection(url,user,password)
it.foreach(tup => {
ps =conn.prepareStatement(sql)
ps.setString(1,tup._1)
ps.setInt(2,tup._2)
ps.setDate(3,new Date(System.currentTimeMillis()))
ps.executeUpdate()
})
}catch {
case e:Exception=> println(e.printStackTrace())
}finally {
if(ps!=null)ps.close()
if(conn!=null)conn.close()
}
}
}