jdbc operation of spark program

jdbc operation of spark program

  • Requirement: Find the corresponding province according to the ip address accessed by the user, and ask for the amount of user access in the area
  • Ideas:
  • 1. Obtain basic ip information and broadcast it
  • 2. Get user access information
  • 3. Find the corresponding province through the user's ip address (binary search)
  • 4. Count regional visits based on the provinces found
  • 5. Output the result to mysql
import java.sql.{
    
    Connection, Date, DriverManager, PreparedStatement}

import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.{
    
    SparkConf, SparkContext}

/**
 * 需求:按照用户访问的ip地址找到对应的省份,并求所属区域的用户访问量
 * 思路:
 *   1.获取ip基础信息,并广播出去
 *   2.获取用户访问信息
 *   3.通过用户的ip地址找到对应的省份(二分查找)
 *   4.通过找到的省份来统计区域访问量
 *   5.将结果输出到mysql
 */
object IpSearchDemo {
    
    



  def main(args: Array[String]): Unit = {
    
    
    val conf =new SparkConf().setAppName(this.getClass.getName).setMaster("local[2]")
    val sc =new SparkContext(conf)
    //获取ip基础数据并切分
    val ipInfo: RDD[(String, String, String)] = sc.textFile("D://xxxxx")
      .map(line => {
    
    
        val fields = line.split("\\|")
        val startIp = fields(2) //起始ip
        val endIp = fields(3) //结束ip
        val province = fields(6) //ip对应的省份
        (startIp, endIp, province)
      })
    //将ip端的基础数据进行广播
    val broadcastIpInfo: Broadcast[Array[(String, String, String)]]
                       = sc.broadcast(ipInfo.collect)
    //获取用户点击流日志并切分,并根据二分查找到用户ip对应的省份,返回(省份,1)
    val logs = sc.textFile("D://xxxxxx")
      .map(line => {
    
    
        val fields = line.split("\\|")
        val userIp = fields(1)// 用户的ip
        val userIp_long = ip2long(userIp)//long类型的ip
        val ipInfoArr: Array[(String, String, String)] = broadcastIpInfo.value
        //通过二分查找,找到用户ip对应的ip段的下标
        val index: Int = binarySearch(ipInfoArr, userIp_long)
        val province = ipInfoArr(index)._3
        (province,1)
      })
    //统计省份对应的访问量
    val aggred = logs.reduceByKey(_+_)

    println(aggred.collect.toBuffer)
    //将结果存到mysql
    aggred.foreachPartition(data2Mysql)
    sc.stop()

  }

  /**
   * 将ip转化为long类型
   * @param ip
   * @return
   */
  def ip2long(ip:String):Long={
    
    
    val fragment = ip.split("[.]")
    var ipNum = 0L
    for(i <- 0 until fragment.length){
    
    
      ipNum = fragment(i).toLong | ipNum << 8L
    }
    ipNum
  }

  /**
   * 二分法查找用户ip对应的ip基础数据下标
   * @param arr
   * @param ip
   */
  def binarySearch(arr: Array[(String, String, String)], ip: Long): Int = {
    
    
    var start = 0
    var end =arr.length-1
    while(start <= end){
    
    
      //中间值
      val middle = (start + end )/2
      //判断
      if(ip >= arr(middle)._1.toLong && ip <= arr(middle)._2.toLong){
    
    
        return middle
      }else if(ip < arr(middle)._1.toLong){
    
    
        end = middle -1
      }else{
    
    
        start = middle + 1
      }
    }
    -1
  }

  /**
   * 插入数据库
   */
  val data2Mysql= (it: Iterator[(String, Int)]) => {
    
    
    var conn :Connection =null
    var ps :PreparedStatement =null
    val sql ="insert into spark.location_info(localtion,counts,access_date) values(?,?,?)"
    val url ="jdbc:mysql://192:168.157.133:3306"
    val user ="root"
    val password ="root"
    try{
    
    
      conn=DriverManager.getConnection(url,user,password)
      it.foreach(tup => {
    
    
        ps =conn.prepareStatement(sql)
        ps.setString(1,tup._1)
        ps.setInt(2,tup._2)
        ps.setDate(3,new Date(System.currentTimeMillis()))
        ps.executeUpdate()
      })
    }catch {
    
    
      case e:Exception=> println(e.printStackTrace())
    }finally {
    
    
      if(ps!=null)ps.close()
      if(conn!=null)conn.close()
    }



  }
}

Guess you like

Origin blog.csdn.net/qq_42706464/article/details/108900423