test code

def taskcal(data:Array[(String,Long)],rt:Array[String],wd:Int):Array[Boolean]={
    val result = Array.fill[Boolean](rt.length)(false)
    val sortData = data.sortBy(_._2)
    val indexArrayLength = rt.length - 1
    var startTimeArray = Array.fill[Long](rt.length)(0l)
    val indexMap = rt.map(item => item -> rt.indexOf(item)).toMap
    var notFull  = true
    for(itemWithTimeKv <- sortData   if notFull ){
      val itemIndex =  indexMap(itemWithTimeKv._1)
      if(itemIndex == 0)   { startTimeArray(0) =  itemWithTimeKv._2  ; result(0) = true}  // first item
      else if(startTimeArray(itemIndex-1) !=0)  {  // pre item exists?
        if( (itemWithTimeKv._2 - startTimeArray(itemIndex-1))< wd) {  // in range
          startTimeArray(itemIndex) = startTimeArray(itemIndex-1)
          result(itemIndex) = true
        }else  // out range
          startTimeArray = Array.fill[Long](rt.length)(0l)
      }
      if(result(indexArrayLength) == true) notFull = false
     }
    result
  }

  def main(args:Array[String]): Unit = {

    val data =Array(("A",1450000000000l),
      ("B",1450000000001l),
      ("C",1430000000002l),
      ("A",1460000000001l)
    )
    val rt  = Array("A","B","C")
    val wd  = 3600000
    println(taskcal(data,rt,wd).mkString(","))
   // bench("r",100000,taskcal(data,rt,wd))
  }


  def  bench(name:String,count:Int,f: => Unit): Unit ={
   val begin = System.currentTimeMillis()
   for(i <-0 to count) f
   val end =  System.currentTimeMillis()
   println(s"name : ${name}  count: $count  count:${end - begin} ")
  }







    val storageDir = "UserBehaviorDStream"
    val sparkConf = new SparkConf().setAppName("NetworkWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(10))
    val lines = ssc.socketTextStream("localhost", 9999, StorageLevel.MEMORY_AND_DISK_SER)
    val sqlContext = new SQLContext(ssc.sparkContext)
    val getPartitionDate = udf(FunnelUtil.getDatePartiton _)
    lines.foreachRDD(rdd => {
      val userBehiviorData =  rdd.map(x => {println("**:"+x);x.split(",")}).filter(_.length == 6)
         .map(r => RowFactory.create(r(0), r(1), r(2), r(3), r(4), r(5)))
      val userBehiviorDataDF = sqlContext.createDataFrame(userBehiviorData, getStructType)
      val userBehiviorDataPartition = userBehiviorDataDF.withColumn("yyyyMMddHH", getPartitionDate(userBehiviorDataDF("eventTime"))).coalesce(1)
      userBehiviorDataPartition.write.format("parquet").mode("append").partitionBy("yyyyMMddHH").save(storageDir)
    })
    ssc.start()
    ssc.awaitTermination()
  }

  val getStructType = {
    val structFields = mutable.ArrayBuffer[StructField]()
    structFields += DataTypes.createStructField("userId", DataTypes.StringType, true)
    structFields += DataTypes.createStructField("eventTime", DataTypes.StringType, true)
    structFields += DataTypes.createStructField("itemId", DataTypes.StringType, true)
    structFields += DataTypes.createStructField("itemName", DataTypes.StringType, true)
    structFields += DataTypes.createStructField("eventAttribute", DataTypes.StringType, true)
    structFields += DataTypes.createStructField("eventDate", DataTypes.StringType, true)
    val structType = DataTypes.createStructType(structFields.toArray)
    structType
  }

猜你喜欢

转载自lingzhi007.iteye.com/blog/2391012