def taskcal(data:Array[(String,Long)],rt:Array[String],wd:Int):Array[Boolean]={
val result = Array.fill[Boolean](rt.length)(false)
val sortData = data.sortBy(_._2)
val indexArrayLength = rt.length - 1
var startTimeArray = Array.fill[Long](rt.length)(0l)
val indexMap = rt.map(item => item -> rt.indexOf(item)).toMap
var notFull = true
for(itemWithTimeKv <- sortData if notFull ){
val itemIndex = indexMap(itemWithTimeKv._1)
if(itemIndex == 0) { startTimeArray(0) = itemWithTimeKv._2 ; result(0) = true} // first item
else if(startTimeArray(itemIndex-1) !=0) { // pre item exists?
if( (itemWithTimeKv._2 - startTimeArray(itemIndex-1))< wd) { // in range
startTimeArray(itemIndex) = startTimeArray(itemIndex-1)
result(itemIndex) = true
}else // out range
startTimeArray = Array.fill[Long](rt.length)(0l)
}
if(result(indexArrayLength) == true) notFull = false
}
result
}
def main(args:Array[String]): Unit = {
val data =Array(("A",1450000000000l),
("B",1450000000001l),
("C",1430000000002l),
("A",1460000000001l)
)
val rt = Array("A","B","C")
val wd = 3600000
println(taskcal(data,rt,wd).mkString(","))
// bench("r",100000,taskcal(data,rt,wd))
}
def bench(name:String,count:Int,f: => Unit): Unit ={
val begin = System.currentTimeMillis()
for(i <-0 to count) f
val end = System.currentTimeMillis()
println(s"name : ${name} count: $count count:${end - begin} ")
}
val storageDir = "UserBehaviorDStream"
val sparkConf = new SparkConf().setAppName("NetworkWordCount")
val ssc = new StreamingContext(sparkConf, Seconds(10))
val lines = ssc.socketTextStream("localhost", 9999, StorageLevel.MEMORY_AND_DISK_SER)
val sqlContext = new SQLContext(ssc.sparkContext)
val getPartitionDate = udf(FunnelUtil.getDatePartiton _)
lines.foreachRDD(rdd => {
val userBehiviorData = rdd.map(x => {println("**:"+x);x.split(",")}).filter(_.length == 6)
.map(r => RowFactory.create(r(0), r(1), r(2), r(3), r(4), r(5)))
val userBehiviorDataDF = sqlContext.createDataFrame(userBehiviorData, getStructType)
val userBehiviorDataPartition = userBehiviorDataDF.withColumn("yyyyMMddHH", getPartitionDate(userBehiviorDataDF("eventTime"))).coalesce(1)
userBehiviorDataPartition.write.format("parquet").mode("append").partitionBy("yyyyMMddHH").save(storageDir)
})
ssc.start()
ssc.awaitTermination()
}
val getStructType = {
val structFields = mutable.ArrayBuffer[StructField]()
structFields += DataTypes.createStructField("userId", DataTypes.StringType, true)
structFields += DataTypes.createStructField("eventTime", DataTypes.StringType, true)
structFields += DataTypes.createStructField("itemId", DataTypes.StringType, true)
structFields += DataTypes.createStructField("itemName", DataTypes.StringType, true)
structFields += DataTypes.createStructField("eventAttribute", DataTypes.StringType, true)
structFields += DataTypes.createStructField("eventDate", DataTypes.StringType, true)
val structType = DataTypes.createStructType(structFields.toArray)
structType
}
test code
猜你喜欢
转载自lingzhi007.iteye.com/blog/2391012
今日推荐
周排行