用户来源、行为分析
import java.sql.Timestamp
import java.util.UUID
import org.apache.flink.streaming.api.windowing.time.Time
import java.util.concurrent.TimeUnit
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.source.{
RichSourceFunction, SourceFunction}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
import scala.util.Random
case class MarketingUserBehavior(userId: String, behavior: String, channel: String, timestamp: Long)
case class MarketingViewCount(windowStart: String, windowEnd: String, channel: String, behavior: String, count: Long)
object market_analysis_channel {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val dataStream = env.addSource(new SimulatedEventSource())
.assignAscendingTimestamps(_.timestamp)
.filter(_.behavior != "UNINSTALL")
.map(data => {
((data.channel, data.behavior), 1L)
})
.keyBy(_._1)
.timeWindow(Time.hours(1),Time.seconds(10))
.process(new myProcess())
dataStream.print()
env.execute()
}
}
class SimulatedEventSource() extends RichSourceFunction[MarketingUserBehavior] {
var running = true
val behaviorType = Seq("CLICK", "DOWNLOAD", "INSTALL", "UNINSTALL")
val channelSets = Seq("wechat", "weibo", "huaweiStore", "APPStore")
val rand = new Random()
override def cancel(): Unit = running = false
override def run(ctx: SourceFunction.SourceContext[MarketingUserBehavior]): Unit = {
val MaxElements: Long = Long.MaxValue
var count = 0L
while (running && count <= MaxElements) {
val id = UUID.randomUUID().toString
val behavior = behaviorType(rand.nextInt(behaviorType.size))
val channel = channelSets(rand.nextInt(channelSets.size))
val timestamp = System.currentTimeMillis()
ctx.collect(MarketingUserBehavior(id, behavior, channel, timestamp))
count += 1
TimeUnit.MILLISECONDS.sleep(10L)
}
}
}
class myProcess() extends ProcessWindowFunction[((String,String),Long),MarketingViewCount,(String,String),TimeWindow]{
override def process(key: (String, String), context: Context, elements: Iterable[((String, String), Long)], out: Collector[MarketingViewCount]): Unit = {
val start = new Timestamp(context.window.getStart).toString
val end = new Timestamp(context.window.getEnd).toString
val channel = key._1.toString
val behavior = key._2.toString
val count = elements.size
out.collect(MarketingViewCount(start,end,behavior,channel,count))
}
}