flink统计用户来源、行为分析

用户来源、行为分析

import java.sql.Timestamp
import java.util.UUID

import org.apache.flink.streaming.api.windowing.time.Time
import java.util.concurrent.TimeUnit

import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.source.{
    
    RichSourceFunction, SourceFunction}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

import scala.util.Random

//输入样例类
case class MarketingUserBehavior(userId: String, behavior: String, channel: String, timestamp: Long)

//输出结果样例类
case class MarketingViewCount(windowStart: String, windowEnd: String, channel: String, behavior: String, count: Long)

object market_analysis_channel {
    
    
  def main(args: Array[String]): Unit = {
    
    
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setParallelism(1)
    val dataStream = env.addSource(new SimulatedEventSource())
      .assignAscendingTimestamps(_.timestamp)
      .filter(_.behavior != "UNINSTALL")
      .map(data => {
    
    
        ((data.channel, data.behavior), 1L)
      })
      .keyBy(_._1)
      .timeWindow(Time.hours(1),Time.seconds(10))
      .process(new myProcess())

    dataStream.print()

    env.execute()
  }
}

class SimulatedEventSource() extends RichSourceFunction[MarketingUserBehavior] {
    
    
  var running = true

  val behaviorType = Seq("CLICK", "DOWNLOAD", "INSTALL", "UNINSTALL")

  val channelSets = Seq("wechat", "weibo", "huaweiStore", "APPStore")

  val rand = new Random()

  override def cancel(): Unit = running = false

  override def run(ctx: SourceFunction.SourceContext[MarketingUserBehavior]): Unit = {
    
    
    //生成上限
    val MaxElements: Long = Long.MaxValue
    var count = 0L
    while (running && count <= MaxElements) {
    
    
      val id = UUID.randomUUID().toString

      val behavior = behaviorType(rand.nextInt(behaviorType.size))

      val channel = channelSets(rand.nextInt(channelSets.size))

      val timestamp = System.currentTimeMillis()

      ctx.collect(MarketingUserBehavior(id, behavior, channel, timestamp))

      count += 1

      TimeUnit.MILLISECONDS.sleep(10L)
    }

  }
}

class myProcess() extends ProcessWindowFunction[((String,String),Long),MarketingViewCount,(String,String),TimeWindow]{
    
    

  override def process(key: (String, String), context: Context, elements: Iterable[((String, String), Long)], out: Collector[MarketingViewCount]): Unit = {
    
    

    val start = new Timestamp(context.window.getStart).toString
    val end = new Timestamp(context.window.getEnd).toString
    val channel = key._1.toString
    val behavior = key._2.toString
    val count = elements.size
    out.collect(MarketingViewCount(start,end,behavior,channel,count))
  }
}

猜你喜欢

转载自blog.csdn.net/weixin_44429965/article/details/108058358