Custom source operator
import org.apache.flink.streaming.api.functions.source.SourceFunction
import java.util.Calendar
import scala.util.Random
/**
* DATE:2022/10/4 0:03
* AUTHOR: GX
*/
case class Event(user:String,url:String,timestamp:Long)
//addSource
class ClickSource extends SourceFunction[Event]{ //ParallelSourceFunction [Event] The operator can set the degree of parallelism //SourceFunction[Event] The degree of parallelism must be 1 //Flag bit var running = true
override def run(sourceContext: SourceFunction.SourceContext[Event]): Unit = { //Random number generator val random = new Random() //Define the range of random numbers val users = Array("Mary","Alice"," Bob","Cary","Leborn") val urls = Array("./home","./cart","./fav","./prod?id=1","./prod?id =2","./prod?id=3")
//Use the flag bit as a loop judgment condition and continuously send data
while (running) { //Randomly generate an event val event = Event(users(random.nextInt(users.length)), urls(random.nextInt(urls) .length)), Calendar.getInstance.getTimeInMillis) //Call the ctx method to send data downstream sourceContext.collect(event) //Send a piece of data every 1 second Thread.sleep(1000) } }
override def cancel(): Unit = running = false
}
Flink reads data
import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, createTypeInformation}
/**
* DATE:2022/10/4 0:21
* AUTHOR:GX
*/
object SourceCustomTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
//Read custom data source
val stream = env.addSource(new ClickSource)
//Attention! ! ! For a non-parallel operator, the degree of parallelism must be 1
//If you want parallel processing, you need to implement a ParallelSourceFunction (custom source operator inherits ParallelSourceFunction), and then the operator parallelism can set multiple
stream.print( )
env.execute()
}
}