Flink学习:UDF(map,filter,flatMap)(匿名函数、匿名类、自定义函数、富函数四类实现方式)

1. map算子(匿名函数、匿名类、自定义函数、富函数四类实现方式)

package com.qu.udf

import com.qu.source.SensorReading
import org.apache.flink.api.common.functions.{IterationRuntimeContext, MapFunction, RichMapFunction, RuntimeContext}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration


//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object MapUDFTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 读取一个本地文件
    val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"

    val inputDataSet = env.readTextFile(inputPath)
    //map: 1.匿名函数
    val map1 = inputDataSet.map{ line=>
      SensorReading(line.split(",")(0),line.split(",")(1).toLong,line.split(",")(2).toDouble)
    }
    map1.print("udf map 匿名函数:")

    //map: 2.匿名类
    val map2 = inputDataSet.map {
      new MapFunction[String, SensorReading] {
        override def map(line: String): SensorReading = {
          SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)
        }
      }
    }
    map2.print("udf map MapFunction匿名类:")

    //map: 3.自定义MapFunction
    val map3 = inputDataSet.map( new MyMapFunction())
    map3.print("udf map 自定义MapFunction:")

    //map: 4.富函数RichMapFunction 实现方法完全一致 但是其自带一些父类方法 生命周期 上下文方法 open close getRuntimeContext
    val map4 = inputDataSet.map( new MyRichMapFunction())
    map4.print("udf map 自定义富函数RichMapFunction:")

    env.execute("udf test")
  }
}

class MyMapFunction extends MapFunction[String, SensorReading]{
  override def map(line: String): SensorReading =
    SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)
}

class MyRichMapFunction extends RichMapFunction[String, SensorReading]{
  override def map(line: String): SensorReading =
    SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)

  override def setRuntimeContext(t: RuntimeContext): Unit = {
    super.setRuntimeContext(t)
  }
  override def getRuntimeContext: RuntimeContext = super.getRuntimeContext

  override def getIterationRuntimeContext: IterationRuntimeContext = super.getIterationRuntimeContext

  override def open(parameters: Configuration): Unit = {
    super.open(parameters)
    //在此可以执行算法前进行一些操作
  }

  override def close(): Unit = {
    super.close()
    //在此可以执行算法后进行一些操作
  }
}

2. flatMap算子(匿名函数、匿名类、自定义函数、富函数四类实现方式)

package com.qu.udf

import com.qu.source.SensorReading
import org.apache.flink.api.common.functions.{FlatMapFunction, MapFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.util.Collector


//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object FlatMapUDFTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 读取一个本地文件
    val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"

    val inputDataSet = env.readTextFile(inputPath)
    //flatMap: 1.匿名函数
    val flatMap11 = inputDataSet.flatMap(line =>
      List((line.split(",")(0), line.split(",")(2))))//等价于 _.split(",")
    val flatMap12 = inputDataSet.flatMap(line =>
      List(SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble))
    )
    flatMap11.print("udf flatMap 匿名函数(转成Touple):")
    flatMap12.print("udf flatMap 匿名函数(转成对象):")

    //flatMap: 2.匿名类
    val flatMap21 = inputDataSet.flatMap {
      new FlatMapFunction[String, (String, String)] {
        override def flatMap(line: String, out: Collector[(String, String)]): Unit = {
          val ss = line.split(",")
          out.collect((ss(0), ss(2)))
        }
      }
    }
    flatMap21.print("udf flatMap FlatMapFunction匿名类(转成Touple):")
    val flatMap22 = inputDataSet.flatMap {
      new FlatMapFunction[String, SensorReading] {
        override def flatMap(line: String, out: Collector[SensorReading]): Unit = {
          val ss = line.split(",")
          out.collect(SensorReading(ss(0), ss(1).toLong, ss(2).toDouble))
        }
      }
    }
    flatMap22.print("udf flatMap FlatMapFunction匿名类(转成对象):")

    //flatMap: 3.自定义FlatMapFunction
    val flatMap3 = inputDataSet.flatMap( new MyFlatMapFunction())
    flatMap3.print("udf flatMap 自定义FlatMapFunction:")

    //flatMap: 4.富函数不举例(map已举例)

    env.execute("udf test")

  }
}

class MyFlatMapFunction extends FlatMapFunction[String, SensorReading]{
  override def flatMap(line: String, out: Collector[SensorReading]): Unit = {
    val ss = line.split(",")
    out.collect(SensorReading(ss(0), ss(1).toLong, ss(2).toDouble))
  }
}

3. filter算子(匿名函数、匿名类、自定义函数、富函数四类实现方式)

package com.qu.udf

import org.apache.flink.api.common.functions.{FilterFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment



//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object FilterUDFTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 读取一个本地文件
    val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"

    val inputDataSet = env.readTextFile(inputPath)
    //filter: 1.匿名函数
    val filter1 = inputDataSet.filter(_.split(",")(2).toDouble > 30)
    filter1.print("udf filter 匿名函数:")


    //filter: 2.匿名类
    val filter2 = inputDataSet.filter {
      new FilterFunction[String] {
        override def filter(line: String): Boolean = {
          line.split(",")(2).toDouble > 30
        }
      }
    }
    filter2.print("udf filter FilterFunction匿名类:")

    //filter: 3.自定义FlatMapFunction
    val filter3 = inputDataSet.filter( new MyFilterMapFunction())
    filter3.print("udf filter 自定义FilterMapFunction:")

    //filter: 4.富函数不举例(map已举例

    env.execute("udf test")

  }
}

class MyFilterMapFunction extends FilterFunction[String]{
  override def filter(line: String): Boolean = line.split(",")(2).toDouble > 30
}

猜你喜欢

转载自blog.csdn.net/q18729096963/article/details/107594415
今日推荐