Flink learning: UDF (mapa, filtro, mapa plano) (función anónima, clase anónima, función personalizada, función rica cuatro tipos de métodos de implementación)

1. operador de mapa (función anónima, clase anónima, función personalizada, función rica cuatro tipos de métodos de implementación)

package com.qu.udf

import com.qu.source.SensorReading
import org.apache.flink.api.common.functions.{IterationRuntimeContext, MapFunction, RichMapFunction, RuntimeContext}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration


//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object MapUDFTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 读取一个本地文件
    val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"

    val inputDataSet = env.readTextFile(inputPath)
    //map: 1.匿名函数
    val map1 = inputDataSet.map{ line=>
      SensorReading(line.split(",")(0),line.split(",")(1).toLong,line.split(",")(2).toDouble)
    }
    map1.print("udf map 匿名函数:")

    //map: 2.匿名类
    val map2 = inputDataSet.map {
      new MapFunction[String, SensorReading] {
        override def map(line: String): SensorReading = {
          SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)
        }
      }
    }
    map2.print("udf map MapFunction匿名类:")

    //map: 3.自定义MapFunction
    val map3 = inputDataSet.map( new MyMapFunction())
    map3.print("udf map 自定义MapFunction:")

    //map: 4.富函数RichMapFunction 实现方法完全一致 但是其自带一些父类方法 生命周期 上下文方法 open close getRuntimeContext
    val map4 = inputDataSet.map( new MyRichMapFunction())
    map4.print("udf map 自定义富函数RichMapFunction:")

    env.execute("udf test")
  }
}

class MyMapFunction extends MapFunction[String, SensorReading]{
  override def map(line: String): SensorReading =
    SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)
}

class MyRichMapFunction extends RichMapFunction[String, SensorReading]{
  override def map(line: String): SensorReading =
    SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)

  override def setRuntimeContext(t: RuntimeContext): Unit = {
    super.setRuntimeContext(t)
  }
  override def getRuntimeContext: RuntimeContext = super.getRuntimeContext

  override def getIterationRuntimeContext: IterationRuntimeContext = super.getIterationRuntimeContext

  override def open(parameters: Configuration): Unit = {
    super.open(parameters)
    //在此可以执行算法前进行一些操作
  }

  override def close(): Unit = {
    super.close()
    //在此可以执行算法后进行一些操作
  }
}

2. Operador FlatMap (función anónima, clase anónima, función personalizada, función enriquecida cuatro tipos de métodos de implementación)

package com.qu.udf

import com.qu.source.SensorReading
import org.apache.flink.api.common.functions.{FlatMapFunction, MapFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.util.Collector


//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object FlatMapUDFTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 读取一个本地文件
    val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"

    val inputDataSet = env.readTextFile(inputPath)
    //flatMap: 1.匿名函数
    val flatMap11 = inputDataSet.flatMap(line =>
      List((line.split(",")(0), line.split(",")(2))))//等价于 _.split(",")
    val flatMap12 = inputDataSet.flatMap(line =>
      List(SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble))
    )
    flatMap11.print("udf flatMap 匿名函数(转成Touple):")
    flatMap12.print("udf flatMap 匿名函数(转成对象):")

    //flatMap: 2.匿名类
    val flatMap21 = inputDataSet.flatMap {
      new FlatMapFunction[String, (String, String)] {
        override def flatMap(line: String, out: Collector[(String, String)]): Unit = {
          val ss = line.split(",")
          out.collect((ss(0), ss(2)))
        }
      }
    }
    flatMap21.print("udf flatMap FlatMapFunction匿名类(转成Touple):")
    val flatMap22 = inputDataSet.flatMap {
      new FlatMapFunction[String, SensorReading] {
        override def flatMap(line: String, out: Collector[SensorReading]): Unit = {
          val ss = line.split(",")
          out.collect(SensorReading(ss(0), ss(1).toLong, ss(2).toDouble))
        }
      }
    }
    flatMap22.print("udf flatMap FlatMapFunction匿名类(转成对象):")

    //flatMap: 3.自定义FlatMapFunction
    val flatMap3 = inputDataSet.flatMap( new MyFlatMapFunction())
    flatMap3.print("udf flatMap 自定义FlatMapFunction:")

    //flatMap: 4.富函数不举例(map已举例)

    env.execute("udf test")

  }
}

class MyFlatMapFunction extends FlatMapFunction[String, SensorReading]{
  override def flatMap(line: String, out: Collector[SensorReading]): Unit = {
    val ss = line.split(",")
    out.collect(SensorReading(ss(0), ss(1).toLong, ss(2).toDouble))
  }
}

3. Operador de filtro (función anónima, clase anónima, función personalizada, función rica cuatro tipos de métodos de implementación)

package com.qu.udf

import org.apache.flink.api.common.functions.{FilterFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment



//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object FilterUDFTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 读取一个本地文件
    val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"

    val inputDataSet = env.readTextFile(inputPath)
    //filter: 1.匿名函数
    val filter1 = inputDataSet.filter(_.split(",")(2).toDouble > 30)
    filter1.print("udf filter 匿名函数:")


    //filter: 2.匿名类
    val filter2 = inputDataSet.filter {
      new FilterFunction[String] {
        override def filter(line: String): Boolean = {
          line.split(",")(2).toDouble > 30
        }
      }
    }
    filter2.print("udf filter FilterFunction匿名类:")

    //filter: 3.自定义FlatMapFunction
    val filter3 = inputDataSet.filter( new MyFilterMapFunction())
    filter3.print("udf filter 自定义FilterMapFunction:")

    //filter: 4.富函数不举例(map已举例

    env.execute("udf test")

  }
}

class MyFilterMapFunction extends FilterFunction[String]{
  override def filter(line: String): Boolean = line.split(",")(2).toDouble > 30
}

Supongo que te gusta

Origin blog.csdn.net/q18729096963/article/details/107594415
Recomendado
Clasificación