Flink入门示例——wordCount(一)

说明(流处理)

  • 读取kafka流数据,每5s统计一次
  • 将处理结果写入Redis

代码示例

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.test</groupId>
    <artifactId>learn_flink</artifactId>
    <version>1.0-SNAPSHOT</version>
    <name>Flink learn</name>
    <description>Flink学习</description>

    <developers>
        <developer>
            <id>lvxw</id>
            <name>吕学文</name>
            <email>[email protected]</email>
        </developer>
    </developers>

    <dependencies>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-scala_2.11</artifactId>
            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.8_2.11</artifactId>
            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-filesystem_2.11</artifactId>
            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>com.yammer.metrics</groupId>
            <artifactId>metrics-core</artifactId>
            <version>2.2.0</version>
        </dependency>
        <dependency>
            <groupId>com.101tec</groupId>
            <artifactId>zkclient</artifactId>
            <version>0.10</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.4.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.4.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.4.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.11</artifactId>
            <version>0.8.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>2.11.12</version>
        </dependency>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-compiler</artifactId>
            <version>2.11.12</version>
        </dependency>
        <dependency>
            <groupId>org.apache.bahir</groupId>
            <artifactId>flink-connector-redis_2.11</artifactId>
            <version>1.0</version>
        </dependency>
        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>2.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-pool2</artifactId>
            <version>2.4.2</version>
        </dependency>
    </dependencies>

    <properties>
        <sonar.language>java</sonar.language>
        <java-version>1.7</java-version>
        <src.dir>src/main/scala</src.dir>
        <src.dir>src/main/java</src.dir>
        <src.res.dir>src/main/resource</src.res.dir>
        <test.src.dir>src/test/scala</test.src.dir>
        <test.src.dir>src/test/java</test.src.dir>
        <test.res.dir>src/test/resource</test.res.dir>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <jarfile.name>Flink</jarfile.name>
        <jar.out.dir>jar</jar.out.dir>
        <maven.build.timestamp.format>yyyyMMdd-HHmmss</maven.build.timestamp.format>
    </properties>
    <build>
        <sourceDirectory>${src.dir}</sourceDirectory>
        <resources>
            <resource>
                <directory>${src.dir}</directory>
            </resource>
            <resource>
                <directory>${src.res.dir}</directory>
                <includes>
                    <include>**/*</include>
                    <include>*</include>
                </includes>
                <filtering>true</filtering>
            </resource>
        </resources>

        <testSourceDirectory>${test.src.dir}</testSourceDirectory>
        <testResources>
            <testResource>
                <directory>${test.src.dir}</directory>
            </testResource>
        </testResources>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.1.0</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <artifactSet>
                                <includes>
                                    <include>org.apache.flink:flink-connector-kafka-*</include>
                                    <include>org.apache.flink:flink-connector-filesystem_2.11</include>
                                    <include>org.apache.commons:commons-pool2</include>
                                    <include>redis.clients:jedis</include>
                                    <include>org.apache.kafka:*</include>
                                    <include>com.yammer.metrics:*</include>
                                    <include>com.101tec:*</include>
                                    <include>org.apache.bahir:*</include>
                                </includes>
                            </artifactSet>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.6</source>
                    <target>1.6</target>
                </configuration>
            </plugin>
            <plugin>
                <artifactId>maven-jar-plugin</artifactId>
                <version>2.4</version>
                <configuration>
                    <finalName>${jarfile.name}</finalName>
                    <outputDirectory>${jar.out.dir}</outputDirectory>
                </configuration>
            </plugin>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.3</version>
                <executions>
                    <execution>
                        <id>Build deploy package</id>
                        <phase>package</phase>
                        <configuration>
                            <appendAssemblyId>false</appendAssemblyId>
                            <tarLongFileMode>gnu</tarLongFileMode>
                            <finalName>${project.artifactId}-${project.version}-${project.activeProfiles[0].id}-${maven.build.timestamp}</finalName>
                            <descriptors>
                                <descriptor>assembly.xml</descriptor>
                            </descriptors>
                        </configuration>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.2.1</version>
                <executions>
                    <execution>
                        <id>scala-compile-first</id>
                        <phase>process-resources</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>
package com.test

import java.text.SimpleDateFormat
import java.util.Properties
import java.util.Date

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
import org.apache.flink.util.Collector
/**
  * 把数据每隔5秒写入Redis
  */
object WordCountKafka5Seconds{

  private val sdf = new SimpleDateFormat("yyyyMMddHHmm")


  class RedisExampleMapper extends RedisMapper[(String, String)]{

    override def getCommandDescription: RedisCommandDescription = {
      new RedisCommandDescription(RedisCommand.RPUSH)
    }

    override def getKeyFromData(data: (String, String)): String = data._1

    override def getValueFromData(data: (String, String)): String = data._2
  }



  val ZOOKEEPER_HOST = "artemis-02:2181,artemis-03:2181,artemis-04:2181/microlens/artemis/kafka"
  val KAFKA_BROKER = "artemis-02:9092,artemis-02:9092,artemis-02:9092"
  val TRANSACTION_GROUP = "transaction"
  val TOPIC = "test-flink"

  def main(args : Array[String]){
    System.setProperty("HADOOP_USER_NAME","hadoop")

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.enableCheckpointing(5000)
    env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)

    val kafkaProps = new Properties()

    kafkaProps.setProperty("zookeeper.connect", ZOOKEEPER_HOST)
    kafkaProps.setProperty("bootstrap.servers", KAFKA_BROKER)
    kafkaProps.setProperty("group.id", TRANSACTION_GROUP)

    val kafkaConsumer = new FlinkKafkaConsumer08[String](TOPIC, new SimpleStringSchema(), kafkaProps)
    kafkaConsumer.setStartFromLatest()

    val result = env
      .addSource(kafkaConsumer)
      .flatMap{x =>
        x.split(" ")
      }
      .map((_,1))
      .keyBy(0)
      .timeWindow(Time.minutes(1L))
      .apply{(tuple, window, values, out:Collector[Map[String,Int]]) =>
        val time = sdf.format(new Date())+":"
        val re = values.groupBy(_._1)
          .map{x =>
            var sum = 0
            val key  = x._1
            val value = x._2
            for((k,v) <- value if  k ==key) {
              sum += v
            }
            (time+key,sum)
          }
        out.collect(re)
      }
      .flatMap(x => x)
      .map{x =>
        val kv = x._1.split(":")
        (kv(0),kv(1)+"->"+x._2)
      }

    val conf = new FlinkJedisPoolConfig.Builder().setHost("artemis-02").setPort(6379).build()
    result.addSink(new RedisSink[(String, String)](conf, new RedisExampleMapper()))

    env.execute()
  }
}

猜你喜欢

转载自blog.csdn.net/believe2017slwx/article/details/79976187