Flink(二)快速上手之wordCount(java)

  • 创建maven功臣
  • pom文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>MyFlink</groupId>
    <artifactId>MyFlink</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
        <!--flink Java相关依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.9.0</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>1.9.0</version>
            <scope>compile</scope>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.1.0</version>
                <configuration>
                    <createDependencyReducedPom>false</createDependencyReducedPom>
                </configuration>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <artifactSet>
                                <excludes>
                                    <exclude>com.google.code.findbugs:jsr305</exclude>
                                    <exclude>org.slf4j:*</exclude>
                                    <exclude>log4j:*</exclude>
                                </excludes>
                            </artifactSet>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <!--如果要打包的话,这里要换成对应的 main class-->
                                    <mainClass>com.haier.cosmodata.source.MyDataStreamSourceDemo</mainClass>
                                </transformer>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                    <resource>reference.conf</resource>
                                </transformer>
                            </transformers>
                            <filters>
                                <filter>
                                    <artifact>*:*:*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>
  • StreamWordCount
package com.sgg.bigdata;

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

/**
 * 流式处理WordCount
 * Created by huqian on 2020/5/23 22:24
 */
public class StreamWordCount {
    public static void main(String[] args) throws Exception {
         //创建一个流处理的执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //接受socket数据流
        DataStreamSource<String> textDataSteam = env.socketTextStream("localhost",7777);

        //逐一读取数据,打散之后进行WordCount
        SingleOutputStreamOperator<Tuple2<String, Integer>> wordCountDataStream = textDataSteam
                .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                    public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
                        String[] tokens = s.split(" ");

                        for (String token : tokens) {
                            if (token.length() > 0) {
                                collector.collect(new Tuple2<String, Integer>(token, 1));
                            }
                        }
                    }
                })
                .filter(new FilterFunction<Tuple2<String, Integer>>() {
                    public boolean filter(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
                        if (stringIntegerTuple2.equals(null)) {
                            return false;
                        }
                        return true;
                    }
                })
                .keyBy(0)
                .sum(1);

        //打印输出
        wordCountDataStream.print();

        //执行任务
        env.execute("StreamWordCountJob");
        //测试需要开启端口7777

    }
}

-- 测试

猜你喜欢

转载自www.cnblogs.com/xinjitu-001/p/12945437.html