一、第一个程序wordcount(实时流处理)
1、pom.xml文件
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.5.1</flink.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
<build>
<!-- 不打包core.properties -->
<!-- <resources> <resource> <directory>src/main/resources</directory> <excludes>
<exclude>core.properties</exclude> </excludes> </resource> </resources> -->
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>com.tydic.SocketWindowWordCount</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<!-- 将依赖包放到lib文件夹中 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory> ${project.build.directory}/lib
</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
2、代码
package com.tydic;
importorg.apache.flink.api.common.functions.FlatMapFunction;
importorg.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class SocketWindowWordCount {
public static void main(String[] args) throws Exception {
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified.Pleas run 'SocketWindowCount --port <port>'");
return;
}
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// local模式
// final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
DataStream<String> text = env.socketTextStream("192.168.128.146", port, "\n");
@SuppressWarnings("serial")
DataStream<WordWithCount> windowCounts = text
.flatMap(new FlatMapFunction<String, WordWithCount>() {
public void flatMap(String value, Collector<WordWithCount> out) throws Exception {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word").timeWindow(Time.seconds(5), Time.seconds(1))
.reduce(new ReduceFunction<WordWithCount>() {
public WordWithCount reduce(WordWithCount a, WordWithCount b) throws Exception {
return new WordWithCount(a.word, a.count + b.count);
}
});
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount(zyl_test)");
}
public static class WordWithCount {
public String word;
public long count;
public WordWithCount() {
}
public WordWithCount(String word, long count) {
this.word = word;
this.count = count;
}
@Override
public String toString() {
return word + " : " + count;
}
}
}
3、打jar包,提交jar到flink集群
/opt/flink-1.5.1/bin/flink run FlinkMaven-0.0.1-SNAPSHOT.jar --port 9000
4、测试
启动服务:nc -l 9000
5、日志查看
[root@rhel6-147 log]# tail -f flink-root-taskexecutor-0-rhel6-147.out
hello : 1
tert : 1
...
二、读取本地文件(类似批处理)
package com.tydic;
importorg.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.aggregation.Aggregations;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.util.Collector;
public class LocalFileWordCount {
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setGlobalJobParameters(params);
// get input data
DataSet<String> text = env.readTextFile(params.get("input"));
DataSet<Tuple2<String, Integer>> counts = text.flatMap(new Splitter()) // split up the lines in pairs (2-tuples) containing: (word,1)
.groupBy(0).aggregate(Aggregations.SUM, 1);// group by the tuple field "0" and sum up tuple field "1"
counts.writeAsText(params.get("output"));
env.execute("WordCount Example");
}
}
// The operations are defined by specialized classes, here the Splitter class.
@SuppressWarnings("serial")
class Splitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
// normalize and split the line into words
String[] tokens = value.split("\\W+");
// emit the pairs
for (String token : tokens) {
if (token.length() > 0) {
out.collect(new Tuple2<String, Integer>(token, 1));
}
}
}
}
运行:
/opt/flink-1.5.1/bin/flink run --class com.tydic.LocalFileWordCount FlinkMaven-0.0.1-SNAPSHOT.jar --input file:///tmp/zyl/input.txt --output file:///tmp/zyl/output.txt
问题:
org.apache.flink.client.program.ProgramInvocationException: java.io.IOException: Error opening the Input Split file:/tmp/zyl/input.txt [0,24]: /tmp/zyl/input.txt (No such file or directory)
at org.apache.flink.client.program.rest.RestClusterClient.submitJob(RestClusterClient.java:264)
at org.apache.flink.client.program.ClusterClient.run(ClusterClient.java:464)
at org.apache.flink.client.program.ClusterClient.run(ClusterClient.java:452)
at org.apache.flink.client.program.ContextEnvironment.execute(ContextEnvironment.java:62)
at com.tydic.LocalFileWordCount.main(LocalFileWordCount.java:25)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.jav
解决方法:本地文件要在taskmanager所对应的机器上!