Flink操作mysql kafka和hbase

主程序

package com.streaming.flink;


import java.util.Properties;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.fastjson.JSONObject;


public class WordCount {
	private static final Logger logger = LoggerFactory
			.getLogger(WordCount.class);

	
	public static void main(String[] args) throws Exception {
		
		final ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		StreamExecutionEnvironment tenv = StreamExecutionEnvironment
				.getExecutionEnvironment();

		// 2.从自定义source中读取数据
//		DataStream<String> dataStream = tenv.addSource(new JdbcRead());
//		tenv.execute();

		// get input data
		DataSet<String> text = env.fromElements(
				"To be, or not to be,--that is the question:--",
				"Whether 'tis nobler in the mind to suffer",
				"The slings and arrows of outrageous fortune",
				"Or to take arms against a sea of troubles,");

		DataSet<Tuple2<String, Integer>> counts =
		text.flatMap(new LineSplitter()).groupBy(0).sum(1);

		text.flatMap(new LineSplitter()).groupBy(0);

		DataSet<Tuple2<String, Integer>> sum =
		text.flatMap(new LineSplitter())
		// group by the tuple field "0" and sum up tuple field "1"
				.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
					@Override
					public Tuple2<String, Integer> reduce(
							Tuple2<String, Integer> a, Tuple2<String, Integer> b) {
						return new Tuple2<String, Integer>(a.f0, a.f1 + b.f1);
					}
				});

		FlinkKafkaConsumer08<String> kafkaSource = createKafka(tenv);
		String hbase_zk="namenode1.xxx.com";
		String hbase_port="2015";
		String hbase_table="ns:table1";
		tenv.addSource(kafkaSource).map(new MapFunction<String, String>() {
			public String map(String line) {
				logger.error("收到消息:{}", line);
				return line;
			}
		}).process(new HbaseSink(hbase_zk,hbase_port,hbase_table));

		
		tenv.execute();
		// execute and print result
		// counts.print();

	}


	/**
	 * Implements the string tokenizer that splits sentences into words as a
	 * user-defined FlatMapFunction. The function takes a line (String) and
	 * splits it into multiple pairs in the form of "(word,1)"
	 * (Tuple2&lt;String, Integer&gt;).
	 */
	public static final class LineSplitter implements
			FlatMapFunction<String, Tuple2<String, Integer>> {

		@Override
		public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
			String[] tokens = value.toLowerCase().split("\\W+");

			for (String token : tokens) {
				if (token.length() > 0) {
					out.collect(new Tuple2<String, Integer>(token, 1));
				}
			}
		}
	}

	public static FlinkKafkaConsumer08<String> createKafka(
			StreamExecutionEnvironment tenv) {
		String zkConnc = "kafkazk01.xxx.com:2181";
		String kafkaServer = "kafkabroker01.xxx.com:9092";

		Properties kafkaProps = new Properties();
		kafkaProps.setProperty("zookeeper.connect", zkConnc);
		kafkaProps.setProperty("bootstrap.servers", kafkaServer);
		kafkaProps.setProperty("group.id", "groupId_1");
		kafkaProps.setProperty("auto.offset.reset", "smallest");
		kafkaProps.setProperty("auto.commit.interval.ms", "30000");
		kafkaProps.setProperty("enable.auto.commit", "true");
		FlinkKafkaConsumer08<String> appSource = new FlinkKafkaConsumer08<String>(
				"topicName", new SimpleStringSchema(), kafkaProps);

		return appSource;
	}

}

写hbase:

package com.streaming.flink;


import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class HbaseSink extends ProcessFunction<String,String> {
	private static final long serialVersionUID = 1L;

	private static final Logger LOGGER = LoggerFactory.getLogger(HbaseSink.class);

    private String _zookeeper;
    private String _port;
    private String _tableName;
    private HTableInterface _table;

    public HbaseSink(String zookeeper, String port, String tableName) {
        _zookeeper = zookeeper;
        _port = port;
        _tableName = tableName;
    }

    @Override
    public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
        try {
            Configuration conf = HBaseConfiguration.create();
            conf.set(HConstants.ZOOKEEPER_QUORUM, _zookeeper);
            conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, _port);
            conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/hbase");

            User user = User.create(UserGroupInformation.createRemoteUser("bmps"));
            HConnection connection = HConnectionManager.createConnection(conf,user);
            _table = connection.getTable(_tableName);

            LOGGER.error("[HbaseSink] : open HbaseSink finished");
        } catch (Exception e) {
            LOGGER.error("[HbaseSink] : open HbaseSink faild {}", e);
        }
    }

    @Override
    public void close() throws Exception {
        _table.close();
    }

    @Override
    public void processElement(String  value, Context ctx, Collector<String > out)
            throws Exception {
    	LOGGER.error("process String {}",value);
        String rowKey = new StringBuffer().append("1").toString();
        Put put = new Put(Bytes.toBytes(rowKey));
        put.setDurability(Durability.ASYNC_WAL);
        put.add(Bytes.toBytes("info"), Bytes.toBytes("flink"), Bytes.toBytes(value));
        _table.put(put);
        LOGGER.error("[HbaseSink] : put rowKey:{}, value:{} to hbase", rowKey, value);
    }



}

读mysql:

package com.streaming.flink;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class JdbcRead extends RichSourceFunction<String> {
	private static final long serialVersionUID = 1L;
	
	
	private static final Logger logger = LoggerFactory.getLogger(JdbcRead.class);

	private Connection connection = null;
	private PreparedStatement ps = null;

	/**
	 * 一、open()方法中建立连接,这样不用每次invoke的时候都要建立连接和释放连接。
	 */
	@Override
	public void open(Configuration parameters) throws Exception {
		super.open(parameters);
		String driver = "com.mysql.jdbc.Driver";
		String url = "jdbc:mysql://127.0.0.1:3309/zkMonitor?characterEncoding=utf8&useSSL=true";
		String username = "root";
		String password = "123456";
		// 1.加载驱动
		Class.forName(driver);
		// 2.创建连接
		connection = DriverManager.getConnection(url, username, password);
		// 3.获得执行语句
		String sql = "select name from t_stock_pin;";
		ps = connection.prepareStatement(sql);
	}

	/**
	 * 二、DataStream调用一次run()方法用来获取数据
	 */
	@Override
	public void run(SourceContext<String> sourceContext) {
		try {
			// 4.执行查询,封装数据
			ResultSet resultSet = ps.executeQuery();
			while (resultSet.next()) {
				String name = resultSet.getString("name");
				logger.error("readJDBC name:{}", name);
				sourceContext.collect(name);
			}
		} catch (Exception e) {
			logger.error("runException:{}", e);
		}
	}

	@Override
	public void cancel() {
	}

	/**
	 * 三、 程序执行完毕就可以进行,关闭连接和释放资源的动作了
	 */
	@Override
	public void close() throws Exception {
		// 5.关闭连接和释放资源
		super.close();
		if (connection != null) {
			connection.close();
		}
		if (ps != null) {
			ps.close();
		}
	}

}

最后是pom配置:

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.streaming.flink</groupId>
	<artifactId>flink-training</artifactId>
	<version>1.0-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>Flink Quickstart Job</name>
	<url>http://www.myorganization.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<flink.version>1.4.1</flink.version>
		<slf4j.version>1.7.7</slf4j.version>
		<log4j.version>1.2.17</log4j.version>
		<scala.binary.version>2.11</scala.binary.version>
	</properties>

	<repositories>
		<repository>
			<id>apache.snapshots</id>
			<name>Apache Development Snapshot Repository</name>
			<url>https://repository.apache.org/content/repositories/snapshots/</url>
			<releases>
				<enabled>false</enabled>
			</releases>
			<snapshots>
				<enabled>true</enabled>
			</snapshots>
		</repository>
	</repositories>

	<dependencies>
		
	<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.6</version>
	</dependency>
			
		<!-- Apache Flink dependencies -->
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-core</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-java</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<!-- This dependency is required to actually execute jobs. It is currently pulled in by
				flink-streaming-java, but we explicitly depend on it to safeguard against future changes. -->
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-clients_${scala.binary.version}</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
			<version>${flink.version}</version>
		</dependency>
		
		 <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.8_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        
         <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>0.98.8-hadoop2</version>
            <exclusions>
				
				<exclusion>
					<artifactId>jdk.tools</artifactId>
					<groupId>jdk.tools</groupId>
				</exclusion>
			</exclusions>
        </dependency>

		<!-- explicitly add a standard logging framework, as Flink does not have
			a hard dependency on one specific framework by default -->
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>${slf4j.version}</version>
		</dependency>
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>${log4j.version}</version>
		</dependency>
	</dependencies>

	<profiles>
		<profile>
			<!-- Profile for packaging correct JAR files -->
			<id>build-jar</id>

			<activation>
				<activeByDefault>false</activeByDefault>
			</activation>

			<dependencies>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-core</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-java</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-clients_${scala.binary.version}</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
					<version>${slf4j.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>log4j</groupId>
					<artifactId>log4j</artifactId>
					<version>${log4j.version}</version>
					<scope>provided</scope>
				</dependency>
			</dependencies>

			<build>
				<plugins>
					<!-- We use the maven-shade plugin to create a fat jar that contains all dependencies
						except flink and its transitive dependencies. The resulting fat-jar can be executed
						on a cluster. Change the value of Program-Class if your program entry point changes. -->
					<plugin>
						<groupId>org.apache.maven.plugins</groupId>
						<artifactId>maven-shade-plugin</artifactId>
						<version>2.4.1</version>
						<executions>
							<!-- Run shade goal on package phase -->
							<execution>
								<phase>package</phase>
								<goals>
									<goal>shade</goal>
								</goals>
								<configuration>
									<artifactSet>
										<excludes>
											<exclude>org.apache.flink:force-shading</exclude>
											<exclude>com.google.code.findbugs:jsr305</exclude>
											<exclude>org.slf4j:*</exclude>
										</excludes>
									</artifactSet>
									<filters>
										<filter>
											<!-- Do not copy the signatures in the META-INF folder.
                                            Otherwise, this might cause SecurityExceptions when using the JAR. -->
											<artifact>*:*</artifact>
											<excludes>
												<exclude>META-INF/*.SF</exclude>
												<exclude>META-INF/*.DSA</exclude>
												<exclude>META-INF/*.RSA</exclude>
											</excludes>
										</filter>
									</filters>
									<!-- If you want to use ./bin/flink run <quickstart jar> uncomment the following lines.
										This will add a Main-Class entry to the manifest file -->
									<!--
									<transformers>
										<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
										<mainClass>com.streaming.flink.StreamingJob</mainClass>
										</transformer>
									</transformers>
									-->
								</configuration>
							</execution>
						</executions>
					</plugin>
				</plugins>
			</build>
		</profile>
	</profiles>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>3.1</version>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
		</plugins>

		
	</build>
</project>

猜你喜欢

转载自my.oschina.net/u/778683/blog/1930336