JStorm学习笔记-基于Kafka、ElasticSearch、HBase简单实例

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wulinshishen/article/details/71732922

相关依赖如下:

<hadoop.version>2.7.2</hadoop.version>
<hbase.version>1.2.3</hbase.version>
<kafka.version>0.10.0.0</kafka.version>
<jstorm.version>2.2.1</jstorm.version>
<elasticsearch.version>5.3.0</elasticsearch.version>


<!-- HBase依赖报 --> 
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>${hbase.version}</version>
            <exclusions>
		    	<exclusion>
					<groupId>org.slf4j</groupId> 
					<artifactId>slf4j-log4j12</artifactId> 
		    	</exclusion>
		    </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
            <exclusions>
		    	<exclusion>
					<groupId>org.slf4j</groupId> 
					<artifactId>slf4j-log4j12</artifactId> 
		    	</exclusion>
		    	<exclusion>
		    		<groupId>io.netty</groupId>
    				<artifactId>netty-all</artifactId>
		    	</exclusion>
		    </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
            <exclusions>
            	<exclusion>
		    		<groupId>io.netty</groupId>
    				<artifactId>netty-all</artifactId>
		    	</exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-protocol</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-annotations</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-hadoop2-compat</artifactId>
            <version>${hbase.version}</version>
        </dependency>
		
	<!-- Kafka依赖包 -->
	<dependency>
		<groupId>org.apache.kafka</groupId>
		<artifactId>kafka_2.11</artifactId>
		<version>${kafka.version}</version>
	</dependency>

	<dependency>
		<groupId>org.apache.kafka</groupId>
		<artifactId>kafka-clients</artifactId>
		<version>${kafka.version}</version>
	</dependency>
		
	<!-- JStorm依赖包 -->
	<dependency>
		   <groupId>com.alibaba.jstorm</groupId>
		   <artifactId>jstorm-core</artifactId>
		   <version>${jstorm.version}</version>
	</dependency>
		
	<!-- ElasticSearch 5.3.0 -->
	<dependency>
		   <groupId>org.elasticsearch</groupId>
		   <artifactId>elasticsearch</artifactId>
		   <version>${elasticsearch.version}</version>
	</dependency>
		
	<dependency>  
	        <groupId>org.elasticsearch.client</groupId>  
	        <artifactId>transport</artifactId>  
	        <version>${elasticsearch.version}</version>  
	   </dependency>


将以上需要的jar包拷贝到JStorm目录下的extlib目录下面


Java实例代码如下:

import java.util.ArrayList;
import java.util.List;

import org.cisiondata.modules.jstorm.bolt.elastic.ElasticBolt;
import org.cisiondata.modules.jstorm.bolt.hbase.HBaseBolt;
import org.cisiondata.modules.jstorm.spout.kafka.BrokerHosts;
import org.cisiondata.modules.jstorm.spout.kafka.KafkaSpout;
import org.cisiondata.modules.jstorm.spout.kafka.SpoutConfig;
import org.cisiondata.modules.jstorm.spout.kafka.StringMultiScheme;
import org.cisiondata.modules.jstorm.spout.kafka.ZkHosts;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.utils.Utils;

public class SimpleJStormTopology {
   
	private static final String TOPOLOGY_NAME = "Kafka2HBaseElastic";
	
	private static final String KAFKA_SPOUT = "KafkaSpout";
	
	private static final String HBASE_BOLT = "HBaseBolt";
	
	private static final String ELASTIC_BOLT = "ElasticBolt";
	
	private static boolean isCluster = true;
	
	public static void main(String[] args) {
		String brokerZks = "192.168.0.11:2181,192.168.0.12:2181,192.168.0.13:2181";
		String topic = "elastic5";
		String zkRoot = "/kafka";
		String id = "jstormdataprocess";
		
		BrokerHosts brokerHosts = new ZkHosts(brokerZks, "/kafka/brokers");
		SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topic, zkRoot, id);
		spoutConfig.startOffsetTime = -1L;// -2 从kafka头开始  -1 是从最新的开始  0  从ZK开始
		spoutConfig.scheme = new StringMultiScheme();
		List<String> zkServers = new ArrayList<String>();
		zkServers.add("192.168.0.11");
		zkServers.add("192.168.0.12");
		zkServers.add("192.168.0.13");
		spoutConfig.zkServers = zkServers;
		spoutConfig.zkPort = 2181;
		
		TopologyBuilder builder = new TopologyBuilder();
		
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
		builder.setSpout(KAFKA_SPOUT, kafkaSpout, 1);
		
		ElasticBolt elasticBolt = new ElasticBolt();
		builder.setBolt(ELASTIC_BOLT, elasticBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
		
		HBaseBolt hbaseBolt = new HBaseBolt();
		builder.setBolt(HBASE_BOLT, hbaseBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
		
		/**
		BaseHBaseMapper mapper = new BaseHBaseMapper()
			.withRowKeyField("_id").withColumnFamily("i");
		List<String> fields = new ArrayList<String>();
		fields.add("insertTime");
		fields.add("updateTime");
		fields.add("sourceFile");
		mapper.withColumnFields(new Fields(fields));
		BaseHBaseBolt hbaseBolt = new BaseHBaseBolt("logistics", mapper)
			.withConfigKey("hbase.config").withBatchSize(1000);
		builder.setBolt(HBASE_BOLT, hbaseBolt, 2).allGrouping(KAFKA_SPOUT);
		*/
		
		Config config = new Config();
		/**
		Map<String, Object> hbaseConfig = new HashMap<String, Object>();
		hbaseConfig.put("hbase.master", "192.168.0.15:60000");
		hbaseConfig.put("hbase.rootdir", "/hbase");
		hbaseConfig.put("hbase.zookeeper.quorum", "192.168.0.15,192.168.0.16,192.168.0.17");
		hbaseConfig.put("hbase.zookeeper.property.clientPort", "2181");
		config.put("hbase.config", hbaseConfig);
		**/
		config.setDebug(true);
		/**
		config.put(Config.STORM_META_SERIALIZATION_DELEGATE, "org.apache.storm.serialization.SerializationDelegate");
		*/
		
		if (isCluster) {
			try {
				config.setNumWorkers(3);
				StormSubmitter.submitTopologyWithProgressBar(TOPOLOGY_NAME, config, builder.createTopology());
			} catch (Exception e) {
				e.printStackTrace();
			}
		} else {
			try {
				LocalCluster cluster = new LocalCluster();
				cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
				Utils.sleep(10000000);
				cluster.killTopology(TOPOLOGY_NAME);
				cluster.shutdown();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
}

import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;

public class ElasticBolt extends BaseRichBolt {

	private static final long serialVersionUID = 1L;
	
	private int batchSize = 1000;
	
	private ElasticHelper elasticHelper = null;
	
	@SuppressWarnings("rawtypes")
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		elasticHelper = new ElasticHelper(batchSize, collector);
	}

	public void execute(Tuple input) {
		try {
			elasticHelper.add(input);
		} catch (Exception e) {
			elasticHelper.fail(e);
		}
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}
	

}


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.cisiondata.modules.elastic5.ESClient;
import org.cisiondata.utils.json.GsonUtils;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;

public class ElasticHelper {
	
	private static Logger LOG = LoggerFactory.getLogger(ElasticHelper.class);

	private List<Tuple> tuples = null;
	
	private List<String> datas = null;
	
	private int batchSize = 1000;
	
	private OutputCollector collector = null;
	
	public ElasticHelper(int batchSize, OutputCollector collector) {
		this.tuples = new ArrayList<Tuple>();
		this.datas = new ArrayList<String>();
		if (batchSize > 0) this.batchSize = batchSize;
		this.collector = collector;
	}
	
	public void add(Tuple tuple) {
		tuples.add(tuple);
		datas.add(tuple.getString(0));
		if (tuples.size() == batchSize) {
			bulkInsert(datas);
			datas.clear();
			ack();
		}
	}
	
	public void ack() {
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.ack(tuples.get(i));
		}
		tuples.clear();
	}
	
	public void fail(Exception e) {
		collector.reportError(e);
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.fail(tuples.get(i));
		}
		tuples.clear();
		datas.clear();
	}
	
	public void bulkInsert(List<String> datas) {
		if (null == datas || datas.size() == 0) return;
		Client client = ESClient.getInstance().getClient();
		BulkRequestBuilder bulkRequestBuilder = client.prepareBulk();
		try {
			IndexRequestBuilder irb = null;
			for (int i = 0, len = datas.size(); i < len; i++) {
				bulkRequestBuilder.add(irb);
			}
		} catch (Exception e) {
			LOG.error(e.getMessage(), e);
		}
		BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
		if (bulkResponse.hasFailures()) {
			LOG.info(bulkResponse.buildFailureMessage());
		}
		System.out.println("elastic5 insert " + datas.size() + " records finish!");
	}
	
}


import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;

public class HBaseBolt extends BaseRichBolt {

	private static final long serialVersionUID = 1L;
	
	private int batchSize = 1000;
	
	private HBaseHelper hbaseHelper = null;
	
	@SuppressWarnings("rawtypes")
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		hbaseHelper = new HBaseHelper(batchSize, collector);
	}

	public void execute(Tuple input) {
		try {
			hbaseHelper.add(input);
		} catch (Exception e) {
			hbaseHelper.fail(e);
		}
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}
	

}

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.cisiondata.utils.bigdata.HBaseUtils;
import org.cisiondata.utils.json.GsonUtils;
import org.cisiondata.utils.serde.SerializerUtils;

import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;

public class HBaseHelper {
	
	private List<Tuple> tuples = null;
	
	private List<String> datas = null;
	
	private int batchSize = 1000;
	
	private OutputCollector collector = null;
	
	public HBaseHelper(int batchSize, OutputCollector collector) {
		this.tuples = new ArrayList<Tuple>();
		this.datas = new ArrayList<String>();
		if (batchSize > 0) this.batchSize = batchSize;
		this.collector = collector;
	}
	
	public void add(Tuple tuple) {
		tuples.add(tuple);
		datas.add(tuple.getString(0));
		if (tuples.size() == batchSize) {
			bulkInsert(datas);
			datas.clear();
			ack();
		}
	}
	
	public void ack() {
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.ack(tuples.get(i));
		}
		tuples.clear();
	}
	
	public void fail(Exception e) {
		collector.reportError(e);
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.fail(tuples.get(i));
		}
		tuples.clear();
		datas.clear();
	}
	
	public void bulkInsert(List<String> datas) {
		if (null == datas || datas.size() == 0) return;
		Map<String, List<Put>> map = new HashMap<String, List<Put>>();
		Map<String, Object> source = null;
		for (int i = 0, len = datas.size(); i < len; i++) {
			source = GsonUtils.fromJsonToMap(datas.get(i));
			source.remove("index");
			String tableName = String.valueOf(source.remove("type"));
			List<Put> puts = map.get(tableName);
			if (null == puts) {
				puts = new ArrayList<Put>();
				map.put(tableName, puts);
			}
			String rowKey = String.valueOf(source.remove("_id"));
			Put put = new Put(Bytes.toBytes(rowKey));
			for (Map.Entry<String, Object> entry : source.entrySet()) {
				String column = entry.getKey();
				String family = column.startsWith("c") ? "i" : "s";
				put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), SerializerUtils.write(entry.getValue()));
			}
			puts.add(put);
		}
		for (Map.Entry<String, List<Put>> entry : map.entrySet()) {
			try {
				HBaseUtils.insertRecords(entry.getKey(), entry.getValue());
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		System.out.println("hbase insert " + datas.size() + " records finish!");
	}
	
}




猜你喜欢

转载自blog.csdn.net/wulinshishen/article/details/71732922