版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wulinshishen/article/details/71732922
相关依赖如下:
<hadoop.version>2.7.2</hadoop.version>
<hbase.version>1.2.3</hbase.version>
<kafka.version>0.10.0.0</kafka.version>
<jstorm.version>2.2.1</jstorm.version>
<elasticsearch.version>5.3.0</elasticsearch.version>
<!-- HBase依赖报 -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-annotations</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop2-compat</artifactId>
<version>${hbase.version}</version>
</dependency>
<!-- Kafka依赖包 -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>${kafka.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version}</version>
</dependency>
<!-- JStorm依赖包 -->
<dependency>
<groupId>com.alibaba.jstorm</groupId>
<artifactId>jstorm-core</artifactId>
<version>${jstorm.version}</version>
</dependency>
<!-- ElasticSearch 5.3.0 -->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>${elasticsearch.version}</version>
</dependency>
将以上需要的jar包拷贝到JStorm目录下的extlib目录下面
import java.util.ArrayList;
import java.util.List;
import org.cisiondata.modules.jstorm.bolt.elastic.ElasticBolt;
import org.cisiondata.modules.jstorm.bolt.hbase.HBaseBolt;
import org.cisiondata.modules.jstorm.spout.kafka.BrokerHosts;
import org.cisiondata.modules.jstorm.spout.kafka.KafkaSpout;
import org.cisiondata.modules.jstorm.spout.kafka.SpoutConfig;
import org.cisiondata.modules.jstorm.spout.kafka.StringMultiScheme;
import org.cisiondata.modules.jstorm.spout.kafka.ZkHosts;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.utils.Utils;
public class SimpleJStormTopology {
private static final String TOPOLOGY_NAME = "Kafka2HBaseElastic";
private static final String KAFKA_SPOUT = "KafkaSpout";
private static final String HBASE_BOLT = "HBaseBolt";
private static final String ELASTIC_BOLT = "ElasticBolt";
private static boolean isCluster = true;
public static void main(String[] args) {
String brokerZks = "192.168.0.11:2181,192.168.0.12:2181,192.168.0.13:2181";
String topic = "elastic5";
String zkRoot = "/kafka";
String id = "jstormdataprocess";
BrokerHosts brokerHosts = new ZkHosts(brokerZks, "/kafka/brokers");
SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topic, zkRoot, id);
spoutConfig.startOffsetTime = -1L;// -2 从kafka头开始 -1 是从最新的开始 0 从ZK开始
spoutConfig.scheme = new StringMultiScheme();
List<String> zkServers = new ArrayList<String>();
zkServers.add("192.168.0.11");
zkServers.add("192.168.0.12");
zkServers.add("192.168.0.13");
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = 2181;
TopologyBuilder builder = new TopologyBuilder();
KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
builder.setSpout(KAFKA_SPOUT, kafkaSpout, 1);
ElasticBolt elasticBolt = new ElasticBolt();
builder.setBolt(ELASTIC_BOLT, elasticBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
HBaseBolt hbaseBolt = new HBaseBolt();
builder.setBolt(HBASE_BOLT, hbaseBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
/**
BaseHBaseMapper mapper = new BaseHBaseMapper()
.withRowKeyField("_id").withColumnFamily("i");
List<String> fields = new ArrayList<String>();
fields.add("insertTime");
fields.add("updateTime");
fields.add("sourceFile");
mapper.withColumnFields(new Fields(fields));
BaseHBaseBolt hbaseBolt = new BaseHBaseBolt("logistics", mapper)
.withConfigKey("hbase.config").withBatchSize(1000);
builder.setBolt(HBASE_BOLT, hbaseBolt, 2).allGrouping(KAFKA_SPOUT);
*/
Config config = new Config();
/**
Map<String, Object> hbaseConfig = new HashMap<String, Object>();
hbaseConfig.put("hbase.master", "192.168.0.15:60000");
hbaseConfig.put("hbase.rootdir", "/hbase");
hbaseConfig.put("hbase.zookeeper.quorum", "192.168.0.15,192.168.0.16,192.168.0.17");
hbaseConfig.put("hbase.zookeeper.property.clientPort", "2181");
config.put("hbase.config", hbaseConfig);
**/
config.setDebug(true);
/**
config.put(Config.STORM_META_SERIALIZATION_DELEGATE, "org.apache.storm.serialization.SerializationDelegate");
*/
if (isCluster) {
try {
config.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(TOPOLOGY_NAME, config, builder.createTopology());
} catch (Exception e) {
e.printStackTrace();
}
} else {
try {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
Utils.sleep(10000000);
cluster.killTopology(TOPOLOGY_NAME);
cluster.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class ElasticBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
private int batchSize = 1000;
private ElasticHelper elasticHelper = null;
@SuppressWarnings("rawtypes")
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
elasticHelper = new ElasticHelper(batchSize, collector);
}
public void execute(Tuple input) {
try {
elasticHelper.add(input);
} catch (Exception e) {
elasticHelper.fail(e);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.cisiondata.modules.elastic5.ESClient;
import org.cisiondata.utils.json.GsonUtils;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;
public class ElasticHelper {
private static Logger LOG = LoggerFactory.getLogger(ElasticHelper.class);
private List<Tuple> tuples = null;
private List<String> datas = null;
private int batchSize = 1000;
private OutputCollector collector = null;
public ElasticHelper(int batchSize, OutputCollector collector) {
this.tuples = new ArrayList<Tuple>();
this.datas = new ArrayList<String>();
if (batchSize > 0) this.batchSize = batchSize;
this.collector = collector;
}
public void add(Tuple tuple) {
tuples.add(tuple);
datas.add(tuple.getString(0));
if (tuples.size() == batchSize) {
bulkInsert(datas);
datas.clear();
ack();
}
}
public void ack() {
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.ack(tuples.get(i));
}
tuples.clear();
}
public void fail(Exception e) {
collector.reportError(e);
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.fail(tuples.get(i));
}
tuples.clear();
datas.clear();
}
public void bulkInsert(List<String> datas) {
if (null == datas || datas.size() == 0) return;
Client client = ESClient.getInstance().getClient();
BulkRequestBuilder bulkRequestBuilder = client.prepareBulk();
try {
IndexRequestBuilder irb = null;
for (int i = 0, len = datas.size(); i < len; i++) {
bulkRequestBuilder.add(irb);
}
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
if (bulkResponse.hasFailures()) {
LOG.info(bulkResponse.buildFailureMessage());
}
System.out.println("elastic5 insert " + datas.size() + " records finish!");
}
}
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class HBaseBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
private int batchSize = 1000;
private HBaseHelper hbaseHelper = null;
@SuppressWarnings("rawtypes")
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
hbaseHelper = new HBaseHelper(batchSize, collector);
}
public void execute(Tuple input) {
try {
hbaseHelper.add(input);
} catch (Exception e) {
hbaseHelper.fail(e);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.cisiondata.utils.bigdata.HBaseUtils;
import org.cisiondata.utils.json.GsonUtils;
import org.cisiondata.utils.serde.SerializerUtils;
import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;
public class HBaseHelper {
private List<Tuple> tuples = null;
private List<String> datas = null;
private int batchSize = 1000;
private OutputCollector collector = null;
public HBaseHelper(int batchSize, OutputCollector collector) {
this.tuples = new ArrayList<Tuple>();
this.datas = new ArrayList<String>();
if (batchSize > 0) this.batchSize = batchSize;
this.collector = collector;
}
public void add(Tuple tuple) {
tuples.add(tuple);
datas.add(tuple.getString(0));
if (tuples.size() == batchSize) {
bulkInsert(datas);
datas.clear();
ack();
}
}
public void ack() {
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.ack(tuples.get(i));
}
tuples.clear();
}
public void fail(Exception e) {
collector.reportError(e);
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.fail(tuples.get(i));
}
tuples.clear();
datas.clear();
}
public void bulkInsert(List<String> datas) {
if (null == datas || datas.size() == 0) return;
Map<String, List<Put>> map = new HashMap<String, List<Put>>();
Map<String, Object> source = null;
for (int i = 0, len = datas.size(); i < len; i++) {
source = GsonUtils.fromJsonToMap(datas.get(i));
source.remove("index");
String tableName = String.valueOf(source.remove("type"));
List<Put> puts = map.get(tableName);
if (null == puts) {
puts = new ArrayList<Put>();
map.put(tableName, puts);
}
String rowKey = String.valueOf(source.remove("_id"));
Put put = new Put(Bytes.toBytes(rowKey));
for (Map.Entry<String, Object> entry : source.entrySet()) {
String column = entry.getKey();
String family = column.startsWith("c") ? "i" : "s";
put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), SerializerUtils.write(entry.getValue()));
}
puts.add(put);
}
for (Map.Entry<String, List<Put>> entry : map.entrySet()) {
try {
HBaseUtils.insertRecords(entry.getKey(), entry.getValue());
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println("hbase insert " + datas.size() + " records finish!");
}
}