storm-kafka 的集成

【storm-kafka】storm和kafka结合处理流式数据

Storm 是一免费的开源,分布式,高容错的 实时计算系统。Storm令持续不断的流计算变的容易,弥补了

hadoop 的批处理的不能满足的实时要求。

Storm 经常用于实时分析,在线机器学习,持续计算,分布式远程调用,和ETL等领域。

Storm 的部署非常简单,而且,在同类的流失计算工具,Storm的性能也是非常出众的。


关于kafka

关于kafka 是一种高吞度量的分布式发布订阅消息系统。

最近项目中有个需求需要从kafka 中将订阅到流失数据实时持久化ka到elasticsearch,mongdb 中。

关于kafka 和 storm 的整合。

1. 配置Maven

<dependencies> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>1.0.2</version> <!-- 由于storm环境中有该jar,所以不用pack到最终的task.jar中 --> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-kafka</artifactId> <version>1.0.2</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.3.2</version> </dependency> <!-- kafka目前已经有2.10了,但是我用了,任务执行报错,目前只能用kafka_2.9.2,我kafka服务端也是用最新的2.10版本 --> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.9.2</artifactId> <version>0.8.2.2</version> <!-- 排除以下jar,由于storm服务端有log4j,避免冲突报错--> <exclusions> <exclusion> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> </exclusion> <exclusion> <groupId>log4j</groupId> <artifactId>log4j<artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.elasticsearch</groupId> <artifactId>elasticsearch</artifactId> <version>1.4.4</version> <exclusions> <exclusion> <groupId>log4j</groupId> <artifactId>log4j<artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> </exclusions> </dependency></dependencies><build> <plugins> <plugin> <artifactId>maven-compiler-plugin</artifactId> <configuration> <encoding>utf-8</encoding> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <artifactId>maven-assembly-plugin</artifactId> <configuration> <archive> <manifest><manifest> </archive> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <encoding>utf-8</encoding> </configuration> </plugin> </plugins></build>




publicclassFilterBolt extends BaseRichBolt{private OutputCollector collector; /** * 初始化工作 * / public void prepare(Map map, TopologyContext context, OutputCollector collector){ this.collector = collector; } /** * 执行逻辑,目的是过滤无用的字符串 */publicvoidexecute(Tuple input){ String str = input.getString(0);if(StringUtils.isNotBlank(str)){ String [] lines = str.split("\n");for(String line : lines){ if(StringUtils.isBlank(line) || line.charAt(0) == '#'){continue; } //发射到下一个bolt collector.emit(new Values(line)); } //汇报给storm,该tuple执行成功 collector.ack(input); }else{//执行失败 collector.fail(input); } } /** * 申明传入到一个Bolt的字段名称 */publicvoiddeclareOutputFields(OutputFieldsDeclarer declarer){ declarer.declare(new Fields("line")); }}

publicclassTransferBolt extends BaseRichBolt{private Logger LOG = LoggerFactory.getLogger(TransferBolt.class); private OutputCollector collector; publicvoidprepare(Map map, TopologyContext context, OutputCollector collector){ this.collector = collector; } publicvoidexecute(Tuple input){ String line = input.getString(0); JSONObject json = JSONObject.toJson(line); BulkRequest bulkRequest = new BulkRequest(); IndexRequest indexRequest = new IndexRequest("test","element",json.getString("id")).source(json.getJSONObject("source").toString()); bulkRequest.add(indexRequest); BulkResponse response = client.bulk(bulkRequest).actionGet(); client.admin().indices().prepareRefresh("test").execute().actionGet();

}}


public class KafkaTopology{ public static void main(String[] args) throws Exception{ String zks = PropertiesUtils.getString(KafkaProperties.ZK_HOSTS); String topic = PropertiesUtils.getString(KafkaProperties.TOPIC); String zkRoot = PropertiesUtils.getString(KafkaProperties.ZK_ROOT); String id = PropertiesUtils.getString(KafkaProperties.STORM_ID); BrokerHosts brokerHosts = new ZkHosts(zks); SpoutConfig spoutConfig = new SpoutConfig(brokerHosts,topic,zkRoot,id); spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); spoutConfig.zkServers = Arrays.asList(PropertiesUtil.getString(KafkaProperties.ZK_SERVERS).split(",")); spoutConfig.zkPort = PropertiesUtil.getInt(KafkaProperties.ZK_PORT); TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("kafka-reader",new KafkaSpout(spoutConfig),1); builder.setBolt("filter-bolt",new FilterBolt(),1).shuffleGrouping("kafka-reader"); builder.setBolt("input-line",new TransferBolt(),1).shuffleGrouping("reader-input"); Config config = new Config(); String name = KafkaTopology.class.getSimpleName(); config.setNumWorkers(PropertiesUtil.getInt(KafkaProperties.NUM_WORKERS)); StormSubmitter.submitTopologyWithProgressBar(name,config,builder.createTopology()); }}





参考文档:storm-kafka 集成


猜你喜欢

转载自blog.csdn.net/xiamaocheng/article/details/60341285