1. Create topology, configuration KafkaSpout, Bolt
KafkaTopologyBasic.java:
package org.mort.storm.kafka; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.storm.Config; import org.apache.storm.LocalCluster; import org.apache.storm.StormSubmitter; import org.apache.storm.generated.StormTopology; import org.apache.storm.kafka.spout.*; import org.apache.storm.topology.TopologyBuilder; import org.apache.storm.kafka.spout.KafkaSpoutRetryExponentialBackoff.TimeInterval; import org.apache.storm.tuple.Fields; importorg.apache.storm.tuple.Values; Import java.util.List; Import java.util.concurrent.TimeUnit; Import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST; / ** * using Storm Kafka consumption data, to build topology Storm (using TopologyBuilder) * achieve SentenceBolt, PrinterBolt * / public class KafkaTopologyBasic { / ** * JUST_VALUE_FUNC message translation function is kafka * here simply outputs * / Private static Func <ConsumerRecord <String, String>, List <Object >> JUST_VALUE_FUNC = new new Func <ConsumerRecord <String, String>, List <Object >> () { @Override public List<Object> apply(ConsumerRecord<String, String> record) { return new Values(record.value()); } }; /** * KafkaSpout重试策略 * @return */ protected KafkaSpoutRetryService newRetryService() { return new KafkaSpoutRetryExponentialBackoff(new TimeInterval(500L, TimeUnit.MICROSECONDS), TimeInterval.milliSeconds(2), Integer.MAX_VALUE, TimeInterval.seconds(10)); } /** * KafkaSpout配置 * New version of KafkaSpout by KafkaSpoutConfig class configuration, KafkaSpoutConfig defined kafka related environmental theme, retry strategy, initial offset consumption parameters and so on. * @Return * / protected KafkaSpoutConfig <String, String> newKafkaSpoutConfig () { return KafkaSpoutConfig.builder ( "192.168.1.201:9092", "First"). SetProp (ConsumerConfig.GROUP_ID_CONFIG, "kafkaSpoutTestGroup" ) .setProp (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG , 200) .setRecordTranslator (JUST_VALUE_FUNC, new new Fields ( "str" )) .setRetry (newRetryService ()). setOffsetCommitPeriodMs ( 10000 ) .setFirstPollOffsetStrategy (LATEST) .250 ) .build (); } / ** * The above and bolt combination spout and the configuration class, the configuration Topology * Construction Storm topology (using TopologyBuilder) * @return * / public StormTopology buildTopology () { TopologyBuilder Builder = new new TopologyBuilder () ; builder.setSpout ( "KafkaSpout", new new KafkaSpout <String, String> (newKafkaSpoutConfig ()),. 1 ); builder.setBolt ( "SentenceBolt", new new SentenceBolt (),. 1) .globalGrouping ( "KafkaSpout" ); Builder. setBolt ( "PrinterBolt", new newPrinterBolt (),. 1) .globalGrouping ( "SentenceBolt" ); return builder.createTopology (); } public Final static Boolean isCluster = to true ; public static void main (String [] args) { // . 1 to create a topology KafkaTopologyBasic kB = new new KafkaTopologyBasic (); StormTopology Topology = kb.buildTopology (); // 2 create an object configuration Config the conf = new new Config (); // configuration Worker turn number conf.setNumWorkers (. 4 ); // 3 提交程序 if(isCluster){ try { // 分布式提交 StormSubmitter.submitTopology("SentenceTopology", conf, topology); }catch (Exception e){ e.printStackTrace(); } }else { // 本地提交 LocalCluster cluster = new LocalCluster(); cluster.submitTopology("KafkaToplogy", conf, topology); try { // Wait for some time before exiting System.out.println("Waiting to consume from kafka"); Thread.sleep(300000); } catch (Exception exception) { System.out.println("Thread interrupted exception : " + exception); } // kill the KafkaTopology cluster.killTopology("KafkaToplogy"); // shut down the storm test cluster cluster.shutdown(); } } }
PrinterBolt.java:
package org.mort.storm.kafka; import org.apache.storm.topology.BasicOutputCollector; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseBasicBolt; import org.apache.storm.tuple.Tuple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * 用于打印输出Sentence的Bolt */ public class PrinterBolt extends BaseBasicBolt { private static final long serialVersionUID = 1L; private static final Logger logger = LoggerFactory.getLogger(PrinterBolt.class); @Override public void execute(Tuple input, BasicOutputCollector collector) { // get the sentence from the tuple and print it String sentence = input.getString(0); logger.info("Received Sentence: " + sentence); System.out.println("Received Sentence: " + sentence); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { // we don't emit anything } }
SentenceBolt.java
Package org.mort.storm.kafka; Import of java.util.ArrayList; Import java.util.List; Import org.apache.commons.lang.StringUtils; Import org.apache.storm.topology.BasicOutputCollector; Import the org.apache. storm.topology.OutputFieldsDeclarer; Import org.apache.storm.topology.base.BaseBasicBolt; Import org.apache.storm.tuple.Fields; Import org.apache.storm.tuple.Tuple; Import org.apache.storm.tuple. Values; Import org.slf4j.Logger; Import org.slf4j.LoggerFactory; / ** "." when a group of words written * makes up the input sentence bolt category, a word per line, when the input symbol, depending to end a word. */ public class SentenceBolt extends BaseBasicBolt { private static final long serialVersionUID = 1L; private static final Logger logger = LoggerFactory.getLogger(SentenceBolt.class); private List<String> words = new ArrayList<String>(); @Override public void execute(Tuple input, BasicOutputCollector collector) { // Get the word from the tuple String word = input.getString(0); if (StringUtils.isBlank(word)) { // ignore blank lines return; } logger.info("Received Word:" + word); System.out.println("Received Word:" + word); // add word to current list of words words.add(word); if (word.endsWith(".")) { // word ends with '.' which means this is the end // the SentenceBolt publishes a sentence tuple collector.emit(new Values(StringUtils.join(words, ' '))); // and reset the words list. words.clear(); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("sentence")); } }
2.pom.xml settings
1) Set using maven-assembly-plugin generating jar package embodiment
<build> <plugins> <plugin> <artifactId>maven-assembly-plugin</artifactId> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <archive> <manifest> <mainClass>org.mort.storm.kafka.KafkaTopologyBasic</mainClass> </manifest> </archive> </configuration> </plugin> </plugins> </build>
2) Add dependencies
Note that in order to prevent storm-core dependency conflicts need to add <scope> provided when performing </ scope>
<dependencies> <dependency> <!-- 桥接:告诉Slf4j使用Log4j2 --> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-slf4j-impl</artifactId> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-api</artifactId> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-core</artifactId> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>26.0-jre</version> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>1.2.3</version> <scope>provided</scope> </dependency> <!-- storm-kafka连接客户端 --> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-kafka-client</artifactId> <version>1.2.3</version> </dependency> <!-- kafka连接客户端 --> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>2.0.1</version> </dependency> </dependencies>
3. Perform assembly: assembly generating jar package
4. Copy jar package to a cluster, Run
bin / Storm JAR / opt / RUN / Storm-demo- 1.0 -SNAPSHOT-JAR-with- dependencies.jar org.mort.storm.kafka.KafkaTopologyBasic bin / Storm JAR [JAR package path] [where the main class name]
Implementation of the results:
[root@hadoop201 apache-storm-1.2.3]# bin/storm jar /opt/run/storm-demo-1.0-SNAPSHOT-jar-with-dependencies.jar org.mort.storm.kafka.KafkaTopologyBasic SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/opt/module/apache-storm-1.2.3/lib/log4j-slf4j-impl-2.8.2.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/opt/run/storm-demo-1.0-SNAPSHOT-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Running: /opt/module/jdk1.8.0_144/bin/java -client -Ddaemon.name= -Dstorm.options= -Dstorm.home=/opt/module/apache-storm-1.2.3 -Dstorm.log.dir=/opt/module/apache-storm-1.2.3/logs -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -Dstorm.conf.file= -cp /opt/module/apache-storm-1.2.3/*:/opt/module/apache-storm-1.2.3/lib/*:/opt/module/apache-storm-1.2.3/extlib/*:/opt/run/storm-demo-1.0-SNAPSHOT-jar-with-dependencies.jar:/opt/module/apache-storm-1.2.3/conf:/opt/module/apache-storm-1.2.3/bin -Dstorm.jar=/opt/run/storm-demo-1.0-SNAPSHOT-jar-with-dependencies.jar -Dstorm.dependency.jars= -Dstorm.dependency.artifacts={} org.mort.storm.kafka.KafkaTopologyBasic SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/opt/module/apache-storm-1.2.3/lib/log4j-slf4j-impl-2.8.2.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/opt/run/storm-demo-1.0-SNAPSHOT-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] 5481 [main] INFO o.a.s.k.s.KafkaSpoutConfig - Setting Kafka consumer property 'auto.offset.reset' to 'earliest' to ensure at-least-once processing 5564 [main] INFO o.a.s.k.s.KafkaSpoutConfig - Setting Kafka consumer property 'enable.auto.commit' to 'false', because the spout does not support auto-commit 7441 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old null 7658 [main] INFO o.a.s.StormSubmitter - Generated ZooKeeper secret payload for MD5-digest: -8420262939352556619:-8011743779888436007 8316 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : hadoop201.com:6627 8388 [main] INFO o.a.s.s.a.AuthUtils - Got AutoCreds [] 8426 [main] INFO o.a.s.u.NimbusClient - Found leader nimbus : hadoop201.com:6627 8661 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - jars... 8661 [main] INFO o.a.s.StormSubmitter - Uploading dependencies - artifacts... 8662 [main] INFO o.a.s.StormSubmitter - Dependency Blob keys - jars : [] / artifacts : [] 8751 [main] INFO o.a.s.StormSubmitter - Uploading topology jar /opt/run/storm-demo-1.0-SNAPSHOT-jar-with-dependencies.jar to assigned location: /opt/module/apache-storm-1.2.3/data/nimbus/inbox/stormjar-c0d5b00a-b07e-48f1-ac4d-871c5b3f635d.jar 9815 [main] INFO o.a.s.StormSubmitter - Successfully uploaded topology jar to assigned location: /opt/module/apache-storm-1.2.3/data/nimbus/inbox/stormjar-c0d5b00a-b07e-48f1-ac4d-871c5b3f635d.jar 9815 [main] INFO o.a.s.StormSubmitter - Submitting topology SentenceTopology in distributed mode with conf {"topology.workers":4,"storm.zookeeper.topology.auth.scheme":"digest","storm.zookeeper.topology.auth.payload":"-8420262939352556619:-8011743779888436007"} 9815 [main] WARN o.a.s.u.Utils - STORM-VERSION new 1.2.3 old 1.2.3 11935 [main] INFO o.a.s.StormSubmitter - Finished submitting topology: SentenceTopology
5. Review the Implementation of the UI by