Storm high concurrency uses WordSum

1. Create a processing class for the total number of statistics and deduplication of all strings that are emitted

public class SumBolt implements IBasicBolt {

	/**
	 * Count the total number and deduplication of all transmitted strings
	 */
	private static final long serialVersionUID = 1L;

	Map<String, Integer> counts = new HashMap<String, Integer>();

	public void execute(Tuple input, BasicOutputCollector collector) {
		try {
			// Variables will be accumulated outside the method, and all data will be placed in the counts Map.
			long word_sum = 0;// total
			long word_count = 0;// number after deduplication

			String word = input.getString(0);
			Integer count = input.getInteger(1);
			counts.put(word, count);

			// Get the total number, traverse the values ​​of counts, and perform sum
			Iterator<Integer> i = counts.values().iterator();
			while (i.hasNext()) {
				word_sum += i.next();
			}
			Iterator<String> i2 = counts.keySet().iterator();
			while (i2.hasNext()) {
				String oneWord = i2.next();
				if (oneWord != null) {
					word_count ++;
				}
			}
			System.err.println("a="+counts.get("a")+"     b="+counts.get("b")+"     c="+counts.get("c")+"     d="+counts.get("d"));
			System.err.println(
					Thread.currentThread().getName() + "word_sum=" + word_sum + ",-------word_count=" + word_count);
		} catch (Exception e) {
			throw new FailedException("split error!");
		}

	}

	public void cleanup() {
		// TODO Auto-generated method stub

	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// TODO Auto-generated method stub

	}

	public Map<String, Object> getComponentConfiguration() {
		// TODO Auto-generated method stub
		return null;
	}

	public void prepare(Map stormConf, TopologyContext context) {
		// TODO Auto-generated method stub

	}

}


2. Topology increases character statistics

public class WordCountTopology {

	/**
	 * Submit the main function of the topology and the character statistics processing class
	 */
	public static class SplitSentence extends ShellBolt implements IRichBolt {
		
		private static final long serialVersionUID = 1L;
		
		
		
		/**
		 * Character statistics processing bolt class
		 */
		public static class WordCount extends BaseBasicBolt {
			private static final long serialVersionUID = 1L;
			// The number of keys of Map and the total number of characters represented by value cannot be counted under multi-threading. Because the multi-line representation is only part of the
			Map<String, Integer> counts = new HashMap<String, Integer>();

			public void execute(Tuple tuple, BasicOutputCollector collector) {
				String word = tuple.getString(0);
				Integer count = counts.get(word);
				if (count == null) {
					count = 0;
				}
				count++;
				counts.put(word, count);
				//System.err.println(Thread.currentThread().getName() + "---word:" + word + "   count:" + count);
				collector.emit(new Values(word, count));
			}

			public void declareOutputFields(OutputFieldsDeclarer declarer) {
				declarer.declare(new Fields("word", "count"));
			}
		}

		//Submit the main function of the topology
		public static void main(String[] args) throws Exception {
			TopologyBuilder builder = new TopologyBuilder();
			//Use 1 thread to read data to prevent repeated reading of data
			builder.setSpout("spout", new RandomSentenceSpout(), 1);
			//Read data from spout source, set 2 threads to process character segmentation
			builder.setBolt("split", new MysplitBolt(" "), 2).shuffleGrouping("spout");
			/**
			 * The last bolt received data, and set 3 threads to process data statistics.
			 * Fields Grouping: Grouping by Field, the same tuple will be distributed to the same thread (Executer or task) for processing. So don't worry about multithreading issues
			 * For example, to group by singleWord, tuples with the same singleWord will be assigned to the same Bolts, and different words will be assigned to different Bolts.
			 */
			//builder.setBolt("count", new WordCount(), 3).fieldsGrouping("split", new Fields("singleWord"));
			/ / Evenly distribute tuple data to each thread for processing, statistics will have thread safety issues
			builder.setBolt("count", new WordCount(), 3).shuffleGrouping("split");
			builder.setBolt("sum", new SumBolt(),1).shuffleGrouping("count");

			Config conf = new Config();
			conf.setDebug(true);

			if (args != null && args.length > 0) {
				conf.setNumWorkers (3);

				StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
			} else {
				conf.setMaxTaskParallelism(3);

				LocalCluster cluster = new LocalCluster();
				cluster.submitTopology("word-count", conf, builder.createTopology());
			}
		}

		public void declareOutputFields(OutputFieldsDeclarer declarer) {
			// TODO Auto-generated method stub
		}

		public Map<String, Object> getComponentConfiguration() {
			// TODO Auto-generated method stub
			return null;
		}
	}

}


-----------------------others------------------------- -------------
3. Character launch spout class
/**
 * Character emission spout class
 */
public class RandomSentenceSpout extends BaseRichSpout {

	private static final long serialVersionUID = 1L;

	SpoutOutputCollector _collector;
	Random _rand;

	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		_collector = collector;
		_rand = new Random();
	}

	public void nextTuple() {
		//a:2 , b:2 , c:1 , d:3
		String[] sentences = new String[] { sentence("a b c d "), sentence("b d"), sentence("a d") };
		for (String sentence : sentences) {// emit three lines of data for bolt processing
			_collector.emit(new Values(sentence));
		}
		Utils.sleep(1000 * 1000);
	}

	protected String sentence(String input) {
		return input;
	}

	@Override
	public void ack(Object id) {
	}

	@Override
	public void fail(Object id) {
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("firstSpout"));
	}

	// Add unique identifier to each tuple, which is helpful for debugging
	public static class TimeStamped extends RandomSentenceSpout {
		private final String prefix;

		public TimeStamped() {
			this("");
		}

		public TimeStamped(String prefix) {
			this.prefix = prefix;
		}

		protected String sentence(String input) {
			return prefix + currentDate() + " " + input;
		}

		private String currentDate() {
			return new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss.SSSSSSSSS").format(new Date());
		}
	}
}

4. Character cutting processing bolt class
/**
 * Character cutting processing bolt class
 */
public class MysplitBolt implements IBasicBolt {

	private static final long serialVersionUID = 1L;

	String patton;

	public MysplitBolt(String patton) {
		this.patton = patton;
	}

	/**
	 * Receive and process each row of data
	 */
	public void execute(Tuple input, BasicOutputCollector collector) {
		try {
			String sen = input.getStringByField("firstSpout");
			if (sen != null) {
				for (String word : sen.split(patton)) {// Emit multiple character data, let the next level bolt process
					collector.emit(new Values(word));
				}

			}

		} catch (Exception e) {
			throw new FailedException("split error!");
		}

	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("singleWord"));
	}

	public Map<String, Object> getComponentConfiguration() {
		// TODO Auto-generated method stub
		return null;
	}

	public void prepare(Map stormConf, TopologyContext context) {
		// TODO Auto-generated method stub

	}

	public void cleanup() {
		// TODO Auto-generated method stub

	}

}


5.pom file
quote
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.test</groupId>
  <artifactId>StormMavenProject</artifactId>
  <packaging>jar</packaging>
  <version>0.0.1-SNAPSHOT</version>
  <name>StormMavenProject</name>
  <url>http://maven.apache.org</url>
  <dependencies>
   
   <dependency>
    <groupId>org.ow2.asm</groupId>
    <artifactId>asm</artifactId>
    <version>5.0.3</version>
   </dependency>
<dependency>
    <groupId>org.clojure</groupId>
    <artifactId>clojure</artifactId>
    <version>1.7.0</version>
</dependency>
<dependency>
    <groupId>com.lmax</groupId>
    <artifactId>disruptor</artifactId>
    <version>3.3.2</version>
</dependency>
<dependency>
    <groupId>com.esotericsoftware</groupId>
    <artifactId>kryo</artifactId>
    <version>3.0.3</version>
</dependency>
<dependency>
    <groupId>org.apache.logging.log4j</groupId>
    <artifactId>log4j-api</artifactId>
    <version>2.8</version>
</dependency>
<dependency>
    <groupId>org.apache.logging.log4j</groupId>
    <artifactId>log4j-core</artifactId>
    <version>2.8</version>
</dependency>
<dependency>
    <groupId>org.slf4j</groupId>
    <artifactId>log4j-over-slf4j</artifactId>
    <version>1.6.6</version>
</dependency>
<dependency>
    <groupId>org.apache.logging.log4j</groupId>
    <artifactId>log4j-slf4j-impl</artifactId>
    <version>2.8</version>
</dependency>
<dependency>
    <groupId>com.esotericsoftware</groupId>
    <artifactId>minlog</artifactId>
    <version>1.3.0</version>
</dependency>
<dependency>
    <groupId>org.objenesis</groupId>
    <artifactId>objenesis</artifactId>
    <version>2.1</version>
</dependency>
<dependency>
    <groupId>com.esotericsoftware</groupId>
    <artifactId>reflectasm</artifactId>
    <version>1.10.1</version>
</dependency>

<dependency>
    <groupId>javax.servlet</groupId>
    <artifactId>servlet-api</artifactId>
    <version>2.5</version>
</dependency>
<dependency>
    <groupId>org.slf4j</groupId>
    <artifactId>slf4j-api</artifactId>
    <version>1.7.21</version>
</dependency>
<dependency>
    <groupId>org.apache.storm</groupId>
    <artifactId>storm-core</artifactId>
    <version>1.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.storm</groupId>
    <artifactId>storm-rename-hack</artifactId>
    <version>1.1.0</version>
</dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>

<dependency>
    <groupId>ring-cors</groupId>
    <artifactId>ring-cors</artifactId>
    <version>0.1.5</version>
</dependency>

  </dependencies>
  <build>
    <finalName>StormMavenProject</finalName>
  </build>
</project>

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326460417&siteId=291194637