public class SentenceSpout extends BaseRichSpout{
private static final long serialVersionUID = 1L;
/**
* This output collector exposes the API for emitting tuples from an {@link org.apache.storm.topology.IRichSpout}.
* The main difference between this output collector and {@link OutputCollector}
* for {@link org.apache.storm.topology.IRichBolt} is that spouts can tag messages with ids so that they can be
* acked or failed later on. This is the Spout portion of Storm's API to
* guarantee that each message is fully processed at least once.
*/
private SpoutOutputCollector collector;
//private OutputCollector collector;
//准备测试数据
private String[] sentences={
"my dog has fleas",
"i like cold beverages",
"the dog ate my homework",
"don't have a cow man",
"i don't think i like fleas"};
private int index=0;
/**
* private Map<String, StreamInfo> _fields = new HashMap<>();
* public void declareStream(String streamId, boolean direct, Fields fields) {
* if(_fields.containsKey(streamId)) {
* throw new IllegalArgumentException("Fields for " + streamId + " already set");
* }
* _fields.put(streamId,new StreamInfo(fields.toList(), direct));
* }
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("sentences"));
}
/**
* The open method is defined in the ISpout interface, and all Spout components call this method during initialization, open( ) method receives three parameters
* a map containing Storm configuration information
* TopologyContext object provides information about components in the topology
* SpoutOutputCollector object provides methods to emit tuples
*/
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector=collector;
}
/**
* is the core of all spout implementations, Storm emits tuple to the output collector by calling this method
*/
public void nextTuple() {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
this.collector.emit(new Values(sentences[index]));
//System.out.println("===============");
index++;
if(index>=sentences.length){
index=0;
}
}
}
public class SplitSentenceBolt extends BaseRichBolt{
private static final long serialVersionUID = 1L;
private OutputCollector collector;
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
public void execute(Tuple input) {
String sentence=input.getStringByField("sentences");
String[] words=sentence.split(" ");
for(String word :words){
this.collector.emit(new Values(word));
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("words"));
}
}
public class WordCountBolt extends BaseRichBolt{
private static final long serialVersionUID = 1L;
private OutputCollector collector;
private HashMap<String,Long> counts=null;
/**
* It is usually best to assign and instantiate primitive data types and serializable objects in the constructor
* in the prepare() method Instantiate non-serializable objects in
*/
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
this.counts=new HashMap<String,Long>();
}
public void execute (Tuple input) {
String word=input.getStringByField("words");
Long count=this.counts.get(word);
if(count==null){
count=0L;
}
count++;
//Add it to the map when it appears, the same word will be overwritten, so the last word is the accurate data
this.counts.put(word,count);
this.collector.emit(new Values(word,count));
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word","count"));
}
}
public class ReportBolt extends BaseRichBolt{
private static final long serialVersionUID = 1L;
private HashMap<String,Long> counts =null;
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.counts=new HashMap<String,Long>();
}
public void execute(Tuple input) {
String word=input.getStringByField("word");
Long count=input.getLongByField("count");
this.counts.put(word, count);
System.out.println("--------FINAL COUNTS--------");
List<String> keys=new ArrayList<String>();
keys.addAll(this.counts.keySet());
Collections.sort(keys);
for(String key:keys){
System.out.println(key+":"+this.counts.get(key));
}
System.out.println("----------------------------");
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// this bolt does not emit anything
}
}
public class WordCountTopology{
private static final String SENTENCE_SPOUT_ID="sentence-sput";
private static final String SPLIT_BOLT_ID="split-bolt";
private static final String COUNT_BOLT_ID="count-bolt";
private static final String REPORT_BOLT_ID="report-bolt";
private static final String TOPOLOGY_NAME="word-count-topology";
public static void main(String[] args) throws InterruptedException {
SentenceSpout spout=new SentenceSpout();
SplitSentenceBolt splitbolt=new SplitSentenceBolt();
WordCountBolt countbolt=new WordCountBolt();
ReportBolt reportbolt=new ReportBolt();
TopologyBuilder builder=new TopologyBuilder();
// Set concurrency to 2 executors, each Task assigns its own executor thread
builder.setSpout(SENTENCE_SPOUT_ID, spout, 2);
// Set concurrency to 2 executors, each executor executes 2 a task
builder.setBolt(SPLIT_BOLT_ID,splitbolt,2).setNumTasks(4).shuffleGrouping(SENTENCE_SPOUT_ID);
// Sometimes we need to route a tuple of specific data to a special bolt instance, here we use fieldsGrouping
// to Ensure that all tuples with the same "word" field value will be routed to the same WordCountBolt instance
builder.setBolt(COUNT_BOLT_ID,countbolt,2).fieldsGrouping(SPLIT_BOLT_ID,new Fields("words"));
builder.setBolt(REPORT_BOLT_ID,reportbolt ).globalGrouping(COUNT_BOLT_ID);
/*Map conf=new HashMap();
conf.put(Config.TOPOLOGY_WORKERS,4);
conf.put(Config.TOPOLOGY_DEBUG,true);*/
Config conf = new Config();
//conf.setDebug(true);
LocalCluster cluster=new LocalCluster();
cluster.submitTopology(TOPOLOGY_NAME,conf,builder.createTopology());
// Thread.sleep(1000);
// cluster.shutdown();
}
}
Storm demo
Guess you like
Origin http://10.200.1.11:23101/article/api/json?id=326789387&siteId=291194637
Ranking