为什么需要消息完整处理这种机制
storm有一种机制可以保证从spout发出的
每个tuple都会被完全处理
。
在一些特定的业务是,丢失一条消息是非常可怕的,也是业务不允许的。
理解消息被完整处理
一个消息(tuple)从spout发送出来,可能会导致成百上千的消息基于此消息被创建
“单词统计”的例子:
1.
storm任务从数据源每次读取一个完整的英文句子;将这个句子分解为独立的单词,最后,实时的输出每个单词以及它出现过的次数。
2.
每个从spout发送出来的消息(每个英文句子)都会触发很多的消息被创建,那些从句子中分隔出来的单词就是被创建出来的新消息。
3.
这些消息构成一个树状结构,我们称之为“tuple tree”。
在什么条件下,Storm才会认为一个从spout发送出来的消息被完整处理呢?
1.
tuple tree不再生长
2.
树中的任何消息被标识为“已处理”
使用Storm提供的可靠处理特性:
1.
无论何时在tuple tree中创建了一个新的节点,我们需要明确的通知Storm;
2.
当处理完一个单独的消息时,我们需要告诉Storm 这棵tuple tree的变化状态。
通过上面的两步,storm就可以检测到一个tuple tree何时被完全处理了,并且会调用相关的ack或fail方法。
锚定(anchoring)
1. Topology
TopoMain.java
package cn.newbies.storm.topology;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import cn.itcast.storm.bolt.FileWriterBolt;
import cn.itcast.storm.bolt.SpliterBolt;
import cn.itcast.storm.spout.MessageSpout;
public class TopoMain {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new MessageSpout());
builder.setBolt("bolt-1", new SpliterBolt()).shuffleGrouping("spout");
builder.setBolt("bolt-2", new FileWriterBolt()).shuffleGrouping("bolt-1");
Config conf = new Config();
conf.setDebug(false);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("reliability", conf, builder.createTopology());
}
}
2. spout
MessageSpout.java
package cn.newbies.storm.spout;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class MessageSpout implements IRichSpout {
private static final long serialVersionUID = -4664068313075450186L;
private int index = 0;
private String[] lines;
private SpoutOutputCollector collector;
public MessageSpout(){
lines = new String[]{
"0,zero",
"1,one",
"2,two",
"3,three",
"4,four",
"5,five",
"6,six",
"7,seven",
"8,eight",
"9,nine"
};
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("line"));
}
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
}
@Override
public void nextTuple() {
if(index < lines.length){
String l = lines[index];
collector.emit(new Values(l), index);
index++;
}
}
@Override
public void ack(Object msgId) {
System.out.println("message sends successfully (msgId = " + msgId +")");
}
@Override
public void fail(Object msgId) {
System.out.println("error : message sends unsuccessfully (msgId = " + msgId +")");
System.out.println("resending...");
collector.emit(new Values(lines[(Integer) msgId]), msgId);
System.out.println("resend successfully");
}
@Override
public void close() {
}
@Override
public void activate() {
}
@Override
public void deactivate() {
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
3. bolt
FileWriterBolt.java
package cn.newbies.storm.bolt;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class FileWriterBolt implements IRichBolt {
private static final long serialVersionUID = -8619029556495402143L;
private FileWriter writer;
private OutputCollector collector;
private int count = 0;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
try {
writer = new FileWriter("e://reliability.txt");
} catch (IOException e) {
}
}
@Override
public void execute(Tuple input) {
String word = input.getString(0);
if (count == 5) {
collector.fail(input);
} else {
try {
writer.write(word);
writer.write("\r\n");
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
collector.emit(input, new Values(word));
collector.ack(input);
}
count++;
}
@Override
public void cleanup() {
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
SpliterBolt.java
package cn.newbies.storm.bolt;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class SpliterBolt implements IRichBolt {
private static final long serialVersionUID = 6266473268990329206L;
private OutputCollector collector;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
@Override
public void execute(Tuple input) {
String line = input.getString(0);
String[] words = line.split(",");
for (String word : words) {
collector.emit(input, new Values(word));
}
collector.ack(input);
}
@Override
public void cleanup() {
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}