Storm - Trident

Trident


1. Storm guarantee

1. The data will be sent and confirmed
by the ack / fail method. If it fails, a re-send mechanism will be provided

. 2. The data must only be counted once. After the
data is sent, there is a unique identifier. By judging this identifier, If it exists, it will not be processed

. 3. The data will be processed in order. After the
data is sent, there is a unique identifier, which will be processed

in the order of the identifier number. The original parallel processing will become walk-through processing, which is not advisable . 2. Batch sending and batch processing If this is done, if the current batch of data is processed but not sent, the next batch of data cannot be processed, and the 3. Divided into two steps: one to process data, the other to send data; after data processing is completed, continue to process the next batch of data; whether the data is sent to the next mitigation is determined by the step of sending data Adopt this method 3. Trident 1.Spout


















package com.study.storm.trident.wordcount;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;

/**
 * @description
 * Data Sources
 * Simulate batch data sending
 * <br/>
 * @remark
 * Guarantee and implementation of Storm
 * 1. Data must be sent
 * Through the confirmation mechanism of ack() and fail(), if the sending fails, it will be resent
 * 2. The data is processed only once
 * The data is sent with a unique number, to determine whether the number has been processed, if so, ignore it and not process it
 * 3. The data is processed in a certain order
 * The data is sent with a unique number and is processed in the order of the numbers. If the data does not arrive in order, wait
 *
 * <br/>
 *
 * Trident handles bulk data
 *
 */
public class SentenceSpout extends BaseRichSpout {

	/**
	 *
	 */
	private static final long serialVersionUID = 2122598284858356171L;

	private SpoutOutputCollector collector = null ;
	
	/**
	 * Simulate batch data sending
	 * key : name
	 * value : sentence
	 */
	private Values [] valuesArray = new Values[] {
			new Values("a","111111111111"),
			new Values("b","222222222222"),
			new Values("c","333333333333"),
			new Values("d","444444444444"),
			new Values("e","555555555555"),
			new Values("f","666666666666"),
			new Values("g","777777777777"),
			new Values("h","888888888888")
	};
	
	@SuppressWarnings("rawtypes")
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector ;
	}

	// The order of sending, that is, the subscript of the data combination, identifies where the data is sent
	private int index = 0 ;
	
	@Override
	public void nextTuple() {

		if(index >= valuesArray.length){
			return ;
		}
		index = index == valuesArray.length ? 0 : index++ ;
		this.collector.emit(valuesArray[index]);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("name","sentence"));
	}

}





Simplified implementation
package com.study.storm.trident.wordcount;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.testing.FixedBatchSpout;

public class TridentTopologyDemo {

	public static void main(String[] args) {

		// Equivalent to the original Spout implementation
		@SuppressWarnings("unchecked")
		FixedBatchSpout tridentSpout = new FixedBatchSpout(new Fields("name","sentence"),
				1,
				new Values("a","111111111111"),
				new Values("b","222222222222"),
				new Values("c","333333333333"),
				new Values("d","444444444444"),
				new Values("e","555555555555"),
				new Values("f","666666666666"),
				new Values("g","777777777777"),
				new Values("h","888888888888"));
		// Whether to send cyclically, false no
		tridentSpout.setCycle(false);
		
		TridentTopology topology = new TridentTopology();
		/**
		 * 1. Local filter settings
		 */
		// set data source
		Stream initStream = topology.newStream("tridentSpout", tridentSpout);
		// set filter -- filter the data of name : d  
		initStream = initStream.each(new Fields("name"),new RemovePartDataFilter());
		// Add a function to output the position corresponding to the letter
		initStream = initStream.each(new Fields("name"),new NameIndexFunction() ,new Fields("indexNum"));

		// set filter -- intercept data and print
		Stream filterPrintStream = initStream.each(new Fields("name","sentence"), new PrintFilter());
		
		
		
		
		
		
		
		
		//--Submit the Topology to the cluster to run
		Config conf = new Config();
		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("MyTopology", conf, topology.build());
		
		//-- Kill Topology and shut down the cluster after running for 10 seconds
		Utils.sleep(1000 * 10);
		cluster.killTopology("MyTopology");
		cluster.shutdown();
	}

}




package com.study.storm.trident.wordcount;

import java.util.Iterator;

import backtype.storm.tuple.Fields;
import storm.trident.operation.BaseFilter;
import storm.trident.tuple.TridentTuple;

/**
 * @description
 * Print: key and value, fields and fields correspond to the transmitted content
 */
public class PrintFilter extends BaseFilter {

	/**
	 *
	 */
	private static final long serialVersionUID = 4393484291178519442L;

	@Override
	public boolean isKeep(TridentTuple tuple) {
		Fields fields = tuple.getFields();
		Iterator<String> iterator = fields.iterator();
		while(iterator.hasNext()){
			String key = iterator.next();
			Object valueByField = tuple.getValueByField(key);
			System.out.println("fields : "+ key + " values : "+valueByField);
		}
		
		return true;
	}

}


package com.study.storm.trident.wordcount;

import storm.trident.operation.BaseFilter;
import storm.trident.tuple.TridentTuple;

/**
 * filter data with name = d
 * return false filter
 * return true continue to pass
 */
public class RemovePartDataFilter extends BaseFilter {

	/**
	 *
	 */
	private static final long serialVersionUID = 8639858690618579558L;

	@Override
	public boolean isKeep(TridentTuple tuple) {
		String stringByField = tuple.getStringByField("name");
		return !stringByField.equals("d");
	}

}


package com.study.storm.trident.wordcount;

import java.util.HashMap;
import java.util.Map;

import backtype.storm.tuple.Values;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class NameIndexFunction extends BaseFunction {

	/**
	 *
	 */
	private static final long serialVersionUID = 9085021905838331812L;

	static Map<String,Integer> indexMap = new HashMap<String,Integer>();
	static {
		indexMap.put("a", 1);
		indexMap.put("b", 2);
		indexMap.put("c", 3);
		indexMap.put("d", 4);
		indexMap.put("e", 5);
		indexMap.put("f", 6);
		indexMap.put("g", 7);
		indexMap.put("h", 8);
		indexMap.put("i", 9);
	}
	
	@Override
	public void execute(TridentTuple tuple, TridentCollector collector) {
		String name = tuple.getStringByField("name");
		collector.emit(new Values(indexMap.get(name)));
	}

}


Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=327052233&siteId=291194637