springboot 集成storm

springboot 集成storm,获取订阅kafka中的数据

1.配置yml文件,配置storm props参数

# 这里可以配置更多的storm apps优化参数,目前只有parallelismHint(并行度)
storm:
props:
topologyName: adLog
topologyWorkers: 1
numTasks: 1
topologyMaxSpoutPending: 5000
topologyMessageTimeoutSecs: 30

2.配置storm props 类
@Getter
@Setter
@Configuration
@ConfigurationProperties(prefix = "storm.props")
public class StormProps {


/**
* 拓扑名字,一个Storm拓扑一个名字
*/
private String topologyName;

/**
* How many processes should be spawned around the cluster to execute this
* topology. Each process will execute some number of tasks as threads within
* them. This parameter should be used in conjunction with the parallelism hints
* on each component in the topology to tune the performance of a topology.
* {@link org.apache.storm.Config#setNumWorkers(Map, int)}
*/
private Integer topologyWorkers = 1;

/**
* Storm默认每个Executor内执行一个Task,但是也可以指定。
* {@link org.apache.storm.topology.ComponentConfigurationDeclarer#setNumTasks(Number)}
*/
private Integer numTasks = 1;


/**
* 此项设置了单个 Spout 任务能够挂起的最大的 tuple 数(tuple 挂起表示该 tuple 已经被发送但是尚未被 ack 或者 fail)。
* 强烈建议设置此参数来防止消息队列的爆发性增长。
* The maximum number of tuples that can be pending on a spout task at any given time.
* This config applies to individual tasks, not to spouts or topologies as a whole.
*
* A pending tuple is one that has been emitted from a spout but has not been acked or failed yet.
* Note that this config parameter has no effect for unreliable spouts that don't tag
* their tuples with a message id.
* {@link org.apache.storm.Config#setMaxSpoutPending(Map, int)}}
*/
private Integer topologyMaxSpoutPending = 5000;

/**
* 此项设置了 ackers 跟踪 tuple 的超时时间。默认值是 30 秒,对于大部分拓扑而言这个值基本上是不需要改动的。
* The maximum amount of time given to the topology to fully process a message
* emitted by a spout. If the message is not acked within this time frame, Storm
* will fail the message on the spout. Some spouts implementations will then replay
* the message at a later time.
* {@link org.apache.storm.Config#setMessageTimeoutSecs(Map, int)}
*/
private Integer topologyMessageTimeoutSecs = 30;

/**
* 其他Storm相关调优参数
*/
//....

}

3.storm bolt通用抽象
@Getter
@Setter
public abstract class BoltBuilder {
/**
* 拓扑的并行度:它代表着一个组件的初始 executor (也是线程)数量
* the number of tasks that should be assigned to execute this spout.
* Each task will run on a thread in a process somewhere around the cluster.
* {@link org.apache.storm.topology.TopologyBuilder#setSpout(String, IRichSpout, Number)}
*/
private Integer parallelismHint = 5;

private String id;

abstract public IComponent buildBolt();
}

4.storm 日志控制BoltBuilder类实现
@Getter
@Setter
@Configuration
@ConfigurationProperties(prefix = "storm.bolt.logConsoleBolt")
public class LogConsoleBoltBuilder extends BoltBuilder {
private int emitFrequencyInSeconds = 60;//每60s发射一次数据

@Bean("logConsoleBolt")
public LogConsoleBolt buildBolt() {
super.setId("logConsoleBolt");
LogConsoleBolt logConsoleBolt = new LogConsoleBolt();
logConsoleBolt.setEmitFrequencyInSeconds(emitFrequencyInSeconds);
return logConsoleBolt;
}
}

5.storm 日志控制类,获取kafka中的数据
@Slf4j
public class LogConsoleBolt extends BaseRichBolt {
private final static String AD_LIST_SHOW_COUNT = "AD_LIST_SHOW_COUNT";
private OutputCollector collector;
@Setter
private int emitFrequencyInSeconds;

public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
@Override
public Map<String, Object> getComponentConfiguration() {
Map<String, Object> conf = new HashMap<String, Object>();
/**
* 这里配置TickTuple的发送频率
*/
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, emitFrequencyInSeconds);
return conf;
}

@Override
public void execute(Tuple input) {
try {
log.info(input.toString());
if (input.size()<5){
collector.ack(input);
}else {
String value = input.getStringByField("value").toString();

AdShowLogEntity adShowLogEntity = AdShowLogEntity.logToEntity(value);
if (adShowLogEntity != null){
collector.emit(new Values(Integer.parseInt(msg.getCreativeId()),System.currentTimeMillis(),0.01f));
}else {
// collector.ack(input);
}
collector.ack(input);
// System.out.println("received from kafka : "+ value);
// 必须ack,否则会重复消费kafka中的消息
}


}catch (Exception e){
e.printStackTrace();
collector.fail(input);
}


}

@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("adId","updateTime","price")); //分词定义的field为word
}
}

6.storm启动类,启动本地模式
@Service
public class AppMain {

@Autowired
private StormProps stormProps;

@Autowired
private TopologyBuilder topologyBuilder;

public void Laugher() throws InvalidTopologyException, AuthorizationException, AlreadyAliveException, InterruptedException {
Config config = new Config();
// remoteSubmit(stormProps,topologyBuilder,config);
localSubmit(stormProps,stormProps.getTopologyName(),topologyBuilder,config);
}


private static void remoteSubmit(StormProps stormProps, TopologyBuilder builder, Config conf)
throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
conf.setNumWorkers(stormProps.getTopologyWorkers());
conf.setMaxSpoutPending(stormProps.getTopologyMaxSpoutPending());
StormSubmitter.submitTopology(stormProps.getTopologyName(), conf, builder.createTopology());
}

/**
* 用于debug
* @param name
* @param builder
* @throws InterruptedException
*/
private static void localSubmit(StormProps stormProps,String name,TopologyBuilder builder, Config conf)
throws InterruptedException {
conf.setDebug(true);
conf.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
conf.setNumWorkers(stormProps.getTopologyWorkers());
conf.setMaxSpoutPending(stormProps.getTopologyMaxSpoutPending());
//设置一个应答者
conf.setNumAckers(1);
cluster.submitTopology(name, conf, builder.createTopology());
// Thread.sleep(100000);
// cluster.shutdown();
}
}

猜你喜欢

转载自www.cnblogs.com/flyyu1/p/11447953.html