springboot 集成storm

springboot 集成storm，获取订阅kafka中的数据

1.配置yml文件，配置storm props参数

# 这里可以配置更多的storm apps优化参数，目前只有parallelismHint（并行度）
storm:
  props:
    topologyName: adLog
    topologyWorkers: 1
    numTasks: 1
    topologyMaxSpoutPending: 5000
    topologyMessageTimeoutSecs: 30

2.配置storm props 类

@Getter
@Setter
@Configuration
@ConfigurationProperties(prefix = "storm.props")
public class StormProps {


    /**
     * 拓扑名字，一个Storm拓扑一个名字
     */
    private String topologyName;

    /**
     * How many processes should be spawned around the cluster to execute this
     * topology. Each process will execute some number of tasks as threads within
     * them. This parameter should be used in conjunction with the parallelism hints
     * on each component in the topology to tune the performance of a topology.
     * {@link org.apache.storm.Config#setNumWorkers(Map, int)}
     */
    private Integer topologyWorkers = 1;

    /**
     * Storm默认每个Executor内执行一个Task，但是也可以指定。
     * {@link org.apache.storm.topology.ComponentConfigurationDeclarer#setNumTasks(Number)}
     */
    private Integer numTasks = 1;


    /**
     * 此项设置了单个 Spout 任务能够挂起的最大的 tuple 数（tuple 挂起表示该 tuple 已经被发送但是尚未被 ack 或者 fail）。
     * 强烈建议设置此参数来防止消息队列的爆发性增长。
     * The maximum number of tuples that can be pending on a spout task at any given time.
     * This config applies to individual tasks, not to spouts or topologies as a whole.
     *
     * A pending tuple is one that has been emitted from a spout but has not been acked or failed yet.
     * Note that this config parameter has no effect for unreliable spouts that don't tag
     * their tuples with a message id.
     * {@link org.apache.storm.Config#setMaxSpoutPending(Map, int)}}
     */
    private Integer topologyMaxSpoutPending = 5000;

    /**
     * 此项设置了 ackers 跟踪 tuple 的超时时间。默认值是 30 秒，对于大部分拓扑而言这个值基本上是不需要改动的。
     * The maximum amount of time given to the topology to fully process a message
     * emitted by a spout. If the message is not acked within this time frame, Storm
     * will fail the message on the spout. Some spouts implementations will then replay
     * the message at a later time.
     * {@link org.apache.storm.Config#setMessageTimeoutSecs(Map, int)}
     */
    private Integer topologyMessageTimeoutSecs = 30;

    /**
     * 其他Storm相关调优参数
     */
    //....

}

3.storm bolt通用抽象

@Getter
@Setter
public abstract class BoltBuilder {
    /**
     * 拓扑的并行度：它代表着一个组件的初始 executor （也是线程）数量
     * the number of tasks that should be assigned to execute this spout.
     * Each task will run on a thread in a process somewhere around the cluster.
     * {@link org.apache.storm.topology.TopologyBuilder#setSpout(String, IRichSpout, Number)}
     */
    private Integer parallelismHint = 5;

    private String id;

    abstract public IComponent buildBolt();
}

4.storm 日志控制BoltBuilder类实现

@Getter
@Setter
@Configuration
@ConfigurationProperties(prefix = "storm.bolt.logConsoleBolt")
public class LogConsoleBoltBuilder extends BoltBuilder {
    private int emitFrequencyInSeconds = 60;//每60s发射一次数据

    @Bean("logConsoleBolt")
    public LogConsoleBolt buildBolt() {
        super.setId("logConsoleBolt");
        LogConsoleBolt logConsoleBolt = new LogConsoleBolt();
        logConsoleBolt.setEmitFrequencyInSeconds(emitFrequencyInSeconds);
        return logConsoleBolt;
    }
}

5.storm 日志控制类，获取kafka中的数据

@Slf4j
public class LogConsoleBolt extends BaseRichBolt {
    private final static String AD_LIST_SHOW_COUNT = "AD_LIST_SHOW_COUNT";
    private OutputCollector collector;
    @Setter
    private int emitFrequencyInSeconds;

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.collector=collector;
    }
    @Override
    public Map<String, Object> getComponentConfiguration() {
        Map<String, Object> conf = new HashMap<String, Object>();
        /**
         * 这里配置TickTuple的发送频率
         */
        conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, emitFrequencyInSeconds);
        return conf;
    }

    @Override
    public void execute(Tuple input) {
        try {
            log.info(input.toString());
            if (input.size()<5){
                collector.ack(input);
            }else {
                String value = input.getStringByField("value").toString();

                AdShowLogEntity adShowLogEntity = AdShowLogEntity.logToEntity(value);
                if (adShowLogEntity != null){
                    collector.emit(new Values(Integer.parseInt(msg.getCreativeId()),System.currentTimeMillis(),0.01f));
                }else {
//                collector.ack(input);
                }
                collector.ack(input);
//            System.out.println("received from kafka : "+ value);
                // 必须ack,否则会重复消费kafka中的消息
            }


        }catch (Exception e){
            e.printStackTrace();
            collector.fail(input);
        }


    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("adId","updateTime","price")); //分词定义的field为word
    }
}

6.storm启动类，启动本地模式

@Service
public class AppMain {

    @Autowired
    private StormProps stormProps;

    @Autowired
    private TopologyBuilder topologyBuilder;

    public void Laugher() throws InvalidTopologyException, AuthorizationException, AlreadyAliveException, InterruptedException {
        Config config = new Config();
//        remoteSubmit(stormProps,topologyBuilder,config);
        localSubmit(stormProps,stormProps.getTopologyName(),topologyBuilder,config);
    }


    private static void remoteSubmit(StormProps stormProps, TopologyBuilder builder, Config conf)
            throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
        conf.setNumWorkers(stormProps.getTopologyWorkers());
        conf.setMaxSpoutPending(stormProps.getTopologyMaxSpoutPending());
        StormSubmitter.submitTopology(stormProps.getTopologyName(), conf, builder.createTopology());
    }

    /**
     * 用于debug
     * @param name
     * @param builder
     * @throws InterruptedException
     */
    private static void localSubmit(StormProps stormProps,String name,TopologyBuilder builder, Config conf)
            throws InterruptedException {
        conf.setDebug(true);
        conf.setMaxTaskParallelism(3);
        LocalCluster cluster = new LocalCluster();
        conf.setNumWorkers(stormProps.getTopologyWorkers());
        conf.setMaxSpoutPending(stormProps.getTopologyMaxSpoutPending());
        //设置一个应答者
        conf.setNumAckers(1);
        cluster.submitTopology(name, conf, builder.createTopology());
//        Thread.sleep(100000);
//        cluster.shutdown();
    }
}

猜你喜欢