Storm+kafka 开发

1、pom.xml

    <!--storm-->
    <dependency>
      <groupId>org.apache.storm</groupId>
      <artifactId>storm-core</artifactId>
      <version>1.1.1</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.storm</groupId>
      <artifactId>storm-kafka-client</artifactId>
      <version>1.1.1</version>
    </dependency>

    <!--kafka-->
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka_2.11</artifactId>
      <version>RELEASE</version>
    </dependency>
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka-clients</artifactId>
      <version>RELEASE</version>
    </dependency>

2、Spout


import com.fasterxml.jackson.databind.ObjectMapper;
import com.util.PropUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.storm.kafka.spout.Func;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

import java.io.IOException;
import java.util.List;

import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST;


@Slf4j
public class SpoutConfig {

    /**
     * kafka消息翻译函数,record-->Test对象
     */
    private static Func<ConsumerRecord<String, String>, List<Object>> record = new Func<ConsumerRecord<String, String>, List<Object>>() {
        ObjectMapper objectMapper = new ObjectMapper();

        @Override
        public List<Object> apply(ConsumerRecord<String, String> record) {
            String message = record.value();
            Test test = null;
            try {
                test = objectMapper.readValue(message, Test.class);
            } catch (IOException e) {
                e.printStackTrace();
                log.error("test转换错误:{}", e.getMessage());
            }
            return new Values(test);
        }
    };

    /**
     * kafka spout配置
     *
     * @return KafkaSpoutConfig
     */
    public static KafkaSpoutConfig<String, String> newKafkaSpoutConfig() {
        return KafkaSpoutConfig
                .builder(PropUtils.getProperty("kafka.servers"), PropUtils.getProperty("kafka.topic"))//设置bootstrapServers和topic
                .setProp(ConsumerConfig.GROUP_ID_CONFIG, "busSpoutGroup")//设置消费组
                .setProp(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, 600000)//消费者最大心跳时间间隔
                // .setProp(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 200)
                .setRecordTranslator(record, new Fields("test"))//设置消息翻译函数
                .setOffsetCommitPeriodMs(10000)//设置提交offset周期
                .setFirstPollOffsetStrategy(LATEST)//设置第一次拉取消息时offset位置
                // .setMaxUncommittedOffsets(500)//Defines the max number of polled offsets (records) that can be pending commit, before another poll can take place
                .build();
    }
}

PropUtils.java

import lombok.extern.slf4j.Slf4j;

import java.io.IOException;
import java.util.Properties;

@Slf4j
public class PropUtils {

    // 本地环境
    private static final String devMode = "development";
    // 正式环境
    // private static final String devMode = "production";
    // 测试环境
    // private static final String devMode = "test";


    private static Properties prop = new Properties();

    static {
        try {
            Properties kafka = new Properties();

            kafka.load(PropUtils.class.getClassLoader().getResourceAsStream("profile/" + devMode + "/kafka.properties"));

            prop.putAll(kafka);

        } catch (IOException e) {
            log.error("加载配置文件失败!", e);
            System.exit(1);
        }
    }

    public static String getProperty(String p) {
        return prop.getProperty(p);
    }

    public static int getInt(String p) {
        return Integer.parseInt(prop.getProperty(p));
    }

    public static boolean getBoolean(String p) {
        return Boolean.parseBoolean(prop.getProperty(p));
    }

}

kafka.properties

kafka.servers=node1:6667,node2:6667,node3:6667
kafka.topic=test

3、Bolt

public class TestBolt1 extends BaseWindowedBolt
public class TestBolt2 extends BaseBasicBolt

4、创建拓扑

import lombok.extern.slf4j.Slf4j;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseWindowedBolt;

import java.util.List;
import java.util.concurrent.TimeUnit;



@Slf4j
public class TopologyBuilder {
    static final String TOPOLOGY_NAME = "TEST_TOPOLOGY";

    public static void main(String[] args) throws Exception {
        Config config = new Config();

        config.setMessageTimeoutSecs(3600); //设置消息过期时间
        config.setNumAckers(0); //设置表示acker的并发数
        config.setNumWorkers(2); //表示整个topology将使用几个worker

        TopologyBuilder topologyBuilder = new TopologyBuilder();
        StormTopology topology = topologyBuilder.buildTopology();

        if (args.length == 0) {
            //本地模式
            LocalCluster cluster = new LocalCluster();
            //提交topo
            cluster.submitTopology(TOPOLOGY_NAME, config, topology);
            log.info("topology submit...");
            //运行一段时间后关闭程序
            TimeUnit.HOURS.sleep(1);
            cluster.killTopology(TOPOLOGY_NAME);
            cluster.shutdown();
            log.info("topology shutdown...");
            System.exit(0);
        } else {
            //集群模式
            config.put(Config.STORM_CLUSTER_MODE, "distributed");
            //提交topo
            StormSubmitter.submitTopology(args[0], config, topology);
        }
    }

    /**
     * 构建topology
     *
     * @return StormTopology
     */
    private StormTopology buildTopology() {
        TopologyBuilder builder = new TopologyBuilder();
        //设置spout
        builder.setSpout("KafkaSpout", new KafkaSpout<>(SpoutConfig.newKafkaSpoutConfig()), 3);
        //设置bolt
        //滚动窗口
        builder.setBolt("TestBolt1", new TestBolt1().withTumblingWindow(new BaseWindowedBolt.Duration(5, TimeUnit.MINUTES)), 1).localOrShuffleGrouping("KafkaSpout");
        //普通窗口
        builder.setBolt("TestBolt2", new TestBolt2(), 1).localOrShuffleGrouping("KafkaSpout");

        return builder.createTopology();

    }


}

遇到的问题记录:

1、一个topolopy多个bolt都没抛异常,spout有failed

解决:

conf.setMaxSpoutPending(100000);   //  这个设置一个spout task上面最多有多少个没有处理(ack/fail)的tuple,防止tuple队列过大, 只对可靠任务起作用
conf.setMessageTimeoutSecs(1000);//设置消息过期时间,默认是30s

扫描二维码关注公众号,回复: 9142406 查看本文章

或者

conf.setNumAckers(0); // 设置acker并发数,关闭Storm应答,可靠性有关

2、异常:org.apache.kafka.clients.consumer.CommitFailedException: Commit cannot be completed since the group has already rebalanced and assigned the partitions to another member. This means that the time between subsequent calls to poll() was longer than the configured max.poll.interval.ms, which typically implies that the poll loop is spending too much time message processing. You can address this either by increasing the session timeout or by reducing the maximum size of batches returned in poll() with max.poll.records.
解决:

a)调大max.poll.interval,ms,默认300000(300s)

b)调小max.poll.records,默认500

参考:Kafka消费异常处理

Storm1.1.1 对 0.10.x 版 Kafka之commit offsets

发布了49 篇原创文章 · 获赞 7 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/csdnzhang365/article/details/95347258