Kafka和Storm的整合

主要难点在于实现一个KafkaSpout，用于Storm接收从Kafka传来的消息
    //发送第一步，加入需要发送列表中
    protected void setWaitingToEmit(ConsumerRecords<K, V> consumerRecords) {
        for (TopicPartition tp : consumerRecords.partitions()) {
            waitingToEmit.put(tp, new ArrayList<>(consumerRecords.records(tp)));
        }
    }

    protected void emitIfWaitingNotEmitted() {
        //从waitingToEmit列表获取将要发送的事件，开始发送
        Iterator<List<ConsumerRecord<K, V>>> waitingToEmitIter = waitingToEmit.values().iterator();
        LOG.info("real event emit begin");
        outerLoop:
        while (waitingToEmitIter.hasNext()) {
            List<ConsumerRecord<K, V>> waitingToEmitForTp = waitingToEmitIter.next();
            while (!waitingToEmitForTp.isEmpty()) {
                final boolean emittedTuple = emitOrRetryTuple(waitingToEmitForTp.remove(0));
                if (emittedTuple) {
                    LOG.error("event emit has failed");
                    break outerLoop;
                }
            }
            waitingToEmitIter.remove();
        }
    }

    //实际发送过程
    private boolean emitOrRetryTuple(ConsumerRecord<K, V> record) {
        final TopicPartition tp = new TopicPartition(record.topic(), record.partition());
        final KafkaSpoutMessageId msgId = retryService.getMessageId(record);

        if (offsetManagers.containsKey(tp) && offsetManagers.get(tp).contains(msgId)) {   // has been acked
            LOG.trace("Tuple for record [{}] has already been acked. Skipping", record);
        } else if (emitted.contains(msgId)) {   // has been emitted and it is pending ack or fail
            LOG.trace("Tuple for record [{}] has already been emitted. Skipping", record);
        } else {
            final OffsetAndMetadata committedOffset = kafkaConsumer.committed(tp);
            if (isAtLeastOnceProcessing()
                && committedOffset != null 
                && committedOffset.offset() > record.offset()
                && commitMetadataManager.isOffsetCommittedByThisTopology(tp, committedOffset, Collections.unmodifiableMap(offsetManagers))) {
                // Ensures that after a topology with this id is started, the consumer fetch
                // position never falls behind the committed offset (STORM-2844)
                throw new IllegalStateException("Attempting to emit a message that has already been committed."
                    + " This should never occur when using the at-least-once processing guarantee.");
            }

            final List<Object> tuple = kafkaSpoutConfig.getTranslator().apply(record);
            if (isEmitTuple(tuple)) {
                final boolean isScheduled = retryService.isScheduled(msgId);
                // not scheduled <=> never failed (i.e. never emitted), or scheduled and ready to be retried
                if (!isScheduled || retryService.isReady(msgId)) {
                    final String stream = tuple instanceof KafkaTuple ? ((KafkaTuple) tuple).getStream() : Utils.DEFAULT_STREAM_ID;

                    if (!isAtLeastOnceProcessing()) {
                        if (kafkaSpoutConfig.isTupleTrackingEnforced()) {
                            collector.emit(stream, tuple, msgId);
                            LOG.trace("Emitted tuple [{}] for record [{}] with msgId [{}]", tuple, record, msgId);
                        } else {
                            collector.emit(stream, tuple);
                            LOG.trace("Emitted tuple [{}] for record [{}]", tuple, record);
                        }
                    } else {
                        emitted.add(msgId);
                        offsetManagers.get(tp).addToEmitMsgs(msgId.offset());
                        if (isScheduled) {  // Was scheduled for retry and re-emitted, so remove from schedule.
                            retryService.remove(msgId);
                        }
                        collector.emit(stream, tuple, msgId);
                        tupleListener.onEmit(tuple, msgId);
                        LOG.trace("Emitted tuple [{}] for record [{}] with msgId [{}]", tuple, record, msgId);
                    }
                    return true;
                }
            } else {
                /*if a null tuple is not configured to be emitted, it should be marked as emitted and acked immediately
                * to allow its offset to be commited to Kafka*/
                LOG.debug("Not emitting null tuple for record [{}] as defined in configuration.", record);
                if (isAtLeastOnceProcessing()) {
                    msgId.setNullTuple(true);
                    offsetManagers.get(tp).addToEmitMsgs(msgId.offset());
                    ack(msgId);
                }
            }
        }
        return false;
    }
猜你喜欢