kafka sender

步骤一：获取元数据
步骤二：判断哪些partition有消息可以发送
步骤三：标识还没有拉取到元数据的topic
步骤四：检查与要发送数据的主机的网络是否已经建立好。
步骤五：按照broker进行分组，同一个broker的partition为同一组，一个批次就一个请求 -> broker，减少网络传输到次数
Map<Integer, List> batches
步骤六：对超时的批次是如何处理的？
步骤七：创建发送消息的请求
步骤八：真正执行网络操作的都是这个NetWorkClient这个组件包括：发送请求，接受响应（处理响应）
Sender.run
    void run(long now) {
    
    

        /**
         *
         *  （1）代码第一次进来：
         *  获取元数据，因为我们是根据场景驱动的方式，目前是我们第一次代码进来，还没有获取到元数据
         *  所以这个cluster里面是没有元数据，如果这儿没有元数据的话，这个方法里面接下来的代码就不用看了
         *  因为接下来的这些代码都依赖这个元数据。
         *
         *  （2）代码第二次进来：
         *  我们用场景驱动的方式，现在我们的代码是第二次进来
         *  第二次进来的时候，已经有元数据了，所以cluster这儿是有元数据。
         *
         * 步骤一：
         *      获取元数据
         *
         *   这个方法就是我们今天晚上主要分析到一个方法了
         *   我们先大概了看一下里面有哪些功能？
         *
         *
         *   场景驱动方式
         *   获取到元数据
         */
        Cluster cluster = metadata.fetch();
        // get the list of partitions with data ready to send
        /**
         * 步骤二：
         *      首先是判断哪些partition有消息可以发送：
         *        我们看一下一个批次可以发送出去的条件
         *
         *      获取到这个partition的leader partition对应的broker主机（根据元数据信息来就可以了）
         *
         *      哪些broker上面需要我们去发送消息？
         */
        RecordAccumulator.ReadyCheckResult result = this.accumulator.ready(cluster, now);

        /**
         * 步骤三：
         *      标识还没有拉取到元数据的topic
         */
        if (!result.unknownLeaderTopics.isEmpty()) {
    
    
            // The set of topics with unknown leader contains topics with leader election pending as well as
            // topics which may have expired. Add the topic again to metadata to ensure it is included
            // and request metadata update, since there are messages to send to the topic.
            for (String topic : result.unknownLeaderTopics)
                this.metadata.add(topic);
            this.metadata.requestUpdate();
        }

        // remove any nodes we aren't ready to send to
        Iterator<Node> iter = result.readyNodes.iterator();
        long notReadyTimeout = Long.MAX_VALUE;
        while (iter.hasNext()) {
    
    
            Node node = iter.next();
            /**
             * 步骤四：检查与要发送数据的主机的网络是否已经建立好。
             */
            if (!this.client.ready(node, now)) {
    
    

                //如果返回的是false  !false 代码就进来
                //移除result 里面要发送消息的主机。
                //所以我们会看到这儿所有的主机都会被移除
                iter.remove();
                notReadyTimeout = Math.min(notReadyTimeout, this.client.connectionDelay(node, now));
            }
        }

        /**
         * 步骤五：
         *
         * 我们有可能要发送的partition有很多个，
         * 很有可能有一些partition的leader partition是在同一台服务器上面。
         *  假设我们集群只有3台服务器 0  1 2
         *  主题：p0 p1 p2 p3
         *
         * p0:leader -> 0
         * p1:leader -> 0
         * p2:leader -> 1
         * p3:leader -> 2
         *
         * 当我们的分区的个数大于集群的节点的个数的时候，一定会有多个leader partition在同一台服务器上面。
         *
         * 按照broker进行分组，同一个broker的partition为同一组
         * 0:{p0,p1}  -> 批次
         * 1:{p2}
         * 2:{p3}
         *
         * 一个批次就一个请求  -> broker
         *
         * 减少网络传输到次数
         *
         *
         */

        //所以我们发现 如果网络没有建立的话，这儿的代码是不执行的
        Map<Integer, List<RecordBatch>> batches = this.accumulator.drain(cluster,
                                                                         result.readyNodes,
                                                                         this.maxRequestSize,
                                                                         now);
        if (guaranteeMessageOrder) {
    
    
            // Mute all the partitions drained
            //如果batches 空的话，这而的代码也就不执行了。
            for (List<RecordBatch> batchList : batches.values()) {
    
    
                for (RecordBatch batch : batchList)
                    this.accumulator.mutePartition(batch.topicPartition);
            }
        }
        /**
         * 步骤六：
         *  对超时的批次是如何处理的？
         *
         */
        List<RecordBatch> expiredBatches = this.accumulator.abortExpiredBatches(this.requestTimeout, now);
        // update sensors
        for (RecordBatch expiredBatch : expiredBatches)
            this.sensors.recordErrors(expiredBatch.topicPartition.topic(), expiredBatch.recordCount);

        sensors.updateProduceRequestMetrics(batches);
        /**
         * 步骤七：
         *      创建发送消息的请求
         *
         * 创建请求
         * 我们往partition上面去发送消息的时候，有一些partition他们在同一台服务器上面
         * ，如果我们一分区一个分区的发送我们网络请求，那网络请求就会有一些频繁
         * 我们要知道，我们集群里面网络资源是非常珍贵的。
         * 会把发往同个broker上面partition的数据 组合成为一个请求。
         * 然后统一一次发送过去，这样子就减少了网络请求。
         */

        //如果网络连接没有建立好 batches其实是为空。
        //也就说其实这段代码也是不会执行。
        List<ClientRequest> requests = createProduceRequests(batches, now);

        // If we have any nodes that are ready to send + have sendable data, poll with 0 timeout so this can immediately
        // loop and try sending more data. Otherwise, the timeout is determined by nodes that have partitions with data
        // that isn't yet sendable (e.g. lingering, backing off). Note that this specifically does not include nodes
        // with sendable data that aren't ready to send since they would cause busy looping.
        long pollTimeout = Math.min(result.nextReadyCheckDelayMs, notReadyTimeout);
        if (result.readyNodes.size() > 0) {
    
    
            log.trace("Nodes with data ready to send: {}", result.readyNodes);
            log.trace("Created {} produce requests: {}", requests.size(), requests);
            pollTimeout = 0;
        }
        //TODO 发送请求的操作
        for (ClientRequest request : requests)
            //绑定 op_write
            client.send(request, now);

        /**
         * 解接下来要发送网络请求了，把把数据写到服务端？
         *
         * Selector
         *
         * write
         *
         */

        // if some partitions are already ready to be sent, the select time would be 0;
        // otherwise if some partition already has some data accumulated but not ready yet,
        // the select time will be the time difference between now and its linger expiry time;
        // otherwise the select time will be the time difference between now and the metadata expiry time;
        //TODO 重点就是去看这个方法
        //就是用这个方法拉取的元数据。

        /**
         * 步骤八：
         * 真正执行网络操作的都是这个NetWorkClient这个组件
         * 包括：发送请求，接受响应（处理响应）
         *
         * 拉取元数据信息，靠的就是这段代码
         */
        //我们猜这儿可能就是去建立连接。
        this.client.poll(pollTimeout, now);
    }
猜你喜欢