The use of Netty's heartbeat mechanism and source code analysis

1. Concept introduction

1. Heartbeat

In a long TCP connection, a special data packet periodically sent and received between the client and the server is called a "heartbeat packet" to notify and confirm that the other party is still online to ensure the validity of the TCP connection

2. The necessity of heartbeat

  • The client program crashed, or the network was disconnected, etc., unilaterally released the TCP connection
  • TCP connection was killed by firewall

There is a firewall between the computer and the computer, and this firewall can implement a strategy at any time to disconnect the socket connection at any time, and may not wave four times when disconnected, and the server or client does not receive the connection The disconnected message will think that the connection is still available at this time, and want to send data at any time, only to know the connection is not available when sending.

Generally speaking, it is precisely because of the above-mentioned uncertain factors that cause long TCP connections to be disconnected (client-side factors are more common), a "heartbeat packet" is needed to confirm whether both parties are online. When the server learns that the client unilaterally releases the TCP connection, the server will release the corresponding memory resources in time. The client knows that the TCP connection is disconnected, and takes measures as needed to determine whether to reconnect

2. Annotated demo

import io.netty.bootstrap.ServerBootstrap;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.ChannelPipeline;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.SocketChannel;
import io.netty.channel.socket.nio.NioServerSocketChannel;
import io.netty.handler.codec.string.StringDecoder;
import io.netty.handler.codec.string.StringEncoder;
import io.netty.handler.timeout.IdleStateHandler;

public class HeartBeatServer {
    
    

    public static void main(String[] args) throws Exception {
    
    
        EventLoopGroup boss = new NioEventLoopGroup();
        EventLoopGroup worker = new NioEventLoopGroup();
        try {
    
    
            ServerBootstrap bootstrap = new ServerBootstrap();
            bootstrap.group(boss, worker)
                    .channel(NioServerSocketChannel.class)
                    .childHandler(new ChannelInitializer<SocketChannel>() {
    
    
                        @Override
                        protected void initChannel(SocketChannel ch) throws Exception {
    
    
                            ChannelPipeline pipeline = ch.pipeline();
                            pipeline.addLast("decoder", new StringDecoder());
                            pipeline.addLast("encoder", new StringEncoder());
                            // IdleStateHandler的readerIdleTime参数指定超过3秒还没收到客户端的连接,
                            // 会触发IdleStateEvent事件并且交给下一个handler处理,下一个handler必须
                            // 实现userEventTriggered方法处理对应事件
                            pipeline.addLast(new IdleStateHandler(3, 0, 0));
                            pipeline.addLast(new HeartBeatServerHandler());
                        }
                    });
            System.out.println("netty server start。。");
            ChannelFuture future = bootstrap.bind(9000).sync();
            future.channel().closeFuture().sync();
        } catch (Exception e) {
    
    
            e.printStackTrace();
        } finally {
    
    
            worker.shutdownGracefully();
            boss.shutdownGracefully();
        }
    }
}

import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.SimpleChannelInboundHandler;
import io.netty.handler.timeout.IdleStateEvent;

public class HeartBeatServerHandler extends SimpleChannelInboundHandler<String> {
    
    

    // 读超时的次数
    int readIdleTimes;

    @Override
    protected void channelRead0(ChannelHandlerContext ctx, String s) throws Exception {
    
    
        System.out.println(" ====== > [server] message received : " + s);
        if ("Heartbeat Packet".equals(s)) {
    
    
            // 如果是心跳包,给客户端发送一个响应 "ok"
            ctx.channel().writeAndFlush("ok");
        } else {
    
    
            System.out.println("其他信息处理 ... ");
        }
    }

    @Override
    public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exception {
    
    
        IdleStateEvent event = (IdleStateEvent) evt;

        String eventType = null;
        switch (event.state()) {
    
    
            case READER_IDLE:
                eventType = "读空闲";
                readIdleTimes++; // 读空闲的计数加1
                break;
            case WRITER_IDLE:
                eventType = "写空闲";
                // 不处理
                break;
            case ALL_IDLE:
                eventType = "读写空闲";
                // 不处理
                break;
        }


        System.out.println(ctx.channel().remoteAddress() + "超时事件:" + eventType);
        if (readIdleTimes > 3) {
    
    
            // 即时超过3次,也不一定是TCP连接真的挂掉了
            // 也有可能是传输过程中网络拥堵,从而造成服务端在误判为TCP连接断开了
            System.out.println(" [server]读空闲超过3次,关闭连接,释放更多资源");
            // 关闭通道之前,先尝试发送一个关闭信息
            ctx.channel().writeAndFlush("idle close");
            ctx.channel().close();
        }
    }

    @Override
    public void channelActive(ChannelHandlerContext ctx) throws Exception {
    
    
        System.err.println("=== " + ctx.channel().remoteAddress() + " is active ===");
        // 通道就绪之后,初始化读超时的次数为0
        readIdleTimes = 0;
    }
}
import io.netty.bootstrap.Bootstrap;
import io.netty.channel.*;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.SocketChannel;
import io.netty.channel.socket.nio.NioSocketChannel;
import io.netty.handler.codec.string.StringDecoder;
import io.netty.handler.codec.string.StringEncoder;

import java.util.Random;

public class HeartBeatClient {
    
    
    public static void main(String[] args) throws Exception {
    
    
        EventLoopGroup eventLoopGroup = new NioEventLoopGroup();
        try {
    
    
            Bootstrap bootstrap = new Bootstrap();
            bootstrap.group(eventLoopGroup).channel(NioSocketChannel.class)
                    .handler(new ChannelInitializer<SocketChannel>() {
    
    
                        @Override
                        protected void initChannel(SocketChannel ch) throws Exception {
    
    
                            ChannelPipeline pipeline = ch.pipeline();
                            pipeline.addLast("decoder", new StringDecoder());
                            pipeline.addLast("encoder", new StringEncoder());
                            pipeline.addLast(new HeartBeatClientHandler());
                        }
                    });

            System.out.println("netty client start。。");
            Channel channel = bootstrap.connect("127.0.0.1", 9000).sync().channel();
            String text = "Heartbeat Packet";
            Random random = new Random();
            while (channel.isActive()) {
    
    
                int num = random.nextInt(10);
                Thread.sleep(num * 1000); 
                channel.writeAndFlush(text); // 模拟发送心跳包
            }
        } catch (Exception e) {
    
    
            e.printStackTrace();
        } finally {
    
    
            eventLoopGroup.shutdownGracefully();
        }
    }

    static class HeartBeatClientHandler extends SimpleChannelInboundHandler<String> {
    
    

        @Override
        protected void channelRead0(ChannelHandlerContext ctx, String msg) throws Exception {
    
    
            System.out.println("client received :" + msg);
            if (msg != null && msg.equals("idle close")) {
    
    
                System.out.println(" 服务端关闭连接,客户端也关闭");
                ctx.channel().closeFuture();
            }
        }
    }
}

3. Analysis of the key source code of Netty's heartbeat mechanism

1. IdleStateHandlerIntroduction to the constructor

public IdleStateHandler(int readerIdleTimeSeconds, int writerIdleTimeSeconds, int allIdleTimeSeconds) {
    
    
	this((long)readerIdleTimeSeconds, (long)writerIdleTimeSeconds, (long)allIdleTimeSeconds, TimeUnit.SECONDS);
}
  • readerIdleTimeSeconds : Read timeout. That is, when no data is read from the Channel within the specified time interval, an IdleStateEvent event of READER_IDLE will be triggered
  • writerIdleTimeSeconds : Write timeout. That is, when no data is written to the Channel within the specified time interval, an IdleStateEvent event of WRITER_IDLE will be triggered
  • allIdleTimeSeconds : Read/write timeout. That is, when there is no read or write operation within the specified time interval, an ALL_IDLE IdleStateEvent event will be triggered

2. Bold conjecture

Before that, let's make a bold guess.
In the demo, the read timeout parameter is set to 3 seconds. Then Netty will determine whether it can read data from the channel every 3 seconds (may be a heartbeat packet, or it may be business data). Obviously, only one timer is needed. Then, it is easy to think that the initialization of the timer must be after the channel is ready. So IdleStateHandlerthe channelActive()method in our positioning has found the coreinitialize()

@Override
public void channelActive(ChannelHandlerContext ctx) throws Exception {
    
    
    // This method will be invoked only if this handler was added
    // before channelActive() event is fired.  If a user adds this handler
    // after the channelActive() event, initialize() will be called by beforeAdd().
    initialize(ctx); // 关键!!!
    super.channelActive(ctx);
}

3. IdleStateHandlerin the mannerinitialize()

As can be seen from the source code, Netty does not directly use timers, but uses "delay tasks" (recursive calls) to implement more flexible timers

private void initialize(ChannelHandlerContext ctx) {
    
    
    // Avoid the case where destroy() is called before scheduling timeouts.
    // See: https://github.com/netty/netty/issues/143
    switch (state) {
    
    
    case 1:
    case 2:
        return;
    }

    state = 1;
    initOutputChanged(ctx);

    lastReadTime = lastWriteTime = ticksInNanos();
    if (readerIdleTimeNanos > 0) {
    
    
    	// 关键!!!
        readerIdleTimeout = schedule(ctx, new ReaderIdleTimeoutTask(ctx),
                readerIdleTimeNanos, TimeUnit.NANOSECONDS);
    }
    if (writerIdleTimeNanos > 0) {
    
    
        writerIdleTimeout = schedule(ctx, new WriterIdleTimeoutTask(ctx),
                writerIdleTimeNanos, TimeUnit.NANOSECONDS);
    }
    if (allIdleTimeNanos > 0) {
    
    
        allIdleTimeout = schedule(ctx, new AllIdleTimeoutTask(ctx),
                allIdleTimeNanos, TimeUnit.NANOSECONDS);
    }
}
/**
  * This method is visible for testing!
  */
ScheduledFuture<?> schedule(ChannelHandlerContext ctx, Runnable task, long delay, TimeUnit unit) {
    
    
    return ctx.executor().schedule(task, delay, unit);
}

4. View timed tasks with read timeoutReaderIdleTimeoutTask

private final class ReaderIdleTimeoutTask extends AbstractIdleTask {
    
    

    ReaderIdleTimeoutTask(ChannelHandlerContext ctx) {
    
    
        super(ctx);
    }

    @Override
    protected void run(ChannelHandlerContext ctx) {
    
    
    	// nextDelay:顾名思义,下一次任务应该在延时多少秒之后执行
    	// lastReadTime:顾名思义,上一次从通道中读取到数据的时间
        long nextDelay = readerIdleTimeNanos;
        if (!reading) {
    
    
        	// 等价于 nextDelay = nextDelay - (ticksInNanos() - lastReadTime);
        	// 以demo的读超时为3秒为例
        	/* 
        	假如(ticksInNanos()-lastReadTime)为2,说明此时读到数据,距离上次读取到数据为2秒。
        	小于传参的3秒,换言之,就是下一个3秒的“节点”还没有到。
        	那么下一次的任务应该在3-2=1秒之后执行。所以新的nextDelay就为1
			*/
			/*
			假如(ticksInNanos()-lastReadTime)为4,说明此时读到数据,距离上次读取到数据为4秒。
			大于传参的3秒,换言之下一个3秒的“节点”已经过了。				
			此时按照下面代码算出来的nextDelay就是-1,此时需要重新开启一个3秒的延时任务
			*/
            nextDelay -= ticksInNanos() - lastReadTime;
        }

        if (nextDelay <= 0) {
    
    
            // Reader is idle - set a new timeout and notify the callback.
            // 重新开启一个3秒延时任务。延时3秒后执行任务
            readerIdleTimeout = schedule(ctx, this, readerIdleTimeNanos, TimeUnit.NANOSECONDS);

            boolean first = firstReaderIdleEvent;
            firstReaderIdleEvent = false;

            try {
    
    
                IdleStateEvent event = newIdleStateEvent(IdleState.READER_IDLE, first);
                // 调用下一个管道中IdleStateHandler的下一个handler的userEventTriggered方法来处理读超时事件
                channelIdle(ctx, event);
            } catch (Throwable t) {
    
    
                ctx.fireExceptionCaught(t);
            }
        } else {
    
    
            // Read occurred before the timeout - set a new timeout with shorter delay.
            // 延时nextDelay秒后执行任务
            readerIdleTimeout = schedule(ctx, this, nextDelay, TimeUnit.NANOSECONDS);
        }
    }
}

Insert picture description here

/**
  * Is called when an {@link IdleStateEvent} should be fired. This implementation calls
  * {@link ChannelHandlerContext#fireUserEventTriggered(Object)}.
  */
 protected void channelIdle(ChannelHandlerContext ctx, IdleStateEvent evt) throws Exception {
    
    
 	// 凡是fireXxx()方法,都是调用管道中当前handler的下一个handler的Xxx()方法来继续消费数据
     ctx.fireUserEventTriggered(evt);
}

Guess you like

Origin blog.csdn.net/qq_43290318/article/details/113100023