In-depth understanding of the TCP protocol and its source code
TCP close analysis
Connection close behind the termination process
TCP protocol as a reliable, connection-based streaming protocol, to be an unreliable transport IP layer, provide a "reliable" data stream to an upper layer protocol.
- Reliable: TCP to ensure user data integrity, and sequence data.
- Based connection: Before starting to establish the connection after the end you want to disconnect.
- Streaming protocols: TCP data in bytes, without subcontracting.
Which use the TCP protocol has established connections and disconnections is one difference between TCP and UDP, the paper focuses on the TCP close
perform source code analysis and operation tracking.
First, the process of TCP disconnect is 4 times Wave:
- Host 1 sends all the data they have to send the decision to disconnect
- 1 using the master
close
transmissionfin|ack
(included in front of the host data 2ack
) , the disconnection process starts when the host sends the window 1 is closed, still working acceptance window; - Host 2 Host 1 received
fin
after sendingack
notify each other of the host 1fin
has been received; - Host 2 continue to send data until all the data is sent host 2
- 2 Use the host
close
to sendfin
, said he's also been sent data - Host 1 to accept
fin
, transmitack
, inform each other of the host 2fin
has been received
During the host 1 is disconnected and the host 2, the host has a host may continue showing another issue data is disconnected, and of course the presence of both the simultaneous transmission is completed, it would be the third wave the host computer 2 fin
and the host 1 2nd ack
transmitted together.
Because TCP is a reliable protocol, so it is necessary to ensure Ack sent fin
has reached the other side, and the presence of the two are not finished sending data at the same time, it normally takes four times and waved.
So the case of close divided into two successive TCP situation, while not close the description.
TCP close source
- [] Linux TCP close the process of reading the source code
C language used close
to close the corresponding TCP Socket
socket, such as
int socket_fp = socket(AF_INET, SOCK_STREAM, 0);
close(socket_fp);
close
System call function sys_close
is performed, the code is located fs/open.c
in
/* fs/open.c:1191 */
SYSCALL_DEFINE1(close, unsigned int, fd)
{
int retval = __close_fd(current->files, fd);
//...
return retval;
}
System call and sys_close
passed fs/file.c
in __close_fd
to release the file pointer function ultimately calls the flip_close
method
/* fs/file.c */
int __close_fd(struct files_struct *files, unsigned fd)
{
struct file *file;
struct fdtable *fdt;
// 获得访问锁
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
if (fd >= fdt->max_fds)
goto out_unlock;
file = fdt->fd[fd];
if (!file)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL);
// 释放文件描述符
__put_unused_fd(files, fd);
// 释放访问锁
spin_unlock(&files->file_lock);
// 调用flip_close方法
return filp_close(file, files);
out_unlock:
spin_unlock(&files->file_lock);
return -EBADF;
}
flip_close
Located fs/open.c
in.
/* fs/open.c */
int filp_close(struct file *filp, fl_owner_t id)
{
int retval = 0;
// 检测文件描述符引用数目
if (!file_count(filp)) {
printk(KERN_ERR "VFS: Close: file count is 0\n");
return 0;
}
// 调用flush方法
if (filp->f_op->flush)
retval = filp->f_op->flush(filp, id);
if (likely(!(filp->f_mode & FMODE_PATH))) {
dnotify_flush(filp, id);
locks_remove_posix(filp, id);
}
// 调用fput方法
fput(filp);
return retval;
}
It located fs/file_table.c
in the fput
call fput_many
, and then start the task ____fput
calls __fput
, the final tracking function pointerf_op->release
/* fs/file_table.c */
static void __fput(struct file *file)
{
// ...
// 调用指针函数file->f_op->release
if (file->f_op->release)
file->f_op->release(inode, file);
// ...
}
void fput_many(struct file *file, unsigned int refs)
{
if (atomic_long_sub_and_test(refs, &file->f_count)) {
struct task_struct *task = current;
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
// 这里启动了____fput
init_task_work(&file->f_u.fu_rcuhead, ____fput);
if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
return;
}
if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
schedule_delayed_work(&delayed_fput_work, 1);
}
}
void fput(struct file *file)
{
// 调用fput_many
fput_many(file, 1);
}
fp_ops->release
fp_ops->release
This target function is assigned at the time of initialization of the socket, you may be positioned to functionsock_close
/* net/socket.c */
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
};
By socket_close
calling __sock_release
the sock->ops->release
function
/* net/socket.c */
static void __sock_release(struct socket *sock, struct inode *inode)
{
if (sock->ops) {
struct module *owner = sock->ops->owner;
if (inode)
inode_lock(inode);
sock->ops->release(sock);
sock->sk = NULL;
if (inode)
inode_unlock(inode);
sock->ops = NULL;
module_put(owner);
}
if (sock->wq.fasync_list)
pr_err("%s: fasync list not empty!\n", __func__);
if (!sock->file) {
iput(SOCK_INODE(sock));
return;
}
sock->file = NULL;
}
static int sock_close(struct inode *inode, struct file *filp)
{
__sock_release(SOCKET_I(inode), inode);
return 0;
}
Here's sock->ops->release
a pointer to a function according to the transport layer protocol is different point to different functions, because we here are TCP, so the last callinet_stream_ops->release
TCP close call process
close(socket_fd)
|
f_op->release
|---sock_close
|---sock->ops->release
|--- inet_stream_ops->release(tcp_close)
tcp_close
/* net/ipv4/tcp.c */
void tcp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
int data_was_unread = 0;
int state;
lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if (sk->sk_state == TCP_LISTEN) {
// 套接字处于Listen状态,将状态调整未close
tcp_set_state(sk, TCP_CLOSE);
inet_csk_listen_stop(sk);
goto adjudge_to_death;
}
// 清空buffer
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
len--;
data_was_unread += len;
__kfree_skb(skb);
}
sk_mem_reclaim(sk);
if (sk->sk_state == TCP_CLOSE)
goto adjudge_to_death;
// ...
} else if (tcp_close_state(sk)) { // 将状态设为fin_wait
tcp_send_fin(sk); // 调用tcp_send_fin(sk)
}
sk_stream_wait_close(sk, timeout);
adjudge_to_death:
// ...
}
EXPORT_SYMBOL(tcp_close);
Now entered a critical section tcp closing the connection, to a closed state will listen socket to close, and then empty the transmission buffer area, followed by tcp_send_fin
transmitting the request fin itself enters fin_wait1 state.
The first wave
/* net/ipv4/tcp_output.c */
void tcp_send_fin(struct sock *sk)
{
......
// 设置flags为ack|fin
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
......
// 发送fin包
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
}
Then waits for the other response, the status code different TCP processing function net\ipv4\tcp_input.c
intcp_rcv_state_process
case TCP_FIN_WAIT1: {
int tmo;
if (req)
tcp_rcv_synrecv_state_fastopen(sk);
if (tp->snd_una != tp->write_seq)
break;
tcp_set_state(sk, TCP_FIN_WAIT2);
sk->sk_shutdown |= SEND_SHUTDOWN;
sk_dst_confirm(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
/* Wake up lingering close() */
sk->sk_state_change(sk);
break;
}
if (tp->linger2 < 0) {
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
}
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
/* Receive out of order FIN after close() */
if (tp->syn_fastopen && th->fin)
tcp_fastopen_active_disable(sk);
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
} else if (th->fin || sock_owned_by_user(sk)) {
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto discard;
}
break;
}
MenuOS running track
- [] MenuOS running track
Qmenu start MenuOS, entered the commissioning
On the last experiment is compiled with debugging features, and with a TCP server and client systems MenuOS
Likewise Open a terminal, enter LinuxKernel catalog, compiled with debugging before the start of MenuOS
~$ cd LinuxKernel
~/LinuxKernel$ qemu-system-i386 -kernel linux-5.4.2/arch/x86/boot/bzImage -initrd rootfs.img -append "root=/dev/sda init=/init nokaslr" -s -S
Enter debug
This time the virtual machine to enter stop at a black screen interface, access and wait for further instructions gdb.
Open a new terminal window, enter the gdb debugger.
Then respectively
- Import Symbol Table
- Debug server connection
- Set a breakpoint
jett@ubuntu:~/LinuxKernel$ gdb
(gdb) file ~/LinuxKernel/linux-5.4.2/vmlinux
Reading symbols from ~/LinuxKernel/linux-5.4.2/vmlinux...done.
(gdb) target remote:1234
Remote debugging using :1234
0x0000fff0 in ?? ()
(gdb) break start_kernel
Breakpoint 1 at 0xc1db5885: file init/main.c, line 576.
Then enter c
the system continues execution to break start_kernel ()
it indicates success.
(gdb) c
Continuing.
Breakpoint 1, start_kernel () at init/main.c:576
576 {
Add a new breakpointsys_close
(gdb) b sys_close
Breakpoint 3 at 0xc119fe60: file fs/open.c, line 1191.
(gdb) info b # 查看设置的断点
Num Type Disp Enb Address What
1 breakpoint keep y 0xc1db5885 in start_kernel at init/main.c:576
2 breakpoint keep y 0xc179ce00 in __se_sys_socketcall at net/socket.c:2818
3 breakpoint keep y 0xc119fe60 in __se_sys_close at fs/open.c:1191
c
Allow the system to continue
Author: SA19225176, gravitation Dian
Reference Source: USTC the Socket network programming - compiled builds Linux system debugging