Linux kernel protocol stack accept system call

table of Contents

1. Overview of accept function

2. Accept kernel implementation

2.1 sys_accept

2.2 Implementation of IPv4 protocol family inet_accept (core)

2.3. TCP protocol implementation inet_csk_accept (core)

2.3.1 Get the TCB of the communication socket (reqsk_queue_get_child, core)


1. Overview of accept function

  1. Get the first request socket from the fully connected queue, and determine whether the system call is blocked according to whether the queue is empty and whether the socket is set to a blocking state
  2. Create a new socket new_sock for communication with the client
  3. Return new_sock mapping a new file description to the user
  4. The client address information in the request queue is copied to the user space

For specific usage of accept system call, please refer to "Socket Programming: Accept Interface Usage"

2. Accept kernel implementation

For the TCP protocol type streaming socket, get the request socket in the full connection queue, the main stack is as follows:

sys_accept
--sys_accept4
	--inet_accept //ipv4协议
		--inet_csk_accept //tcp协议
			--reqsk_queue_get_child //从全连接队列获取请求套接字

2.1 sys_accept

  1. Search sock based on the incoming file descriptor
  2. Apply for a new socket newsock
  3. Get a new file descriptor newfd
  4. Map the newly created newsock and newfd
  5. Call the accept interface in the IPv4 protocol suite: inet_accept()
  6. Copy remote address information to user space
  7. Install newfd into the file system
//从注释中也可以看出accept()系统调用要做的事情:
//1.建立一个新的套接字供服务器端和客户端通信
//2.创建一个新的fd供应用程序后续读写该套接字
/*
 *	For accept, we attempt to create a new socket, set up the link
 *	with the client, wake up the client, then return the new
 *	connected fd. We collect the address of the connector in kernel
 *	space and move it to user at the very end. This is unclean because
 *	we open the socket then return an error.
 *
 *	1003.1g adds the ability to recvmsg() to query connection pending
 *	status to recvmsg. We need to add that support in a way thats
 *	clean when we restucture accept also.
 */
asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
			   int __user *upeer_addrlen)
{
	struct socket *sock, *newsock;
	struct file *newfile;
	int err, len, newfd, fput_needed;
	char address[MAX_SOCK_ADDR];

	//根据监听套接字的fd找到监听套接字对应的套接字结构struct scoket
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (!sock)
		goto out;

	//为新的通信套接字分配套接字结构struct socket
	err = -ENFILE;
	if (!(newsock = sock_alloc()))
		goto out_put;
	//新的套接字类型和操作函数集与监听套接字相同
	newsock->type = sock->type;
	newsock->ops = sock->ops;

	/*
	 * We don't need try_module_get here, as the listening socket (sock)
	 * has the protocol module (sock->ops->owner) held.
	 */
	__module_get(newsock->ops->owner);
	//为新创建的通信套接字结构分配文件描述符fd
	newfd = sock_alloc_fd(&newfile);
	if (unlikely(newfd < 0)) {
		err = newfd;
		sock_release(newsock);
		goto out_put;
	}
	//将fd与socket关联起来
	err = sock_attach_fd(newsock, newfile);
	if (err < 0)
		goto out_fd_simple;

	//SELinux相关
	err = security_socket_accept(sock, newsock);
	if (err)
		goto out_fd;

	//调用协议族提供的accept()函数完成接收,IPv4协议族中,为inet_accept(),见下文
	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
	if (err < 0)
		goto out_fd;

	//如果accept()系统调用参数中指明要获取客户端地址信息,
	//则调用getname()接口获取客户端信息后将其拷贝到用户空间
	if (upeer_sockaddr) {
		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
					  &len, 2) < 0) {
			err = -ECONNABORTED;
			goto out_fd;
		}
		err = move_addr_to_user(address, len, upeer_sockaddr,
					upeer_addrlen);
		if (err < 0)
			goto out_fd;
	}

	//关联文件系统
	/* File flags are not inherited via accept() unlike another OSes. */
	fd_install(newfd, newfile);
	err = newfd;

	security_socket_post_accept(sock, newsock);

out_put:
	fput_light(sock->file, fput_needed);
out:
	return err;
out_fd_simple:
	sock_release(newsock);
	put_filp(newfile);
	put_unused_fd(newfd);
	goto out_put;
out_fd:
	fput(newfile);
	put_unused_fd(newfd);
	goto out_put;
}

2.2 Implementation of IPv4 protocol family inet_accept (core)

  1. Call the accept interface of the transport layer: inet_csk_accept, get the first request sock of the full connection
  2. "Graft" the obtained request sock (struct sock) to the new socket newsock (struct socket)
/*
 *	Accept a pending connection. The TCP layer now gives BSD semantics.
 */
int inet_accept(struct socket *sock, struct socket *newsock, int flags)
{
	struct sock *sk1 = sock->sk;
	int err = -EINVAL;

	//直接调用传输层的accept()回调,TCP为inet_csk_accept(),该回调需要返回
	//新的通信套接字对应的TCB
	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
	if (!sk2)
		goto do_err;

	lock_sock(sk2);
	BUG_TRAP((1 << sk2->sk_state) &
		 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE));
	//将新的TCB和新的套接字结构sock关联起来
	sock_graft(sk2, newsock);
	//设置套接字结构中的状态为”已连接“
	newsock->state = SS_CONNECTED;
	err = 0;
	release_sock(sk2);
do_err:
	return err;
}

static inline void sock_graft(struct sock *sk, struct socket *parent)
{
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_sleep = &parent->wait;
	parent->sk = sk;
	sk->sk_socket = parent;
	security_sock_graft(sk, parent);
	write_unlock_bh(&sk->sk_callback_lock);
}

2.3. TCP protocol implementation inet_csk_accept (core)

  1. Determine that the incoming socket must be a listening socket, that is, there must be a request queue
  2. Determine whether the request full connection queue is empty: whether to perform blocking operations
  3. Pop up the first request sock from the fully connected queue and return
/*
 * This will accept the next outstanding connection.
 */
//sk为监听套接字传输控制块
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct sock *newsk;
	int error;

	lock_sock(sk);
	//传入到的套接字的TCB状态必须是TCP_LISTEN
	/* We need to make sure that this socket is listening,
	 * and that it has something pending.
	 */
	error = -EINVAL;
	if (sk->sk_state != TCP_LISTEN)
		goto out_err;

	//如果监听套接字的accept接收队列为空,则需要根据当前套接字是否阻塞进行操作
	/* Find already established connection */
	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
		//根据是否阻塞决定一个超时值,如果为非阻塞模式,那么timeo将为0
		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);

		//对于非阻塞模式,直接返回重试错误
		/* If this is a non blocking socket don't sleep */
		error = -EAGAIN;
		if (!timeo)
			goto out_err;
		//休眠等待accept接收队列非空
		error = inet_csk_wait_for_connect(sk, timeo);
		if (error)
			goto out_err;
	}
	//到这里,说明当前accept队列已经有连接可以接收(可能是阻塞后被唤醒的)
	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
out:
	release_sock(sk);
	return newsk;
out_err:
	newsk = NULL;
	*err = error;
	goto out;
}

2.3.1 Get the TCB of the communication socket (reqsk_queue_get_child, core)

In the previous notes that introduced the server-side three-way handshake process, you already know that these sockets waiting for accept() have been placed in the accept() receiving queue of the listening socket. The interface execution logic is as follows:

  1. Take the first request socket req from the fully connected header (struct request_sock)
  2. Take out the sock module req->sk in the request socket and return
  3. Release the request socket req, and update the full connection queue length
static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
						 struct sock *parent)
{
	//从accept接收队列上将第一个已完成连接的请求块摘除
	struct request_sock *req = reqsk_queue_remove(queue);
	//记录与该请求块关联的真正的传输控制块(在三次握手完成时创建)
	struct sock *child = req->sk;

	BUG_TRAP(child != NULL);
	//更新监听套接字的accept接收队列中的计数信息(即当前已完成连接的请求数目)
	sk_acceptq_removed(parent);
	//释放该连接请求块,它已经完成了它的使命
	__reqsk_free(req);
	return child;
}

//取出指定队列的第一个节点,典型的链表操作
static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue)
{
	struct request_sock *req = queue->rskq_accept_head;

	BUG_TRAP(req != NULL);

	queue->rskq_accept_head = req->dl_next;
	if (queue->rskq_accept_head == NULL)
		queue->rskq_accept_tail = NULL;

	return req;
}

static inline void sk_acceptq_removed(struct sock *sk)
{
	sk->sk_ack_backlog--;
}

Guess you like

Origin blog.csdn.net/wangquan1992/article/details/108881151