TCP协议和套接字、IP层之间的接口

1、TCP和套接字层之间的接口

TCP和套接字之间的接口数据结构是struct proto,这个结构体的元素是一系列的函数指针,从tcp_close到tcp_shutdown函数是tcp连接管理处理函数。TCP数据接受函数是tcp_recvmsg和tcp_v4_do_rcv函数实现。struct proto_tcp_prot定义如下:

struct proto tcp_prot = {
	.name			= "TCP",
	.owner			= THIS_MODULE,
	.close			= tcp_close,			//套接字关闭	
	.connect		= tcp_v4_connect,		//练级处理	
	.disconnect		= tcp_disconnect,		//断开处理
	.accept			= inet_csk_accept,
	.ioctl			= tcp_ioctl,
	.init			= tcp_v4_init_sock,		//初始化套接字
	.destroy		= tcp_v4_destroy_sock,
	.shutdown		= tcp_shutdown,		//立即关闭套接字
	.setsockopt		= tcp_setsockopt,		//设置选项	
	.getsockopt		= tcp_getsockopt,	//获取选项
	.recvmsg		= tcp_recvmsg,			//套接字层接受数据包函数
	.backlog_rcv		= tcp_v4_do_rcv,
	.hash			= inet_hash,
	.unhash			= inet_unhash,
	.get_port		= inet_csk_get_port,
	.enter_memory_pressure	= tcp_enter_memory_pressure,
	.sockets_allocated	= &tcp_sockets_allocated,
	.orphan_count		= &tcp_orphan_count,
	.memory_allocated	= &tcp_memory_allocated,
	.memory_pressure	= &tcp_memory_pressure,
	.sysctl_mem		= sysctl_tcp_mem,
	.sysctl_wmem		= sysctl_tcp_wmem,
	.sysctl_rmem		= sysctl_tcp_rmem,
	.max_header		= MAX_TCP_HEADER,
	.obj_size		= sizeof(struct tcp_sock),
	.slab_flags		= SLAB_DESTROY_BY_RCU,
	.twsk_prot		= &tcp_timewait_sock_ops,
	.rsk_prot		= &tcp_request_sock_ops,
	.h.hashinfo		= &tcp_hashinfo,
#ifdef CONFIG_COMPAT
	.compat_setsockopt	= compat_tcp_setsockopt,
	.compat_getsockopt	= compat_tcp_getsockopt,
#endif
};

TCP和套接字层的接口数据结构在AF_INET协议族套接字初始化函数inet_init调用proto_register函数注册,

static int __init inet_init(void)
{
...

	//注册tcp协议实例
	rc = proto_register(&tcp_prot, 1);
...

}

2、TCP和IP层之间的接口

2.1、TCP和IP层之间的接受接口

TCP协议和IP层的数据结构接口是struct net_protocol,struct net_protocol tcp_protocol如下:

static const struct net_protocol tcp_protocol = {
	.handler =	tcp_v4_rcv,			//接受IP层数据包处理函数
	.err_handler =	tcp_v4_err,			//icmp错误处理函数
	.gso_send_check = tcp_v4_gso_send_check,
	.gso_segment =	tcp_tso_segment,
	.gro_receive =	tcp4_gro_receive,
	.gro_complete =	tcp4_gro_complete,
	.no_policy =	1,
	.netns_ok =	1,
};

IP层通过ip_local_deliver_finish函数处理后将数据包上传到传输层,是根据协议号proto在inet_protos全局数组中找打传输层的接受函数。TCP和IP层的结构数据结构struct net_protocol tcp_protocol的注册在inet_init函数中调用inet_add_protocol保存到全局数组inet_protos中。

static int __init inet_init(void)
{
...

	//注册传输层的处理函数到inet_protos全局数组中
	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
...

}

2.2、TCP和IP层之间的发送接口

TCP和IP层之间的发送接口数据结构是struct inet_connection_sock_af_ops,TCP的的struct inet_connection_sock_af_ops的数据结构的实例是ipv4_specific,包含了一组AF_INET地址族中TCP协议实例操作函数,其目的是实现一组IPv4和IPv6都可以共享TCP和网络层之间的接口。

struct inet_connection_sock_af_ops:

struct inet_connection_sock_af_ops {
	int	    (*queue_xmit)(struct sk_buff *skb);				//发送数据到网络层
	void	    (*send_check)(struct sock *sk, struct sk_buff *skb);	//发送数据段校验和
	int	    (*rebuild_header)(struct sock *sk);				//创建TCP协议头
	int	    (*conn_request)(struct sock *sk, struct sk_buff *skb);
	struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
				      struct request_sock *req,
				      struct dst_entry *dst);
	int	    (*remember_stamp)(struct sock *sk);
	u16	    net_header_len;
	u16	    sockaddr_len;
	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
				  char __user *optval, unsigned int optlen);
	int	    (*getsockopt)(struct sock *sk, int level, int optname, 
				  char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
	int	    (*compat_setsockopt)(struct sock *sk,
				int level, int optname,
				char __user *optval, unsigned int optlen);
	int	    (*compat_getsockopt)(struct sock *sk,
				int level, int optname,
				char __user *optval, int __user *optlen);
#endif
	void	    (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
	int	    (*bind_conflict)(const struct sock *sk,
				     const struct inet_bind_bucket *tb);
};

tcp协议struct inet_connection_sock_af_ops数据结构的实例是ipv4_specific

ipv4_specific:

const struct inet_connection_sock_af_ops ipv4_specific = {
    //向IPv4网络层传送函数
	.queue_xmit	   = ip_queue_xmit,  
    //计算发送数据段校验和      
	.send_check	   = tcp_v4_send_check,
    //创建TCP头部
	.rebuild_header	   = inet_sk_rebuild_header,
    //处理连接请求数据段
	.conn_request	   = tcp_v4_conn_request,
    //从另一端收到SYNACK回答后创建新的子套接字的函数
	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
    //保存从某个站点收到最后一个数据包的时间戳
	.remember_stamp	   = tcp_v4_remember_stamp,
    //网络层协议头的大小,设置为IPv4协议头长度
	.net_header_len	   = sizeof(struct iphdr),
    //设置IPv4在网络层的套接字选项
	.setsockopt	   = ip_setsockopt,
    //获取IPv4在网络层的套接字选项
	.getsockopt	   = ip_getsockopt,
    //为IPv4生成常规sockaddr_in类型地址
	.addr2sockaddr	   = inet_csk_addr2sockaddr,
    //IPv4的sockaddr_in类型地址大小
	.sockaddr_len	   = sizeof(struct sockaddr_in),
	.bind_conflict	   = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_ip_setsockopt,
	.compat_getsockopt = compat_ip_getsockopt,
#endif
};

ipv4_specific的注册是调用tcp_v4_init_sock函数

static int tcp_v4_init_sock(struct sock *sk)
{
...
    icsk->icsk_af_ops = &ipv4_specific;
...

}

3、TCP、套接字、IP层之间接口函数

tcp、套接字、和ip层之间的接口函数关系如下图,通过这张图就能清晰知道TCP、套接字层、IP层数据包发送接受流程。

TCP、套接字、IP层之间接口函数

4、TCP套接字初始化

当应用层打开一个套接字后就会调用tcp_v4_init_sock函数初始化套接字,主要初始化TCP套接字结构,代码如下:

static int tcp_v4_init_sock(struct sock *sk)
{
	//获取套接字指针
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct tcp_sock *tp = tcp_sk(sk);

	//初始化TCP输出队列out_of_order_queue
	skb_queue_head_init(&tp->out_of_order_queue);
	//初始化传送超时时钟
	tcp_init_xmit_timers(sk);
	//初始化输入队列prequeue
	tcp_prequeue_init(tp);

	//初始化重传时间isc_rto和介质偏差时间mdev,设置为3秒
	icsk->icsk_rto = TCP_TIMEOUT_INIT;
	tp->mdev = TCP_TIMEOUT_INIT;

	/* So many TCP implementations out there (incorrectly) count the
	 * initial SYN frame in their delayed-ACK and congestion control
	 * algorithms that we must have the following bandaid to talk
	 * efficiently to them.  -DaveM
	 */
	tp->snd_cwnd = 2;

	/* See draft-stevens-tcpca-spec-01 for discussion of the
	 * initialization of these values.
	 */
	 //snd_ssthresh设置为32位有效禁止slow start算法
	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
	//发送阻塞窗口最大设置16位
	tp->snd_cwnd_clamp = ~0;
	//TCP最小段大小536
	tp->mss_cache = TCP_MSS_DEFAULT;

	//初始化TCP选项结构的重排序域recordering
	tp->reordering = sysctl_tcp_reordering;
	//初始化inet连接套接字阻塞管理操作函数
	icsk->icsk_ca_ops = &tcp_init_congestion_ops;

	//此时套接字的状态还是close
	sk->sk_state = TCP_CLOSE;

	//指向套接字的回调函数,当套接字的写缓冲区有效
	//就调用该函数
	sk->sk_write_space = sk_stream_write_space;
	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);

	//注册ipv4_specific
	icsk->icsk_af_ops = &ipv4_specific;
	icsk->icsk_sync_mss = tcp_sync_mss;
#ifdef CONFIG_TCP_MD5SIG
	tp->af_specific = &tcp_sock_ipv4_specific;
#endif

	/* TCP Cookie Transactions */
	if (sysctl_tcp_cookie_size > 0) {
		/* Default, cookies without s_data_payload. */
		tp->cookie_values =
			kzalloc(sizeof(*tp->cookie_values),
				sk->sk_allocation);
		if (tp->cookie_values != NULL)
			kref_init(&tp->cookie_values->kref);
	}
	/* Presumed zeroed, in order of appearance:
	 *	cookie_in_always, cookie_out_never,
	 *	s_data_constant, s_data_in, s_data_out
	 */
	 //设置发送缓冲区和接受缓冲区大小,
	 //应用层可以调用setsockopt设置
	sk->sk_sndbuf = sysctl_tcp_wmem[1];
	sk->sk_rcvbuf = sysctl_tcp_rmem[1];

	local_bh_disable();
	//tcp_sockets_allocated是一个全局变量
	//保存的套接字数量,加1
	percpu_counter_inc(&tcp_sockets_allocated);
	local_bh_enable();

	return 0;
}

猜你喜欢

转载自blog.csdn.net/City_of_skey/article/details/84558568