1、TCP和套接字层之间的接口
TCP和套接字之间的接口数据结构是struct proto,这个结构体的元素是一系列的函数指针,从tcp_close到tcp_shutdown函数是tcp连接管理处理函数。TCP数据接受函数是tcp_recvmsg和tcp_v4_do_rcv函数实现。struct proto_tcp_prot定义如下:
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close, //套接字关闭
.connect = tcp_v4_connect, //练级处理
.disconnect = tcp_disconnect, //断开处理
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock, //初始化套接字
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown, //立即关闭套接字
.setsockopt = tcp_setsockopt, //设置选项
.getsockopt = tcp_getsockopt, //获取选项
.recvmsg = tcp_recvmsg, //套接字层接受数据包函数
.backlog_rcv = tcp_v4_do_rcv,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
};
TCP和套接字层的接口数据结构在AF_INET协议族套接字初始化函数inet_init调用proto_register函数注册,
static int __init inet_init(void)
{
...
//注册tcp协议实例
rc = proto_register(&tcp_prot, 1);
...
}
2、TCP和IP层之间的接口
2.1、TCP和IP层之间的接受接口
TCP协议和IP层的数据结构接口是struct net_protocol,struct net_protocol tcp_protocol如下:
static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv, //接受IP层数据包处理函数
.err_handler = tcp_v4_err, //icmp错误处理函数
.gso_send_check = tcp_v4_gso_send_check,
.gso_segment = tcp_tso_segment,
.gro_receive = tcp4_gro_receive,
.gro_complete = tcp4_gro_complete,
.no_policy = 1,
.netns_ok = 1,
};
IP层通过ip_local_deliver_finish函数处理后将数据包上传到传输层,是根据协议号proto在inet_protos全局数组中找打传输层的接受函数。TCP和IP层的结构数据结构struct net_protocol tcp_protocol的注册在inet_init函数中调用inet_add_protocol保存到全局数组inet_protos中。
static int __init inet_init(void)
{
...
//注册传输层的处理函数到inet_protos全局数组中
if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
...
}
2.2、TCP和IP层之间的发送接口
TCP和IP层之间的发送接口数据结构是struct inet_connection_sock_af_ops,TCP的的struct inet_connection_sock_af_ops的数据结构的实例是ipv4_specific,包含了一组AF_INET地址族中TCP协议实例操作函数,其目的是实现一组IPv4和IPv6都可以共享TCP和网络层之间的接口。
struct inet_connection_sock_af_ops:
struct inet_connection_sock_af_ops {
int (*queue_xmit)(struct sk_buff *skb); //发送数据到网络层
void (*send_check)(struct sock *sk, struct sk_buff *skb); //发送数据段校验和
int (*rebuild_header)(struct sock *sk); //创建TCP协议头
int (*conn_request)(struct sock *sk, struct sk_buff *skb);
struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
int (*remember_stamp)(struct sock *sk);
u16 net_header_len;
u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
int (*compat_setsockopt)(struct sock *sk,
int level, int optname,
char __user *optval, unsigned int optlen);
int (*compat_getsockopt)(struct sock *sk,
int level, int optname,
char __user *optval, int __user *optlen);
#endif
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
int (*bind_conflict)(const struct sock *sk,
const struct inet_bind_bucket *tb);
};
tcp协议struct inet_connection_sock_af_ops数据结构的实例是ipv4_specific
ipv4_specific:
const struct inet_connection_sock_af_ops ipv4_specific = {
//向IPv4网络层传送函数
.queue_xmit = ip_queue_xmit,
//计算发送数据段校验和
.send_check = tcp_v4_send_check,
//创建TCP头部
.rebuild_header = inet_sk_rebuild_header,
//处理连接请求数据段
.conn_request = tcp_v4_conn_request,
//从另一端收到SYNACK回答后创建新的子套接字的函数
.syn_recv_sock = tcp_v4_syn_recv_sock,
//保存从某个站点收到最后一个数据包的时间戳
.remember_stamp = tcp_v4_remember_stamp,
//网络层协议头的大小,设置为IPv4协议头长度
.net_header_len = sizeof(struct iphdr),
//设置IPv4在网络层的套接字选项
.setsockopt = ip_setsockopt,
//获取IPv4在网络层的套接字选项
.getsockopt = ip_getsockopt,
//为IPv4生成常规sockaddr_in类型地址
.addr2sockaddr = inet_csk_addr2sockaddr,
//IPv4的sockaddr_in类型地址大小
.sockaddr_len = sizeof(struct sockaddr_in),
.bind_conflict = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
};
ipv4_specific的注册是调用tcp_v4_init_sock函数
static int tcp_v4_init_sock(struct sock *sk)
{
...
icsk->icsk_af_ops = &ipv4_specific;
...
}
3、TCP、套接字、IP层之间接口函数
tcp、套接字、和ip层之间的接口函数关系如下图,通过这张图就能清晰知道TCP、套接字层、IP层数据包发送接受流程。
4、TCP套接字初始化
当应用层打开一个套接字后就会调用tcp_v4_init_sock函数初始化套接字,主要初始化TCP套接字结构,代码如下:
static int tcp_v4_init_sock(struct sock *sk)
{
//获取套接字指针
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
//初始化TCP输出队列out_of_order_queue
skb_queue_head_init(&tp->out_of_order_queue);
//初始化传送超时时钟
tcp_init_xmit_timers(sk);
//初始化输入队列prequeue
tcp_prequeue_init(tp);
//初始化重传时间isc_rto和介质偏差时间mdev,设置为3秒
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
tp->snd_cwnd = 2;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
*/
//snd_ssthresh设置为32位有效禁止slow start算法
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
//发送阻塞窗口最大设置16位
tp->snd_cwnd_clamp = ~0;
//TCP最小段大小536
tp->mss_cache = TCP_MSS_DEFAULT;
//初始化TCP选项结构的重排序域recordering
tp->reordering = sysctl_tcp_reordering;
//初始化inet连接套接字阻塞管理操作函数
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
//此时套接字的状态还是close
sk->sk_state = TCP_CLOSE;
//指向套接字的回调函数,当套接字的写缓冲区有效
//就调用该函数
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
//注册ipv4_specific
icsk->icsk_af_ops = &ipv4_specific;
icsk->icsk_sync_mss = tcp_sync_mss;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv4_specific;
#endif
/* TCP Cookie Transactions */
if (sysctl_tcp_cookie_size > 0) {
/* Default, cookies without s_data_payload. */
tp->cookie_values =
kzalloc(sizeof(*tp->cookie_values),
sk->sk_allocation);
if (tp->cookie_values != NULL)
kref_init(&tp->cookie_values->kref);
}
/* Presumed zeroed, in order of appearance:
* cookie_in_always, cookie_out_never,
* s_data_constant, s_data_in, s_data_out
*/
//设置发送缓冲区和接受缓冲区大小,
//应用层可以调用setsockopt设置
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
//tcp_sockets_allocated是一个全局变量
//保存的套接字数量,加1
percpu_counter_inc(&tcp_sockets_allocated);
local_bh_enable();
return 0;
}