msn: [email protected]
来源:http://yfydz.cublog.cn
1. 前言 在2.6.1*以上的Linux内核中,关于TCP连接跟踪处理有了比较大的修改,增加了TCP可能标志位组合的检查;增加了通过序列号、确认号和窗口值来判断数据包合法性的功能,支持SACK选项;状态转换数组也进行了一些修改和完善,相应程序代码量增加不少。 以下2.6内核代码版本为2.6.17.11。 2. 通过确认号、序列号和窗口判断数据包合法性 该思路提出比较早,最初是在“Real Statefule TCP Packet Filtering in IP FIlter”中提出的( http://www.nluug.nl/events/sane2000/papers.html ),用在FreeBSD,OpenBSD,NetBSD等操作系统的防火墙IP Filter中。 原理: TCP连接开始时进行3次握手,交换MSS等信息,同时在window字段中告诉对方本方的数据接收缓冲区大小,另一方发送数据时一次不能发送超过该大小的数据,也就是一方的序列号变化值不能超过对方提供的window大小,确认号的变化值是不能超过己方提供的window大小,这是正常TCP 实现都会遵守的,如果不遵守这条件,说明该数据包非法。 使用该功能要注意两个TCP选项,第一,TCP的SACK(选择性确认)选项,RFC1323,2018,2883,在数据包丢失的情况下,使发送方只重新发送丢失的包而不是全部发送;第二,扩展window选项,该选项可将window值从16位最大扩展到30位。 为描述此功能新增加了一个数据结构: /* include/linux/netfilter/nf_conntrack_tcp.h */ struct ip_ct_tcp_state { u_int32_t td_end; /* max of seq + len */ u_int32_t td_maxend; /* max of ack + max(win, 1) */ u_int32_t td_maxwin; /* max(win) */ u_int8_t td_scale; /* window scale factor */ u_int8_t loose; /* used when connection picked up from the middle */ u_int8_t flags; /* per direction options */ }; 判断一个TCP包序列号和确认号是否在给定window范围内的函数是tcp_in_window: static int tcp_in_window(struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, struct iphdr *iph, struct tcphdr *tcph) { struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; __u32 seq, ack, sack, end, win, swin; int res; // 客户端发的第一个SYN包是到不了这个函数的,直接就接受了, // 是从连接的第2个包以后才进入本函数处理 /* * Get the required data from the packet. */ // 序列号 seq = ntohl(tcph->seq); // 确认号 ack = sack = ntohl(tcph->ack_seq); // 本方窗口 win = ntohs(tcph->window); // 本数据包结束序列号 end = segment_seq_plus_len(seq, skb->len, iph, tcph); // 接收方支持SACK的话检查是否在TCP选项中有SACK if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, iph, tcph, &sack); // 省略号部分是一些调试打印信息,忽略下同 ... if (sender->td_end == 0) { // 连接初始情况 /* * Initialize sender data. */ if (tcph->syn && tcph->ack) { // 服务器端 /* * Outgoing SYN-ACK in reply to a SYN. */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); // 检查TCP选项,判断接收方是否支持SACK和窗口扩大 tcp_options(skb, iph, tcph, sender); /* * RFC 1323: * Both sides must send the Window Scale option * to enable window scaling in either direction. */ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) // 不支持窗口扩大 sender->td_scale = receiver->td_scale = 0; } else { /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ sender->td_end = end; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { // 发送方重新发包 /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, iph, tcph, sender); } // 非ACK包和RST包,将确认号置为接收方的结束序列号 if (!(tcph->ack)) { /* * If there is no ACK, just pretend it was set and OK. */ ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { /* * Broken TCP stacks, that set ACK in RST packets as well * with zero ack value. */ ack = sack = receiver->td_end; } // 无数据包或起始包 if (seq == end && (!tcph->rst || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) /* * Packets contains no data: we assume it is valid * and check the ack value only. * However RST segments are always validated by their * SEQ number, except when seq == 0 (reset sent answering * SYN. */ seq = end = sender->td_end; ... // 检查序列号和确认号是否在合法范围内 if (sender->loose || receiver->loose || (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { // 合法包 /* * Take into account window scaling (RFC 1323). */ // 窗口扩大调整 if (!tcph->syn) win <<= sender->td_scale; /* * Update sender data. */ // 发送方窗口调整 swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) sender->td_end = end; /* * Update receiver data. */ // 接收方的参数调整 if (after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } /* * Check retransmissions. */ // 判断是否是重发包 if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->retrans = 0; } } /* * Close the window of disabled window tracking :-) */ if (sender->loose) sender->loose--; res = 1; } else { ... // 对非法包的缺省策略,0拒绝,非0接受.该参数可通过/proc文件系统设置 res = ip_ct_tcp_be_liberal; } ... return res; } 3. TCP状态转换表 这是2.6.1*中的新转换表: static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, /*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { /* REPLY */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ /*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; 这是以前2.4.26中的转换表 static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ /*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI }, /*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI }, /*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES }, /*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL }, /*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { /* REPLY */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ /*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }, /*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI }, /*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI }, /*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI }, /*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; 该数组的解读方法在以前的文章中介绍过,不再赘述。 从两个数组对比可看到,增加了对SYNACK包的判断,同时数组中的sIV(非法状态)项也增加了很多,使得状态跟踪更加严格,但不足的是对ACK包还是太宽容,ACK扫描还是防不住。 4. TCP合法标志位组合 TCP的各个标志位的合法组合方式由下面的数组定义,数组每个元素为一种可能的组合方式,除了专门定义合法组合项为1,其他未定义的都属于非法项,值为0。 static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = { [TH_SYN] = 1, [TH_SYN|TH_ACK] = 1, [TH_SYN|TH_PUSH] = 1, [TH_SYN|TH_ACK|TH_PUSH] = 1, [TH_RST] = 1, [TH_RST|TH_ACK] = 1, [TH_RST|TH_ACK|TH_PUSH] = 1, [TH_FIN|TH_ACK] = 1, [TH_ACK] = 1, [TH_ACK|TH_PUSH] = 1, [TH_ACK|TH_URG] = 1, [TH_ACK|TH_URG|TH_PUSH] = 1, [TH_FIN|TH_ACK|TH_PUSH] = 1, [TH_FIN|TH_ACK|TH_URG] = 1, [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, }; 5. 与netlink的接口 新的协议跟踪结构struct ip_conntrack_protocol中增加了4个和netlink接口相关函数,用于通过netlink套接口传递跟踪协议相关信息。 /* convert protoinfo to nfnetink attributes */ int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct); /* convert nfnetlink attributes to protoinfo */ int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct); int (*tuple_to_nfattr)(struct sk_buff *skb, const struct ip_conntrack_tuple *t); int (*nfattr_to_tuple)(struct nfattr *tb[], struct ip_conntrack_tuple *t); 在TCP协议中对应函数为: .to_nfattr = tcp_to_nfattr, .from_nfattr = nfattr_to_tcp, .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, 6. 结论 2.6.1*的TCP协议跟踪处理比2.4考虑的因素增加了很多,这些新功能的使用可使系统的安全性进一步有所提高。