2.6.1*Linux内核中TCP的连接跟踪

本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: [email protected]
来源:http://yfydz.cublog.cn

1. 前言
在2.6.1*以上的Linux内核中,关于TCP连接跟踪处理有了比较大的修改,增加了TCP可能标志位组合的检查;增加了通过序列号、确认号和窗口值来判断数据包合法性的功能,支持SACK选项;状态转换数组也进行了一些修改和完善,相应程序代码量增加不少。
以下2.6内核代码版本为2.6.17.11。

2. 通过确认号、序列号和窗口判断数据包合法性

该思路提出比较早,最初是在“Real Statefule TCP Packet Filtering in IP FIlter”中提出的( http://www.nluug.nl/events/sane2000/papers.html ),用在FreeBSD,OpenBSD,NetBSD等操作系统的防火墙IP Filter中。
原理:
TCP连接开始时进行3次握手,交换MSS等信息,同时在window字段中告诉对方本方的数据接收缓冲区大小,另一方发送数据时一次不能发送超过该大小的数据,也就是一方的序列号变化值不能超过对方提供的window大小,确认号的变化值是不能超过己方提供的window大小,这是正常TCP 实现都会遵守的,如果不遵守这条件,说明该数据包非法。

使用该功能要注意两个TCP选项,第一,TCP的SACK(选择性确认)选项,RFC1323,2018,2883,在数据包丢失的情况下,使发送方只重新发送丢失的包而不是全部发送;第二,扩展window选项,该选项可将window值从16位最大扩展到30位。
为描述此功能新增加了一个数据结构:
/* include/linux/netfilter/nf_conntrack_tcp.h */
struct ip_ct_tcp_state {
 u_int32_t td_end;  /* max of seq + len */
 u_int32_t td_maxend; /* max of ack + max(win, 1) */
 u_int32_t td_maxwin; /* max(win) */
 u_int8_t td_scale; /* window scale factor */
 u_int8_t loose;  /* used when connection picked up from the middle */
 u_int8_t flags;  /* per direction options */
};
判断一个TCP包序列号和确认号是否在给定window范围内的函数是tcp_in_window:
static int tcp_in_window(struct ip_ct_tcp *state,
                         enum ip_conntrack_dir dir,
                         unsigned int index,
                         const struct sk_buff *skb,
                         struct iphdr *iph,
                         struct tcphdr *tcph)
{
 struct ip_ct_tcp_state *sender = &state->seen[dir];
 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 __u32 seq, ack, sack, end, win, swin;
 int res;
 
// 客户端发的第一个SYN包是到不了这个函数的,直接就接受了,
// 是从连接的第2个包以后才进入本函数处理
 /*
  * Get the required data from the packet.
  */
// 序列号
 seq = ntohl(tcph->seq);
// 确认号
 ack = sack = ntohl(tcph->ack_seq);
// 本方窗口
 win = ntohs(tcph->window);
// 本数据包结束序列号
 end = segment_seq_plus_len(seq, skb->len, iph, tcph);
// 接收方支持SACK的话检查是否在TCP选项中有SACK 
 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
  tcp_sack(skb, iph, tcph, &sack);
// 省略号部分是一些调试打印信息,忽略下同  
...  
 if (sender->td_end == 0) {
// 连接初始情况
  /*
   * Initialize sender data.
   */
  if (tcph->syn && tcph->ack) {
// 服务器端
   /*
    * Outgoing SYN-ACK in reply to a SYN.
    */
   sender->td_end =
   sender->td_maxend = end;
   sender->td_maxwin = (win == 0 ? 1 : win);
// 检查TCP选项,判断接收方是否支持SACK和窗口扩大
   tcp_options(skb, iph, tcph, sender);
   /*
    * RFC 1323:
    * Both sides must send the Window Scale option
    * to enable window scaling in either direction.
    */
   if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
         && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
// 不支持窗口扩大
    sender->td_scale =
    receiver->td_scale = 0;
  } else {
   /*
    * We are in the middle of a connection,
    * its history is lost for us.
    * Let's try to use the data from the packet.
     */
   sender->td_end = end;
   sender->td_maxwin = (win == 0 ? 1 : win);
   sender->td_maxend = end + sender->td_maxwin;
  }
 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
       && dir == IP_CT_DIR_ORIGINAL)
      || (state->state == TCP_CONNTRACK_SYN_RECV
          && dir == IP_CT_DIR_REPLY))
      && after(end, sender->td_end)) {
// 发送方重新发包
  /*
   * RFC 793: "if a TCP is reinitialized ... then it need
   * not wait at all; it must only be sure to use sequence
   * numbers larger than those recently used."
   */
  sender->td_end =
  sender->td_maxend = end;
  sender->td_maxwin = (win == 0 ? 1 : win);
  tcp_options(skb, iph, tcph, sender);
 }
 
// 非ACK包和RST包,将确认号置为接收方的结束序列号
 if (!(tcph->ack)) {
  /*
   * If there is no ACK, just pretend it was set and OK.
   */
  ack = sack = receiver->td_end;
 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
      (TCP_FLAG_ACK|TCP_FLAG_RST))
     && (ack == 0)) {
  /*
   * Broken TCP stacks, that set ACK in RST packets as well
   * with zero ack value.
   */
  ack = sack = receiver->td_end;
 }
// 无数据包或起始包
 if (seq == end
     && (!tcph->rst
         || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
  /*
   * Packets contains no data: we assume it is valid
   * and check the ack value only.
   * However RST segments are always validated by their
   * SEQ number, except when seq == 0 (reset sent answering
   * SYN.
   */
  seq = end = sender->td_end;
  
... 
// 检查序列号和确认号是否在合法范围内
 if (sender->loose || receiver->loose ||
     (before(seq, sender->td_maxend + 1) &&
      after(end, sender->td_end - receiver->td_maxwin - 1) &&
      before(sack, receiver->td_end + 1) &&
      after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
// 合法包
      /*
   * Take into account window scaling (RFC 1323).
   */
// 窗口扩大调整
  if (!tcph->syn)
   win <<= sender->td_scale;
  
  /*
   * Update sender data.
   */
// 发送方窗口调整
  swin = win + (sack - ack);
  if (sender->td_maxwin < swin)
   sender->td_maxwin = swin;
  if (after(end, sender->td_end))
   sender->td_end = end;
  /*
   * Update receiver data.
   */
// 接收方的参数调整
  if (after(end, sender->td_maxend))
   receiver->td_maxwin += end - sender->td_maxend;
  if (after(sack + win, receiver->td_maxend - 1)) {
   receiver->td_maxend = sack + win;
   if (win == 0)
    receiver->td_maxend++;
  }
  /*
   * Check retransmissions.
   */
// 判断是否是重发包
  if (index == TCP_ACK_SET) {
   if (state->last_dir == dir
       && state->last_seq == seq
       && state->last_ack == ack
       && state->last_end == end)
    state->retrans++;
   else {
    state->last_dir = dir;
    state->last_seq = seq;
    state->last_ack = ack;
    state->last_end = end;
    state->retrans = 0;
   }
  }
  /*
   * Close the window of disabled window tracking :-)
   */
  if (sender->loose)
   sender->loose--;
  
  res = 1;
 } else {
...
// 对非法包的缺省策略,0拒绝,非0接受.该参数可通过/proc文件系统设置
  res = ip_ct_tcp_be_liberal;
   }
... 
 return res;
}

3. TCP状态转换表

这是2.6.1*中的新转换表:
static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 {
/* ORIGINAL */
/*       sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 },
 {
/* REPLY */
/*       sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/    { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
   }
};

这是以前2.4.26中的转换表
static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = {
 {
/* ORIGINAL */
/*       sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI  */
/*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI },
/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI },
/*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES },
/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL },
/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 },
 {
/* REPLY */
/*       sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI  */
/*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR },
/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI },
/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI },
/*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI },
/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 }
};
该数组的解读方法在以前的文章中介绍过,不再赘述。
从两个数组对比可看到,增加了对SYNACK包的判断,同时数组中的sIV(非法状态)项也增加了很多,使得状态跟踪更加严格,但不足的是对ACK包还是太宽容,ACK扫描还是防不住。

4. TCP合法标志位组合
TCP的各个标志位的合法组合方式由下面的数组定义,数组每个元素为一种可能的组合方式,除了专门定义合法组合项为1,其他未定义的都属于非法项,值为0。
static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
{
 [TH_SYN]   = 1,
 [TH_SYN|TH_ACK]   = 1,
 [TH_SYN|TH_PUSH]  = 1,
 [TH_SYN|TH_ACK|TH_PUSH]  = 1,
 [TH_RST]   = 1,
 [TH_RST|TH_ACK]   = 1,
 [TH_RST|TH_ACK|TH_PUSH]  = 1,
 [TH_FIN|TH_ACK]   = 1,
 [TH_ACK]   = 1,
 [TH_ACK|TH_PUSH]  = 1,
 [TH_ACK|TH_URG]   = 1,
 [TH_ACK|TH_URG|TH_PUSH]  = 1,
 [TH_FIN|TH_ACK|TH_PUSH]  = 1,
 [TH_FIN|TH_ACK|TH_URG]  = 1,
 [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
};

5. 与netlink的接口
新的协议跟踪结构struct ip_conntrack_protocol中增加了4个和netlink接口相关函数,用于通过netlink套接口传递跟踪协议相关信息。

 /* convert protoinfo to nfnetink attributes */
 int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
    const struct ip_conntrack *ct);
 /* convert nfnetlink attributes to protoinfo */
 int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct);
 int (*tuple_to_nfattr)(struct sk_buff *skb,
          const struct ip_conntrack_tuple *t);
 int (*nfattr_to_tuple)(struct nfattr *tb[],
          struct ip_conntrack_tuple *t);

在TCP协议中对应函数为:

 .to_nfattr  = tcp_to_nfattr,
 .from_nfattr  = nfattr_to_tcp,
 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,

6. 结论

2.6.1*的TCP协议跟踪处理比2.4考虑的因素增加了很多,这些新功能的使用可使系统的安全性进一步有所提高。

猜你喜欢

转载自cxw06023273.iteye.com/blog/867071