TCP/IP详解V2(五)之TCP协议

##TCP处理中的辅助函数 ###tcp_template

  • 功能A:为TCP和IP首部创建一个模板
struct tcpiphdr *
tcp_template(tp)
	struct tcpcb *tp;
{
	register struct inpcb *inp = tp->t_inpcb;
	register struct mbuf *m;
	register struct tcpiphdr *n;

	if ((n = tp->t_template) == 0) {
		m = m_get(M_DONTWAIT, MT_HEADER);
		if (m == NULL)
			return (0);
		m->m_len = sizeof (struct tcpiphdr);
		n = mtod(m, struct tcpiphdr *);
	}
	n->ti_next = n->ti_prev = 0;
	n->ti_x1 = 0;
	n->ti_pr = IPPROTO_TCP;
	n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
	n->ti_src = inp->inp_laddr;
	n->ti_dst = inp->inp_faddr;
	n->ti_sport = inp->inp_lport;
	n->ti_dport = inp->inp_fport;
	n->ti_seq = 0;
	n->ti_ack = 0;
	n->ti_x2 = 0;
	n->ti_off = 5;
	n->ti_flags = 0;
	n->ti_win = 0;
	n->ti_sum = 0;
	n->ti_urp = 0;
	return (n);
}

###tcp_respond

  • 功能A:tcp_input调用它生成RST报文段,携带或者不携带ACK
  • 功能B:tcp_timers调用它发送保活探测报文
void
tcp_respond(tp, ti, m, ack, seq, flags)
	struct tcpcb *tp;
	register struct tcpiphdr *ti;
	register struct mbuf *m;
	tcp_seq ack, seq;
	int flags;        //在传入参数的时候已经标记RST或者一些其他需要的标记
{
	register int tlen;
	int win = 0;
	struct route *ro = 0;

	if (tp) {        //如果tcp_input收到了一个不属于任何连接的报文段,则有可能生成RST。例如收到的报文段中没有找到任何现存的连接,这种情况下tp为空。
		win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
		ro = &tp->t_inpcb->inp_route;
	}
	if (m == 0) {        //如果不带有数据报文段
		m = m_gethdr(M_DONTWAIT, MT_HEADER);    //获取一个mbuf
		if (m == NULL)
			return;
#ifdef TCP_COMPAT_42
		tlen = 1;
#else
		tlen = 0;
#endif
		m->m_data += max_linkhdr;        //留下链路层协议的16字节
		*mtod(m, struct tcpiphdr *) = *ti;        //获取TCPIP Header
		ti = mtod(m, struct tcpiphdr *);
		flags = TH_ACK;        //标记ACK
	} else {
		m_freem(m->m_next);        //如果带有mbuf,释放除了首部之外的所有的mbuf
		m->m_next = 0;
		m->m_data = (caddr_t)ti;
		m->m_len = sizeof (struct tcpiphdr);        //可以直接调整TCPIP Header的位置
		tlen = 0;
#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
		xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
		xchg(ti->ti_dport, ti->ti_sport, u_short);
#undef xchg
	}
	ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));        //填充len
	tlen += sizeof (struct tcpiphdr);
	m->m_len = tlen;        //在mbuf中填充len
	m->m_pkthdr.len = tlen;
	m->m_pkthdr.rcvif = (struct ifnet *) 0;
	ti->ti_next = ti->ti_prev = 0;        //填充TCPIP Header
	ti->ti_x1 = 0;
	ti->ti_seq = htonl(seq);
	ti->ti_ack = htonl(ack);
	ti->ti_x2 = 0;
	ti->ti_off = sizeof (struct tcphdr) >> 2;
	ti->ti_flags = flags;
	if (tp)
		ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
	else
		ti->ti_win = htons((u_short)win);
	ti->ti_urp = 0;
	ti->ti_sum = 0;
	ti->ti_sum = in_cksum(m, tlen);
	((struct ip *)ti)->ip_len = tlen;
	((struct ip *)ti)->ip_ttl = ip_defttl;
	(void) ip_output(m, NULL, ro, 0, NULL);        //将数据交由IP层
}

###tcp_drop

  • 功能A:发送RST报文段并丢弃报文,向应用进程返回差错
struct tcpcb *
tcp_drop(tp, errno)
	register struct tcpcb *tp;
	int errno;
{
	struct socket *so = tp->t_inpcb->inp_socket;        //获取SOCKET

	if (TCPS_HAVERCVDSYN(tp->t_state)) {
		tp->t_state = TCPS_CLOSED;        //调整插口的状态为closed
		(void) tcp_output(tp);        //并发送RESET报文段,增加全局计数
		tcpstat.tcps_drops++;
	} else
		tcpstat.tcps_conndrops++;
	if (errno == ETIMEDOUT && tp->t_softerror)        //返回插口的软错误
		errno = tp->t_softerror;
	so->so_error = errno;
	return (tcp_close(tp));        //结束这个插口
}

###tcp_close

  • 功能A:释放连接占用的内存(IP和TCP首部,TCP PCB,Internet PCB,以及在连接队列中重组的乱序报文段),并更新路由特性
struct tcpcb *
tcp_close(tp)
	register struct tcpcb *tp;
{
	register struct tcpiphdr *t;
	struct inpcb *inp = tp->t_inpcb;        //获取需要的结构
	struct socket *so = inp->inp_socket;
	register struct mbuf *m;
#ifdef RTV_RTT
	register struct rtentry *rt;

	if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
	    (rt = inp->inp_route.ro_rt) &&
	    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {        //判断是否发送了足够的数据量
		register u_long i;

		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {        //更新RTT
			i = tp->t_srtt *
			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));        
			if (rt->rt_rmx.rmx_rtt && i)
				rt->rt_rmx.rmx_rtt =
				    (rt->rt_rmx.rmx_rtt + i) / 2;
			else
				rt->rt_rmx.rmx_rtt = i;
		}
		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {        //更新RTTVAR
			i = tp->t_rttvar *
			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
			if (rt->rt_rmx.rmx_rttvar && i)
				rt->rt_rmx.rmx_rttvar =
				    (rt->rt_rmx.rmx_rttvar + i) / 2;
			else
				rt->rt_rmx.rmx_rttvar = i;
		}
		if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
		    (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh ||
		    i < (rt->rt_rmx.rmx_sendpipe / 2)) {        //更新慢启动门限
			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
			if (i < 2)
				i = 2;
			i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
			if (rt->rt_rmx.rmx_ssthresh)
				rt->rt_rmx.rmx_ssthresh =
				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
			else
				rt->rt_rmx.rmx_ssthresh = i;
		}
	}
#endif /* RTV_RTT */
	/* free the reassembly queue, if any */
	t = tp->seg_next;
	while (t != (struct tcpiphdr *)tp) {        //释放重组队列中的报文段
		t = (struct tcpiphdr *)t->ti_next;
		m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
		remque(t->ti_prev);
		m_freem(m);
	}
	if (tp->t_template)        //释放TCP/IP Header Template
		(void) m_free(dtom(tp->t_template));
	free(tp, M_PCB);        //释放TCP PCB
	inp->inp_ppcb = 0;
	soisdisconnected(so);        //将SOCKET断连
	/* clobber input pcb cache if we're closing the cached connection */
	if (inp == tcp_last_inpcb)        //调整last PCB为Internet PCB头部
		tcp_last_inpcb = &tcb;
	in_pcbdetach(inp);        //将Internet PCB从链表中摘除
	tcpstat.tcps_closed++;        //更新全局的统计量
	return ((struct tcpcb *)0);
}

###tcp_mss

  • 功能A:tcp_output准备发送SYN时调用,添加MSS选项
  • 功能B:tcp_input收到SYN包含MSS选项时调用
  • 功能C:检查到达目的地的缓存路由,计算用于该连接的MSS
int
tcp_mss(tp, offer)
	register struct tcpcb *tp;
	u_int offer;
{
	struct route *ro;
	register struct rtentry *rt;
	struct ifnet *ifp;
	register int rtt, mss;
	u_long bufsize;
	struct inpcb *inp;
	struct socket *so;
	extern int tcp_mssdflt;

	inp = tp->t_inpcb;        //获取Internet PCB
	ro = &inp->inp_route;        //获取路由选项

	if ((rt = ro->ro_rt) == (struct rtentry *)0) {        //如果插口中没有合适的路由,就调用rtalloc获取一条路由。外出接口中的MTU会影响MSS的判断
		/* No route yet, so try to acquire one */
		if (inp->inp_faddr.s_addr != INADDR_ANY) {
			ro->ro_dst.sa_family = AF_INET;
			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
				inp->inp_faddr;
			rtalloc(ro);
		}
		if ((rt = ro->ro_rt) == (struct rtentry *)0)
			return (tcp_mssdflt);
	}
	ifp = rt->rt_ifp;
	so = inp->inp_socket;

#ifdef RTV_MTU	
	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {        //初始化已经平滑的RTT估计器,前面已经介绍了基本的初始值
		if (rt->rt_rmx.rmx_locks & RTV_RTT)
			tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
		tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
		if (rt->rt_rmx.rmx_rttvar)
			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
		else
			tp->t_rttvar =
			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
		TCPT_RANGESET(tp->t_rxtcur,
		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
		    tp->t_rttmin, TCPTV_REXMTMAX);
	}

	if (rt->rt_rmx.rmx_mtu)        //根据MTU计算MSS
		mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);        //如果路由中存在MTU值,令MSS =  MTU - TCP/IP Header
	else
#endif /* RTV_MTU */
	{
		mss = ifp->if_mtu - sizeof(struct tcpiphdr);
#if	(MCLBYTES & (MCLBYTES - 1)) == 0
		if (mss > MCLBYTES)        //平滑MSS与本地缓存大小相似,有利于直接从mbuf中copy数据到链路层地址中
			mss &= ~(MCLBYTES-1);
#else
		if (mss > MCLBYTES)
			mss = mss / MCLBYTES * MCLBYTES;
#endif
		if (!in_localaddr(inp->inp_faddr))        //如果不是本地地址,限制MSS=512,在现代协议中已经作废
			mss = min(mss, tcp_mssdflt);
	}
	
	if (offer)        //处理收到MSS的情况
		mss = min(mss, offer);        //获取本端地址与远端地址之间的较小值
	mss = max(mss, 32);		//MSS最小=32
	if (mss < tp->t_maxseg || offer != 0) {        //如果MSS小于512并且远端的MSS存在
#ifdef RTV_SPIPE
		if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)        //调整MSS为合适的大小
#endif
			bufsize = so->so_snd.sb_hiwat;
		if (bufsize < mss)
			mss = bufsize;
		else {
			bufsize = roundup(bufsize, mss);
			if (bufsize > sb_max)
				bufsize = sb_max;
			(void)sbreserve(&so->so_snd, bufsize);
		}
		tp->t_maxseg = mss;        //将本地发送的最大报文段调整为MSS

#ifdef RTV_RPIPE
		if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)        //增加本地缓存的大小,使其为MSS的整数倍
#endif
			bufsize = so->so_rcv.sb_hiwat;
		if (bufsize > mss) {
			bufsize = roundup(bufsize, mss);
			if (bufsize > sb_max)
				bufsize = sb_max;
			(void)sbreserve(&so->so_rcv, bufsize);
		}
	}
	tp->snd_cwnd = mss;        //调整拥塞窗口为MSS

#ifdef RTV_SSTHRESH
	if (rt->rt_rmx.rmx_ssthresh) {
		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);        //慢启动门限为2×MSS
	}
#endif /* RTV_MTU */
	return (mss);
}
#endif /* TUBA_INCLUDE */

###tcp_reass和TCP_REASS

  • 功能A:处理乱序到达的报文段。TCP Header中存在两个指针,做双向链表,用来存放乱序到达的报文段
#define	TCP_REASS(tp, ti, m, so, flags) { \
	if ((ti)->ti_seq == (tp)->rcv_nxt && \        //如果收到的序列号=等待接收的序列号
	    (tp)->seg_next == (struct tcpiphdr *)(tp) && \    //连接的重组队列为空(指向自身)
	    (tp)->t_state == TCPS_ESTABLISHED) { \        //并且目前处于连接状态
		tp->t_flags |= TF_DELACK; \        //设置延迟发送ACK标志
		(tp)->rcv_nxt += (ti)->ti_len; \        //更新recv next
		flags = (ti)->ti_flags & TH_FIN; \        //将标志中的FIN置位
		tcpstat.tcps_rcvpack++;\    //修改全局变量数值
		tcpstat.tcps_rcvbyte += (ti)->ti_len;\
		sbappend(&(so)->so_rcv, (m)); \        //将这个mbuf加入缓存中并唤醒相应的读取进程
		sorwakeup(so); \    
	} else { \
		(flags) = tcp_reass((tp), (ti), (m)); \        //如果上述三个条件不满足的话,调用reass对报文段进行处理
		tp->t_flags |= TF_ACKNOW; \        //并且设置立刻发送ACKNOW的标志
	} \
}

###重组报文段图解:

int
tcp_reass(tp, ti, m)
	register struct tcpcb *tp;
	register struct tcpiphdr *ti;
	struct mbuf *m;
{
	register struct tcpiphdr *q;
	struct socket *so = tp->t_inpcb->inp_socket;
	int flags;

	if (ti == 0)        //如果传递了一个空的指针,意味着连接已经建立,可以把SYN中携带的数据放入重组队列中,并将重组完成的数据报提交给用户
		goto present;

	for (q = tp->seg_next; q != (struct tcpiphdr *)tp;    //在重组队列中寻找recv next > 新接收的报文段的第一个报文段
	    q = (struct tcpiphdr *)q->ti_next)
		if (SEQ_GT(q->ti_seq, ti->ti_seq))
			break;

	if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {        //如果双向链表中存在报文段,即这个报文段可能与新到达的报文段重复
		register int i;
		q = (struct tcpiphdr *)q->ti_prev;        //调整指针指向前一个报文段
		/* conversion to int (in i) handles seq wraparound */
		i = q->ti_seq + q->ti_len - ti->ti_seq;        //获取计算重复的字节数
		if (i > 0) {    //如果有重复的字节存在
			if (i >= ti->ti_len) {    //如果重复的字节数 > 整个报文段
				tcpstat.tcps_rcvduppack++;        //记录全局变量,释放占用内存
				tcpstat.tcps_rcvdupbyte += ti->ti_len;
				m_freem(m);
				return (0);
			}
			m_adj(m, i);        //调整新接收的报文段的位置指针
			ti->ti_len -= i;
			ti->ti_seq += i;
		}
		q = (struct tcpiphdr *)(q->ti_next);    //调整接收位置指针
	}
	tcpstat.tcps_rcvoopack++;
	tcpstat.tcps_rcvoobyte += ti->ti_len;
	REASS_MBUF(ti) = m;	    //调整tcp/ip header中的乱序指针

	while (q != (struct tcpiphdr *)tp) {    //从第一个seq大于新到达的位置指针,一直向后循环
		register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;    //获取重复数据的字节数
		if (i <= 0)        //如果没有偏移量,直接退出循环
			break;
		if (i < q->ti_len) {    //如果存在重复数据,调整已经在队列中的报文段的指针
			q->ti_seq += i;
			q->ti_len -= i;
			m_adj(REASS_MBUF(q), i);    //调整完了之后直接退出
			break;
		}
		q = (struct tcpiphdr *)q->ti_next;        //到这块是存在一块完全相同的报文段
		m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);        //将这个报文段从列表中完全删除
		remque(q->ti_prev);
		m_freem(m);
	}

	/*
	 * Stick new segment in its place.
	 */
	insque(ti, q->ti_prev);        //将新到达的报文段加入重组的链表中

present:        //这一阶段是判断是否存在已经有序的报文段,将已经有序的报文端提交给接收缓存
	if (TCPS_HAVERCVDSYN(tp->t_state) == 0)        //如果连接还没有接收到SYN(此时处于LISTEN或者SYN_SRNT状态),不允许向用户返回数据
		return (0);
	ti = tp->seg_next;        //在乱序链表中获取第一个有数据报文段
	if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)        //如果不存在有数据的报文段或者报文段的seq与recv next对不上,直接退出
		return (0);
	if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)    //如果处于SYN RECV并且报文段存在,说明在监听的过程中收到了携带数据的SYN报文段,将数据保存,等待连接结束
		return (0);
	do {
		tp->rcv_nxt += ti->ti_len;    //调整recv next
		flags = ti->ti_flags & TH_FIN;    //标志FIN
		remque(ti);        //将这个数据报从乱序队列中移除
		m = REASS_MBUF(ti);        
		ti = (struct tcpiphdr *)ti->ti_next;        //调整乱序报文的位置
		if (so->so_state & SS_CANTRCVMORE)        //如果结果正确,将数据添加到SOCKET的recv buf中
			m_freem(m);
		else
			sbappend(&so->so_rcv, m);
	} while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);        //判断数据可以交付的条件是:数据报存在并且数据seq与recv next相等
	sorwakeup(so);        //唤醒所有等待的进程
	return (flags);
}

###tcp_usrreq

  • 功能A:功能分用函数,处理各种TCP发送的请求
int
tcp_usrreq(so, req, m, nam, control)
	struct socket *so;
	int req;
	struct mbuf *m, *nam, *control;
{
	register struct inpcb *inp;
	register struct tcpcb *tp;
	int s;
	int error = 0;
	int ostate;

	if (req == PRU_CONTROL)        //如果是控制请求,调用in_control转去控制
		return (in_control(so, (int)m, (caddr_t)nam,
			(struct ifnet *)control));
	if (control && control->m_len) {        //如果存在控制信息,丢弃控制信息
		m_freem(control);
		if (m)
			m_freem(m);
		return (EINVAL);
	}

	s = splnet();
	inp = sotoinpcb(so);        //获取Internet PCB
	if (inp == 0 && req != PRU_ATTACH) {
		splx(s);
		return (EINVAL);		/* XXX */
	}
	if (inp) {
		tp = intotcpcb(inp);        //获取TCP PCB
		/* WHAT IF TP IS 0? */
#ifdef KPROF
		tcp_acounts[tp->t_state][req]++;
#endif
		ostate = tp->t_state;
	} else
		ostate = 0;
	switch (req) {        //分用
	case PRU_ATTACH:         //调用sonewconn时会调用   
		if (inp) {                //如果inp已经存在,带着错误返回
			error = EISCONN;
			break;
		}
		error = tcp_attach(so);        //由tcp_attach为e连接分配Internet PCB和TCP PCB
		if (error)
			break;
		if ((so->so_options & SO_LINGER) && so->so_linger == 0)        //如果设置了Linger
			so->so_linger = TCP_LINGERTIME;        //为Linger设置定时条件
		tp = sototcpcb(so);        //此时,tp指向了TCP PCB
		break;

	case PRU_DETACH:        //close系统调用
		if (tp->t_state > TCPS_LISTEN)        //如果连接尚未建立
			tp = tcp_disconnect(tp);        
		else        //如果连接已经建立
			tp = tcp_close(tp);        
		break;

	case PRU_BIND:        //bind系统调用
		error = in_pcbbind(inp, nam);        //关联本地地址
		if (error)
			break;
		break;

	case PRU_LISTEN:        //listen系统调用
		if (inp->inp_lport == 0)        //如果本地端口尚未关联,关联本地端口
			error = in_pcbbind(inp, (struct mbuf *)0);
		if (error == 0)
			tp->t_state = TCPS_LISTEN;        //然后调整状态
		break;

	case PRU_CONNECT:        //connect系统调用
		if (inp->inp_lport == 0) {        //如果本地端口尚未关联
			error = in_pcbbind(inp, (struct mbuf *)0);        //关联本地端口与地址
			if (error)
				break;
		}
		error = in_pcbconnect(inp, nam);        //关联远程端口与地址
		if (error)
			break;
		tp->t_template = tcp_template(tp);        //填充模板
		if (tp->t_template == 0) {        //如果模板填充失败,直接返回
			in_pcbdisconnect(inp);
			error = ENOBUFS;
			break;
		}
	
		while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
		    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)        //计算窗口的缩放因子
			tp->request_r_scale++;
		soisconnecting(so);        //设置socket的状态
		tcpstat.tcps_connattempt++;        
		tp->t_state = TCPS_SYN_SENT;        //更新连接的状态
		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;        //启动keep-alive定时器
		tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;        //更新ISS
		tcp_sendseqinit(tp);        //初始化Send seq
		error = tcp_output(tp);        //发送SYN
		break;

	case PRU_CONNECT2:        //在TCP中不考虑这个协议
		error = EOPNOTSUPP;
		break;

	case PRU_DISCONNECT:        //处理断开连接
		tp = tcp_disconnect(tp);
		break;

	case PRU_ACCEPT:        //accept系统调用
		in_setpeeraddr(inp, nam);
		break;

	case PRU_SHUTDOWN:        //shutdown系统调用
		socantsendmore(so);        //调整socket状态,禁止继续发送报文段
		tp = tcp_usrclosed(tp);        //处理设置正确的连接状态
		if (tp)
			error = tcp_output(tp);        //发送FIN标志
		break;

	case PRU_RCVD:    //从接收缓存中接收数据之后会进行这个调用
		(void) tcp_output(tp);
		break;

	case PRU_SEND:        //send系统调用
		sbappend(&so->so_snd, m);        //向插口的接收缓存中添加数据
		error = tcp_output(tp);        //饭后发送新的报文段
		break;

	case PRU_ABORT:        //异常就是丢弃数据,设置错误信息并发送RST
		tp = tcp_drop(tp, ECONNABORTED);
		break;

	case PRU_SENSE:        //返回发送缓存的大小
		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
		(void) splx(s);
		return (0);

	case PRU_RCVOOB:        //读取带外数据
		if ((so->so_oobmark == 0 &&
		    (so->so_state & SS_RCVATMARK) == 0) ||
		    so->so_options & SO_OOBINLINE ||
		    tp->t_oobflags & TCPOOB_HADDATA) {
			error = EINVAL;
			break;
		}
		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
			error = EWOULDBLOCK;
			break;
		}
		m->m_len = 1;
		*mtod(m, caddr_t) = tp->t_iobc;
		if (((int)nam & MSG_PEEK) == 0)
			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
		break;

	case PRU_SENDOOB:        //发送带外数据
		if (sbspace(&so->so_snd) < -512) {
			m_freem(m);
			error = ENOBUFS;
			break;
		}
		/*
		 * According to RFC961 (Assigned Protocols),
		 * the urgent pointer points to the last octet
		 * of urgent data.  We continue, however,
		 * to consider it to indicate the first octet
		 * of data past the urgent section.
		 * Otherwise, snd_up should be one lower.
		 */
		sbappend(&so->so_snd, m);
		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
		tp->t_force = 1;
		error = tcp_output(tp);
		tp->t_force = 0;
		break;

	case PRU_SOCKADDR:        //设置本地地址与端口
		in_setsockaddr(inp, nam);
		break;

	case PRU_PEERADDR:        //设置远端地址与端口
		in_setpeeraddr(inp, nam);
		break;

	case PRU_SLOWTIMO:        //处理定时器到期事件
		tp = tcp_timers(tp, (int)nam);
		req |= (int)nam << 8;		/* for debug's sake */
		break;

	default:
		panic("tcp_usrreq");
	}
	if (tp && (so->so_options & SO_DEBUG))        //如果设置了DEBUG选项,记录发送的TCP IP Header
		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
	splx(s);
	return (error);
}

###tcp_attach

  • 功能A:为连接分配资源
int
tcp_attach(so)
	struct socket *so;
{
	register struct tcpcb *tp;
	struct inpcb *inp;
	int error;

	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {        //如果还没有为插口分配发送缓存与接收缓存,将两者都设置为8192
		error = soreserve(so, tcp_sendspace, tcp_recvspace);
		if (error)
			return (error);
	}
	error = in_pcballoc(so, &tcb);        //为Internet PCB分配空间
	if (error)
		return (error);
	inp = sotoinpcb(so);        
	tp = tcp_newtcpcb(inp);        //为TCP PCB提供分配空间
	if (tp == 0) {        //处理分配失败的情况,相当于一次回滚操作
		int nofd = so->so_state & SS_NOFDREF;	/* XXX */

		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
		in_pcbdetach(inp);
		so->so_state |= nofd;
		return (ENOBUFS);
	}
	tp->t_state = TCPS_CLOSED;        //将状态设置为CLOSED
	return (0);
}

###tcp_disconnect

  • 功能A:断开TCP连接
struct tcpcb *
tcp_disconnect(tp)
	register struct tcpcb *tp;
{
	struct socket *so = tp->t_inpcb->inp_socket;

	if (tp->t_state < TCPS_ESTABLISHED)        //如果连接还没有建立,直接关闭TCP连接
		tp = tcp_close(tp);
	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)        //如果连接已经建立并且设置了Linger选项,丢弃连接并发送RST
		tp = tcp_drop(tp, 0);
	else {
		soisdisconnecting(so);        //断开连接
		sbflush(&so->so_rcv);        //丢弃在接收缓存中的数据
		tp = tcp_usrclosed(tp);        //关闭连接并进入下一连接状态
		if (tp)        //发送FIN
			(void) tcp_output(tp);
	}
	return (tp);
}

###tcp_usrclosed

  • 功能A:处理连接状态的转换
struct tcpcb *
tcp_usrclosed(tp)
	register struct tcpcb *tp;
{

	switch (tp->t_state) {

	case TCPS_CLOSED:
	case TCPS_LISTEN:
	case TCPS_SYN_SENT:
		tp->t_state = TCPS_CLOSED;
		tp = tcp_close(tp);
		break;

	case TCPS_SYN_RECEIVED:
	case TCPS_ESTABLISHED:
		tp->t_state = TCPS_FIN_WAIT_1;
		break;

	case TCPS_CLOSE_WAIT:
		tp->t_state = TCPS_LAST_ACK;
		break;
	}
	if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
		soisdisconnected(tp->t_inpcb->inp_socket);
	return (tp);
}

猜你喜欢

转载自www.cnblogs.com/ukernel/p/9191037.html