TCP/IP Detailed Explanation V2 (1) Protocol Control Block

protocol control block

The protocol layer is divided into two types, one is Internet PCB and the other is TCP PCB. Because UDP protocol is a connectionless protocol, there is no dedicated connection control information.
The Internet PCB contains all the common information of UDP and TCP: external and local IP addresses, external and local ports, the prototype of the IP header, the IP selection used by the port, and a routing table information pointing to the destination address of the endpoint.
The TCP PCB contains the information that TCP maintains for the connection: the serial number in both directions, the window size, the number of retransmissions, etc.
Among them, Internet PCB is a data structure of the transport layer, TCP, UDP and raw IP use it, but network layer protocols such as IP, ICMP, etc. do not use it.
The overall description is as follows:

data structure

struct inpcb {
    struct  inpcb *inp_next,*inp_prev;    //使用双向链表维护的PCB‘s
    struct  inpcb *inp_head;        //每个PCB都存在一个指针指向PCB’s双向链表的头部
    struct  in_addr inp_faddr;      /* foreign host table entry */    //注意:使用网络字节序维护这个四元组
    u_short inp_fport;          /* foreign port */
    struct  in_addr inp_laddr;  /* local host table entry */
    u_short inp_lport;      /* local port */
    struct  socket *inp_socket;     //指向虚拟文件系统中的socket结构
    caddr_t inp_ppcb;       //如果存在TCP PCB,就指向它,否则为空
    struct  route inp_route;    //指向一条路由选型
    int inp_flags;      //维护一些标识信息,基本上使用的很少
    struct  ip inp_ip;      //维护一个IP首部备份,只是用其中的两个成员,TOS和TTL
    struct  mbuf *inp_options;  //维护IP选项
    struct  ip_moptions *inp_moptions;     //维护一个IP多播选项
};

Basic operation

create and destroy

int
in_pcballoc(so, head)
    struct socket *so;
    struct inpcb *head;
{
    register struct inpcb *inp;

    MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_WAITOK);    //从内核中分配一个inpcb结构
    if (inp == NULL)    //如果分配失败,返回错误
        return (ENOBUFS);
    bzero((caddr_t)inp, sizeof(*inp));    //将新分配的结构初始化为0,其中IP地址与端口号必须要初始化为0
    inp->inp_head = head;    //指向整个PCB‘s的头部
    inp->inp_socket = so;    //指向VFS中的socket结构
    insque(inp, head);    //将这个结构加入到队列中
    so->so_pcb = (caddr_t)inp;    //同时设置socket中的信息指向该PCB
    return (0);
}
int
in_pcbdetach(inp)
    struct inpcb *inp;
{
    struct socket *so = inp->inp_socket;    //从socket中获取PCB结构,并将socket中的结构置为空

    so->so_pcb = 0;
    sofree(so);    //释放socket结构
    if (inp->inp_options)    //释放IP选项
        (void)m_free(inp->inp_options);
    if (inp->inp_route.ro_rt)    //释放路由记录
        rtfree(inp->inp_route.ro_rt);
    ip_freemoptions(inp->inp_moptions);    //释放IP多播选项
    remque(inp);    //从PCB’s的双向链表中移除这个PCB
    FREE(inp, M_PCB);    //彻底的释放这个PCB
}

in_pcblookup:

  • Function A: Delivery of IP datagrams to the appropriate transport layer Internet PCB
  • Function B: When executing bind, verify whether the local IP and Port are bound

    struct inpcb *
    in_pcblookup(head, faddr, fport_arg, laddr, lport_arg, flags)
    struct inpcb *head;
    struct in_addr faddr, laddr;
    u_int fport_arg, lport_arg;
    int flags;
    {
    register struct inpcb *inp, *match = 0;
    int matchwild = 3, wildcard;
    u_short fport = fport_arg, lport = lport_arg;
    
    for (inp = head->inp_next; inp != head; inp = inp->inp_next) {    //从Internet PCB的起始位置开始搜索
            if (inp->inp_lport != lport)    //如果与数据报想要投递的端口不匹配,直接开始下一轮的匹配
            continue;
        wildcard = 0;    //将当前的通配匹配数置0
        if (inp->inp_laddr.s_addr != INADDR_ANY) {    //如果PCB中的本地地址不是通配地址,而数据报中的目标地址是一个通配地址,通配数+1
            if (laddr.s_addr == INADDR_ANY)
                wildcard++;
            else if (inp->inp_laddr.s_addr != laddr.s_addr)    //如果PCB中的本地地址与数据报中的目标地址不匹配,直接开始下一轮的循环
                continue;
        } else {
            if (laddr.s_addr != INADDR_ANY)    //如果PCB中是一个本地通配地址,通配数+1
                wildcard++;
        }
        if (inp->inp_faddr.s_addr != INADDR_ANY) {    //与上述代码相同,判断PCB中的远程地址与数据报的源地址之间的通配关系
            if (faddr.s_addr == INADDR_ANY)
                wildcard++;
            else if (inp->inp_faddr.s_addr != faddr.s_addr ||
                inp->inp_fport != fport)
                continue;
        } else {
            if (faddr.s_addr != INADDR_ANY)
                wildcard++;
        }
        if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)    //如果这个PCB存在通配匹配但是不允许通配匹配,开启下一次匹配
            continue;
        if (wildcard < matchwild) {    //记录目前的匹配,寻找通配匹配数最小的匹配,如果存在为0的通配匹配,退出循环,找到了最合适的匹配
            match = inp;
            matchwild = wildcard;
            if (matchwild == 0)
                break;
        }
    }
    return (match);
    }

    in_pcbbind:

  • Function A: Bind local address and port for TCP/UDP, implicit binding will be performed when there is no explicit binding

    int
    in_pcbbind(inp, nam)
    register struct inpcb *inp;
    struct mbuf *nam;
    {
    register struct socket *so = inp->inp_socket;    //获取VFS中描述的socket
    register struct inpcb *head = inp->inp_head;    //获取整个PCB队列的首部
    register struct sockaddr_in *sin;
    struct proc *p = curproc;       
    u_short lport = 0;
    int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
    int error;
    
    if (in_ifaddr == 0)    //全局变量,判断接口是否存在
        return (EADDRNOTAVAIL);
    if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)    //如果PCB中已经存在本地地址与本地端口,说明整个PCB已经被绑定,重复出错
        return (EINVAL);
    if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&    //简而言之,TCP允许通配地址的绑定,UDP不允许
        ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
         (so->so_options & SO_ACCEPTCONN) == 0))
        wild = INPLOOKUP_WILDCARD;
    if (nam) {
        sin = mtod(nam, struct sockaddr_in *);    //mbuf中存在的是PCB想要绑定的地址,但是长度不正确,返回错误
        if (nam->m_len != sizeof (*sin))
            return (EINVAL);
    #ifdef notdef
        /*
         * We should check the family, but old programs
         * incorrectly fail to initialize it.
         */
        if (sin->sin_family != AF_INET)    //检查协议域类型
            return (EAFNOSUPPORT);
    #endif
        lport = sin->sin_port;
        if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {    //检测绑定的地址是否是一个多播地址
            /*
             * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
             * allow complete duplication of binding if
             * SO_REUSEPORT is set, or if SO_REUSEADDR is set
             * and a multicast address is bound on both
             * new and duplicated sockets.
             */
            if (so->so_options & SO_REUSEADDR)    //如果是多播地址,置位重用选项
                reuseport = SO_REUSEADDR|SO_REUSEPORT;
        } else if (sin->sin_addr.s_addr != INADDR_ANY) {    //如果本地绑定的不是一个通配地址,判断这个通配地址是由与一个本地接口对应
            sin->sin_port = 0;      /* yech... */
            if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
                return (EADDRNOTAVAIL);
        }
        if (lport) {    //如果想要绑定一个本地端口
            struct inpcb *t;
    
            /* GROSS */
            if (ntohs(lport) < IPPORT_RESERVED &&
                (error = suser(p->p_ucred, &p->p_acflag)))    //如果绑定<1024的端口,判断权限是否正常
                return (error);
            t = in_pcblookup(head, zeroin_addr, 0,
                sin->sin_addr, lport, wild);    //在PCBs中进行寻找是否已经存在相似的PCB
            if (t && (reuseport & t->inp_socket->so_options) == 0)    //如果存在,并且没有设置REUSE选项,返回错误
                return (EADDRINUSE);
        }
        inp->inp_laddr = sin->sin_addr;    //否则的话,设置PCB中本地IP
    }
    if (lport == 0)
        do {
            if (head->inp_lport++ < IPPORT_RESERVED ||                        //在PCB head中以网络字节序维护着一个下一个使用的端口,找到之后,将这个端口转换为主机字节序
                head->inp_lport > IPPORT_USERRESERVED)        //然后调整head中的下一个端口的缓存
                head->inp_lport = IPPORT_RESERVED;
            lport = htons(head->inp_lport);
        } while (in_pcblookup(head,
                zeroin_addr, 0, inp->inp_laddr, lport, wild));
    inp->inp_lport = lport;    //设置了PCB中的端口,就完事了呗
    return (0);
    }

    in_pcbconnect:

  • Function A: Bind remote address and port for TCP/UDP

    int
    in_pcbconnect(inp, nam)
    register struct inpcb *inp;
    struct mbuf *nam;
    {
    struct in_ifaddr *ia;
    struct sockaddr_in *ifaddr;
    register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);    //从mbuf中获取远程地址
    
    if (nam->m_len != sizeof (*sin))    //判断地址长度,协议域以及端口是否正确
        return (EINVAL);
    if (sin->sin_family != AF_INET)
        return (EAFNOSUPPORT);
    if (sin->sin_port == 0)
        return (EADDRNOTAVAIL);
    if (in_ifaddr) {
        /*
         * If the destination address is INADDR_ANY,
         * use the primary local address.
         * If the supplied address is INADDR_BROADCAST,
         * and the primary interface supports broadcast,
         * choose the broadcast address for that interface.
         */
    #define satosin(sa) ((struct sockaddr_in *)(sa))
    #define sintosa(sin)    ((struct sockaddr *)(sin))
    #define ifatoia(ifa)    ((struct in_ifaddr *)(ifa))
        if (sin->sin_addr.s_addr == INADDR_ANY)    //如果远程地址是INADDR_ANY,相当于调用进程连接到这个主机上的一个实体
            sin->sin_addr = IA_SIN(in_ifaddr)->sin_addr;
        else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
          (in_ifaddr->ia_ifp->if_flags & IFF_BROADCAST))    //如果是多播地址的话,将这个地址转换为接口合适的IP地址
            sin->sin_addr = satosin(&in_ifaddr->ia_broadaddr)->sin_addr;
    }
    if (inp->inp_laddr.s_addr == INADDR_ANY) {    //如果没有指定本地地址
        register struct route *ro;
    
        ia = (struct in_ifaddr *)0;
        /* 
         * If route is known or can be allocated now,
         * our src addr is taken from the i/f, else punt.
         */
        ro = &inp->inp_route;
        if (ro->ro_rt &&
            (satosin(&ro->ro_dst)->sin_addr.s_addr !=
            sin->sin_addr.s_addr || 
            inp->inp_socket->so_options & SO_DONTROUTE)) {    //如果目前的路由的目标地址与PCB的远程地址不一样,释放路由
            RTFREE(ro->ro_rt);
            ro->ro_rt = (struct rtentry *)0;
        }
        if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/        //如果没有指定SO_DONTROUTE选项,需要重新获取一条指向远程地址的路由选项
            (ro->ro_rt == (struct rtentry *)0 ||
            ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
            /* No route yet, so try to acquire one */
            ro->ro_dst.sa_family = AF_INET;
            ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
            ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
                sin->sin_addr;
            rtalloc(ro);
        }
        /*
         * If we found a route, use the address
         * corresponding to the outgoing interface
         * unless it is the loopback (in case a route
         * to our address on another net goes to loopback).
         */
        if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))    //确定外出的接口。这一部分和协议层相关性不大,不重点关注
            ia = ifatoia(ro->ro_rt->rt_ifa);
        if (ia == 0) {
            u_short fport = sin->sin_port;
    
            sin->sin_port = 0;
            ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
            if (ia == 0)
                ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
            sin->sin_port = fport;
            if (ia == 0)
                ia = in_ifaddr;
            if (ia == 0)
                return (EADDRNOTAVAIL);
        }
        /*
         * If the destination address is multicast and an outgoing
         * interface has been set as a multicast option, use the
         * address of that interface as our source address.
         */
        if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
            inp->inp_moptions != NULL) {
            struct ip_moptions *imo;
            struct ifnet *ifp;
    
            imo = inp->inp_moptions;
            if (imo->imo_multicast_ifp != NULL) {
                ifp = imo->imo_multicast_ifp;
                for (ia = in_ifaddr; ia; ia = ia->ia_next)
                    if (ia->ia_ifp == ifp)
                        break;
                if (ia == 0)
                    return (EADDRNOTAVAIL);
            }
        }
        ifaddr = (struct sockaddr_in *)&ia->ia_addr;
    }
    if (in_pcblookup(inp->inp_head,
        sin->sin_addr,
        sin->sin_port,
        inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
        inp->inp_lport,
        0))    //如果在PCB head中找到了相同的四元组,返回错误
        return (EADDRINUSE);
    if (inp->inp_laddr.s_addr == INADDR_ANY) {
        if (inp->inp_lport == 0)
            (void)in_pcbbind(inp, (struct mbuf *)0);    //如果本地地址与本地接口没有设置,就是没有经过bind的处理,在这块需要进行处理。比如所UDP协议
        inp->inp_laddr = ifaddr->sin_addr;
    }
    inp->inp_faddr = sin->sin_addr;    //设置完外部地址与外部端口就可以返回了
    inp->inp_fport = sin->sin_port;
    return (0);
    }

    in_dispcbconnect

  • Function A: Disconnect the UDP socket and set the external port and external address to 0 (INADDR_ANY). Note: This function is only called when a PCB is required to be reused, such as when the implicit connection for UDP is broken.

    int
    in_pcbdisconnect(inp)
    struct inpcb *inp;
    {
    
    inp->inp_faddr.s_addr = INADDR_ANY;
    inp->inp_fport = 0;
    if (inp->inp_socket->so_state & SS_NOFDREF)    //如果在VFS中已经没有索引了,释放这个PCB结构
        in_pcbdetach(inp);
    }

    in_setsockaddr和in_setpeeraddr

  • Function A: Get local address (getsockname) and remote address (getpeername)

    int
    in_setsockaddr(inp, nam)
    register struct inpcb *inp;
    struct mbuf *nam;
    {
    register struct sockaddr_in *sin;
    
    nam->m_len = sizeof (*sin);
    sin = mtod(nam, struct sockaddr_in *);
    bzero((caddr_t)sin, sizeof (*sin));
    sin->sin_family = AF_INET;
    sin->sin_len = sizeof(*sin);
    sin->sin_port = inp->inp_lport;
    sin->sin_addr = inp->inp_laddr;
    }
    int
    in_setpeeraddr(inp, nam)
    struct inpcb *inp;
    struct mbuf *nam;
    {
    register struct sockaddr_in *sin;
    
    nam->m_len = sizeof (*sin);
    sin = mtod(nam, struct sockaddr_in *);
    bzero((caddr_t)sin, sizeof (*sin));
    sin->sin_family = AF_INET;
    sin->sin_len = sizeof(*sin);
    sin->sin_port = inp->inp_fport;
    sin->sin_addr = inp->inp_faddr;
    }

    Summarize

  • Question 1: How to judge a port is being used?
    As long as there is a PCB, the port is used as the local port of the PCB. The concept of "in use" is relative to the binding protocol, that is, TCP has nothing to do with UCP's ports.

  • Problem 2: BSD allows processes to modify the default behavior using the following two options:
    • SO_REUSEADDR: Allows a using process to bind a port number that is in use, but the bound IP address (including wildcard addresses) must not be bound to the same port. For the binding of local addresses and ports, if this option is set, the ports can be the same, but the IP addresses (including wildcard addresses) cannot be the same.
    • SO_REUSEPORT: Allows to bind the same local IP address and port address, used to support multicast.
  • Question 3: How does UDP determine the PCB at the protocol layer when an external packet arrives?
    First make sure the local port matches. Secondly, in order to determine the number of matches, only the number of wildcard matches between the local IP address and the external IP address is considered. The lower the value, the higher the matching degree. Broadcast and multicast packets are not discussed.

  • Question 4: The types of ICMP packets and the processing of
    ICMP packets can be roughly divided into:
    A: The destination host is unreachable
    B: The parameter problem
    C: Redirection (a special item, which will operate on the routing structure in the PCB)
    D :source suppression
    E:timeout
    In summary, ICMP packets received on UDP are not delivered to the application unless connect is called on the UDP SOCKET.

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325267056&siteId=291194637