UDT 最新源码分析(五) -- 网络数据收发

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/yongkai0214/article/details/86690875

从接口实现看 UDT 网络收发

从对外的接口实现方法来看,网络收发过程实际上是对 m_pSndBuffer 和 m_pRcvBuffer 进行操作,而实际的网络收发涉及到系统调度,算法实现等问题。简单来看看代码。

UDT 发送 send / sendmsg / sendfile

以 send 为例,外部接口调用send 其实并不是直接发送到网络,而是将数据加入发送的 buffer 中,后续再通过调度将数据发送到网络中去。send 仅仅针对流传输模式而言,其他模式不可调用此函数。对于数据包模式,应该调用 sendmsg。

CUDT::send(UDTSOCKET u, const char* buf, int len, int)
-> CUDT::send(const char* data, int len)

int CUDT::send(const char* data, int len)
{
    ...
   if (m_iSndBufSize <= m_pSndBuffer->getCurrBufSize()) //buffer 已满
   {
       // 检查buffer状态,等待满足条件被触发。
       // 检查网络连接状态,以及UDT 是否关闭等状态。
   }

   int size = (m_iSndBufSize - m_pSndBuffer->getCurrBufSize()) * m_iPayloadSize; //最大可用
   if (size > len)
      size = len; //size 为本次需要填充的字节,最大为可用容量

   // record total time used for sending
   if (0 == m_pSndBuffer->getCurrBufSize())
      m_llSndDurationCounter = CTimer::getTime();

   // insert the user buffer into the sending list
   m_pSndBuffer->addBuffer(data, size); //重点代码,发送过程其实只是放入buffer

   // insert this socket to snd list if it is not on the list yet
   m_pSndQueue->m_pSndUList->update(this, false);

   if (m_iSndBufSize <= m_pSndBuffer->getCurrBufSize())
   {
      // write is not available any more
      s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_OUT, false);
   }

   return size;
}

sendmsg 与 send 函数有非常多代码一致,核心代码基本上没有变化。sendfile 中 addbuffer 变成 addBufferFromFile,其余基本没变化。

UDT 接收 recv /recvmsg /recvfile

从接口调用 recv 实际上只是从接收缓冲中取出数据,在获取数据会检查当前是否流模式,如果没数据,或启动条件唤醒和定时等待等,也会检查网络连接是否正常。

int CUDT::recv(char* data, int len)
{
    ... 
   if (0 == m_pRcvBuffer->getRcvDataSize()) // buffer 为空
   {
       ... //等待条件满足或者超时
   }

   int res = m_pRcvBuffer->readBuffer(data, len);

   if (m_pRcvBuffer->getRcvDataSize() <= 0)
   {
      // read is not available any more
      s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_IN, false); //删除
   }

   if ((res <= 0) && (m_iRcvTimeOut >= 0))
      throw CUDTException(6, 3, 0);

   return res;
}

从内部实现看 UDT 网络收发

从接口上可以看到,发送接收仅仅是将数据与buffer进行交互,看不到数据真正进行发送接收的地方。那么在内部究竟如何实现的呢?在以前的文章分析中已经提到过发送接收工作线程的概念,在这里再次看看,代码参考 queue.cpp。

初始化的地方如下,通过调用 m_pSndQueue 和 m_pRcvQueue 调用 init 实现 worker 线程创建:

void CUDTUnited::updateMux(CUDTSocket* s, const sockaddr* addr, const UDPSOCKET* udpsock)
{
    ...
   CMultiplexer m;
   m.m_iID = s->m_SocketID;
   m.m_pChannel = new CChannel(s->m_pUDT->m_iIPversion);
   m.m_pChannel->setSndBufSize(s->m_pUDT->m_iUDPSndBufSize);
   m.m_pChannel->setRcvBufSize(s->m_pUDT->m_iUDPRcvBufSize);

   try
   {
      if (NULL != udpsock)
         m.m_pChannel->open(*udpsock);
      else
         m.m_pChannel->open(addr);
   }
   catch (CUDTException& e)
   {
      m.m_pChannel->close();
      delete m.m_pChannel;
      throw e;
   }

   m.m_pTimer = new CTimer;

   m.m_pSndQueue = new CSndQueue;
   m.m_pSndQueue->init(m.m_pChannel, m.m_pTimer);
   m.m_pRcvQueue = new CRcvQueue;
   m.m_pRcvQueue->init(32, s->m_pUDT->m_iPayloadSize, m.m_iIPversion, 1024, m.m_pChannel, m.m_pTimer);

   m_mMultiplexer[m.m_iID] = m;
}

UDT 发送工作线程

发送线程中主要的变量有 m_pSndUList, m_pChannel, m_pTimer。线程的工作就是不停的检查 m_pSndUList 中的UDT 实例,取出包,通过 m_pChannel 发送出去。如果取出的包时发现未到发送时间,则通过 m_pTimer sleep 剩余的时间再发送。

创建线程如下所示:

void CSndQueue::init(CChannel* c, CTimer* t)
{
   m_pChannel = c;
   m_pTimer = t;
   m_pSndUList = new CSndUList;
   m_pSndUList->m_pWindowLock = &m_WindowLock;
   m_pSndUList->m_pWindowCond = &m_WindowCond;
   m_pSndUList->m_pTimer = m_pTimer;

   #ifndef WIN32
      if (0 != pthread_create(&m_WorkerThread, NULL, CSndQueue::worker, this))
      {
         m_WorkerThread = 0;
         throw CUDTException(3, 1);
      }
   #else
      DWORD threadID;
      m_WorkerThread = CreateThread(NULL, 0, CSndQueue::worker, this, 0, &threadID);
      if (NULL == m_WorkerThread)
         throw CUDTException(3, 1);
   #endif
}

根据前面的描述,接下来理解发送工作线程运行过程。getNextProcTime 实际上就是获取 m_pHeap[0] 的 m_llTimeStamp。这个时间就是即将要发送的数据的时间。sleepto 等待时间到达。pop 则是初始化 CPacket,然后再发送。如果 ts <= 0,代表当前并无数据需要发送,需要继续等待。

#ifndef WIN32
   void* CSndQueue::worker(void* param)
#else
   DWORD WINAPI CSndQueue::worker(LPVOID param)
#endif
{
   CSndQueue* self = (CSndQueue*)param;

   while (!self->m_bClosing)
   {
      uint64_t ts = self->m_pSndUList->getNextProcTime(); //获取下一次发送时间

      if (ts > 0)
      {
         // wait until next processing time of the first socket on the list
         uint64_t currtime;
         CTimer::rdtsc(currtime);
         if (currtime < ts) //时间未到
            self->m_pTimer->sleepto(ts); //sleep, 控制包与包之间的发送间隔

         // it is time to send the next pkt
         sockaddr* addr;
         CPacket pkt;
         if (self->m_pSndUList->pop(addr, pkt) < 0)
            continue;

         self->m_pChannel->sendto(addr, pkt);
      }
      else
      {
         // wait here if there is no sockets with data to be sent
         #ifndef WIN32
            pthread_mutex_lock(&self->m_WindowLock);
            if (!self->m_bClosing && (self->m_pSndUList->m_iLastEntry < 0))
               pthread_cond_wait(&self->m_WindowCond, &self->m_WindowLock);
            pthread_mutex_unlock(&self->m_WindowLock);
         #else
            WaitForSingleObject(self->m_WindowCond, INFINITE);
         #endif
      }
   }

   #ifndef WIN32
      return NULL;
   #else
      SetEvent(self->m_ExitCond);
      return 0;
   #endif
}

Retrieve the next packet and peer address from the first entry, and reschedule it in the queue.
在线程循环块内,出现了 pop 方法。这个方法取出 m_pHeap 中的根节点,检查时间戳,若时间已到,在堆中删除该节点,进入 packData。

int CSndUList::pop(sockaddr*& addr, CPacket& pkt)
{
   CGuard listguard(m_ListLock);

   if (-1 == m_iLastEntry) //m_pHeap中为空
      return -1;

   // no pop until the next schedulled time
   uint64_t ts;
   CTimer::rdtsc(ts);
   if (ts < m_pHeap[0]->m_llTimeStamp)
      return -1;

   CUDT* u = m_pHeap[0]->m_pUDT;
   remove_(u);

   if (!u->m_bConnected || u->m_bBroken)
      return -1;

   // pack a packet from the socket
   if (u->packData(pkt, ts) <= 0)
      return -1;

   addr = u->m_pPeerAddr;

   // insert a new entry, ts is the next processing time
   if (ts > 0)
      insert_(ts, u);

   return 1;
}

m_pHeap 是一个以节点时间为参考建立的最小堆。所有的插入与删除操作均为堆的操作,需要注意的是,孩子节点与根节点的对应关系。对于根节点 q 来说,左孩子序号为 2 * q + 1, 右孩子为 2 * q + 2,这也是代码中的 p 节点值。

首先看删除某节点的操作:

void CSndUList::remove_(const CUDT* u)
{
   CSNode* n = u->m_pSNode;

   if (n->m_iHeapLoc >= 0)
   {
      // remove the node from heap 最后节点与被删节点交换
      m_pHeap[n->m_iHeapLoc] = m_pHeap[m_iLastEntry];
      m_iLastEntry --;
      m_pHeap[n->m_iHeapLoc]->m_iHeapLoc = n->m_iHeapLoc;

      int q = n->m_iHeapLoc; //被删位置上新节点
      int p = q * 2 + 1; //左孩子序号
      while (p <= m_iLastEntry) // 存在左孩子节点
      {
         // 存在右孩子,且左孩子时间戳大于右孩子时间戳,则修改当前孩子为右孩子
         if ((p + 1 <= m_iLastEntry) && (m_pHeap[p]->m_llTimeStamp > m_pHeap[p + 1]->m_llTimeStamp))
            p ++;
         // 如果根节点时间戳大于孩子中最小时间戳节点,则交换,并置当前节点为新的根节点的左孩子
         if (m_pHeap[q]->m_llTimeStamp > m_pHeap[p]->m_llTimeStamp)
         {
            CSNode* t = m_pHeap[p];
            m_pHeap[p] = m_pHeap[q];
            m_pHeap[p]->m_iHeapLoc = p;
            m_pHeap[q] = t;
            m_pHeap[q]->m_iHeapLoc = q;

            q = p;
            p = q * 2 + 1;
         }
         else
            break;
      }

      n->m_iHeapLoc = -1;
   }

   // the only event has been deleted, wake up immediately
   if (0 == m_iLastEntry)
      m_pTimer->interrupt();
}

对于插入操作,只要记住节点序号关系,就很容易看明白了。父节点 p 为孩子节点 (q-1)/2。 如果还不明白,只能去复习一下堆的数据结构相关知识。

void CSndUList::insert_(int64_t ts, const CUDT* u)
{
   CSNode* n = u->m_pSNode;

   // do not insert repeated node
   if (n->m_iHeapLoc >= 0) 
      return;
   //插入增加到最后节点
   m_iLastEntry ++; 
   m_pHeap[m_iLastEntry] = n;
   n->m_llTimeStamp = ts;
   //开始调整
   int q = m_iLastEntry;
   int p = q;
   while (p != 0)
   {
      p = (q - 1) >> 1; //父节点
      if (m_pHeap[p]->m_llTimeStamp > m_pHeap[q]->m_llTimeStamp)
      {
         CSNode* t = m_pHeap[p];
         m_pHeap[p] = m_pHeap[q];
         m_pHeap[q] = t;
         t->m_iHeapLoc = q;
         q = p;
      }
      else
         break;
   }

   n->m_iHeapLoc = q;

   // an earlier event has been inserted, wake up sending worker
   if (n->m_iHeapLoc == 0)
      m_pTimer->interrupt();

   // first entry, activate the sending queue
   if (0 == m_iLastEntry)
   {
      #ifndef WIN32
         pthread_mutex_lock(m_pWindowLock);
         pthread_cond_signal(m_pWindowCond); //唤醒线程
         pthread_mutex_unlock(m_pWindowLock);
      #else
         SetEvent(*m_pWindowCond);
      #endif
   }
}

在发送线程中还有一个 packData 方法,处理了两类 packet 的读取,一是丢失的 packet,二是正常的顺序传输的包。处理过程:

  1. 获取 entertime, 更新 m_ullTimeDiff, 即记录当前发包对应目标时间的差值,会影响到下一次发包的目标时间。UDT 以此使得发包的时间间隔始终控制在算法之中。

    在 UDT 中,在开始的时候会初始化一个发包时间间隔 m_ullInterval ,这个值表示期望的发送时间间隔。初始化如下所示:

    m_ullInterval = (uint64_t)(m_pCC->m_dPktSndPeriod * m_ullCPUFrequency);

    m_ullInterval 并不是一个固定的值,而是根据网络状态进行调整。比如在 processCtrl 中 收到包类型为 4 时,就会改变。但是查找代码可以发现,当前udt 版本不再执行 sendCtrl(4),代码详见包类型为6 时,代码已经被注释。但是无用代码并未删除,如下所示。

    // One way packet delay is increasing, so decrease the sending rate
    m_ullInterval = (uint64_t)ceil(m_ullInterval * 1.125);
    

    在拥塞控制中 CCUpdate 改变 m_ullInterva 值:

    m_ullInterval = (uint64_t) (m_pCC->m_dPktSndPeriod * m_ullCPUFrequency);
    m_dCongestionWindow = m_pCC->m_dCWndSize;
    if (m_llMaxBW <= 0)
      return;
    const double minSP = 1000000.0 / (double(m_llMaxBW) / m_iMSS) * m_ullCPUFrequency;
    if (m_ullInterval < minSP)
      m_ullInterval = minSP;

    在UDT中,包发送会有一个随着网络状况调整的一个发送周期,也就是 m_ullInterva 值。在每一次发送包时,都会根据 m_ullInterval值计算下一次包发送的理想时间间隔,并修改 m_ullTargetTime 值。

  2. 检查是否丢包。

    • 如果丢包,就将 packet.m_iSeqNo 赋值为丢包的序号值。然后计算 offset。m_iSndLastDataAck 是在接收到最后一个 ack 时更新的序号,之前的所有包都被确认。如果 offset < 0, 表示上次确认序号大于丢包序号,即有包未收到但是被确认,可能出现错误。读取数据如果失败,就会发送丢弃请求,并更新 m_iSndCurrSeqNo。

    • 如果没有丢包,则发送一个新包。根据流窗口与拥塞窗口更新 cwnd 值。若发送包序号在窗口范围内,则 readData 并且更新本地和 ccc中 m_iSndCurrSeqNo,更新 m_iSeqNo,检查是否需要发送包对探测。

    • 更新 packet 与 cc,更新 ts, m_ullTargetTime。包将在 worker 中被发送

int CUDT::packData(CPacket& packet, uint64_t& ts)
{
   int payload = 0;
   bool probe = false;

   uint64_t entertime;
   CTimer::rdtsc(entertime);

   if ((0 != m_ullTargetTime) && (entertime > m_ullTargetTime))
      m_ullTimeDiff += entertime - m_ullTargetTime;

   // Loss retransmission always has higher priority.
   if ((packet.m_iSeqNo = m_pSndLossList->getLostSeq()) >= 0) //发现丢包,可能超时或者 NACK回应消息
   {
      // protect m_iSndLastDataAck from updating by ACK processing
      CGuard ackguard(m_AckLock);

      int offset = CSeqNo::seqoff(m_iSndLastDataAck, packet.m_iSeqNo); 
      if (offset < 0)
         return 0;

      int msglen;

      //重新取数据
      payload = m_pSndBuffer->readData(&(packet.m_pcData), offset, packet.m_iMsgNo, msglen);
      if (-1 == payload)
      {
         int32_t seqpair[2];
         seqpair[0] = packet.m_iSeqNo;
         seqpair[1] = CSeqNo::incseq(seqpair[0], msglen);
         sendCtrl(7, &packet.m_iMsgNo, seqpair, 8);

         // only one msg drop request is necessary
         m_pSndLossList->remove(seqpair[1]);

         // skip all dropped packets
         if (CSeqNo::seqcmp(m_iSndCurrSeqNo, CSeqNo::incseq(seqpair[1])) < 0)
             m_iSndCurrSeqNo = CSeqNo::incseq(seqpair[1]);

         return 0;
      }
      else if (0 == payload)
         return 0;

      ++ m_iTraceRetrans;
      ++ m_iRetransTotal;
   }
   else
   {
      // If no loss, pack a new packet.

      // check congestion/flow window limit
      int cwnd = (m_iFlowWindowSize < (int)m_dCongestionWindow) ? m_iFlowWindowSize : (int)m_dCongestionWindow;
      if (cwnd >= CSeqNo::seqlen(m_iSndLastAck, CSeqNo::incseq(m_iSndCurrSeqNo)))
      {
         if (0 != (payload = m_pSndBuffer->readData(&(packet.m_pcData), packet.m_iMsgNo)))
         {
            m_iSndCurrSeqNo = CSeqNo::incseq(m_iSndCurrSeqNo);
            m_pCC->setSndCurrSeqNo(m_iSndCurrSeqNo);

            packet.m_iSeqNo = m_iSndCurrSeqNo;

            // every 16 (0xF) packets, a packet pair is sent
            if (0 == (packet.m_iSeqNo & 0xF))
               probe = true;
         }
         else
         {
            m_ullTargetTime = 0;
            m_ullTimeDiff = 0;
            ts = 0;
            return 0;
         }
      }
      else
      {
         m_ullTargetTime = 0;
         m_ullTimeDiff = 0;
         ts = 0;
         return 0;
      }
   }

   packet.m_iTimeStamp = int(CTimer::getTime() - m_StartTime);
   packet.m_iID = m_PeerID;
   packet.setLength(payload);

   m_pCC->onPktSent(&packet);
   //m_pSndTimeWindow->onPktSent(packet.m_iTimeStamp);

   ++ m_llTraceSent;
   ++ m_llSentTotal;

   if (probe)
   {
      // sends out probing packet pair
      ts = entertime;
      probe = false;
   }
   else
   {
      #ifndef NO_BUSY_WAITING
         ts = entertime + m_ullInterval;
      #else
         if (m_ullTimeDiff >= m_ullInterval)
         {
            ts = entertime;
            m_ullTimeDiff -= m_ullInterval;
         }
         else
         {
            ts = entertime + m_ullInterval - m_ullTimeDiff;
            m_ullTimeDiff = 0;
         }
      #endif
   }

   m_ullTargetTime = ts;

   return payload;
}

UDT 接收工作线程

接收工作线程的主要工作同样在 while 循环中完成。首先检查是否有新的 socket 到来,如果有,则不断加入 m_pRcvUList,同时添加到 m_pHash 中。然后再 m_UnitQueue 中查找是否存在可用的存储块,在此过程中如果发现已经数量太多会自动扩容。不断的通过 recvfrom 接收包。

如果是连接请求, 将被送给 listening socket 或者 rendezvous sockets,对应将进入 listen 或者 connect 操作。否则, 根据 getFlag 判断,进入 processData 或者 processCtrl。这也是接收数据被处理的核心函数。最后将这个 UDT实例 放入 m_pRcvUList 最后。

#ifndef WIN32
   void* CRcvQueue::worker(void* param)
#else
   DWORD WINAPI CRcvQueue::worker(LPVOID param)
#endif
{
   CRcvQueue* self = (CRcvQueue*)param;

   sockaddr* addr = (AF_INET == self->m_UnitQueue.m_iIPversion) ? (sockaddr*) new sockaddr_in : (sockaddr*) new sockaddr_in6;
   CUDT* u = NULL;
   int32_t id;

   while (!self->m_bClosing)
   {
      #ifdef NO_BUSY_WAITING
         self->m_pTimer->tick();
      #endif

      // check waiting list, if new socket, insert it to the list
      while (self->ifNewEntry())
      {
         CUDT* ne = self->getNewEntry();
         if (NULL != ne)
         {
            self->m_pRcvUList->insert(ne);
            self->m_pHash->insert(ne->m_SocketID, ne);
         }
      }

      // find next available slot for incoming packet
      CUnit* unit = self->m_UnitQueue.getNextAvailUnit();
      if (NULL == unit)
      {
         // no space, skip this packet
         CPacket temp;
         temp.m_pcData = new char[self->m_iPayloadSize];
         temp.setLength(self->m_iPayloadSize);
         self->m_pChannel->recvfrom(addr, temp);
         delete [] temp.m_pcData;
         goto TIMER_CHECK;
      }

      unit->m_Packet.setLength(self->m_iPayloadSize);

      // reading next incoming packet, recvfrom returns -1 is nothing has been received
      if (self->m_pChannel->recvfrom(addr, unit->m_Packet) < 0)
         goto TIMER_CHECK;

      id = unit->m_Packet.m_iID;

      // ID 0 is for connection request, which should be passed to the listening socket or rendezvous sockets
      if (0 == id)
      {
         if (NULL != self->m_pListener)
            self->m_pListener->listen(addr, unit->m_Packet);
         else if (NULL != (u = self->m_pRendezvousQueue->retrieve(addr, id)))
         {
            // asynchronous connect: call connect here
            // otherwise wait for the UDT socket to retrieve this packet
            if (!u->m_bSynRecving)
               u->connect(unit->m_Packet);
            else
               self->storePkt(id, unit->m_Packet.clone());
         }
      }
      else if (id > 0)
      {
         if (NULL != (u = self->m_pHash->lookup(id)))
         {
            if (CIPAddress::ipcmp(addr, u->m_pPeerAddr, u->m_iIPversion))
            {
               if (u->m_bConnected && !u->m_bBroken && !u->m_bClosing)
               {
                  if (0 == unit->m_Packet.getFlag())
                     u->processData(unit);
                  else
                     u->processCtrl(unit->m_Packet);

                  u->checkTimers();
                  self->m_pRcvUList->update(u);
               }
            }
         }
         else if (NULL != (u = self->m_pRendezvousQueue->retrieve(addr, id)))
         {
            if (!u->m_bSynRecving)
               u->connect(unit->m_Packet);
            else
               self->storePkt(id, unit->m_Packet.clone());
         }
      }

TIMER_CHECK:
      // take care of the timing event for all UDT sockets

      uint64_t currtime;
      CTimer::rdtsc(currtime);

      CRNode* ul = self->m_pRcvUList->m_pUList;
      uint64_t ctime = currtime - 100000 * CTimer::getCPUFrequency();
      while ((NULL != ul) && (ul->m_llTimeStamp < ctime))
      {
         CUDT* u = ul->m_pUDT;

         if (u->m_bConnected && !u->m_bBroken && !u->m_bClosing)
         {
            u->checkTimers();
            self->m_pRcvUList->update(u);
         }
         else
         {
            // the socket must be removed from Hash table first, then RcvUList
            self->m_pHash->remove(u->m_SocketID);
            self->m_pRcvUList->remove(u);
            u->m_pRNode->m_bOnList = false;
         }

         ul = self->m_pRcvUList->m_pUList;
      }

      // Check connection requests status for all sockets in the RendezvousQueue.
      self->m_pRendezvousQueue->updateConnStatus();
   }

   if (AF_INET == self->m_UnitQueue.m_iIPversion)
      delete (sockaddr_in*)addr;
   else
      delete (sockaddr_in6*)addr;

   #ifndef WIN32
      return NULL;
   #else
      SetEvent(self->m_ExitCond);
      return 0;
   #endif
}

checkTimers 会更新 cc 参数,并发送 ack 包,检查连接是否中断。在代码中,NAK 定时器不再生效,仅仅依靠发送方的超时机制。检测如果16个超时 且 总时间达到阈值才会认为连接挂掉。超时也会导致拥塞控制算法进行调整。

void CUDT::checkTimers()
{
   // update CC parameters
   CCUpdate(); //更新发包时间间隔和拥塞窗口

   uint64_t currtime;
   CTimer::rdtsc(currtime);

   if ((currtime > m_ullNextACKTime) || ((m_pCC->m_iACKInterval > 0) && (m_pCC->m_iACKInterval <= m_iPktCount)))
   {
      // ACK timer expired or ACK interval is reached
      sendCtrl(2); //ack
      CTimer::rdtsc(currtime);
      if (m_pCC->m_iACKPeriod > 0) //更新 m_ullNextACKTime
         m_ullNextACKTime = currtime + m_pCC->m_iACKPeriod * m_ullCPUFrequency;
      else
         m_ullNextACKTime = currtime + m_ullACKInt;

      m_iPktCount = 0;
      m_iLightACKCount = 1;
   }
   else if (m_iSelfClockInterval * m_iLightACKCount <= m_iPktCount)
   {
      //send a "light" ACK
      sendCtrl(2, NULL, NULL, 4);
      ++ m_iLightACKCount;
   }

   // we are not sending back repeated NAK anymore and rely on the sender's EXP for retransmission
   //if ((m_pRcvLossList->getLossLength() > 0) && (currtime > m_ullNextNAKTime))
   //{ 
   //   // NAK timer expired, and there is loss to be reported.
   //   sendCtrl(3);
   //
   //   CTimer::rdtsc(currtime);
   //   m_ullNextNAKTime = currtime + m_ullNAKInt;
   //} //不再触发 NAK 定时器,仅仅依靠发送方的重传超时,应该是为了减少误丢包识别。

   uint64_t next_exp_time;
   if (m_pCC->m_bUserDefinedRTO)
      next_exp_time = m_ullLastRspTime + m_pCC->m_iRTO * m_ullCPUFrequency;
   else
   {
      uint64_t exp_int = (m_iEXPCount * (m_iRTT + 4 * m_iRTTVar) + m_iSYNInterval) * m_ullCPUFrequency;
      if (exp_int < m_iEXPCount * m_ullMinExpInt)
         exp_int = m_iEXPCount * m_ullMinExpInt;
      next_exp_time = m_ullLastRspTime + exp_int;
   }

   if (currtime > next_exp_time)
   {
      // Haven't receive any information from the peer, is it dead?!
      // timeout: at least 16 expirations and must be greater than 10 seconds
      if ((m_iEXPCount > 16) && (currtime - m_ullLastRspTime > 5000000 * m_ullCPUFrequency))
      {
         // Connection is broken. 
         // UDT does not signal any information about this instead of to stop quietly.
         // Application will detect this when it calls any UDT methods next time.
         m_bClosing = true;
         m_bBroken = true;
         m_iBrokenCounter = 30;

         // update snd U list to remove this socket
         m_pSndQueue->m_pSndUList->update(this);

         releaseSynch();

         // app can call any UDT API to learn the connection_broken error
         s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_IN | UDT_EPOLL_OUT | UDT_EPOLL_ERR, true);

         CTimer::triggerEvent();

         return;
      }

      // sender: Insert all the packets sent after last received acknowledgement into the sender loss list.
      // recver: Send out a keep-alive packet
      if (m_pSndBuffer->getCurrBufSize() > 0)
      {
         if ((CSeqNo::incseq(m_iSndCurrSeqNo) != m_iSndLastAck) && (m_pSndLossList->getLossLength() == 0))
         {
            // resend all unacknowledged packets on timeout, but only if there is no packet in the loss list
            int32_t csn = m_iSndCurrSeqNo;
            int num = m_pSndLossList->insert(m_iSndLastAck, csn); 
            m_iTraceSndLoss += num;
            m_iSndLossTotal += num;
         }

         m_pCC->onTimeout();
         CCUpdate();

         // immediately restart transmission
         m_pSndQueue->m_pSndUList->update(this);
      }
      else
      {
         sendCtrl(1); //keep-live 包
      }

      ++ m_iEXPCount; //增加,如果到达16 次,进入超时处理,如果收到确认,则重置为0。
      // Reset last response time since we just sent a heart-beat.
      m_ullLastRspTime = currtime;
   }
}

再回头看数据处理部分 processData 。

int CUDT::processData(CUnit* unit)
{
   CPacket& packet = unit->m_Packet;

   // Just heard from the peer, reset the expiration count.
   m_iEXPCount = 1; //有收到数据,重置 EXP
   uint64_t currtime;
   CTimer::rdtsc(currtime);
   m_ullLastRspTime = currtime; //更新 m_ullLastRspTime

   m_pCC->onPktReceived(&packet); //未找到函数的实现
   ++ m_iPktCount;
   // update time information, 记录包到达的时间以及上一包时间
   m_pRcvTimeWindow->onPktArrival(); //记录的目的用于计算包的到达速率,然后将计算的速率通过ACK反馈回去

   // check if it is probing packet pair, 用于估计链路容量,将计算的容量通过ACK反馈回去
   if (0 == (packet.m_iSeqNo & 0xF))//检查是否为包对
      m_pRcvTimeWindow->probe1Arrival(); //记录包对中第一个包的到达时间
   else if (1 == (packet.m_iSeqNo & 0xF))
      m_pRcvTimeWindow->probe2Arrival(); // 记录探测包对的时间间隔

   ++ m_llTraceRecv;
   ++ m_llRecvTotal;

   int32_t offset = CSeqNo::seqoff(m_iRcvLastAck, packet.m_iSeqNo);
   if ((offset < 0) || (offset >= m_pRcvBuffer->getAvailBufSize()))
      return -1;

   if (m_pRcvBuffer->addData(unit, offset) < 0)//将数据包加入到 m_pRcvBuffer
      return -1;

   // Loss detection.
   if (CSeqNo::seqcmp(packet.m_iSeqNo, CSeqNo::incseq(m_iRcvCurrSeqNo)) > 0)
   {
      // If loss found, insert them to the receiver loss list
      m_pRcvLossList->insert(CSeqNo::incseq(m_iRcvCurrSeqNo), CSeqNo::decseq(packet.m_iSeqNo));

      // pack loss list for NAK
      int32_t lossdata[2];
      lossdata[0] = CSeqNo::incseq(m_iRcvCurrSeqNo) | 0x80000000;
      lossdata[1] = CSeqNo::decseq(packet.m_iSeqNo);

      // Generate loss report immediately.
      sendCtrl(3, NULL, lossdata, (CSeqNo::incseq(m_iRcvCurrSeqNo) == CSeqNo::decseq(packet.m_iSeqNo)) ? 1 : 2);

      int loss = CSeqNo::seqlen(m_iRcvCurrSeqNo, packet.m_iSeqNo) - 2;
      m_iTraceRcvLoss += loss;
      m_iRcvLossTotal += loss;
   }

   // This is not a regular fixed size packet...   
   //an irregular sized packet usually indicates the end of a message, so send an ACK immediately   
   if (packet.getLength() != m_iPayloadSize)
      CTimer::rdtsc(m_ullNextACKTime); 

   // Update the current largest sequence number that has been received.
   // Or it is a retransmitted packet, remove it from receiver loss list.
   if (CSeqNo::seqcmp(packet.m_iSeqNo, m_iRcvCurrSeqNo) > 0)
      m_iRcvCurrSeqNo = packet.m_iSeqNo;
   else
      m_pRcvLossList->remove(packet.m_iSeqNo);

   return 0;
}

接下来看控制消息的处理。这部分的内容可以参考 UDT 最新协议分析.

  1. ACK 处理
  • 如果是一个轻量级 ACK,更新 m_iFlowWindowSize 和 m_iSndLastAck, 终止处理。
  • 否则:
    • 使用相同的 ACK 序号返回一个 ACK2 作为确认的确认。更新 m_ullSndLastAck2Time, m_iFlowWindowSize, m_iSndLastDataAck 和 m_iSndLastAck。\
    • 更新发送丢失链表,移除已经被确认的所有包序号。\
    • 更新RTT与RTTVar。更新ACK和NAK周期为 4 * RTT + RTTVar + SYN。\
    • 更新发送端缓冲,释放已经被确认的缓冲。\
    • 更新包到达速率为:A = (A * 7 + a) / 8,其中a为ACK中携带的相应值。更新链路容量估计值:B = (B * 7 + b) / 8,其中b为ACK中携带的相应值。
    • 更新发包间隔 m_ullInterval。
void CUDT::processCtrl(CPacket& ctrlpkt)
{
   ...
   case 2: //010 - Acknowledgement
      {
      int32_t ack;

      // process a lite ACK
      if (4 == ctrlpkt.getLength())
      {
         ack = *(int32_t *)ctrlpkt.m_pcData;
         if (CSeqNo::seqcmp(ack, m_iSndLastAck) >= 0)
         {
            m_iFlowWindowSize -= CSeqNo::seqoff(m_iSndLastAck, ack); //更新 m_iFlowWindowSize
            m_iSndLastAck = ack;
         }

         break;
      }

       // read ACK seq. no.
      ack = ctrlpkt.getAckSeqNo();

      // send ACK acknowledgement
      // number of ACK2 can be much less than number of ACK
      uint64_t now = CTimer::getTime();
      if ((currtime - m_ullSndLastAck2Time > (uint64_t)m_iSYNInterval) || (ack == m_iSndLastAck2))
      {
         sendCtrl(6, &ack); // ack of ack, 对确认包的二次确认
         m_iSndLastAck2 = ack;
         m_ullSndLastAck2Time = now;
      }

      // Got data ACK
      ack = *(int32_t *)ctrlpkt.m_pcData;

      // check the validation of the ack
      if (CSeqNo::seqcmp(ack, CSeqNo::incseq(m_iSndCurrSeqNo)) > 0)
      {
         //this should not happen: attack or bug . 不应该大于最大发送序号
         m_bBroken = true;
         m_iBrokenCounter = 0;
         break;
      }

      if (CSeqNo::seqcmp(ack, m_iSndLastAck) >= 0) //新的数据的 ack
      {
         // Update Flow Window Size, must update before and together with m_iSndLastAck
         m_iFlowWindowSize = *((int32_t *)ctrlpkt.m_pcData + 3);
         m_iSndLastAck = ack;
      }

      // protect packet retransmission
      CGuard::enterCS(m_AckLock);

      int offset = CSeqNo::seqoff(m_iSndLastDataAck, ack);
      if (offset <= 0)
      {
         // discard it if it is a repeated ACK
         CGuard::leaveCS(m_AckLock);
         break;
      }

      // acknowledge the sending buffer
      m_pSndBuffer->ackData(offset); //仅修改 m_pFirstBlock 指针 和 m_iCount

      // record total time used for sending
      m_llSndDuration += currtime - m_llSndDurationCounter;
      m_llSndDurationTotal += currtime - m_llSndDurationCounter;
      m_llSndDurationCounter = currtime;

      // update sending variables
      m_iSndLastDataAck = ack;
      m_pSndLossList->remove(CSeqNo::decseq(m_iSndLastDataAck));//重点函数,后续介绍

      CGuard::leaveCS(m_AckLock);

      #ifndef WIN32
         pthread_mutex_lock(&m_SendBlockLock);
         if (m_bSynSending)
            pthread_cond_signal(&m_SendBlockCond);
         pthread_mutex_unlock(&m_SendBlockLock);
      #else
         if (m_bSynSending)
            SetEvent(m_SendBlockCond);
      #endif

      // acknowledde any waiting epolls to write
      s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_OUT, true);

      // insert this socket to snd list if it is not on the list yet
      m_pSndQueue->m_pSndUList->update(this, false);

      // Update RTT
      //m_iRTT = *((int32_t *)ctrlpkt.m_pcData + 1);
      //m_iRTTVar = *((int32_t *)ctrlpkt.m_pcData + 2);
      int rtt = *((int32_t *)ctrlpkt.m_pcData + 1);
      m_iRTTVar = (m_iRTTVar * 3 + abs(rtt - m_iRTT)) >> 2;
      m_iRTT = (m_iRTT * 7 + rtt) >> 3;

      m_pCC->setRTT(m_iRTT); //更新cc m_iRTT

      if (ctrlpkt.getLength() > 16)
      {
         // Update Estimated Bandwidth and packet delivery rate
         if (*((int32_t *)ctrlpkt.m_pcData + 4) > 0)
            m_iDeliveryRate = (m_iDeliveryRate * 7 + *((int32_t *)ctrlpkt.m_pcData + 4)) >> 3;

         if (*((int32_t *)ctrlpkt.m_pcData + 5) > 0)
            m_iBandwidth = (m_iBandwidth * 7 + *((int32_t *)ctrlpkt.m_pcData + 5)) >> 3;

         m_pCC->setRcvRate(m_iDeliveryRate); //更新cc m_iRcvRate
         m_pCC->setBandwidth(m_iBandwidth);  //更新cc m_iBandwidth
      }

      m_pCC->onACK(ack); //更新cc m_dPktSndPeriod,进一步影响发包间隔计算 m_ullInterval
      CCUpdate(); //重新计算 m_ullInterval

      ++ m_iRecvACK;
      ++ m_iRecvACKTotal;

      break;
      }
      ...
}
  1. ACK2 处理
  • acknowledge 根据ACK2中的ACK序号,在ACK历史窗口中找到关联的ACK,根据ACK2到达时间和ACK离开时间,计算rtt。
  • 计算新的 RTT = (RTT * 7 + rtt) / 8,更新RTTVar = (RTTVar * 3 + abs(RTT - rtt)) / 4,更新cc中 rtt。
  • 更新被确认的最大ACK序号。
void CUDT::processCtrl(CPacket& ctrlpkt)
{
   case 6: //110 - Acknowledgement of Acknowledgement
      {
      int32_t ack;
      int rtt = -1;

      // update RTT
      rtt = m_pACKWindow->acknowledge(ctrlpkt.getAckSeqNo(), ack);
      if (rtt <= 0)
         break;

      //if increasing delay detected...
      //   sendCtrl(4);

      // RTT EWMA
      m_iRTTVar = (m_iRTTVar * 3 + abs(rtt - m_iRTT)) >> 2;
      m_iRTT = (m_iRTT * 7 + rtt) >> 3;

      m_pCC->setRTT(m_iRTT); 

      // update last ACK that has been received by the sender
      if (CSeqNo::seqcmp(ack, m_iRcvLastAckAck) > 0)
         m_iRcvLastAckAck = ack;

      break;
      }
}
  1. NAK 处理
  • 将 NAK 中携带的所有序号添加到发送丢失链表中。通过码率控制更新 SND 周期。重置 EXP 时间变量。更新 m_pSndUList,等待重传。
void CUDT::processCtrl(CPacket& ctrlpkt)
{
   case 3: //011 - Loss Report
      {
      int32_t* losslist = (int32_t *)(ctrlpkt.m_pcData);

      m_pCC->onLoss(losslist, ctrlpkt.getLength() / 4);//拥塞控制丢包处理,比如停止慢启动,更新参数。
      CCUpdate();

      bool secure = true;

      // decode loss list message and insert loss into the sender loss list
      for (int i = 0, n = (int)(ctrlpkt.getLength() / 4); i < n; ++ i)
      {
         if (0 != (losslist[i] & 0x80000000))
         {
            if ((CSeqNo::seqcmp(losslist[i] & 0x7FFFFFFF, losslist[i + 1]) > 0) || (CSeqNo::seqcmp(losslist[i + 1], m_iSndCurrSeqNo) > 0))
            {
               // seq_a must not be greater than seq_b; seq_b must not be greater than the most recent sent seq
               secure = false;
               break;
            }
            int num = 0;
            if (CSeqNo::seqcmp(losslist[i] & 0x7FFFFFFF, m_iSndLastAck) >= 0)
               num = m_pSndLossList->insert(losslist[i] & 0x7FFFFFFF, losslist[i + 1]);
            else if (CSeqNo::seqcmp(losslist[i + 1], m_iSndLastAck) >= 0)
               num = m_pSndLossList->insert(m_iSndLastAck, losslist[i + 1]);

            m_iTraceSndLoss += num;
            m_iSndLossTotal += num;

            ++ i;
         }
         else if (CSeqNo::seqcmp(losslist[i], m_iSndLastAck) >= 0)
         {
            if (CSeqNo::seqcmp(losslist[i], m_iSndCurrSeqNo) > 0)
            {
               //seq_a must not be greater than the most recent sent seq
               secure = false;
               break;
            }

            int num = m_pSndLossList->insert(losslist[i], losslist[i]);

            m_iTraceSndLoss += num;
            m_iSndLossTotal += num;
         }
      }

      if (!secure)
      {
         //this should not happen: attack or bug
         m_bBroken = true;
         m_iBrokenCounter = 0;
         break;
      }

      // the lost packet (retransmission) should be sent out immediately
      m_pSndQueue->m_pSndUList->update(this);

      ++ m_iRecvNAK;
      ++ m_iRecvNAKTotal;

      break;
      }
}
  1. Handshake 处理
   case 0: //000 - Handshake
      {
      CHandShake req;
      req.deserialize(ctrlpkt.m_pcData, ctrlpkt.getLength());
      if ((req.m_iReqType > 0) || (m_bRendezvous && (req.m_iReqType != -2)))
      {
         // The peer side has not received the handshake message, so it keeps querying
         // resend the handshake packet

         CHandShake initdata;
         initdata.m_iISN = m_iISN;
         initdata.m_iMSS = m_iMSS;
         initdata.m_iFlightFlagSize = m_iFlightFlagSize;
         initdata.m_iReqType = (!m_bRendezvous) ? -1 : -2;
         initdata.m_iID = m_SocketID;

         char* hs = new char [m_iPayloadSize];
         int hs_size = m_iPayloadSize;
         initdata.serialize(hs, hs_size);
         sendCtrl(0, NULL, hs, hs_size);
         delete [] hs;
      }

      break;
      }
}
  1. Msg drop request 处理
  • 在接收缓冲中标记所有属于同一个消息的包,使得不再可读。 在接收丢失链表中移除所有对应的包。
   case 7: //111 - Msg drop request
      m_pRcvBuffer->dropMsg(ctrlpkt.getMsgSeq());
      m_pRcvLossList->remove(*(int32_t*)ctrlpkt.m_pcData, *(int32_t*)(ctrlpkt.m_pcData + 4));

      // move forward with current recv seq no.
      if ((CSeqNo::seqcmp(*(int32_t*)ctrlpkt.m_pcData, CSeqNo::incseq(m_iRcvCurrSeqNo)) <= 0)
         && (CSeqNo::seqcmp(*(int32_t*)(ctrlpkt.m_pcData + 4), m_iRcvCurrSeqNo) > 0))
      {
         m_iRcvCurrSeqNo = *(int32_t*)(ctrlpkt.m_pcData + 4);
      }

      break;
}

猜你喜欢

转载自blog.csdn.net/yongkai0214/article/details/86690875