H.266/VVC代码学习:xCheckRDCostIntra函数

xCheckRDCostIntra函数主要是选出最佳帧内模式和变换类型。

其中通过调用estIntraPredLumaQT函数遍历所有的亮度帧内预测模式选出最佳亮度帧内预测模式,通过调用estIntraPredChromaQT函数选出最佳色度帧内预测模式。

xCheckRDCostIntra函数是遍历了所有的变换模式并计算相应的RD Cost。

主要分为两个阶段

阶段1:mts_flag标志被设置为0,并遍历预测模式根据RD Cost选择出最佳预测模式。对于每个预测模式,分别进行DCT-2和DCT-2+LFNST变换。

  • 对于DCT-2变换,通过比较DCT-2和TransformSkip的SAD决定使用DCT-2还是TransformSkip
  • 对于LFNST变换,通过遍历lfnstIdx为1或者2的情况选出最佳LFNST变换矩阵

阶段2:mts_flag标志被设置为1,遍历MTS变换集并计算相应的RD Cost。

  • 快速算法:通过比较当前MTS变换核的 RD Cost与在第一阶段的存储的最佳DCT-2的RD Cost决定是否进行MTS下一变换核的遍历。

代码和注释如下:

void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
{
  double          bestInterCost             = m_modeCtrl->getBestInterCost();//最佳帧间成本
  double          costSize2Nx2NmtsFirstPass = m_modeCtrl->getMtsSize2Nx2NFirstPassCost();
  //跳过MTS
  bool            skipSecondMtsPass         = m_modeCtrl->getSkipSecondMTSPass();
  const SPS&      sps                       = *tempCS->sps;
  const int       maxSizeMTS                = MTS_INTRA_MAX_CU_SIZE;//32
  //考虑是否使用MTS,即满足条件:帧内,亮度块并且尺寸小于32
  uint8_t         considerMtsSecondPass     = ( sps.getUseIntraMTS() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeMTS && partitioner.currArea().lheight() <= maxSizeMTS ) ? 1 : 0;
  const PPS &pps      = *tempCS->pps;

  bool   useIntraSubPartitions   = false;//ISP模式
  double maxCostAllowedForChroma = MAX_DOUBLE;//色度最大允许cost
  const  CodingUnit *bestCU      = bestCS->getCU( partitioner.chType );
  Distortion interHad = m_modeCtrl->getInterHad();//帧间HAD


  double dct2Cost                =   MAX_DOUBLE;
  double trGrpBestCost     [ 4 ] = { MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE };
  double globalBestCost          =   MAX_DOUBLE;
  bool   bestSelFlag       [ 4 ] = { false, false, false, false };
  bool   trGrpCheck        [ 4 ] = { true, true, true, true };
  int    startMTSIdx       [ 4 ] = { 0, 1, 2, 3 };//用来选择MTS转换核即DST7和DCT8的四种组合
  int    endMTSIdx         [ 4 ] = { 0, 1, 2, 3 };
  double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 };//用来比较是否需要进一步计算MTS的其它三种组合的RD Cost
  int    bestMtsFlag             =   0;
  int    bestLfnstIdx            =   0;

#if JVET_O0213_RESTRICT_LFNST_TO_MAX_TB_SIZE
#if JVET_O0050_LOCAL_DUAL_TREE
  //如果当前是色度类型且宽高同时小于等于8或者当前块尺寸大于最大变换块尺寸,则maxlfnstIdx = 0,否则等于2
  //0表示不使用LFNST,1和2表示LFNST的变换核的选择
  const int  maxLfnstIdx         = ( partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
#else
  const int  maxLfnstIdx         = ( CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
#endif
#if JVET_O0545_MAX_TB_SIGNALLING
                                   || ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 2;
#else
                                   || ( partitioner.currArea().lwidth() > MAX_TB_SIZEY || partitioner.currArea().lheight() > MAX_TB_SIZEY ) ? 0 : 2;
#endif
#else
#if JVET_O0050_LOCAL_DUAL_TREE
  const int  maxLfnstIdx         = partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2;
#else
  const int  maxLfnstIdx         = CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2;
#endif
#endif
  bool       skipOtherLfnst      = false;
  int        startLfnstIdx       = 0;
  int        endLfnstIdx         = sps.getUseLFNST() ? maxLfnstIdx : 0;// 2或者0

  int grpNumMax = sps.getUseLFNST() ? 4 : 1; 
  m_pcIntraSearch->invalidateBestModeCost();
  for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ )
  {
    const uint8_t startMtsFlag = trGrpIdx > 0;
    const uint8_t endMtsFlag   = sps.getUseLFNST() ? considerMtsSecondPass : 0;//0或者1
    //不跳过MTS且使用MTS
    if( ( trGrpIdx == 0 || ( !skipSecondMtsPass && considerMtsSecondPass ) ) && trGrpCheck[ trGrpIdx ] )
    {
      //遍历LFNST startLfnstIdx为0 endLfnstIdx为2或者0
      for( int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++ )
      {
        //遍历MTS  endMtsFlag为1或者0
        //MTS Flag为0时只使用DCT2,MTS Flag为1时使用MTS
        for( uint8_t mtsFlag = startMtsFlag; mtsFlag <= endMtsFlag; mtsFlag++ )
        {
#if JVET_O0368_LFNST_WITH_DCT2_ONLY // LFNST只在DCT-2后使用
          if (mtsFlag > 0 && lfnstIdx > 0) 
          {
            continue;
          }
#endif
          //3) if interHad is 0, only try further modes if some intra mode was already better than inter
          //3) 如果interHad为0,只有当某些intra模式已经优于inter时,才继续尝试其他模式
          if( sps.getUseLFNST() && m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 )
          {
            continue;
          }

          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );

          CodingUnit &cu      = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );

          partitioner.setCUData( cu );
          cu.slice            = tempCS->slice;
          cu.tileIdx          = tempCS->picture->brickMap->getBrickIdxRsMap( tempCS->area.lumaPos() );
          cu.skip             = false;
          cu.mmvdSkip = false;
          cu.predMode         = MODE_INTRA;
          cu.transQuantBypass = encTestMode.lossless;
          cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
          cu.qp               = encTestMode.qp;
          //cu.ipcm             = false;
          cu.lfnstIdx         = lfnstIdx;
          cu.mtsFlag          = mtsFlag;
          cu.ispMode          = NOT_INTRA_SUBPARTITIONS;

          CU::addPUs( cu );

          tempCS->interHad    = interHad;

          m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;

          bool validCandRet = false;
          //亮度类型
          if( isLuma( partitioner.chType ) )//如果是亮度
          {
#if JVET_O0502_ISP_CLEANUP
            //ISP uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary subpartitions
            //ISP使用到目前为止的最佳成本值(如果是快速版本,则使用luma)来避免测试不必要的子分区
#if JVET_O0050_LOCAL_DUAL_TREE
            double bestCostSoFar = partitioner.isSepTree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
            if (partitioner.isSepTree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#else
            double bestCostSoFar = CS::isDualITree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
            if (CS::isDualITree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#endif
            {
              bestCostSoFar = encTestMode.maxCostAllowed;//目前为止的最佳成本
            }
#else
            //the Intra SubPartitions mode uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary lines
#if JVET_O0050_LOCAL_DUAL_TREE
            const double bestCostSoFar = partitioner.isSepTree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
#else
            const double bestCostSoFar = CS::isDualITree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
#endif
#endif  
            //计算帧内亮度预测模式,检查有没有有效的模式
            validCandRet = m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar, mtsFlag, startMTSIdx[ trGrpIdx ], endMTSIdx[ trGrpIdx ], ( trGrpIdx > 0 ) );
            
            //如果没有找到有效的模式或者ISP模式且cbf=0,则进行下一轮选择
            if( sps.getUseLFNST() && ( !validCandRet || ( cu.ispMode && cu.firstTU->cbf[ COMPONENT_Y ] == 0 ) ) )
            {
              continue;
            }

            useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS; //使用ISP模式标志
#if JVET_O0050_LOCAL_DUAL_TREE
            if( !partitioner.isSepTree( *tempCS ) )
#else
            if( !CS::isDualITree( *tempCS ) )
#endif
            {
              //计算当前亮度成本
              tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); //亮度的Cost
              if( useIntraSubPartitions )
              {
                //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost
                //如果luma + Cb的成本大于最佳成本,则存储目前的最佳成本与当前的luma成本之间的差异,以避免测试Cr组件
                maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE;
              }
            }

            if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max()
                && tempCS->interHad == 0)
            {
              interHad = 0;
              // JEM assumes only perfect reconstructions can from now on beat the inter mode
              // JEM认为只有完美的重建才能从现在起击败帧间模式
              m_modeCtrl->enforceInterHad( 0 );
              continue;
            }

#if JVET_O0050_LOCAL_DUAL_TREE
            if( !partitioner.isSepTree( *tempCS ) )
#else
            if( !CS::isDualITree( *tempCS ) )
#endif
            {
              cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) );//获取重建块
              cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y));//获取预测块
            }
          }//if(isLuma( partitioner.chType))

          //色度类型
#if JVET_O0050_LOCAL_DUAL_TREE
          if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !cu.isSepTree() ) )//色度
#else
          if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) )
#endif
          {
            TUIntraSubPartitioner subTuPartitioner( partitioner );
#if JVET_O0050_LOCAL_DUAL_TREE
            m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( cu.isSepTree() && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
#else
            m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( CS::isDualITree( *cu.cs ) && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
#endif
            if( useIntraSubPartitions && !cu.ispMode )
            {
              //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations
              //此时,临时成本大于最佳成本。因此,我们可以跳过其余的计算
              continue;
            }
          }//if(partitioner.chType == CHANNEL_TYPE_CHROMA)


          cu.rootCbf = false;
          for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ )
          {
            cu.rootCbf |= cu.firstTU->cbf[t] != 0;
          }

          //--------------设置上下文模型----------------
          // Get total bits for current mode: encode CU
          // 获取当前模式的总比特数:对CU进行编码
          m_CABACEstimator->resetBits();

          if( pps.getTransquantBypassEnabledFlag() ) //跳过变换量化标志
          {
            m_CABACEstimator->cu_transquant_bypass_flag( cu );
          }

          if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
            && cu.Y().valid()
            )
          {
            m_CABACEstimator->cu_skip_flag ( cu );
          }
          m_CABACEstimator->pred_mode      ( cu ); //设置预测模式
          m_CABACEstimator->pcm_data       ( cu, partitioner );
          m_CABACEstimator->cu_pred_data   ( cu );
          m_CABACEstimator->bdpcm_mode     ( cu, ComponentID(partitioner.chType) );

          // Encode Coefficients
          //编码系数
          CUCtx cuCtx;
          cuCtx.isDQPCoded = true;
          cuCtx.isChromaQpAdjCoded = true;
          m_CABACEstimator->cu_residual( cu, partitioner, cuCtx ); 

          tempCS->fracBits = m_CABACEstimator->getEstFracBits(); //获取码率R
          tempCS->cost     = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);//计算RD Cost

#if JVET_O0050_LOCAL_DUAL_TREE
          double bestIspCost = cu.ispMode ? cu.isSepTree() ? tempCS->cost : tempCS->lumaCost : MAX_DOUBLE; //最佳ISP成本
#else
          double bestIspCost = cu.ispMode ? CS::isDualITree( *tempCS ) ? tempCS->cost : tempCS->lumaCost : MAX_DOUBLE;
#endif

          const double tmpCostWithoutSplitFlags = tempCS->cost;
          xEncodeDontSplit( *tempCS, partitioner );

          xCheckDQP( *tempCS, partitioner );

          // Check if low frequency non-separable transform (LFNST) is too expensive
          //检查低频不可分变换(LFNST)是否太贵
#if JVET_O0472_LFNST_SIGNALLING_LAST_SCAN_POS
          if( lfnstIdx && !cuCtx.lfnstLastScanPos )
          {
#if JVET_O0050_LOCAL_DUAL_TREE
            bool cbfAtZeroDepth = cu.isSepTree() ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf;
#else
            bool cbfAtZeroDepth = CS::isDualITree( *tempCS ) ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf;
#endif
            if( cbfAtZeroDepth )
            {
              tempCS->cost = MAX_DOUBLE;
            }
          }
#else
#if JVET_O0050_LOCAL_DUAL_TREE
          const int nonZeroCoeffThr = cu.isSepTree() ? ( isLuma( partitioner.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA;
#else
          const int nonZeroCoeffThr = CS::isDualITree( *tempCS ) ? ( isLuma( partitioner.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA;
#endif
          if( lfnstIdx && cuCtx.numNonZeroCoeffNonTs <= nonZeroCoeffThr )
          {
            if (cuCtx.numNonZeroCoeffNonTs > 0)
            {
              tempCS->cost = MAX_DOUBLE;
            }
          }
#endif

          if( mtsFlag == 0 && lfnstIdx == 0 )
          {
            dct2Cost = tempCS->cost;//设置DCT-2的Cost
          }

          if( tempCS->cost < bestCS->cost ) //如果当前Cost小于最佳Cost
          {
            m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags );
          }

          if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost;

          if( sps.getUseLFNST() && !tempCS->cus.empty() )
          {
            skipOtherLfnst = m_modeCtrl->checkSkipOtherLfnst( encTestMode, tempCS, partitioner );
          }

          xCalDebCost( *tempCS, partitioner );
          tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();


#if WCG_EXT
          DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
          DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
          if( !sps.getUseLFNST() ) //SPS层关闭LFNST
          {
            xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); //检查最佳模式
          }
          else //SPS层开启LFNST
          {
            if( xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ) )
            {
              trGrpBestCost[ trGrpIdx ] = globalBestCost = bestCS->cost;
              bestSelFlag  [ trGrpIdx ] = true;
              bestMtsFlag               = mtsFlag;
              bestLfnstIdx              = lfnstIdx;
              if( bestCS->cus.size() == 1 )
              {
                CodingUnit &cu = *bestCS->cus.front();
                if( cu.firstTU->mtsIdx == MTS_SKIP )
                {
                  if( ( g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].width ] + g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].height ] ) >= 6 )
                  {
                    endLfnstIdx = 0;
                  }
                }
              }
            }

#if JVET_O0502_ISP_CLEANUP
            //we decide to skip the non-DCT-II transforms and LFNST according to the ISP results
            //根据ISP结果,我们决定跳过非DCT-2变换和LFNST变换
            if ((endMtsFlag > 0 || endLfnstIdx > 0) && cu.ispMode && !mtsFlag && !lfnstIdx && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP())
#else
            //we decide to skip the second emt pass or not according to the ISP results
            if( considerMtsSecondPass && cu.ispMode && !mtsFlag && tempCS->slice->isIntra() )
#endif
            {
              double bestCostDct2NoIsp = m_modeCtrl->getMtsFirstPassNoIspCost();
              CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" );
#if JVET_O0502_ISP_CLEANUP
              double threshold = 1.4;
#else
              double nSamples  = ( double ) ( cu.lwidth() << g_aucLog2[ cu.lheight() ] );
              double threshold = 1 + 1.4 / sqrt( nSamples );
#endif

              double lfnstThreshold = 1.01 * threshold;
              if( bestCostDct2NoIsp > bestIspCost*lfnstThreshold ) //跳过LFNST变换
              {
                endLfnstIdx = lfnstIdx;
              }

              if( bestCostDct2NoIsp > bestIspCost*threshold ) //跳过MTS变换
              {
                skipSecondMtsPass = true;
                m_modeCtrl->setSkipSecondMTSPass( true );
                break;
              }
            }
            //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not
            //现在我们检查是否应该跳过SIZE_2Nx2N的第二遍以及整个内部的SIZE_NxN
            //当使用DCT-2时,对于P帧和B帧,如果最佳预测模式不是帧内预测模式,如果性能比最佳帧间模式的性能更糟糕则跳过MTS的检查
            if( !mtsFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA )
            {
              //如果“使用DCT2的2Nx2N”比最佳帧间模式更糟糕,则跳过帧内检查
              const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode
              if( costSize2Nx2NmtsFirstPass > thEmtInterFastSkipIntra * bestInterCost )
              {
                skipSecondMtsPass = true; 
                m_modeCtrl->setSkipSecondMTSPass( true );
                break;
              }
            }
          } //if(sps.getUseLFNST())

        } //for emtCuFlag  for MTSflag
        if( skipOtherLfnst ) //跳过其余的LFNST变换
        {
          startLfnstIdx = lfnstIdx;
          endLfnstIdx   = lfnstIdx;
          break;
        }
      } //for lfnstIdx
    } //if (!skipSecondMtsPass && considerMtsSecondPass && trGrpCheck[iGrpIdx])

    if( sps.getUseLFNST() && trGrpIdx < 3 )
    {
      trGrpCheck[ trGrpIdx + 1 ] = false;

      if( bestSelFlag[ trGrpIdx ] && considerMtsSecondPass )
      {
        double dCostRatio = dct2Cost / trGrpBestCost[ trGrpIdx ];
        
        trGrpCheck[ trGrpIdx + 1 ] = ( bestMtsFlag != 0 || bestLfnstIdx != 0 ) && dCostRatio < trGrpStopThreshold[ trGrpIdx ];
      }
    }
  } //trGrpIdx  for循环结束
}
发布了80 篇原创文章 · 获赞 101 · 访问量 5万+

猜你喜欢

转载自blog.csdn.net/BigDream123/article/details/103112353