xCheckRDCostIntra函数主要是选出最佳帧内模式和变换类型。
其中通过调用estIntraPredLumaQT函数遍历所有的亮度帧内预测模式选出最佳亮度帧内预测模式,通过调用estIntraPredChromaQT函数选出最佳色度帧内预测模式。
xCheckRDCostIntra函数是遍历了所有的变换模式并计算相应的RD Cost。
主要分为两个阶段
阶段1:mts_flag标志被设置为0,并遍历预测模式根据RD Cost选择出最佳预测模式。对于每个预测模式,分别进行DCT-2和DCT-2+LFNST变换。
- 对于DCT-2变换,通过比较DCT-2和TransformSkip的SAD决定使用DCT-2还是TransformSkip
- 对于LFNST变换,通过遍历lfnstIdx为1或者2的情况选出最佳LFNST变换矩阵
阶段2:mts_flag标志被设置为1,遍历MTS变换集并计算相应的RD Cost。
- 快速算法:通过比较当前MTS变换核的 RD Cost与在第一阶段的存储的最佳DCT-2的RD Cost决定是否进行MTS下一变换核的遍历。
代码和注释如下:
void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
{
double bestInterCost = m_modeCtrl->getBestInterCost();//最佳帧间成本
double costSize2Nx2NmtsFirstPass = m_modeCtrl->getMtsSize2Nx2NFirstPassCost();
//跳过MTS
bool skipSecondMtsPass = m_modeCtrl->getSkipSecondMTSPass();
const SPS& sps = *tempCS->sps;
const int maxSizeMTS = MTS_INTRA_MAX_CU_SIZE;//32
//考虑是否使用MTS,即满足条件:帧内,亮度块并且尺寸小于32
uint8_t considerMtsSecondPass = ( sps.getUseIntraMTS() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeMTS && partitioner.currArea().lheight() <= maxSizeMTS ) ? 1 : 0;
const PPS &pps = *tempCS->pps;
bool useIntraSubPartitions = false;//ISP模式
double maxCostAllowedForChroma = MAX_DOUBLE;//色度最大允许cost
const CodingUnit *bestCU = bestCS->getCU( partitioner.chType );
Distortion interHad = m_modeCtrl->getInterHad();//帧间HAD
double dct2Cost = MAX_DOUBLE;
double trGrpBestCost [ 4 ] = { MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE };
double globalBestCost = MAX_DOUBLE;
bool bestSelFlag [ 4 ] = { false, false, false, false };
bool trGrpCheck [ 4 ] = { true, true, true, true };
int startMTSIdx [ 4 ] = { 0, 1, 2, 3 };//用来选择MTS转换核即DST7和DCT8的四种组合
int endMTSIdx [ 4 ] = { 0, 1, 2, 3 };
double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 };//用来比较是否需要进一步计算MTS的其它三种组合的RD Cost
int bestMtsFlag = 0;
int bestLfnstIdx = 0;
#if JVET_O0213_RESTRICT_LFNST_TO_MAX_TB_SIZE
#if JVET_O0050_LOCAL_DUAL_TREE
//如果当前是色度类型且宽高同时小于等于8或者当前块尺寸大于最大变换块尺寸,则maxlfnstIdx = 0,否则等于2
//0表示不使用LFNST,1和2表示LFNST的变换核的选择
const int maxLfnstIdx = ( partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
#else
const int maxLfnstIdx = ( CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
#endif
#if JVET_O0545_MAX_TB_SIGNALLING
|| ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 2;
#else
|| ( partitioner.currArea().lwidth() > MAX_TB_SIZEY || partitioner.currArea().lheight() > MAX_TB_SIZEY ) ? 0 : 2;
#endif
#else
#if JVET_O0050_LOCAL_DUAL_TREE
const int maxLfnstIdx = partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2;
#else
const int maxLfnstIdx = CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2;
#endif
#endif
bool skipOtherLfnst = false;
int startLfnstIdx = 0;
int endLfnstIdx = sps.getUseLFNST() ? maxLfnstIdx : 0;// 2或者0
int grpNumMax = sps.getUseLFNST() ? 4 : 1;
m_pcIntraSearch->invalidateBestModeCost();
for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ )
{
const uint8_t startMtsFlag = trGrpIdx > 0;
const uint8_t endMtsFlag = sps.getUseLFNST() ? considerMtsSecondPass : 0;//0或者1
//不跳过MTS且使用MTS
if( ( trGrpIdx == 0 || ( !skipSecondMtsPass && considerMtsSecondPass ) ) && trGrpCheck[ trGrpIdx ] )
{
//遍历LFNST startLfnstIdx为0 endLfnstIdx为2或者0
for( int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++ )
{
//遍历MTS endMtsFlag为1或者0
//MTS Flag为0时只使用DCT2,MTS Flag为1时使用MTS
for( uint8_t mtsFlag = startMtsFlag; mtsFlag <= endMtsFlag; mtsFlag++ )
{
#if JVET_O0368_LFNST_WITH_DCT2_ONLY // LFNST只在DCT-2后使用
if (mtsFlag > 0 && lfnstIdx > 0)
{
continue;
}
#endif
//3) if interHad is 0, only try further modes if some intra mode was already better than inter
//3) 如果interHad为0,只有当某些intra模式已经优于inter时,才继续尝试其他模式
if( sps.getUseLFNST() && m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 )
{
continue;
}
tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
partitioner.setCUData( cu );
cu.slice = tempCS->slice;
cu.tileIdx = tempCS->picture->brickMap->getBrickIdxRsMap( tempCS->area.lumaPos() );
cu.skip = false;
cu.mmvdSkip = false;
cu.predMode = MODE_INTRA;
cu.transQuantBypass = encTestMode.lossless;
cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
cu.qp = encTestMode.qp;
//cu.ipcm = false;
cu.lfnstIdx = lfnstIdx;
cu.mtsFlag = mtsFlag;
cu.ispMode = NOT_INTRA_SUBPARTITIONS;
CU::addPUs( cu );
tempCS->interHad = interHad;
m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
bool validCandRet = false;
//亮度类型
if( isLuma( partitioner.chType ) )//如果是亮度
{
#if JVET_O0502_ISP_CLEANUP
//ISP uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary subpartitions
//ISP使用到目前为止的最佳成本值(如果是快速版本,则使用luma)来避免测试不必要的子分区
#if JVET_O0050_LOCAL_DUAL_TREE
double bestCostSoFar = partitioner.isSepTree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
if (partitioner.isSepTree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#else
double bestCostSoFar = CS::isDualITree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
if (CS::isDualITree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#endif
{
bestCostSoFar = encTestMode.maxCostAllowed;//目前为止的最佳成本
}
#else
//the Intra SubPartitions mode uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary lines
#if JVET_O0050_LOCAL_DUAL_TREE
const double bestCostSoFar = partitioner.isSepTree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
#else
const double bestCostSoFar = CS::isDualITree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
#endif
#endif
//计算帧内亮度预测模式,检查有没有有效的模式
validCandRet = m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar, mtsFlag, startMTSIdx[ trGrpIdx ], endMTSIdx[ trGrpIdx ], ( trGrpIdx > 0 ) );
//如果没有找到有效的模式或者ISP模式且cbf=0,则进行下一轮选择
if( sps.getUseLFNST() && ( !validCandRet || ( cu.ispMode && cu.firstTU->cbf[ COMPONENT_Y ] == 0 ) ) )
{
continue;
}
useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS; //使用ISP模式标志
#if JVET_O0050_LOCAL_DUAL_TREE
if( !partitioner.isSepTree( *tempCS ) )
#else
if( !CS::isDualITree( *tempCS ) )
#endif
{
//计算当前亮度成本
tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); //亮度的Cost
if( useIntraSubPartitions )
{
//the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost
//如果luma + Cb的成本大于最佳成本,则存储目前的最佳成本与当前的luma成本之间的差异,以避免测试Cr组件
maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE;
}
}
if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max()
&& tempCS->interHad == 0)
{
interHad = 0;
// JEM assumes only perfect reconstructions can from now on beat the inter mode
// JEM认为只有完美的重建才能从现在起击败帧间模式
m_modeCtrl->enforceInterHad( 0 );
continue;
}
#if JVET_O0050_LOCAL_DUAL_TREE
if( !partitioner.isSepTree( *tempCS ) )
#else
if( !CS::isDualITree( *tempCS ) )
#endif
{
cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) );//获取重建块
cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y));//获取预测块
}
}//if(isLuma( partitioner.chType))
//色度类型
#if JVET_O0050_LOCAL_DUAL_TREE
if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !cu.isSepTree() ) )//色度
#else
if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) )
#endif
{
TUIntraSubPartitioner subTuPartitioner( partitioner );
#if JVET_O0050_LOCAL_DUAL_TREE
m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( cu.isSepTree() && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
#else
m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( CS::isDualITree( *cu.cs ) && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
#endif
if( useIntraSubPartitions && !cu.ispMode )
{
//At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations
//此时,临时成本大于最佳成本。因此,我们可以跳过其余的计算
continue;
}
}//if(partitioner.chType == CHANNEL_TYPE_CHROMA)
cu.rootCbf = false;
for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ )
{
cu.rootCbf |= cu.firstTU->cbf[t] != 0;
}
//--------------设置上下文模型----------------
// Get total bits for current mode: encode CU
// 获取当前模式的总比特数:对CU进行编码
m_CABACEstimator->resetBits();
if( pps.getTransquantBypassEnabledFlag() ) //跳过变换量化标志
{
m_CABACEstimator->cu_transquant_bypass_flag( cu );
}
if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
&& cu.Y().valid()
)
{
m_CABACEstimator->cu_skip_flag ( cu );
}
m_CABACEstimator->pred_mode ( cu ); //设置预测模式
m_CABACEstimator->pcm_data ( cu, partitioner );
m_CABACEstimator->cu_pred_data ( cu );
m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) );
// Encode Coefficients
//编码系数
CUCtx cuCtx;
cuCtx.isDQPCoded = true;
cuCtx.isChromaQpAdjCoded = true;
m_CABACEstimator->cu_residual( cu, partitioner, cuCtx );
tempCS->fracBits = m_CABACEstimator->getEstFracBits(); //获取码率R
tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);//计算RD Cost
#if JVET_O0050_LOCAL_DUAL_TREE
double bestIspCost = cu.ispMode ? cu.isSepTree() ? tempCS->cost : tempCS->lumaCost : MAX_DOUBLE; //最佳ISP成本
#else
double bestIspCost = cu.ispMode ? CS::isDualITree( *tempCS ) ? tempCS->cost : tempCS->lumaCost : MAX_DOUBLE;
#endif
const double tmpCostWithoutSplitFlags = tempCS->cost;
xEncodeDontSplit( *tempCS, partitioner );
xCheckDQP( *tempCS, partitioner );
// Check if low frequency non-separable transform (LFNST) is too expensive
//检查低频不可分变换(LFNST)是否太贵
#if JVET_O0472_LFNST_SIGNALLING_LAST_SCAN_POS
if( lfnstIdx && !cuCtx.lfnstLastScanPos )
{
#if JVET_O0050_LOCAL_DUAL_TREE
bool cbfAtZeroDepth = cu.isSepTree() ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf;
#else
bool cbfAtZeroDepth = CS::isDualITree( *tempCS ) ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf;
#endif
if( cbfAtZeroDepth )
{
tempCS->cost = MAX_DOUBLE;
}
}
#else
#if JVET_O0050_LOCAL_DUAL_TREE
const int nonZeroCoeffThr = cu.isSepTree() ? ( isLuma( partitioner.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA;
#else
const int nonZeroCoeffThr = CS::isDualITree( *tempCS ) ? ( isLuma( partitioner.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA;
#endif
if( lfnstIdx && cuCtx.numNonZeroCoeffNonTs <= nonZeroCoeffThr )
{
if (cuCtx.numNonZeroCoeffNonTs > 0)
{
tempCS->cost = MAX_DOUBLE;
}
}
#endif
if( mtsFlag == 0 && lfnstIdx == 0 )
{
dct2Cost = tempCS->cost;//设置DCT-2的Cost
}
if( tempCS->cost < bestCS->cost ) //如果当前Cost小于最佳Cost
{
m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags );
}
if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost;
if( sps.getUseLFNST() && !tempCS->cus.empty() )
{
skipOtherLfnst = m_modeCtrl->checkSkipOtherLfnst( encTestMode, tempCS, partitioner );
}
xCalDebCost( *tempCS, partitioner );
tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
#if WCG_EXT
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
if( !sps.getUseLFNST() ) //SPS层关闭LFNST
{
xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); //检查最佳模式
}
else //SPS层开启LFNST
{
if( xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ) )
{
trGrpBestCost[ trGrpIdx ] = globalBestCost = bestCS->cost;
bestSelFlag [ trGrpIdx ] = true;
bestMtsFlag = mtsFlag;
bestLfnstIdx = lfnstIdx;
if( bestCS->cus.size() == 1 )
{
CodingUnit &cu = *bestCS->cus.front();
if( cu.firstTU->mtsIdx == MTS_SKIP )
{
if( ( g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].width ] + g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].height ] ) >= 6 )
{
endLfnstIdx = 0;
}
}
}
}
#if JVET_O0502_ISP_CLEANUP
//we decide to skip the non-DCT-II transforms and LFNST according to the ISP results
//根据ISP结果,我们决定跳过非DCT-2变换和LFNST变换
if ((endMtsFlag > 0 || endLfnstIdx > 0) && cu.ispMode && !mtsFlag && !lfnstIdx && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP())
#else
//we decide to skip the second emt pass or not according to the ISP results
if( considerMtsSecondPass && cu.ispMode && !mtsFlag && tempCS->slice->isIntra() )
#endif
{
double bestCostDct2NoIsp = m_modeCtrl->getMtsFirstPassNoIspCost();
CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" );
#if JVET_O0502_ISP_CLEANUP
double threshold = 1.4;
#else
double nSamples = ( double ) ( cu.lwidth() << g_aucLog2[ cu.lheight() ] );
double threshold = 1 + 1.4 / sqrt( nSamples );
#endif
double lfnstThreshold = 1.01 * threshold;
if( bestCostDct2NoIsp > bestIspCost*lfnstThreshold ) //跳过LFNST变换
{
endLfnstIdx = lfnstIdx;
}
if( bestCostDct2NoIsp > bestIspCost*threshold ) //跳过MTS变换
{
skipSecondMtsPass = true;
m_modeCtrl->setSkipSecondMTSPass( true );
break;
}
}
//now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not
//现在我们检查是否应该跳过SIZE_2Nx2N的第二遍以及整个内部的SIZE_NxN
//当使用DCT-2时,对于P帧和B帧,如果最佳预测模式不是帧内预测模式,如果性能比最佳帧间模式的性能更糟糕则跳过MTS的检查
if( !mtsFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA )
{
//如果“使用DCT2的2Nx2N”比最佳帧间模式更糟糕,则跳过帧内检查
const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode
if( costSize2Nx2NmtsFirstPass > thEmtInterFastSkipIntra * bestInterCost )
{
skipSecondMtsPass = true;
m_modeCtrl->setSkipSecondMTSPass( true );
break;
}
}
} //if(sps.getUseLFNST())
} //for emtCuFlag for MTSflag
if( skipOtherLfnst ) //跳过其余的LFNST变换
{
startLfnstIdx = lfnstIdx;
endLfnstIdx = lfnstIdx;
break;
}
} //for lfnstIdx
} //if (!skipSecondMtsPass && considerMtsSecondPass && trGrpCheck[iGrpIdx])
if( sps.getUseLFNST() && trGrpIdx < 3 )
{
trGrpCheck[ trGrpIdx + 1 ] = false;
if( bestSelFlag[ trGrpIdx ] && considerMtsSecondPass )
{
double dCostRatio = dct2Cost / trGrpBestCost[ trGrpIdx ];
trGrpCheck[ trGrpIdx + 1 ] = ( bestMtsFlag != 0 || bestLfnstIdx != 0 ) && dCostRatio < trGrpStopThreshold[ trGrpIdx ];
}
}
} //trGrpIdx for循环结束
}