xEncodeInterResidual関数、関数関数は関数名と同じで、フレーム間予測残差をエンコードする関数エントリです。この関数は、主に、スキップ残差コーディング、DCT-2、MTS、およびSBTモードから最適な残差コーディングモードを選択します。
主に次のステップに分かれています。
1.分析前の残差
(1)CU全体の現在のSSEを計算し、いくつかのSBTモードの最小歪みを計算して導出し、SBT歪みが最小の最大4つのモードを選択します(SBTモードの選択にはSBTの高速アルゴリズムが含まれます)(calcMinDistSbt関数)
(2)履歴内の現在のCUと同じSSEの最適な変換モードを見つける
2、コーディングモードの選択
(1)スキップモードまたはMTS / DCT-2の状況:現在のモードがスキップ残差コーディングモードであるか、履歴に現在のCUと同じSSEを持つ変換モードがない場合、または過去に最適な変換モードがMTSまたはDCT2である場合次に、残りのコーディングのためにencodeResAndCalcRdInterCU関数を呼び出し、RDコストを計算します
(2)SBTの状況:高速アルゴリズムを介してnumSbtRdo SBTモードを選択し、numSbtRdo SBTモードをトラバースし、残余コーディングのためにencodeResAndCalcRdInterCU関数を呼び出し、RDコストを計算します。
3つ目は、最適な変換モードを保存する
注:SBTには最大8つのモードがあるため、複雑さを軽減するために、SBTの多くの高速アルゴリズムが関係しています。
機能コードと関連コメントは次のとおりです。
void EncCu::xEncodeInterResidual( CodingStructure *&tempCS
, CodingStructure *&bestCS
, Partitioner &partitioner
, const EncTestMode& encTestMode
, int residualPass
, bool* bestHasNonResi
, double* equBcwCost
)
{
CodingUnit* cu = tempCS->getCU( partitioner.chType );
double bestCostInternal = MAX_DOUBLE;
double bestCost = bestCS->cost;
double bestCostBegin = bestCS->cost;
CodingUnit* prevBestCU = bestCS->getCU( partitioner.chType );
uint8_t prevBestSbt = ( prevBestCU == nullptr ) ? 0 : prevBestCU->sbtInfo;
bool swapped = false; // avoid unwanted data copy
bool reloadCU = false;
const PredictionUnit& pu = *cu->firstPU;
// clang-format off
const int affineShiftTab[3] =
{
MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH,
MV_PRECISION_INTERNAL - MV_PRECISION_INT
};
const int normalShiftTab[NUM_IMV_MODES] =
{
MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
MV_PRECISION_INTERNAL - MV_PRECISION_INT,
MV_PRECISION_INTERNAL - MV_PRECISION_4PEL,
MV_PRECISION_INTERNAL - MV_PRECISION_HALF,
};
// clang-format on
int mvShift;
for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
{
if (pu.refIdx[refList] >= 0)
{
if (!cu->affine)
{
mvShift = normalShiftTab[cu->imv];
Mv signaledmvd(pu.mvd[refList].getHor() >> mvShift, pu.mvd[refList].getVer() >> mvShift);
if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
return;
}
else
{
for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--)
{
mvShift = affineShiftTab[cu->imv];
Mv signaledmvd(pu.mvdAffi[refList][ctrlP].getHor() >> mvShift, pu.mvdAffi[refList][ctrlP].getVer() >> mvShift);
if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
return;
}
}
}
}
// avoid MV exceeding 18-bit dynamic range
// 避免MV超过18位动态范围
const int maxMv = 1 << 17;
if (!cu->affine && !pu.mergeFlag)
{
if ( (pu.refIdx[0] >= 0 && (pu.mv[0].getAbsHor() >= maxMv || pu.mv[0].getAbsVer() >= maxMv))
|| (pu.refIdx[1] >= 0 && (pu.mv[1].getAbsHor() >= maxMv || pu.mv[1].getAbsVer() >= maxMv)))
{
return;
}
}
if (cu->affine && !pu.mergeFlag)
{
for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
{
if (pu.refIdx[refList] >= 0)
{
for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--)
{
if (pu.mvAffi[refList][ctrlP].getAbsHor() >= maxMv || pu.mvAffi[refList][ctrlP].getAbsVer() >= maxMv)
{
return;
}
}
}
}
}
const bool mtsAllowed = tempCS->sps->getUseInterMTS() && CU::isInter( *cu ) && partitioner.currArea().lwidth() <= MTS_INTER_MAX_CU_SIZE && partitioner.currArea().lheight() <= MTS_INTER_MAX_CU_SIZE;
uint8_t sbtAllowed = cu->checkAllowedSbt();//是否允许使用SBT,判断当前预测模式、CU尺寸是否符合要求
// SBT resolution-dependent fast algorithm: not try size-64 SBT in RDO for low-resolution sequences (now resolution below HD)
// SBT分辨率相关快速算法:对于低分辨率序列(现在分辨率低于HD),不要尝试RDO中的size-64sbt
if( tempCS->pps->getPicWidthInLumaSamples() < (uint32_t)m_pcEncCfg->getSBTFast64WidthTh() )
{
sbtAllowed = ((cu->lwidth() > 32 || cu->lheight() > 32)) ? 0 : sbtAllowed;
}
uint8_t numRDOTried = 0;
Distortion sbtOffDist = 0;
bool sbtOffRootCbf = 0;
double sbtOffCost = MAX_DOUBLE;
double currBestCost = MAX_DOUBLE;
bool doPreAnalyzeResi = ( sbtAllowed || mtsAllowed ) && residualPass == 0;
m_pcInterSearch->initTuAnalyzer();
if( doPreAnalyzeResi )
{
m_pcInterSearch->calcMinDistSbt( *tempCS, *cu, sbtAllowed );
}
auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
int slShift = 4 + std::min( (int)gp_sizeIdxInfo->idxFrom( cu->lwidth() ) + (int)gp_sizeIdxInfo->idxFrom( cu->lheight() ), 9 );
Distortion curPuSse = m_pcInterSearch->getEstDistSbt( NUMBER_SBT_MODE );//当前PU的SSE
uint8_t currBestSbt = 0;
uint8_t currBestTrs = MAX_UCHAR;
uint8_t histBestSbt = MAX_UCHAR;
uint8_t histBestTrs = MAX_UCHAR;
m_pcInterSearch->setHistBestTrs( MAX_UCHAR, MAX_UCHAR );
// 设置历史最佳SBT模式和MTS模式
if( doPreAnalyzeResi )
{
if( m_pcInterSearch->getSkipSbtAll() && !mtsAllowed ) //emt is off MTS关闭
{
histBestSbt = 0; //try DCT2
m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs );
}
else
{
assert( curPuSse != std::numeric_limits<uint64_t>::max() );
//寻找历史上与当前PU相同SSE的最优SBT模式
uint16_t compositeSbtTrs = slsSbt->findBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ) );
histBestSbt = ( compositeSbtTrs >> 0 ) & 0xff;
histBestTrs = ( compositeSbtTrs >> 8 ) & 0xff;
// 特殊情况,加载SBT时跳过SBT
if( m_pcInterSearch->getSkipSbtAll() && CU::isSbtMode( histBestSbt ) ) //special case, skip SBT when loading SBT
{
histBestSbt = 0; //try DCT2
}
m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs );
}
}
{// EMT loop
if( reloadCU )
{
if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
{
tempCS->clearTUs();
}
else if( false == swapped )
{
tempCS->initStructData( encTestMode.qp );
tempCS->copyStructure( *bestCS, partitioner.chType );
tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
bestCost = bestCS->cost;
cu = tempCS->getCU( partitioner.chType );
swapped = true;
}
else
{
tempCS->clearTUs();
bestCost = bestCS->cost;
cu = tempCS->getCU( partitioner.chType );
}
//we need to restart the distortion for the new tempCS, the bit count and the cost
//我们需要为新的tempCS重新开始失真,比特数和成本
tempCS->dist = 0;
tempCS->fracBits = 0;
tempCS->cost = MAX_DOUBLE;
tempCS->costDbOffset = 0;
}
reloadCU = true; // enable cu reloading
cu->skip = false;
cu->sbtInfo = 0;
const bool skipResidual = residualPass == 1;
//跳过残差编码(Skip模式)或历史上没有和当前相同SSE的SBT模式或者历史上最佳模式是MTS或DCT-2
if( skipResidual || histBestSbt == MAX_UCHAR || !CU::isSbtMode( histBestSbt ) )
{
//编码残差并计算CU的RD Cost
m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
if (tempCS->slice->getSPS()->getUseColorTrans())
{
bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
}
numRDOTried += mtsAllowed ? 2 : 1;
xEncodeDontSplit( *tempCS, partitioner );
xCheckDQP( *tempCS, partitioner );
#if JVET_Q0267_RESET_CHROMA_QP_OFFSET
xCheckChromaQPOffset( *tempCS, partitioner );
#endif
if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) )
{
bestCostInternal = tempCS->cost;
if (!(tempCS->getPU(partitioner.chType)->ciipFlag))
*bestHasNonResi = !cu->rootCbf;
}
if (cu->rootCbf == false)
{
if (tempCS->getPU(partitioner.chType)->ciipFlag)
{
tempCS->cost = MAX_DOUBLE;
tempCS->costDbOffset = 0;
return;
}
}
currBestCost = tempCS->cost;
sbtOffCost = tempCS->cost;
sbtOffDist = tempCS->dist;
sbtOffRootCbf = cu->rootCbf;
//设置当前最佳变换模式(MTS或者DCT-2)和变换类型
currBestSbt = CU::getSbtInfo(cu->firstTU->mtsIdx[COMPONENT_Y] > MTS_SKIP ? SBT_OFF_MTS : SBT_OFF_DCT, 0);
currBestTrs = cu->firstTU->mtsIdx[COMPONENT_Y];
#if WCG_EXT
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
}
uint8_t numSbtRdo = CU::numSbtModeRdo( sbtAllowed );
//early termination if all SBT modes are not allowed 如果不允许所有SBT模式,则提前终止
//normative
if( !sbtAllowed || skipResidual )
{
numSbtRdo = 0;
}
//fast algorithm
//快速算法:存在历史最佳SBT模式但是最终选择的模式不是SBT模式
if( ( histBestSbt != MAX_UCHAR && !CU::isSbtMode( histBestSbt ) ) || m_pcInterSearch->getSkipSbtAll() )
{
numSbtRdo = 0;
}
//如果当前PU关闭SBT的Cost大于最佳Cost*th,则跳过SBT的RDO过程,将numSbtRdo设置为0
if( bestCost != MAX_DOUBLE && sbtOffCost != MAX_DOUBLE )
{
double th = 1.07;
if( !( prevBestSbt == 0 || m_sbtCostSave[0] == MAX_DOUBLE ) )
{
//m_sbtCostSave[0] = sbtOffCost;上一次关闭SBT的Cost
//m_sbtCostSave[1] = currBestCost;上次最佳的Cost
assert( m_sbtCostSave[1] <= m_sbtCostSave[0] );
th *= ( m_sbtCostSave[0] / m_sbtCostSave[1] );
}
if( sbtOffCost > bestCost * th )
{
numSbtRdo = 0;
}
}
//当SbtOffCost小于某一阈值的时候,跳过SBT的RDO过程
if( !sbtOffRootCbf && sbtOffCost != MAX_DOUBLE )
{
double th = Clip3( 0.05, 0.55, ( 27 - cu->qp ) * 0.02 + 0.35 );
if( sbtOffCost < m_pcRdCost->calcRdCost( ( cu->lwidth() * cu->lheight() ) << SCALE_BITS, 0 ) * th )
{
numSbtRdo = 0;
}
}
//当历史最佳SBT模式存在且可用时,将numSbtRdo置1,仅对这一种SBT模式进行RD Cost的计算
if( histBestSbt != MAX_UCHAR && numSbtRdo != 0 )
{
numSbtRdo = 1;
m_pcInterSearch->initSbtRdoOrder( CU::getSbtMode( CU::getSbtIdx( histBestSbt ), CU::getSbtPos( histBestSbt ) ) );
}
//遍历numSbtRdo种SBT模式
for( int sbtModeIdx = 0; sbtModeIdx < numSbtRdo; sbtModeIdx++ )
{
uint8_t sbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx );
uint8_t sbtIdx = CU::getSbtIdxFromSbtMode( sbtMode );
uint8_t sbtPos = CU::getSbtPosFromSbtMode( sbtMode );
//fast algorithm (early skip, save & load)
//快速算法(提前跳过,保存和加载)
if( histBestSbt == MAX_UCHAR )
{
//快速算法,提前跳过SBT(返回0/1/2/3,表示跳过当前SBT模式)
uint8_t skipCode = m_pcInterSearch->skipSbtByRDCost( cu->lwidth(), cu->lheight(), cu->mtDepth, sbtIdx, sbtPos, bestCS->cost, sbtOffDist, sbtOffCost, sbtOffRootCbf );
if( skipCode != MAX_UCHAR )
{
continue;
}
//比较当前SBT模式与前一个相同划分方式的SBT模式的Est失真
if( sbtModeIdx > 0 )
{
uint8_t prevSbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx - 1 );//获得前一个SBT模式
//make sure the prevSbtMode is the same size as the current SBT mode (otherwise the estimated dist may not be comparable)
//确保prevSbtMode与当前SBT模式划分后的尺寸相同(否则估计的dist可能无法比较)
if( CU::isSameSbtSize( prevSbtMode, sbtMode ) )
{
Distortion currEstDist = m_pcInterSearch->getEstDistSbt( sbtMode );
Distortion prevEstDist = m_pcInterSearch->getEstDistSbt( prevSbtMode );
if( currEstDist > prevEstDist * 1.15 )
{
continue;
}
}
}
}
//init tempCS and TU
//初始化tempCS和TU
if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
{ //第一次EMT过程并没有成为bestCS,所以我们清除了生成的TUs
tempCS->clearTUs();
}
else if( false == swapped )
{
tempCS->initStructData( encTestMode.qp );
tempCS->copyStructure( *bestCS, partitioner.chType );
tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
bestCost = bestCS->cost;
cu = tempCS->getCU( partitioner.chType );
swapped = true;
}
else
{
tempCS->clearTUs();
bestCost = bestCS->cost;
cu = tempCS->getCU( partitioner.chType );
}
//we need to restart the distortion for the new tempCS, the bit count and the cost
//我们需要为新的tempCS重新开始失真,比特数和成本
tempCS->dist = 0;
tempCS->fracBits = 0;
tempCS->cost = MAX_DOUBLE;
cu->skip = false;
//set SBT info
//设置SBT信息
cu->setSbtIdx( sbtIdx );
cu->setSbtPos( sbtPos );
//try residual coding
//尝试残差编码
m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
if (tempCS->slice->getSPS()->getUseColorTrans())
{
bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
}
numRDOTried++;
xEncodeDontSplit( *tempCS, partitioner );
xCheckDQP( *tempCS, partitioner );
#if JVET_Q0267_RESET_CHROMA_QP_OFFSET
xCheckChromaQPOffset( *tempCS, partitioner );
#endif
if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) )
{
bestCostInternal = tempCS->cost;
if( !( tempCS->getPU( partitioner.chType )->ciipFlag ) )
*bestHasNonResi = !cu->rootCbf;
}
//保存当前最佳Cost和SBT信息
if( tempCS->cost < currBestCost )
{
currBestSbt = cu->sbtInfo;
currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx[COMPONENT_Y];
assert( currBestTrs == 0 || currBestTrs == 1 );
currBestCost = tempCS->cost;
}
#if WCG_EXT
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
}//end numSbtRdo loop
if( bestCostBegin != bestCS->cost )
{
m_sbtCostSave[0] = sbtOffCost;
m_sbtCostSave[1] = currBestCost;
}
} //end emt loop
if( histBestSbt == MAX_UCHAR && doPreAnalyzeResi && numRDOTried > 1 )
{
//保存最佳的变换模式
slsSbt->saveBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ), currBestSbt, currBestTrs );
}
tempCS->cost = currBestCost;
if( ETM_INTER_ME == encTestMode.type )
{
if( equBcwCost != NULL )
{
if( tempCS->cost < ( *equBcwCost ) && cu->BcwIdx == BCW_DEFAULT )
{
( *equBcwCost ) = tempCS->cost;
}
}
else
{
CHECK( equBcwCost == NULL, "equBcwCost == NULL" );
}
if( tempCS->slice->getCheckLDC() && !cu->imv && cu->BcwIdx != BCW_DEFAULT && tempCS->cost < m_bestBcwCost[1] )
{
if( tempCS->cost < m_bestBcwCost[0] )
{
m_bestBcwCost[1] = m_bestBcwCost[0];
m_bestBcwCost[0] = tempCS->cost;
m_bestBcwIdx[1] = m_bestBcwIdx[0];
m_bestBcwIdx[0] = cu->BcwIdx;
}
else
{
m_bestBcwCost[1] = tempCS->cost;
m_bestBcwIdx[1] = cu->BcwIdx;
}
}
}
}
CheckAllowSbt関数は主に、現在のCUがSBTテクノロジーを使用できるかどうかを確認するために使用されます。
const uint8_t CodingUnit::checkAllowedSbt() const
{
if( !slice->getSPS()->getUseSBT() )
{
// SPS层没有开启SBT
return 0;
}
//check on prediction mode
// 检查预测模式:帧内预测、IBC、PLT模式不使用SBT
if (predMode == MODE_INTRA || predMode == MODE_IBC || predMode == MODE_PLT ) //intra, palette or IBC
{
return 0;
}
if( firstPU->ciipFlag )
{
return 0;
}
#if !JVET_Q0806
if( triangle )
{
return 0;
}
#endif
uint8_t sbtAllowed = 0;
int cuWidth = lwidth();//Cu宽度
int cuHeight = lheight();//Cu高度
bool allow_type[NUMBER_SBT_IDX];
memset( allow_type, false, NUMBER_SBT_IDX * sizeof( bool ) );
//parameter
// 参数:最大和最小支持SBT的变换块尺寸
int maxSbtCUSize = cs->sps->getMaxTbSize();
int minSbtCUSize = 1 << ( MIN_CU_LOG2 + 1 );//8
//check on size
// 检查尺寸
if( cuWidth > maxSbtCUSize || cuHeight > maxSbtCUSize )
{
return 0;
}
allow_type[SBT_VER_HALF] = cuWidth >= minSbtCUSize;//是否允许2:2垂直划分
allow_type[SBT_HOR_HALF] = cuHeight >= minSbtCUSize;//是否允许2:2水平划分
allow_type[SBT_VER_QUAD] = cuWidth >= ( minSbtCUSize << 1 );//是否允许1:3/3:1垂直划分
allow_type[SBT_HOR_QUAD] = cuHeight >= ( minSbtCUSize << 1 );//是否允许1:3/3:1水平划分
for( int i = 0; i < NUMBER_SBT_IDX; i++ )
{
sbtAllowed += (uint8_t)allow_type[i] << i;
}
return sbtAllowed;
}
calcMinDistSbtの機能は次のとおりです。
(1)ブロック全体の予測値と元の値の間のSSE歪みを計算します
(2)コード化する必要のある8つのSBTモードとコード化する必要のないサブブロックの予測値と元の値の間のSSEの加重歪みを計算します。SSE_SBT= SSE(コード)/ 16 + SSE(コード解除)、および最小から最大への並べ替えに従います
void InterSearch::calcMinDistSbt( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed )
{
if( !sbtAllowed ) //不允许使用SBT时
{
m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
for( int comp = 0; comp < getNumberValidTBlocks( *cs.pcv ); comp++ )
{
const ComponentID compID = ComponentID( comp );
CPelBuf pred = cs.getPredBuf( compID );
CPelBuf org = cs.getOrgBuf( compID );
m_estMinDistSbt[NUMBER_SBT_MODE] += m_pcRdCost->getDistPart( org, pred, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
}
return;
}
//SBT fast algorithm 2.1 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
// if this cost is larger than the best cost, no need to try a specific SBT mode
//SBT快速算法2.1:根据未编码部分和编码部分的luma失真估计SBT模式的最小RD Cost(假设失真可以减少到1/16);
// 如果该成本大于最佳成本,则无需尝试特定的SBT模式
int cuWidth = cu.lwidth();
int cuHeight = cu.lheight();
int numPartX = cuWidth >= 16 ? 4 : ( cuWidth == 4 ? 1 : 2 );
int numPartY = cuHeight >= 16 ? 4 : ( cuHeight == 4 ? 1 : 2 );
Distortion dist[4][4];
memset( dist, 0, sizeof( Distortion ) * 16 );
for( uint32_t c = 0; c < getNumberValidTBlocks( *cs.pcv ); c++ )
{
const ComponentID compID = ComponentID( c );
const CompArea& compArea = cu.blocks[compID];
const CPelBuf orgPel = cs.getOrgBuf( compArea );
const CPelBuf predPel = cs.getPredBuf( compArea );
int lengthX = compArea.width / numPartX;
int lengthY = compArea.height / numPartY;
int strideOrg = orgPel.stride;
int stridePred = predPel.stride;
uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT( ( *cs.sps.getBitDepth( toChannelType( compID ) ) - 8 ) << 1 );
Intermediate_Int iTemp;
//calc distY of 16 sub parts
//计算16个子部分的距离
for( int j = 0; j < numPartY; j++ )
{
for( int i = 0; i < numPartX; i++ )
{
int posX = i * lengthX;
int posY = j * lengthY;
const Pel* ptrOrg = orgPel.bufAt( posX, posY );
const Pel* ptrPred = predPel.bufAt( posX, posY );
Distortion uiSum = 0;
for( int n = 0; n < lengthY; n++ )
{
for( int m = 0; m < lengthX; m++ )
{
iTemp = ptrOrg[m] - ptrPred[m];
uiSum += Distortion( ( iTemp * iTemp ) >> uiShift );
}
ptrOrg += strideOrg;//加个CU的长度
ptrPred += stridePred;
}
if( isChroma( compID ) )
{
uiSum = (Distortion)( uiSum * m_pcRdCost->getChromaWeight() );
}
dist[j][i] += uiSum;
}
}
}
// SSE of a CU
// 计算出整个CU的SSE
m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
for( int j = 0; j < numPartY; j++ )
{
for( int i = 0; i < numPartX; i++ )
{
m_estMinDistSbt[NUMBER_SBT_MODE] += dist[j][i];
}
}
//init per-mode dist
// 初始化每个模式的失真
for( int i = SBT_VER_H0; i < NUMBER_SBT_MODE; i++ )
{
m_estMinDistSbt[i] = std::numeric_limits<uint64_t>::max();
}
//SBT fast algorithm 1: not try SBT if the residual is too small to compensate bits for encoding residual info
//SBT快速算法1:如果残差太小,无法补偿编码残差信息的比特数,则不要尝试SBT
uint64_t minNonZeroResiFracBits = 12 << SCALE_BITS;
if( m_pcRdCost->calcRdCost( 0, m_estMinDistSbt[NUMBER_SBT_MODE] ) < m_pcRdCost->calcRdCost( minNonZeroResiFracBits, 0 ) )
{
m_skipSbtAll = true;
return;
}
//derive estimated minDist of SBT = zero-residual part distortion + non-zero residual part distortion / 16
//推导出SBT的估计值minDist=零残差部分失真+非零残差部分失真/16
int shift = 5;
Distortion distResiPart = 0, distNoResiPart = 0;
//如果允许垂直2:2划分
if( CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) )
{
int offsetResiPart = 0;
int offsetNoResiPart = numPartX / 2;
distResiPart = distNoResiPart = 0;
assert( numPartX >= 2 );
for( int j = 0; j < numPartY; j++ )
{
for( int i = 0; i < numPartX / 2; i++ )
{
distResiPart += dist[j][i + offsetResiPart];
distNoResiPart += dist[j][i + offsetNoResiPart];
}
}
m_estMinDistSbt[SBT_VER_H0] = ( distResiPart >> shift ) + distNoResiPart;
m_estMinDistSbt[SBT_VER_H1] = ( distNoResiPart >> shift ) + distResiPart;
}
if( CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ) )
{
int offsetResiPart = 0;
int offsetNoResiPart = numPartY / 2;
assert( numPartY >= 2 );
distResiPart = distNoResiPart = 0;
for( int j = 0; j < numPartY / 2; j++ )
{
for( int i = 0; i < numPartX; i++ )
{
distResiPart += dist[j + offsetResiPart][i];
distNoResiPart += dist[j + offsetNoResiPart][i];
}
}
m_estMinDistSbt[SBT_HOR_H0] = ( distResiPart >> shift ) + distNoResiPart;
m_estMinDistSbt[SBT_HOR_H1] = ( distNoResiPart >> shift ) + distResiPart;
}
if( CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) )
{
assert( numPartX == 4 );
m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q1] = 0;
for( int j = 0; j < numPartY; j++ )
{
m_estMinDistSbt[SBT_VER_Q0] += dist[j][0] + ( ( dist[j][1] + dist[j][2] + dist[j][3] ) << shift );
m_estMinDistSbt[SBT_VER_Q1] += dist[j][3] + ( ( dist[j][0] + dist[j][1] + dist[j][2] ) << shift );
}
m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q0] >> shift;
m_estMinDistSbt[SBT_VER_Q1] = m_estMinDistSbt[SBT_VER_Q1] >> shift;
}
if( CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ) )
{
assert( numPartY == 4 );
m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q1] = 0;
for( int i = 0; i < numPartX; i++ )
{
m_estMinDistSbt[SBT_HOR_Q0] += dist[0][i] + ( ( dist[1][i] + dist[2][i] + dist[3][i] ) << shift );
m_estMinDistSbt[SBT_HOR_Q1] += dist[3][i] + ( ( dist[0][i] + dist[1][i] + dist[2][i] ) << shift );
}
m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q0] >> shift;
m_estMinDistSbt[SBT_HOR_Q1] = m_estMinDistSbt[SBT_HOR_Q1] >> shift;
}
//SBT fast algorithm 5: try N SBT modes with the lowest distortion
//SBT快速算法5:尝试N个失真最低的SBT模式
Distortion temp[NUMBER_SBT_MODE];
memcpy( temp, m_estMinDistSbt, sizeof( Distortion ) * NUMBER_SBT_MODE );
memset( m_sbtRdoOrder, 255, NUMBER_SBT_MODE );
//从二分划分的4种模式中找出两种SSE最小的模式
int startIdx = 0, numRDO;
numRDO = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
for( int i = startIdx; i < startIdx + numRDO; i++ )
{
Distortion minDist = std::numeric_limits<uint64_t>::max();
for( int n = SBT_VER_H0; n <= SBT_HOR_H1; n++ )
{ //寻找四种二分划分模式中最小失真的模式
if( temp[n] < minDist )
{
minDist = temp[n];
m_sbtRdoOrder[i] = n;
}
}
//将找到的最小失真的模式的临时失真值修改为max值
temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max();
}
//从四分划分的四种模式中选择两种SSE最小的模式
startIdx += numRDO;
numRDO = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
for( int i = startIdx; i < startIdx + numRDO; i++ )
{
Distortion minDist = std::numeric_limits<uint64_t>::max();
for( int n = SBT_VER_Q0; n <= SBT_HOR_Q1; n++ )
{
if( temp[n] < minDist )
{
minDist = temp[n];
m_sbtRdoOrder[i] = n;
}
}
temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max();
}
}
skipSbtByRDCost関数には、SBTの高速アルゴリズムが含まれます。
(1)コード化されていない部分とコード化された部分の輝度歪みに基づいてSBTモードの最小RDコストを推定します(歪みを1/16に減らすことができると仮定します)。コストが最適なコストよりも大きい場合は、特定のSBTモードを試す必要はありません
(2)残差が小さすぎる場合はSBTをスキップします
(3)推定RDコストが最良コストよりも大きい場合はSBTをスキップします
uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff )
{
int sbtMode = CU::getSbtMode( sbtIdx, sbtPos );
//SBT fast algorithm 2.2 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
// if this cost is larger than the best cost, no need to try a specific SBT mode
//SBT快速算法2.2:根据未编码部分和编码部分的luma失真估计SBT模式的最小RD成本(假设失真可以减少到1/16);
// 如果该Cost大于最佳Cost,则无需尝试特定的SBT模式
if( m_pcRdCost->calcRdCost( 11 << SCALE_BITS, m_estMinDistSbt[sbtMode] ) > bestCost )
{
return 0; //early skip type 0 提前退出
}
if( costSbtOff != MAX_DOUBLE )
{
if( !rootCbfSbtOff )
{
//SBT fast algorithm 3: skip SBT when the residual is too small (estCost is more accurate than fast algorithm 1, counting PU mode bits)
//SBT快速算法3:当残差太小时跳过SBT(estCost比快速算法1更精确,计算PU模式位)
uint64_t minNonZeroResiFracBits = 10 << SCALE_BITS;//最小非零残差编码的比特数
Distortion distResiPart;
if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_HOR_HALF )
{
distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 9 ) >> 4 );
}
else
{
distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 3 ) >> 3 );
}
double estCost = ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) + m_pcRdCost->calcRdCost( minNonZeroResiFracBits, m_estMinDistSbt[sbtMode] + distResiPart );
if( estCost > costSbtOff )
{
return 1;
}
if( estCost > bestCost )
{
return 2;
}
}
else
{
//SBT fast algorithm 4: skip SBT when an estimated RD cost is larger than the bestCost
//SBT快速算法4:当估计的RD成本大于最佳成本时跳过SBT
double weight = sbtMode > SBT_HOR_H1 ? 0.4 : 0.6;
double estCost = ( ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) * weight ) + m_pcRdCost->calcRdCost( 0 << SCALE_BITS, m_estMinDistSbt[sbtMode] );
if( estCost > bestCost )
{
return 3;
}
}
}
return MAX_UCHAR;
}