xFwdLfnst函数主要是对经过主变换后的一次变换系数进行二次低频不可分变换(LFNST)
基本流程如下:
- 判断lfnstIdx和mtsIdx!=MTS_SKIP和高度和宽度都大于等于4,若满足条件则执行2,否则结束
- 获取IntraMode(根据intraMode选择变换集)
- 获取一维主变换向量(16x1或者48x1)
- 调用fwdLfnstNxN()函数进行LFNST变换
- 用经过LFNST变换后的系数替换对应位置的主变换系数
注意这里的intramode和LFNST变换集的选择:
对于帧内预测模式,其预测角度为2~66(只针对方形块)
对于width>height的块,将其预测角度扩展67~80;对于width<height的块,将其预测角度扩展-1~-14,即将原本的角度范围2~66现在扩展为wideAngPredMode = -14~80(getWideAngIntraMode函数);对于模式wideAngPredMode = 34~66和wideAngPredMode = 67~80的情况下,其主变换系数需要转置后再进行二次变换。
int PU::getWideAngIntraMode( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID )
{
if( dirMode < 2 )
{
return ( int ) dirMode;
}
CodingStructure& cs = *tu.cs;
const CompArea& area = tu.blocks[ compID ];
PelBuf pred = cs.getPredBuf( area );
int width = int( pred.width );
int height = int( pred.height );
int modeShift[ ] = { 0, 6, 10, 12, 14, 15 };
int deltaSize = abs( floorLog2( width ) - floorLog2( height ) );
int predMode = dirMode;
if( width > height && dirMode < 2 + modeShift[ deltaSize ] )
{
predMode += ( VDIA_IDX - 1 );
}
else if( height > width && predMode > VDIA_IDX - modeShift[ deltaSize ] )
{
predMode -= ( VDIA_IDX + 1 );
}
return predMode;
}
wideAngPredMode和LFNST变换集对应关系如图所示:
为了方便存储模式对应索引,又将wideAngPredMode = -1~-14的模式修改为LFNSTIntraMode = 67~80;将wideAngPredMode = 67~80的模式修改为LFNSTIntraMode = 81~94;即将wideAngPredMode = -14~80修改为LFNSTIntraMode = 0~94(getLFNSTIntraMode函数)。
uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode )
{
uint32_t intraMode;
if( wideAngPredMode < 0 )
{
intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) + NUM_LUMA_MODE );
}
else if( wideAngPredMode >= NUM_LUMA_MODE )
{
intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) );
}
else
{
intraMode = ( uint32_t ) wideAngPredMode;
}
return intraMode;
}
代码如下:
void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr )
{
const CompArea& area = tu.blocks[ compID ];
const uint32_t width = area.width;
const uint32_t height = area.height;
const uint32_t lfnstIdx = tu.cu->lfnstIdx;
if( lfnstIdx && tu.mtsIdx != MTS_SKIP && width >= 4 && height >= 4 )
{
const bool whge3 = width >= 8 && height >= 8;
//对角扫描顺序
//从相对索引到绝对索引的转换,即将(x,y)-》idx索引形式
//残差宽度高度都大于等于8时,只扫描左上角8x8矩阵;其余情况只扫描左上角4x4矩阵
const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ];
//预测模式
uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) );
if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
{//判断预测单元是否是LMC模式
#if JVET_O0219_LFNST_TRANSFORM_SET_FOR_LMCMODE
intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) );
#else
intraMode = PLANAR_IDX;
#endif
}
#if JVET_O0925_MIP_SIMPLIFICATIONS
if (PU::isMIP(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
{ //判断预测单元是否是MIP模式
//When intra_mip_flag[ xTbComp ][ yTbComp ] is equal to 1 and cIdx is equal to 0, predModeIntra is set equal to INTRA_PLANAR.
intraMode = PLANAR_IDX;
}
#endif
CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
if( lfnstIdx < 3 )
{
//得到LFNST帧内模式,根据intraMode选择变换集
//The wide angle intra prediction mode mapping process as specified in clause 8.4.5.2.6 is invoked with predModeIntra, nTbW, nTbH and cIdx as inputs, and the modified predModeIntra as output.
intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) );
bool transposeFlag = getTransposeFlag( intraMode );//变换标志
//主变换系数矩阵的宽度和高度同时大于等于8则取 8x8 子块;否则取 4x4 子块
//width&&heigth>=8,whge3=8
const int sbSize = whge3 ? 8 : 4;
#if !JVET_O0094_LFNST_ZERO_PRIM_COEFFS
const int subGrpXMax = ( height == 4 && width > 8 ) ? 2 : 1;
const int subGrpYMax = ( width == 4 && height > 8 ) ? 2 : 1;
#endif
bool tu4x4Flag = ( width == 4 && height == 4 );
bool tu8x8Flag = ( width == 8 && height == 8 );
TCoeff* lfnstTemp;
TCoeff* coeffTemp;
TCoeff* tempCoeff = loadTr ? m_mtsCoeffs[ tu.mtsIdx ] : m_plTempCoeff;
#if !JVET_O0094_LFNST_ZERO_PRIM_COEFFS
for( int subGroupX = 0; subGroupX < subGrpXMax; subGroupX++ )
{
for( int subGroupY = 0; subGroupY < subGrpYMax; subGroupY++ )
{
const int offsetX = sbSize * subGroupX;
const int offsetY = sbSize * subGroupY * width;
int y;
lfnstTemp = m_tempInMatrix; // forward low frequency non-separable transform
coeffTemp = tempCoeff + offsetX + offsetY;
#else
int y;
//TCoeff m_tempInMatrix [ 48 ];
//即lfnstTemp指向元素个数为48的TCoeff类型的数组
//m_tempInMatrix矩阵最终指向的是一维主变换系数向量
//若sbsize = 4则m_tempInMatrix为 16x1 向量;若sbsize = 8则m_tempInMatrix为 48x1 向量
lfnstTemp = m_tempInMatrix; // forward low frequency non-separable transform
coeffTemp = tempCoeff;//主变换系数
#endif
if( transposeFlag )//判断主变换系数矩阵是否转置
{
if( sbSize == 4 )
{
for( y = 0; y < 4; y++ )
{
//读取转置后主变换矩阵的获得一维 16x1 向量
lfnstTemp[ 0 ] = coeffTemp[ 0 ]; lfnstTemp[ 4 ] = coeffTemp[ 1 ];
lfnstTemp[ 8 ] = coeffTemp[ 2 ]; lfnstTemp[ 12 ] = coeffTemp[ 3 ];
lfnstTemp++;
coeffTemp += width;
}
}
else // ( sbSize == 8 )
{
//读取转置后主变换矩阵获得一维 48x1 向量
for( y = 0; y < 8; y++ )
{
lfnstTemp[ 0 ] = coeffTemp[ 0 ]; lfnstTemp[ 8 ] = coeffTemp[ 1 ];
lfnstTemp[ 16 ] = coeffTemp[ 2 ]; lfnstTemp[ 24 ] = coeffTemp[ 3 ];
if( y < 4 )
{
lfnstTemp[ 32 ] = coeffTemp[ 4 ]; lfnstTemp[ 36 ] = coeffTemp[ 5 ];
lfnstTemp[ 40 ] = coeffTemp[ 6 ]; lfnstTemp[ 44 ] = coeffTemp[ 7 ];
}
lfnstTemp++;
coeffTemp += width;
}
}
}
else
{
for( y = 0; y < sbSize; y++ )
{
uint32_t uiStride = ( y < 4 ) ? sbSize : 4;
//从coeffTemp所指向的对象复制uiStride * sizeof(TCoeff)个字符到lfnstTemp所指向的对象
//sbsize = 4时,4次复制4个数据,总共得到16个数据
//sbsize = 8时,先复制4次8个数据,再复制4次4个数据,总共得到48个数据
::memcpy( lfnstTemp, coeffTemp, uiStride * sizeof( TCoeff ) );
lfnstTemp += uiStride;
coeffTemp += width;
}
}
//m_tempInMatrix 就是从输入的主变换系数中提取出来的一维向量
//g_lfnstLut 为预测模式和变换集的映射,传入的为对应的映射集合
//当输入残差快为 4x4 或者 8x8 子块时,输出结果为 8x1 向量;其余情况输出为 16x1 向量
//TCoeff m_tempOutMatri[48],即m_tempOutMatrix为经过二次变换后的系数
fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
//m_tempOutMatrix经过LFNST变换后输出的系数矩阵
lfnstTemp = m_tempOutMatrix; // forward spectral rearrangement
#if !JVET_O0094_LFNST_ZERO_PRIM_COEFFS
coeffTemp = tempCoeff + offsetX + offsetY;
#else
coeffTemp = tempCoeff;
#endif
const ScanElement * scanPtr = scan;
int lfnstCoeffNum = ( sbSize == 4 ) ? sbSize * sbSize : 48;
//利用对角扫描顺序将经过二次变换后的系数替换主变换系数矩阵的系数
for( y = 0; y < lfnstCoeffNum; y++ )
{
coeffTemp[ scanPtr->idx ] = *lfnstTemp++;
scanPtr++;
}
#if !JVET_O0094_LFNST_ZERO_PRIM_COEFFS
}
} // subGroupX
#endif
}
}
}
注意:进行完二次变换之后,是通过对角扫描的顺序将二次变换系数填充到系数矩阵中的。
其中fwdLfnstNxN()函数是将输入的一维主变换向量与LFNST变换核相乘,获得二次变换系数向量
参数:
src 是输入的一维主变换系数
dst 是经过二次变换后输出的系数,定义为有48个矩阵元素的Tcoeff的数组
mode 为预测模式对应的映射集合
index 为0或者1,表示lfnst变换集中的变换矩阵索引
size 为4或者8,8代表宽度和高度同时大于等于8,取主变换系数的 8x8 子块(48x1);4代表取 4x4 子块(16x1)
zeroOutSize代表当输入残差块为 4x4 或者 8x8 子块时,输出结果为 8x1 向量;其余情况输出为 16x1 向量,即zeroOutSize代表输出大小,为8或者16
流程:
- 选择LFNST变换矩阵核
- 将输入主变换向量和LFNST变换矩阵相乘并填充0,获得输出向量
代码如下:
/*
src 是输入的一维主变换系数
dst 是经过二次变换后输出的系数,定义为有48个矩阵元素的Tcoeff的数组
mode 为预测模式对应的映射集合
index 为0或者1,表示lfnst变换集中的变换矩阵索引
size 为4或者8,8代表宽度和高度同时大于等于8,取主变换系数的 8x8 子块(48x1);4代表取 4x4 子块(16x1)
zeroOutSize代表当输入残差块为 4x4 或者 8x8 子块时,输出结果为 8x1 向量;其余情况输出为 16x1 向量
即zeroOutSize代表输出大小,为8或者16
*/
void TrQuant::fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
{
//获取对应的lfnst变换矩阵
const int8_t* trMat = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ];
//trSize代表lfnst变换矩阵的尺寸,为 48x16 或者 16x16
const int trSize = ( size > 4 ) ? 48 : 16;
int coef;
int* out = dst;
assert( index < 3 );
for( int j = 0; j < zeroOutSize; j++ )
{
int* srcPtr = src;
const int8_t* trMatTmp = trMat;
coef = 0;
for( int i = 0; i < trSize; i++ )
{
coef += *srcPtr++ * *trMatTmp++;
}
*out++ = ( coef + 64 ) >> 7;
trMat += trSize;
}
//将out剩余的内存填充为0
::memset( out, 0, ( trSize - zeroOutSize ) * sizeof( int ) );
}