在上一篇文章扩展的merge模式中,当前CU在生成的merge list中选择一个率失真代价最小的候选项的运动信息直接作为自己的运动信息。而MMVD是将候选项的运动信息作为自己MV的预测值,需要编码传输MVD(自己最优MV与预测MV的差值)。
MMVD起源于之前提案中的ultimate motion vector expression(UMVE)技术,UMVE是一种新的运动向量表示方法,在skip和merge模式中使用起始点、运动步长、运动方向三个量来表示运动向量。
在VTM5中,MMVD首先从merge list中取出前2个MRG_TYPE_DEFAULT_N类型的候选项作为初始向量。在VTM5中定义如下:
static const int MMVD_BASE_MV_NUM = 2; ///< max number of base candidate
//!<从merge list里取2个初始MV
void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const int& mrgCandIdx)
{
int refIdxList0, refIdxList1;
int k;
int currBaseNum = 0;
const uint16_t maxNumMergeCand = mrgCtx.numValidMergeCand;
for (k = 0; k < maxNumMergeCand; k++)
{//!<只取MRG_TYPE_DEFAULT_N类型候选项
if (mrgCtx.mrgTypeNeighbours[k] == MRG_TYPE_DEFAULT_N)
{
refIdxList0 = mrgCtx.mvFieldNeighbours[(k << 1)].refIdx;
refIdxList1 = mrgCtx.mvFieldNeighbours[(k << 1) + 1].refIdx;
if ((refIdxList0 >= 0) && (refIdxList1 >= 0))
{
mrgCtx.mmvdBaseMv[currBaseNum][0] = mrgCtx.mvFieldNeighbours[(k << 1)];
mrgCtx.mmvdBaseMv[currBaseNum][1] = mrgCtx.mvFieldNeighbours[(k << 1) + 1];
}
else if (refIdxList0 >= 0)
{
mrgCtx.mmvdBaseMv[currBaseNum][0] = mrgCtx.mvFieldNeighbours[(k << 1)];
mrgCtx.mmvdBaseMv[currBaseNum][1] = MvField(Mv(0, 0), -1);
}
else if (refIdxList1 >= 0)
{
mrgCtx.mmvdBaseMv[currBaseNum][0] = MvField(Mv(0, 0), -1);
mrgCtx.mmvdBaseMv[currBaseNum][1] = mrgCtx.mvFieldNeighbours[(k << 1) + 1];
}
currBaseNum++;
//!<只取2个base MV
if (currBaseNum == MMVD_BASE_MV_NUM)
break;
}
}
#if !JVET_N0448_N0380
if (currBaseNum < MMVD_BASE_MV_NUM)
{
for (k = currBaseNum; k < MMVD_BASE_MV_NUM; k++)
{
mrgCtx.mmvdBaseMv[k][0] = MvField(Mv(0, 0), 0);
const Slice &slice = *pu.cs->slice;
mrgCtx.mmvdBaseMv[k][1] = MvField(Mv(0, 0), (slice.isInterB() ? 0 : -1));
mrgCtx.GBiIdx[k] = GBI_DEFAULT;
mrgCtx.interDirNeighbours[k] = (mrgCtx.mmvdBaseMv[k][0].refIdx >= 0) + (mrgCtx.mmvdBaseMv[k][1].refIdx >= 0) * 2;
}
}
#endif
}
提取出初始候选项后以候选项在参考帧中指向的位置为起始点,以4个方向、8个步长分别形成不同的运动向量(MV)。一共2个候选MV,每个候选MV生成4x8=32个新的MV,所以2个候选MV一共生成64个新的MV。关于步长和方向定义如下:
Distance IDX | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
---|---|---|---|---|---|---|---|---|
Offset (in unit of luma sample) | 1/4 | 1/2 | 1 | 2 | 4 | 8 | 16 | 32 |
Direction IDX | 00 | 01 | 10 | 11 |
---|---|---|---|---|
x-axis | + | – | N/A | N/A |
y-axis | N/A | N/A | + | – |
然后遍历这64个候选MV选择率失真代价最小的MV作为MMVD最终的MV,但是传输时只需要传输(1)该MV的初始MV在merge list中的索引。(2)运动方向。(3)运动步长。
当初始MV是un-prediction MV或是双向预测MV且两个参考帧在当前帧的同一侧时,双向预测的2个MV都等于起始MV直接加上offset。如果起始MV是双向预测MV且两个参考帧在当前帧不同侧,则list0的MV等于起始MV加上offset,而list1的MV等于起始MV加上offset的相反数。
生成新的候选MV的代码如下:
void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
{
const Slice &slice = *pu.cs->slice;
const int mvShift = MV_FRACTIONAL_BITS_DIFF;
//!<4 8 16 32 64 128 256 512,1/16像素精度
const int refMvdCands[8] = { 1 << mvShift , 2 << mvShift , 4 << mvShift , 8 << mvShift , 16 << mvShift , 32 << mvShift, 64 << mvShift , 128 << mvShift };
int fPosGroup = 0;
int fPosBaseIdx = 0;
int fPosStep = 0;
int tempIdx = 0;
int fPosPosition = 0;
Mv tempMv[2];
tempIdx = candIdx; //!<等于0-63
fPosGroup = tempIdx / (MMVD_BASE_MV_NUM * MMVD_MAX_REFINE_NUM); //!<等于0
tempIdx = tempIdx - fPosGroup * (MMVD_BASE_MV_NUM * MMVD_MAX_REFINE_NUM); //!<这两步相当于对64取余,结果0-63
fPosBaseIdx = tempIdx / MMVD_MAX_REFINE_NUM; //!<等于0或1,初始向量的索引
tempIdx = tempIdx - fPosBaseIdx * (MMVD_MAX_REFINE_NUM); //!<这两步相当于对32取余,结果0-31
fPosStep = tempIdx / 4; //!<等于0、1、2、3、4、5、6、7表示8个步长
fPosPosition = tempIdx - fPosStep * (4); //!<这两步相当于对4取余,等于0、1、2、3表示4个方向的索引
int offset = refMvdCands[fPosStep];
if ( pu.cu->slice->getDisFracMMVD() )
{
offset <<= 2;
}
const int refList0 = mmvdBaseMv[fPosBaseIdx][0].refIdx;
const int refList1 = mmvdBaseMv[fPosBaseIdx][1].refIdx;
if ((refList0 != -1) && (refList1 != -1))
{
const int poc0 = slice.getRefPOC(REF_PIC_LIST_0, refList0);
const int poc1 = slice.getRefPOC(REF_PIC_LIST_1, refList1);
const int currPoc = slice.getPOC();
if (fPosPosition == 0)
{
tempMv[0] = Mv(offset, 0);
}
else if (fPosPosition == 1)
{
tempMv[0] = Mv(-offset, 0);
}
else if (fPosPosition == 2)
{
tempMv[0] = Mv(0, offset);
}
else
{
tempMv[0] = Mv(0, -offset);
}//!<前后参考帧相同
if ((poc0 - currPoc) == (poc1 - currPoc))
{
tempMv[1] = tempMv[0];
}//!<后向参考帧离得远
else if (abs(poc1 - currPoc) > abs(poc0 - currPoc))
{//!<如果前后参考帧在当前帧不同侧scale为负值,同侧为正值
const int scale = PU::getDistScaleFactor(currPoc, poc0, currPoc, poc1);
tempMv[1] = tempMv[0];
#if JVET_N0332_LTRP_MMVD_FIX
const bool isL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm;
const bool isL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm;
if (isL0RefLongTerm || isL1RefLongTerm)
{
if ((poc1 - currPoc)*(poc0 - currPoc) > 0)
{
tempMv[0] = tempMv[1];
}
else
{
tempMv[0].set(-1 * tempMv[1].getHor(), -1 * tempMv[1].getVer());
}
}
else
#endif
tempMv[0] = tempMv[1].scaleMv(scale);
}
else
{
const int scale = PU::getDistScaleFactor(currPoc, poc1, currPoc, poc0);
#if JVET_N0332_LTRP_MMVD_FIX
const bool isL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm;
const bool isL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm;
if (isL0RefLongTerm || isL1RefLongTerm) //!<存在长期参考帧
{ //!<前后参考帧在当前帧同一侧
if ((poc1 - currPoc)*(poc0 - currPoc) > 0)
{
tempMv[1] = tempMv[0];
}
else
{//!<前后参考帧在当前帧不同侧,list1与list0相反
tempMv[1].set(-1 * tempMv[0].getHor(), -1 * tempMv[0].getVer());
}
}
else
#endif
tempMv[1] = tempMv[0].scaleMv(scale);
}
pu.interDir = 3;
pu.mv[REF_PIC_LIST_0] = mmvdBaseMv[fPosBaseIdx][0].mv + tempMv[0];
pu.refIdx[REF_PIC_LIST_0] = refList0;
pu.mv[REF_PIC_LIST_1] = mmvdBaseMv[fPosBaseIdx][1].mv + tempMv[1];
pu.refIdx[REF_PIC_LIST_1] = refList1;
}
else if (refList0 != -1)
{
if (fPosPosition == 0)
{
tempMv[0] = Mv(offset, 0);
}
else if (fPosPosition == 1)
{
tempMv[0] = Mv(-offset, 0);
}
else if (fPosPosition == 2)
{
tempMv[0] = Mv(0, offset);
}
else
{
tempMv[0] = Mv(0, -offset);
}
pu.interDir = 1;
pu.mv[REF_PIC_LIST_0] = mmvdBaseMv[fPosBaseIdx][0].mv + tempMv[0];
pu.refIdx[REF_PIC_LIST_0] = refList0;
pu.mv[REF_PIC_LIST_1] = Mv(0, 0);
pu.refIdx[REF_PIC_LIST_1] = -1;
}
else if (refList1 != -1)
{
if (fPosPosition == 0)
{
tempMv[1] = Mv(offset, 0);
}
else if (fPosPosition == 1)
{
tempMv[1] = Mv(-offset, 0);
}
else if (fPosPosition == 2)
{
tempMv[1] = Mv(0, offset);
}
else
{
tempMv[1] = Mv(0, -offset);
}
pu.interDir = 2;
pu.mv[REF_PIC_LIST_0] = Mv(0, 0);
pu.refIdx[REF_PIC_LIST_0] = -1;
pu.mv[REF_PIC_LIST_1] = mmvdBaseMv[fPosBaseIdx][1].mv + tempMv[1];
pu.refIdx[REF_PIC_LIST_1] = refList1;
}
pu.mmvdMergeFlag = true;
pu.mmvdMergeIdx = candIdx;
pu.mergeFlag = true;
#if JVET_N0324_REGULAR_MRG_FLAG
pu.regularMergeFlag = false;
#endif
pu.mergeIdx = candIdx;
pu.mergeType = MRG_TYPE_DEFAULT_N;
pu.mvd[REF_PIC_LIST_0] = Mv();
pu.mvd[REF_PIC_LIST_1] = Mv();
pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
pu.cu->GBiIdx = (interDirNeighbours[fPosBaseIdx] == 3) ? GBiIdx[fPosBaseIdx] : GBI_DEFAULT;
#if JVET_N0334_MVCLIPPING
for (int refList = 0; refList < 2; refList++)
{
if (pu.refIdx[refList] >= 0)
{
pu.mv[refList].clipToStorageBitDepth();
}
}
#endif
PU::restrictBiPredMergeCandsOne(pu);
}
参考
JVET-K0115
JVET-L0054
JVET-N1002
感兴趣的请关注微信公众号Video Coding