版权声明:本文为博主[email protected](阿King)原创文章,不得以任何理由任何形式进行转载 https://blog.csdn.net/lj501886285/article/details/84345012
在Intra-prediction的最优模式选择中,我们需要计算各种模式的开销,而开销也有三种计算模式,分别为:
- SAD:Sum of Absolute Difference
绝对误差和,有
- SSE(SSD):Sum of Squared Error(Sum of Squared Difference)
差值平方和,有
- SATD:Sum of Absolute Transformed Difference
hadamard4阶变换后再求绝对值和
void setupDistCost(Slice *currSlice, InputParameters *p_Inp) //设置开销计算的方法 一共三种:sad sse satd 默认satd
{
switch(p_Inp->ModeDecisionMetric)
{
case ERROR_SAD: //SAD 模式
currSlice->compute_cost4x4 = compute_sad4x4_cost;
currSlice->compute_cost8x8 = compute_sad8x8_cost;
currSlice->distI16x16 = distI16x16_sad;
break;
case ERROR_SSE: //SSE模式
currSlice->compute_cost4x4 = compute_sse4x4_cost;
currSlice->compute_cost8x8 = compute_sse8x8_cost;
currSlice->distI16x16 = distI16x16_sse;
break;
case ERROR_SATD: //SATD 模式
currSlice->compute_cost4x4 = compute_satd4x4_cost;
currSlice->compute_cost8x8 = compute_satd8x8_cost;
currSlice->distI16x16 = distI16x16_satd;
break;
default: //4x4,8x8默认模式为comp 16x16默认模式为satd
currSlice->compute_cost4x4 = compute_comp4x4_cost;
currSlice->compute_cost8x8 = compute_comp8x8_cost;
currSlice->distI16x16 = distI16x16_satd;
break;
}
}
针对I_16x16
在intra16x16.c中
distblk distI16x16_sad(Macroblock *currMB, imgpel **img_org, imgpel **pred_img, distblk min_cost)
{
imgpel *cur_img, *prd_img;
int i32Cost = 0;
int i, j;
int imin_cost = dist_down(min_cost);
for (j = 0; j < MB_BLOCK_SIZE; j++)
{
cur_img = &img_org[currMB->opix_y + j][currMB->pix_x];
prd_img = pred_img[j];
for (i = 0; i < MB_BLOCK_SIZE; i++)
{
i32Cost += iabs( *cur_img++ - *prd_img++ );
}
if (i32Cost > imin_cost) //一旦发现i32Cost > imin_cost,立刻停止运算,返回min_cost
return (min_cost);
}
return (dist_scale((distblk) i32Cost));
}
其中iabs()为绝对值函数,为什么写这么复制?位运算效率高吧
static inline int iabs(int x)
{
static const int INT_BITS = (sizeof(int) * CHAR_BIT) - 1; //int的位数-1=31
int y = x >> INT_BITS; //x右移31位
return (x ^ y) - y;
}
distblk distI16x16_sse(Macroblock *currMB, imgpel **img_org, imgpel **pred_img, distblk min_cost)
{
imgpel *cur_img, *prd_img;
int i, j, i32Cost = 0;
int imin_cost = dist_down(min_cost);
for (j = 0; j < MB_BLOCK_SIZE;j++)
{
cur_img = &img_org[currMB->opix_y + j][currMB->pix_x];
prd_img = pred_img[j];
for (i = 0; i < MB_BLOCK_SIZE; i++)
{
i32Cost += iabs2( *cur_img++ - *prd_img++ ); //求差值平方和
}
if (i32Cost > imin_cost)
return (min_cost);
}
return (dist_scale((distblk) i32Cost));
}
其中iabs2()为平方运算
static inline int iabs2(int x)
{
return (x) * (x);
}
distblk distI16x16_satd(Macroblock *currMB, imgpel **img_org, imgpel **pred_img, distblk min_cost)
{
Slice *currSlice = currMB->p_Slice;
int **M7 = NULL;
int **tblk4x4 = currSlice->tblk4x4;
int ****i16blk4x4 = currSlice->i16blk4x4;
imgpel *cur_img, *prd_img;
distblk current_intra_sad_2 = 0;
int ii, jj, i, j, i32Cost = 0;
int imin_cost = dist_down(min_cost);
for (j = 0; j < MB_BLOCK_SIZE; j++)
{
cur_img = &img_org[currMB->opix_y + j][currMB->pix_x];
prd_img = pred_img[j];
for (i = 0; i < MB_BLOCK_SIZE; i++)
{
i16blk4x4[j >> 2][i >> 2][j & 0x03][i & 0x03] = cur_img[i] - prd_img[i];
}
}
for (jj = 0; jj < 4; jj++)
{
for (ii = 0; ii < 4;ii++)
{
M7 = i16blk4x4[jj][ii];
hadamard4x4(M7, M7);
i32Cost += iabs(M7[0][1]);
i32Cost += iabs(M7[0][2]);
i32Cost += iabs(M7[0][3]);
if (i32Cost > imin_cost)
return (min_cost);
for (j = 1; j < 4; j++)
{
//i32Cost =0;
i32Cost += iabs(M7[j][0]);
i32Cost += iabs(M7[j][1]);
i32Cost += iabs(M7[j][2]);
i32Cost += iabs(M7[j][3]);
if (i32Cost > imin_cost)
return (min_cost);
}
}
}
for (j = 0; j < 4;j++)
{
tblk4x4[j][0] = (i16blk4x4[j][0][0][0] >> 1);
tblk4x4[j][1] = (i16blk4x4[j][1][0][0] >> 1);
tblk4x4[j][2] = (i16blk4x4[j][2][0][0] >> 1);
tblk4x4[j][3] = (i16blk4x4[j][3][0][0] >> 1);
}
// Hadamard of DC coeff
hadamard4x4(tblk4x4, tblk4x4);
for (j = 0; j < 4; j++) //16个hadamard转换后的矩阵
{
i32Cost += iabs(tblk4x4[j][0]);
i32Cost += iabs(tblk4x4[j][1]);
i32Cost += iabs(tblk4x4[j][2]);
i32Cost += iabs(tblk4x4[j][3]);
if (i32Cost > imin_cost)
return (min_cost);
}
current_intra_sad_2 += (dist_scale((distblk)i32Cost));
return current_intra_sad_2;
}
其中hadamard4x4为4阶hadamard转化函数
void hadamard4x4(int **block, int **tblock) //将block变换成tblock
{
int i;
int tmp[16];
int *pTmp = tmp, *pblock;
int p0,p1,p2,p3;
int t0,t1,t2,t3;
// Horizontal
for (i = 0; i < BLOCK_SIZE; i++)
{
pblock = block[i];
p0 = *(pblock++);
p1 = *(pblock++);
p2 = *(pblock++);
p3 = *(pblock );
t0 = p0 + p3;
t1 = p1 + p2;
t2 = p1 - p2;
t3 = p0 - p3;
*(pTmp++) = t0 + t1;
*(pTmp++) = t3 + t2;
*(pTmp++) = t0 - t1;
*(pTmp++) = t3 - t2;
}
// Vertical
for (i = 0; i < BLOCK_SIZE; i++)
{
pTmp = tmp + i;
p0 = *pTmp;
p1 = *(pTmp += BLOCK_SIZE);
p2 = *(pTmp += BLOCK_SIZE);
p3 = *(pTmp += BLOCK_SIZE);
t0 = p0 + p3;
t1 = p1 + p2;
t2 = p1 - p2;
t3 = p0 - p3;
tblock[0][i] = (t0 + t1) >> 1;
tblock[1][i] = (t2 + t3) >> 1;
tblock[2][i] = (t0 - t1) >> 1;
tblock[3][i] = (t3 - t2) >> 1;
}
}
针对I_4x4
在rdopt.c中
distblk compute_sad4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
imgpel *cur_line, *prd_line;
int i32Cost = 0;
int imin_cost = dist_down(min_cost);
int j;
for (j = 0; j < BLOCK_SIZE; j++)
{
cur_line = &cur_img[j][pic_opix_x];
prd_line = prd_img[j];
i32Cost += iabs(cur_line[0] - prd_line[0]);
i32Cost += iabs(cur_line[1] - prd_line[1]);
i32Cost += iabs(cur_line[2] - prd_line[2]);
i32Cost += iabs(cur_line[3] - prd_line[3]);
if (i32Cost > imin_cost)
{
return(min_cost);
}
}
return dist_scale(i32Cost);
}
distblk compute_sse4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
int j, i;
imgpel *cur_line, *prd_line;
int i32Cost = 0;
int imin_cost = dist_down(min_cost);
for (j = 0; j < BLOCK_SIZE; j++)
{
cur_line = &cur_img[j][pic_opix_x];
prd_line = prd_img[j];
for (i = 0; i < BLOCK_SIZE; i++)
{
i32Cost += iabs2(*cur_line++ - *prd_line++);
}
if (i32Cost > imin_cost)
{
return(min_cost);
}
}
return dist_scale(i32Cost);
}
distblk compute_satd4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
int j, i;
imgpel *cur_line, *prd_line;
short diff[16];
short *d = &diff[0];
for (j = 0; j < BLOCK_SIZE; j++)
{
cur_line = &cur_img[j][pic_opix_x];
prd_line = prd_img[j];
for (i = 0; i < BLOCK_SIZE; i++)
{
*d++ = *cur_line++ - *prd_line++;
}
}
return dist_scale(HadamardSAD4x4 (diff));
}
static distblk compute_comp4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
int j, i;
imgpel *cur_line, *prd_line;
short diff[16];
short *d = &diff[0];
for (j = 0; j < BLOCK_SIZE; j++)
{
cur_line = &cur_img[j][pic_opix_x];
prd_line = prd_img[j];
for (i = 0; i < BLOCK_SIZE; i++)
{
*d++ = *cur_line++ - *prd_line++;
}
}
return(p_Vid->distortion4x4 (diff, min_cost));
}