Intra-prediction(4)之三种开销的计算模式

版权声明:本文为博主[email protected](阿King)原创文章,不得以任何理由任何形式进行转载 https://blog.csdn.net/lj501886285/article/details/84345012

在Intra-prediction的最优模式选择中,我们需要计算各种模式的开销,而开销也有三种计算模式,分别为:

  • SAD:Sum of Absolute Difference
    绝对误差和,有
    S A D = i = 0 , j = 0 B l o c k s i z e p i j p i j SAD=\sum_{i=0,j=0}^{Blocksize}|p_{ij}-p'_{ij}|
  • SSE(SSD):Sum of Squared Error(Sum of Squared Difference)
    差值平方和,有
    S S E = i = 0 , j = 0 B l o c k s i z e ( p i j p i j ) 2 SSE=\sum_{i=0,j=0}^{Blocksize}(p_{ij}-p'_{ij})^2
  • SATD:Sum of Absolute Transformed Difference
    hadamard4阶变换后再求绝对值和
    S A D = i = 0 , j = 0 B l o c k s i z e H i j H i j SAD=\sum_{i=0,j=0}^{Blocksize}|H_{ij}-H'_{ij}|
void setupDistCost(Slice *currSlice, InputParameters *p_Inp)	//设置开销计算的方法	一共三种:sad	sse	satd	默认satd
{
  switch(p_Inp->ModeDecisionMetric)
  {
  case ERROR_SAD:	//SAD 模式
    currSlice->compute_cost4x4 = compute_sad4x4_cost; 
    currSlice->compute_cost8x8 = compute_sad8x8_cost;
    currSlice->distI16x16      = distI16x16_sad;
  break;
  case ERROR_SSE:	//SSE模式
    currSlice->compute_cost4x4 = compute_sse4x4_cost; 
    currSlice->compute_cost8x8 = compute_sse8x8_cost;
    currSlice->distI16x16      = distI16x16_sse;
  break;
  case ERROR_SATD:	//SATD 模式
    currSlice->compute_cost4x4 = compute_satd4x4_cost;
    currSlice->compute_cost8x8 = compute_satd8x8_cost;
    currSlice->distI16x16      = distI16x16_satd;
  break;
  default:	//4x4,8x8默认模式为comp	16x16默认模式为satd
    currSlice->compute_cost4x4 = compute_comp4x4_cost;  
    currSlice->compute_cost8x8 = compute_comp8x8_cost;
    currSlice->distI16x16      = distI16x16_satd;
    break;
  }
}

针对I_16x16

在intra16x16.c中

  • SAD开销计算的源码

distblk distI16x16_sad(Macroblock *currMB, imgpel **img_org, imgpel **pred_img, distblk min_cost)
{
  imgpel *cur_img, *prd_img;
  int i32Cost = 0;
  int i, j; 
  int imin_cost = dist_down(min_cost);

  for (j = 0; j < MB_BLOCK_SIZE; j++)
  {
    cur_img = &img_org[currMB->opix_y + j][currMB->pix_x];
    prd_img = pred_img[j];
    for (i = 0; i < MB_BLOCK_SIZE; i++)
    {
      i32Cost += iabs( *cur_img++ - *prd_img++ );
    }

    if (i32Cost > imin_cost)	//一旦发现i32Cost > imin_cost,立刻停止运算,返回min_cost
      return (min_cost);
  }

  return (dist_scale((distblk) i32Cost));
}

其中iabs()为绝对值函数,为什么写这么复制?位运算效率高吧

static inline int iabs(int x)
{
	static const int INT_BITS = (sizeof(int) * CHAR_BIT) - 1;	//int的位数-1=31
	int y = x >> INT_BITS;	//x右移31位
	return (x ^ y) - y;
}
  • SSE开销计算的源码

distblk distI16x16_sse(Macroblock *currMB, imgpel **img_org, imgpel **pred_img, distblk min_cost)
{
  imgpel *cur_img, *prd_img;  
  int i, j, i32Cost = 0; 
  int imin_cost = dist_down(min_cost);

  for (j = 0; j < MB_BLOCK_SIZE;j++)
  {
    cur_img = &img_org[currMB->opix_y + j][currMB->pix_x];
    prd_img = pred_img[j];
    for (i = 0; i < MB_BLOCK_SIZE; i++)
    {
      i32Cost += iabs2( *cur_img++ - *prd_img++ );	//求差值平方和
    }

    if (i32Cost > imin_cost)
      return (min_cost);
  }

  return (dist_scale((distblk) i32Cost));
}

其中iabs2()为平方运算

static inline int iabs2(int x) 
{
  return (x) * (x);
}
  • SAED开销计算的源码

distblk distI16x16_satd(Macroblock *currMB, imgpel **img_org, imgpel **pred_img, distblk min_cost)
{
  Slice *currSlice = currMB->p_Slice;
  int   **M7 = NULL;
  int   **tblk4x4 = currSlice->tblk4x4;
  int   ****i16blk4x4 = currSlice->i16blk4x4;
  imgpel *cur_img, *prd_img;
  distblk current_intra_sad_2 = 0;
  int ii, jj, i, j, i32Cost = 0;
  int imin_cost = dist_down(min_cost);

  for (j = 0; j < MB_BLOCK_SIZE; j++)
  {
    cur_img = &img_org[currMB->opix_y + j][currMB->pix_x];
    prd_img = pred_img[j];
    for (i = 0; i < MB_BLOCK_SIZE; i++)
    {
      i16blk4x4[j >> 2][i >> 2][j & 0x03][i & 0x03] = cur_img[i] - prd_img[i];
    }
  }


  for (jj = 0; jj < 4; jj++)
  {
    for (ii = 0; ii < 4;ii++)
    {
      M7 = i16blk4x4[jj][ii];
      hadamard4x4(M7, M7);
      i32Cost += iabs(M7[0][1]);
      i32Cost += iabs(M7[0][2]);
      i32Cost += iabs(M7[0][3]);

      if (i32Cost > imin_cost)
        return (min_cost);

      for (j = 1; j < 4; j++)
      {
        //i32Cost =0;
        i32Cost += iabs(M7[j][0]);
        i32Cost += iabs(M7[j][1]);
        i32Cost += iabs(M7[j][2]);
        i32Cost += iabs(M7[j][3]);

        if (i32Cost > imin_cost)
          return (min_cost);
      }
    }
  }

  for (j = 0; j < 4;j++)
  {
    tblk4x4[j][0] = (i16blk4x4[j][0][0][0] >> 1);
    tblk4x4[j][1] = (i16blk4x4[j][1][0][0] >> 1);
    tblk4x4[j][2] = (i16blk4x4[j][2][0][0] >> 1);
    tblk4x4[j][3] = (i16blk4x4[j][3][0][0] >> 1);     
  }

  // Hadamard of DC coeff
  hadamard4x4(tblk4x4, tblk4x4);

  for (j = 0; j < 4; j++)	//16个hadamard转换后的矩阵
  {
    i32Cost += iabs(tblk4x4[j][0]);
    i32Cost += iabs(tblk4x4[j][1]);
    i32Cost += iabs(tblk4x4[j][2]);
    i32Cost += iabs(tblk4x4[j][3]);

    if (i32Cost > imin_cost)
      return (min_cost);
  }

  current_intra_sad_2 += (dist_scale((distblk)i32Cost));
  return current_intra_sad_2;
}

其中hadamard4x4为4阶hadamard转化函数

void hadamard4x4(int **block, int **tblock)	//将block变换成tblock
{
  int i;
  int tmp[16];
  int *pTmp = tmp, *pblock;
  int p0,p1,p2,p3;
  int t0,t1,t2,t3;

  // Horizontal
  for (i = 0; i < BLOCK_SIZE; i++)
  {
    pblock = block[i];
    p0 = *(pblock++);
    p1 = *(pblock++);
    p2 = *(pblock++);
    p3 = *(pblock  );

    t0 = p0 + p3;
    t1 = p1 + p2;
    t2 = p1 - p2;
    t3 = p0 - p3;

    *(pTmp++) = t0 + t1;
    *(pTmp++) = t3 + t2;
    *(pTmp++) = t0 - t1;    
    *(pTmp++) = t3 - t2;
  }

  // Vertical 
  for (i = 0; i < BLOCK_SIZE; i++)
  {
    pTmp = tmp + i;
    p0 = *pTmp;
    p1 = *(pTmp += BLOCK_SIZE);
    p2 = *(pTmp += BLOCK_SIZE);
    p3 = *(pTmp += BLOCK_SIZE);

    t0 = p0 + p3;
    t1 = p1 + p2;
    t2 = p1 - p2;
    t3 = p0 - p3;

    tblock[0][i] = (t0 + t1) >> 1;
    tblock[1][i] = (t2 + t3) >> 1;
    tblock[2][i] = (t0 - t1) >> 1;
    tblock[3][i] = (t3 - t2) >> 1;
  }
}

针对I_4x4

在rdopt.c中

  • SAD的计算

distblk compute_sad4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
  imgpel *cur_line, *prd_line;
  int i32Cost = 0;  
  int imin_cost = dist_down(min_cost);
  
  int j;
  for (j = 0; j < BLOCK_SIZE; j++)
  {
    cur_line = &cur_img[j][pic_opix_x];
    prd_line = prd_img[j];

    i32Cost += iabs(cur_line[0] - prd_line[0]);
    i32Cost += iabs(cur_line[1] - prd_line[1]);
    i32Cost += iabs(cur_line[2] - prd_line[2]);
    i32Cost += iabs(cur_line[3] - prd_line[3]);

    if (i32Cost > imin_cost)
    {
      return(min_cost);
    }
  }
  return dist_scale(i32Cost);
}
  • SSE的计算

distblk compute_sse4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
  int j, i;
  imgpel *cur_line, *prd_line;
  int i32Cost = 0;
  int imin_cost = dist_down(min_cost);
  for (j = 0; j < BLOCK_SIZE; j++)
  {
    cur_line = &cur_img[j][pic_opix_x];
    prd_line = prd_img[j];
    for (i = 0; i < BLOCK_SIZE; i++)
    {
       i32Cost += iabs2(*cur_line++ - *prd_line++);
    }

    if (i32Cost > imin_cost)
    {
      return(min_cost);
    }
  }
  return dist_scale(i32Cost);
}
  • satd的计算

distblk compute_satd4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
  int j, i;
  imgpel *cur_line, *prd_line;
  short diff[16];

  short *d = &diff[0];

  for (j = 0; j < BLOCK_SIZE; j++)
  {
    cur_line = &cur_img[j][pic_opix_x];
    prd_line = prd_img[j];

    for (i = 0; i < BLOCK_SIZE; i++)
    {
      *d++ = *cur_line++ - *prd_line++;
    }
  }

  return dist_scale(HadamardSAD4x4 (diff));
}
  • comp的计算

static distblk compute_comp4x4_cost(VideoParameters *p_Vid, imgpel **cur_img, imgpel **prd_img, int pic_opix_x, distblk min_cost)
{
  int j, i;
  imgpel *cur_line, *prd_line;
  short diff[16];

  short *d = &diff[0];

  for (j = 0; j < BLOCK_SIZE; j++)
  {
    cur_line = &cur_img[j][pic_opix_x];
    prd_line = prd_img[j];

    for (i = 0; i < BLOCK_SIZE; i++)
    {
      *d++ = *cur_line++ - *prd_line++;
    }
  }      
  return(p_Vid->distortion4x4 (diff, min_cost));
}

猜你喜欢

转载自blog.csdn.net/lj501886285/article/details/84345012