[AV1] AV1中帧内编码预测器与原始数据相减的运算过程

static INLINE void aom_subtract_block_32xn_avx2(int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) 
{
    
    
  for (int32_t j = 0; j < rows; ++j) 
  {
    
    
      subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
      src_ptr += src_stride;
      pred_ptr += pred_stride;
      diff_ptr += diff_stride;
  }
}
static INLINE void subtract32_avx2(int16_t *diff_ptr, const uint8_t *src_ptr, const uint8_t *pred_ptr) 
{
    
    
  __m256i s = _mm256_lddqu_si256((__m256i *)(src_ptr));
  __m256i p = _mm256_lddqu_si256((__m256i *)(pred_ptr));
  __m256i s_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s));
  __m256i s_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s, 1));
  __m256i p_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(p));
  __m256i p_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(p, 1));
  const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
  const __m256i d_1 = _mm256_sub_epi16(s_1, p_1);
  _mm256_store_si256((__m256i *)(diff_ptr), d_0);
  _mm256_store_si256((__m256i *)(diff_ptr + 16), d_1);
}

猜你喜欢

转载自blog.csdn.net/starperfection/article/details/107684986