legacy

    int bw = blockDim.x;
    int bh = blockDim.y;

    int tx = threadIdx.x%bw;
    int ty = threadIdx.y%bh;

    __shared__ uchar2 ys0[16][16];
    __shared__ uchar2 ys1[16][16];
    __shared__ uchar2 uvs[16][16];

    ys0[ty][tx] = y0y1;
    ys1[ty][tx] = y2y3;
    uvs[ty][tx] = uv;

    __syncthreads();
    if (threadIdx.x == 0 && threadIdx.y == 0) {
        for (int j = 0; j != bh; ++j) {
            uchar2* py0 = (uchar2*)(pDst + (iy + j) * 2 * nPitch + ix * 2);
            uchar2* py1 = (uchar2*)(pDst + ((iy + j) * 2+1) * nPitch + ix * 2);
            uchar2* puv = (uchar2*)(pDstUv + (iy + j)*nWidth + ix * 2);
            for (int i = 0; i != bw; ++i) {
                *py0++ = ys0[j][i];
                *py1++ = ys1[j][i];
                //*puv++ = uvs[j][i];
            }

        }
    }

猜你喜欢

转载自www.cnblogs.com/luoyinjie/p/10882099.html