diff options
author | Hendrik Leppkes <h.leppkes@gmail.com> | 2012-03-03 15:37:30 +0400 |
---|---|---|
committer | Hendrik Leppkes <h.leppkes@gmail.com> | 2012-03-03 15:42:39 +0400 |
commit | b164d700ab81996854770d97329514fd221ef7db (patch) | |
tree | 4b9daf7c7f5b451429e7bb2958030c0e1ce0080c /decoder/LAVVideo/pixconv | |
parent | ebe13209f48872a47b5f340e08cd637694fbbb12 (diff) |
Only use SSE2 copy when memory is aligned, avoid a second memcpy.
Diffstat (limited to 'decoder/LAVVideo/pixconv')
-rw-r--r-- | decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp | 66 |
1 files changed, 48 insertions, 18 deletions
diff --git a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp index 63eed135..13cb3d83 100644 --- a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp +++ b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp @@ -229,19 +229,35 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv) _mm_sfence(); // Y - for(line = 0; line < height; ++line) { - PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width); - y += inLumaStride; + if ((outLumaStride % 16) == 0) { + for(line = 0; line < height; ++line) { + PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width); + y += inLumaStride; + } + } else { + for(line = 0; line < height; ++line) { + memcpy(dstY + outLumaStride * line, y, width); + y += inLumaStride; + } } // U/V - for(line = 0; line < chromaHeight; ++line) { - PIXCONV_MEMCPY_ALIGNED_TWO( - dstU + outChromaStride * line, u, - dstV + outChromaStride * line, v, - chromaWidth); - u += inChromaStride; - v += inChromaStride; + if ((outChromaStride % 16) == 0) { + for(line = 0; line < chromaHeight; ++line) { + PIXCONV_MEMCPY_ALIGNED_TWO( + dstU + outChromaStride * line, u, + dstV + outChromaStride * line, v, + chromaWidth); + u += inChromaStride; + v += inChromaStride; + } + } else { + for(line = 0; line < chromaHeight; ++line) { + memcpy(dstU + outChromaStride * line, u, chromaWidth); + memcpy(dstV + outChromaStride * line, v, chromaWidth); + u += inChromaStride; + v += inChromaStride; + } } return S_OK; @@ -520,15 +536,29 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_nv12) _mm_sfence(); - // Copy the data - for (line = 0; line < height; line++) { - PIXCONV_MEMCPY_ALIGNED(dstY + outStride * line, y, width); - y += inStride; - } + // Use SSE2 copy when the stride is aligned + if ((outStride % 16) == 0) { + // Copy the data + for (line = 0; line < height; line++) { + PIXCONV_MEMCPY_ALIGNED(dstY + outStride * line, y, width); + y += inStride; + } - for (line = 0; line < chromaHeight; line++) { - PIXCONV_MEMCPY_ALIGNED(dstUV + outStride * line, uv, width); - uv += inStride; + for (line = 0; line < chromaHeight; line++) { + PIXCONV_MEMCPY_ALIGNED(dstUV + outStride * line, uv, width); + uv += inStride; + } + } else { + // Copy the data + for (line = 0; line < height; line++) { + memcpy(dstY + outStride * line, y, width); + y += inStride; + } + + for (line = 0; line < chromaHeight; line++) { + memcpy(dstUV + outStride * line, uv, width); + uv += inStride; + } } return S_OK; |