diff options
author | Vic Lee <llyzs@163.com> | 2011-06-20 09:03:36 +0400 |
---|---|---|
committer | Vic Lee <llyzs@163.com> | 2011-06-20 09:03:36 +0400 |
commit | 8b8194f515c7bf713fbb1855a9caaffbb1b71729 (patch) | |
tree | 78119fc75bbfc6923b7e493cc27b660aa12d6586 | |
parent | c5f1de23c25ed8df0d61c376787482a59d040ad8 (diff) |
libfreerdp-rfx: add SSE2 optimization for quantization encoding.
-rw-r--r-- | include/freerdp/rfx.h | 2 | ||||
-rw-r--r-- | libfreerdp-rfx/rfx_encode.c | 2 | ||||
-rw-r--r-- | libfreerdp-rfx/sse/rfx_sse.c | 2 | ||||
-rw-r--r-- | libfreerdp-rfx/sse/rfx_sse2.c | 57 | ||||
-rw-r--r-- | libfreerdp-rfx/sse/rfx_sse2.h | 1 |
5 files changed, 52 insertions, 12 deletions
diff --git a/include/freerdp/rfx.h b/include/freerdp/rfx.h index d36e884..3628e8d 100644 --- a/include/freerdp/rfx.h +++ b/include/freerdp/rfx.h @@ -160,7 +160,6 @@ struct _RFX_CONTEXT sint16 dwt_mem_8[8*8*2*2 + 8]; /* sub-band width 8 */ sint16 dwt_mem_16[16*16*2*2 + 8]; /* sub-band width 16 */ sint16 dwt_mem_32[32*32*2*2 + 8]; /* sub-band width 32 */ - //sint16* dwt_buffers[5]; /* sub-band buffer array */ sint16 * dwt_buffer_8; sint16 * dwt_buffer_16; @@ -170,6 +169,7 @@ struct _RFX_CONTEXT void (* decode_YCbCr_to_RGB)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf); void (* encode_RGB_to_YCbCr)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf); void (* quantization_decode)(sint16 * buffer, const uint32 * quantization_values); + void (* quantization_encode)(sint16 * buffer, const uint32 * quantization_values); void (* dwt_2d_decode)(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32); /* profiler definitions */ diff --git a/libfreerdp-rfx/rfx_encode.c b/libfreerdp-rfx/rfx_encode.c index 62265c7..e458dd1 100644 --- a/libfreerdp-rfx/rfx_encode.c +++ b/libfreerdp-rfx/rfx_encode.c @@ -136,7 +136,7 @@ rfx_encode_component(RFX_CONTEXT * context, const uint32 * quantization_values, PROFILER_EXIT(context->prof_rfx_dwt_2d_encode); PROFILER_ENTER(context->prof_rfx_quantization_encode); - rfx_quantization_encode(data, quantization_values); + context->quantization_encode(data, quantization_values); PROFILER_EXIT(context->prof_rfx_quantization_encode); PROFILER_ENTER(context->prof_rfx_differential_encode); diff --git a/libfreerdp-rfx/sse/rfx_sse.c b/libfreerdp-rfx/sse/rfx_sse.c index 4c86fb8..f639b13 100644 --- a/libfreerdp-rfx/sse/rfx_sse.c +++ b/libfreerdp-rfx/sse/rfx_sse.c @@ -39,11 +39,13 @@ void rfx_init_sse(RFX_CONTEXT * context) IF_PROFILER(context->prof_rfx_decode_YCbCr_to_RGB->name = "rfx_decode_YCbCr_to_RGB_SSE2"); IF_PROFILER(context->prof_rfx_encode_RGB_to_YCbCr->name = "rfx_encode_RGB_to_YCbCr_SSE2"); IF_PROFILER(context->prof_rfx_quantization_decode->name = "rfx_quantization_decode_SSE2"); + IF_PROFILER(context->prof_rfx_quantization_encode->name = "rfx_quantization_encode_SSE2"); IF_PROFILER(context->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_SSE2"); context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_SSE2; context->encode_RGB_to_YCbCr = rfx_encode_RGB_to_YCbCr_SSE2; context->quantization_decode = rfx_quantization_decode_SSE2; + context->quantization_encode = rfx_quantization_encode_SSE2; context->dwt_2d_decode = rfx_dwt_2d_decode_SSE2; } } diff --git a/libfreerdp-rfx/sse/rfx_sse2.c b/libfreerdp-rfx/sse/rfx_sse2.c index 577f9b1..63dfdcf 100644 --- a/libfreerdp-rfx/sse/rfx_sse2.c +++ b/libfreerdp-rfx/sse/rfx_sse2.c @@ -188,16 +188,53 @@ rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values { _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16)); - rfx_quantization_decode_block_SSE2(buffer, 1024, quantization_values[8]); // HL1 - rfx_quantization_decode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); // LH1 - rfx_quantization_decode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); // HH1 - rfx_quantization_decode_block_SSE2(buffer + 3072, 256, quantization_values[5]); // HL2 - rfx_quantization_decode_block_SSE2(buffer + 3328, 256, quantization_values[4]); // LH2 - rfx_quantization_decode_block_SSE2(buffer + 3584, 256, quantization_values[6]); // HH2 - rfx_quantization_decode_block_SSE2(buffer + 3840, 64, quantization_values[2]); // HL3 - rfx_quantization_decode_block_SSE2(buffer + 3904, 64, quantization_values[1]); // LH3 - rfx_quantization_decode_block_SSE2(buffer + 3868, 64, quantization_values[3]); // HH3 - rfx_quantization_decode_block_SSE2(buffer + 4032, 64, quantization_values[0]); // LL3 + rfx_quantization_decode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */ + rfx_quantization_decode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */ + rfx_quantization_decode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */ + rfx_quantization_decode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */ + rfx_quantization_decode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */ + rfx_quantization_decode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */ + rfx_quantization_decode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */ + rfx_quantization_decode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */ + rfx_quantization_decode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */ + rfx_quantization_decode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */ +} + +static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +rfx_quantization_encode_block_SSE2(sint16 * buffer, const int buffer_size, const uint32 factor) +{ + int shift = factor-6; + if (shift <= 0) + return; + + __m128i a; + __m128i * ptr = (__m128i*) buffer; + __m128i * buf_end = (__m128i*) (buffer + buffer_size); + do + { + a = _mm_load_si128(ptr); + a = _mm_srai_epi16(a, shift); + _mm_store_si128(ptr, a); + + ptr++; + } while(ptr < buf_end); +} + +void +rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values) +{ + _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16)); + + rfx_quantization_encode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */ + rfx_quantization_encode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */ + rfx_quantization_encode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */ + rfx_quantization_encode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */ + rfx_quantization_encode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */ + rfx_quantization_encode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */ + rfx_quantization_encode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */ + rfx_quantization_encode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */ + rfx_quantization_encode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */ + rfx_quantization_encode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */ } static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/libfreerdp-rfx/sse/rfx_sse2.h b/libfreerdp-rfx/sse/rfx_sse2.h index ea87347..8f35f7c 100644 --- a/libfreerdp-rfx/sse/rfx_sse2.h +++ b/libfreerdp-rfx/sse/rfx_sse2.h @@ -25,6 +25,7 @@ void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer); void rfx_encode_RGB_to_YCbCr_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer); void rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values); +void rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values); void rfx_dwt_2d_decode_SSE2(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32); #endif /* __RFX_SSE2_H */ |