Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FreeRDP/FreeRDP-old.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVic Lee <llyzs@163.com>2011-06-20 09:03:36 +0400
committerVic Lee <llyzs@163.com>2011-06-20 09:03:36 +0400
commit8b8194f515c7bf713fbb1855a9caaffbb1b71729 (patch)
tree78119fc75bbfc6923b7e493cc27b660aa12d6586
parentc5f1de23c25ed8df0d61c376787482a59d040ad8 (diff)
libfreerdp-rfx: add SSE2 optimization for quantization encoding.
-rw-r--r--include/freerdp/rfx.h2
-rw-r--r--libfreerdp-rfx/rfx_encode.c2
-rw-r--r--libfreerdp-rfx/sse/rfx_sse.c2
-rw-r--r--libfreerdp-rfx/sse/rfx_sse2.c57
-rw-r--r--libfreerdp-rfx/sse/rfx_sse2.h1
5 files changed, 52 insertions, 12 deletions
diff --git a/include/freerdp/rfx.h b/include/freerdp/rfx.h
index d36e884..3628e8d 100644
--- a/include/freerdp/rfx.h
+++ b/include/freerdp/rfx.h
@@ -160,7 +160,6 @@ struct _RFX_CONTEXT
sint16 dwt_mem_8[8*8*2*2 + 8]; /* sub-band width 8 */
sint16 dwt_mem_16[16*16*2*2 + 8]; /* sub-band width 16 */
sint16 dwt_mem_32[32*32*2*2 + 8]; /* sub-band width 32 */
- //sint16* dwt_buffers[5]; /* sub-band buffer array */
sint16 * dwt_buffer_8;
sint16 * dwt_buffer_16;
@@ -170,6 +169,7 @@ struct _RFX_CONTEXT
void (* decode_YCbCr_to_RGB)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
void (* encode_RGB_to_YCbCr)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
void (* quantization_decode)(sint16 * buffer, const uint32 * quantization_values);
+ void (* quantization_encode)(sint16 * buffer, const uint32 * quantization_values);
void (* dwt_2d_decode)(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32);
/* profiler definitions */
diff --git a/libfreerdp-rfx/rfx_encode.c b/libfreerdp-rfx/rfx_encode.c
index 62265c7..e458dd1 100644
--- a/libfreerdp-rfx/rfx_encode.c
+++ b/libfreerdp-rfx/rfx_encode.c
@@ -136,7 +136,7 @@ rfx_encode_component(RFX_CONTEXT * context, const uint32 * quantization_values,
PROFILER_EXIT(context->prof_rfx_dwt_2d_encode);
PROFILER_ENTER(context->prof_rfx_quantization_encode);
- rfx_quantization_encode(data, quantization_values);
+ context->quantization_encode(data, quantization_values);
PROFILER_EXIT(context->prof_rfx_quantization_encode);
PROFILER_ENTER(context->prof_rfx_differential_encode);
diff --git a/libfreerdp-rfx/sse/rfx_sse.c b/libfreerdp-rfx/sse/rfx_sse.c
index 4c86fb8..f639b13 100644
--- a/libfreerdp-rfx/sse/rfx_sse.c
+++ b/libfreerdp-rfx/sse/rfx_sse.c
@@ -39,11 +39,13 @@ void rfx_init_sse(RFX_CONTEXT * context)
IF_PROFILER(context->prof_rfx_decode_YCbCr_to_RGB->name = "rfx_decode_YCbCr_to_RGB_SSE2");
IF_PROFILER(context->prof_rfx_encode_RGB_to_YCbCr->name = "rfx_encode_RGB_to_YCbCr_SSE2");
IF_PROFILER(context->prof_rfx_quantization_decode->name = "rfx_quantization_decode_SSE2");
+ IF_PROFILER(context->prof_rfx_quantization_encode->name = "rfx_quantization_encode_SSE2");
IF_PROFILER(context->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_SSE2");
context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_SSE2;
context->encode_RGB_to_YCbCr = rfx_encode_RGB_to_YCbCr_SSE2;
context->quantization_decode = rfx_quantization_decode_SSE2;
+ context->quantization_encode = rfx_quantization_encode_SSE2;
context->dwt_2d_decode = rfx_dwt_2d_decode_SSE2;
}
}
diff --git a/libfreerdp-rfx/sse/rfx_sse2.c b/libfreerdp-rfx/sse/rfx_sse2.c
index 577f9b1..63dfdcf 100644
--- a/libfreerdp-rfx/sse/rfx_sse2.c
+++ b/libfreerdp-rfx/sse/rfx_sse2.c
@@ -188,16 +188,53 @@ rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values
{
_mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
- rfx_quantization_decode_block_SSE2(buffer, 1024, quantization_values[8]); // HL1
- rfx_quantization_decode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); // LH1
- rfx_quantization_decode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); // HH1
- rfx_quantization_decode_block_SSE2(buffer + 3072, 256, quantization_values[5]); // HL2
- rfx_quantization_decode_block_SSE2(buffer + 3328, 256, quantization_values[4]); // LH2
- rfx_quantization_decode_block_SSE2(buffer + 3584, 256, quantization_values[6]); // HH2
- rfx_quantization_decode_block_SSE2(buffer + 3840, 64, quantization_values[2]); // HL3
- rfx_quantization_decode_block_SSE2(buffer + 3904, 64, quantization_values[1]); // LH3
- rfx_quantization_decode_block_SSE2(buffer + 3868, 64, quantization_values[3]); // HH3
- rfx_quantization_decode_block_SSE2(buffer + 4032, 64, quantization_values[0]); // LL3
+ rfx_quantization_decode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_decode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_decode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_decode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_decode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_decode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_decode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_quantization_encode_block_SSE2(sint16 * buffer, const int buffer_size, const uint32 factor)
+{
+ int shift = factor-6;
+ if (shift <= 0)
+ return;
+
+ __m128i a;
+ __m128i * ptr = (__m128i*) buffer;
+ __m128i * buf_end = (__m128i*) (buffer + buffer_size);
+ do
+ {
+ a = _mm_load_si128(ptr);
+ a = _mm_srai_epi16(a, shift);
+ _mm_store_si128(ptr, a);
+
+ ptr++;
+ } while(ptr < buf_end);
+}
+
+void
+rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values)
+{
+ _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
+
+ rfx_quantization_encode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_encode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_encode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_encode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_encode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_encode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_encode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
}
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/libfreerdp-rfx/sse/rfx_sse2.h b/libfreerdp-rfx/sse/rfx_sse2.h
index ea87347..8f35f7c 100644
--- a/libfreerdp-rfx/sse/rfx_sse2.h
+++ b/libfreerdp-rfx/sse/rfx_sse2.h
@@ -25,6 +25,7 @@
void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer);
void rfx_encode_RGB_to_YCbCr_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer);
void rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values);
+void rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values);
void rfx_dwt_2d_decode_SSE2(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32);
#endif /* __RFX_SSE2_H */