Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FreeRDP/FreeRDP-old.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc-André Moreau <marcandre.moreau@gmail.com>2011-06-28 18:19:40 +0400
committerMarc-André Moreau <marcandre.moreau@gmail.com>2011-06-28 18:19:40 +0400
commit46c1d75a72521b548e8f0f677b86f3aab01027e8 (patch)
tree0a5f57437e5c7cfcfee3db4a71564bc4211a4b06
parentb2e7d3410410a6e79e95ed6c3ba354fdd4e7ed05 (diff)
parentc78f823e681a9e53cb8f89d78ab9bdf65405656c (diff)
Merge pull request #54 from FreeRDP/remotefx
Remotefx
-rw-r--r--Makefile.am3
-rw-r--r--X11/xf_win.c12
-rw-r--r--configure.ac57
-rw-r--r--cunit/test_librfx.c208
-rw-r--r--cunit/test_librfx.h6
-rw-r--r--dfb/Makefile.am1
-rw-r--r--dfb/dfb_win.c14
-rw-r--r--dfb/dfb_win.h2
-rw-r--r--dfb/dfbfreerdp.c4
-rw-r--r--include/freerdp/constants/ui.h2
-rw-r--r--include/freerdp/rfx.h68
-rw-r--r--include/freerdp/utils/Makefile.am2
-rw-r--r--include/freerdp/utils/profiler.h71
-rw-r--r--include/freerdp/utils/stopwatch.h44
-rw-r--r--libfreerdp-gdi/Makefile.am15
-rw-r--r--libfreerdp-gdi/color.c4
-rw-r--r--libfreerdp-gdi/color.h4
-rw-r--r--libfreerdp-gdi/decode.c145
-rw-r--r--libfreerdp-gdi/decode.h4
-rw-r--r--libfreerdp-gdi/gdi.c21
-rw-r--r--libfreerdp-gdi/gdi.h17
-rw-r--r--libfreerdp-gdi/gdi_bitmap.c4
-rw-r--r--libfreerdp-gdi/gdi_bitmap.h3
-rw-r--r--libfreerdp-gdi/libgdi.h35
-rw-r--r--libfreerdp-gdi/neon/Makefile.am25
-rw-r--r--libfreerdp-gdi/neon/gdi_neon.c32
-rw-r--r--libfreerdp-gdi/neon/gdi_neon.h31
-rw-r--r--libfreerdp-gdi/sse/Makefile.am25
-rw-r--r--libfreerdp-gdi/sse/gdi_sse.c32
-rw-r--r--libfreerdp-gdi/sse/gdi_sse.h31
-rw-r--r--libfreerdp-rfx/Makefile.am8
-rw-r--r--libfreerdp-rfx/librfx.c666
-rw-r--r--libfreerdp-rfx/librfx.h4
-rw-r--r--libfreerdp-rfx/neon/Makefile.am25
-rw-r--r--libfreerdp-rfx/neon/rfx_neon.c372
-rw-r--r--libfreerdp-rfx/neon/rfx_neon.h32
-rw-r--r--libfreerdp-rfx/rfx_bitstream.c34
-rw-r--r--libfreerdp-rfx/rfx_bitstream.h6
-rw-r--r--libfreerdp-rfx/rfx_decode.c155
-rw-r--r--libfreerdp-rfx/rfx_decode.h2
-rw-r--r--libfreerdp-rfx/rfx_differential.c20
-rw-r--r--libfreerdp-rfx/rfx_differential.h4
-rw-r--r--libfreerdp-rfx/rfx_dwt.c121
-rw-r--r--libfreerdp-rfx/rfx_dwt.h4
-rw-r--r--libfreerdp-rfx/rfx_encode.c182
-rw-r--r--libfreerdp-rfx/rfx_encode.h34
-rw-r--r--libfreerdp-rfx/rfx_quantization.c52
-rw-r--r--libfreerdp-rfx/rfx_quantization.h4
-rw-r--r--libfreerdp-rfx/rfx_rlgr.c213
-rw-r--r--libfreerdp-rfx/rfx_rlgr.h4
-rw-r--r--libfreerdp-rfx/sse/Makefile.am6
-rw-r--r--libfreerdp-rfx/sse/rfx_sse.c18
-rw-r--r--libfreerdp-rfx/sse/rfx_sse.h18
-rw-r--r--libfreerdp-rfx/sse/rfx_sse2.c608
-rw-r--r--libfreerdp-rfx/sse/rfx_sse2.h7
-rw-r--r--libfreerdp-utils/Makefile.am4
-rw-r--r--libfreerdp-utils/profiler.c72
-rw-r--r--libfreerdp-utils/stopwatch.c60
58 files changed, 3264 insertions, 393 deletions
diff --git a/Makefile.am b/Makefile.am
index 5d63bc9..daa2a61 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,9 +1,8 @@
## Process this file with automake to produce Makefile.in
REQUIRED_SUBDIRS = \
libfreerdp-asn1 \
- libfreerdp-gdi \
- libfreerdp-rfx/sse \
libfreerdp-rfx \
+ libfreerdp-gdi \
libfreerdp-utils \
libfreerdp-core \
docs \
diff --git a/X11/xf_win.c b/X11/xf_win.c
index ce1cc7f..3f6accf 100644
--- a/X11/xf_win.c
+++ b/X11/xf_win.c
@@ -1073,6 +1073,8 @@ RD_BOOL
l_ui_check_certificate(rdpInst * inst, const char * fingerprint,
const char * subject, const char * issuer, RD_BOOL verified)
{
+ //char answer;
+
printf("certificate details:\n");
printf(" Subject:\n %s\n", subject);
printf(" Issued by:\n %s\n", issuer);
@@ -1081,7 +1083,17 @@ l_ui_check_certificate(rdpInst * inst, const char * fingerprint,
if (!verified)
printf("The server could not be authenticated. Connection security may be compromised!\n");
+#if 0
+ printf("Accept this certificate? (Y/N): ");
+ answer = fgetc(stdin);
+
+ if (answer == 'y' || answer == 'Y')
+ return True;
+ else
+ return False;
+#else
return True;
+#endif
}
static int
diff --git a/configure.ac b/configure.ac
index 39e3863..6cf6e02 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,7 +19,9 @@ AH_TEMPLATE(IPv6, [IPv6])
AH_TEMPLATE(NEED_ALIGN, [Alignment])
AH_TEMPLATE(DISABLE_TLS, [Disable TLS encryption])
AH_TEMPLATE(WITH_SSE, [Enable SSE Optimizations])
+AH_TEMPLATE(WITH_NEON, [Enable NEON Optimizations])
AH_TEMPLATE(WITH_XKBFILE, [Use xkbfile for keyboard handling])
+AH_TEMPLATE(WITH_PROFILER, [Turn on the code profiler])
AH_TEMPLATE(WITH_DEBUG, [Turn on debugging messages])
AH_TEMPLATE(WITH_DEBUG_RDP, [Turn on debugging messages])
AH_TEMPLATE(WITH_DEBUG_GDI, [Turn on debugging messages])
@@ -1026,24 +1028,52 @@ AC_ARG_WITH([cunit],
)
#
-# SSE
+# Profiler
#
-sse="yes"
-AC_ARG_WITH([sse],
- [AS_HELP_STRING([--with-sse], [Enable SSE Optimizations [default=yes]])])
-AS_IF([test "x$with_sse" == xno],
+profiler="no"
+AC_ARG_WITH([profiler],
+ [AS_HELP_STRING([--with-profiler], [enable the code profiler])])
+AS_IF([test "x$with_profiler" == xyes],
[
- sse="no"
- AM_CONDITIONAL(WITH_SSE, false)
+ profiler="yes"
+ AM_CONDITIONAL(WITH_PROFILER, true)
+ AC_DEFINE(WITH_PROFILER,1)
],
[
- sse="yes"
+ profiler="no"
+ AM_CONDITIONAL(WITH_PROFILER, false)
+ ]
+)
+
+#
+# SSE
+#
+AM_CONDITIONAL(WITH_SSE, false)
+AC_ARG_WITH(sse,
+ [ --with-sse enable SSE optimizations],
+ [
+ if test $withval != "no";
+ then
AM_CONDITIONAL(WITH_SSE, true)
AC_DEFINE(WITH_SSE,1)
CFLAGS="$CFLAGS -msse2"
- REQUIRED_SUBDIRS="libfreerdp-rfx/sse $REQUIRED_SUBDIRS"
- ]
-)
+ fi
+ ])
+
+#
+# NEON
+#
+AM_CONDITIONAL(WITH_NEON, false)
+AC_ARG_WITH(neon,
+ [ --with-neon enable NEON optimizations],
+ [
+ if test $withval != "no";
+ then
+ AM_CONDITIONAL(WITH_NEON, true)
+ AC_DEFINE(WITH_NEON,1)
+ CFLAGS="$CFLAGS -mfpu=neon"
+ fi
+ ])
#
# X11
@@ -1139,9 +1169,12 @@ AC_CONFIG_FILES([
Makefile
freerdp.pc
libfreerdp-asn1/Makefile
-libfreerdp-gdi/Makefile
libfreerdp-rfx/Makefile
libfreerdp-rfx/sse/Makefile
+libfreerdp-rfx/neon/Makefile
+libfreerdp-gdi/Makefile
+libfreerdp-gdi/sse/Makefile
+libfreerdp-gdi/neon/Makefile
libfreerdp-utils/Makefile
libfreerdp-core/Makefile
docs/Makefile
diff --git a/cunit/test_librfx.c b/cunit/test_librfx.c
index 42913d4..78c453d 100644
--- a/cunit/test_librfx.c
+++ b/cunit/test_librfx.c
@@ -32,6 +32,7 @@
#include "rfx_quantization.h"
#include "rfx_dwt.h"
#include "rfx_decode.h"
+#include "rfx_encode.h"
#include "test_librfx.h"
@@ -60,7 +61,7 @@ static const uint8 y_data[] =
static const uint8 cb_data[] =
{
- 0x1b, 0x04, 0x7f, 0x04, 0x31, 0x5f, 0xc2,
+ 0x1b, 0x04, 0x7f, 0x04, 0x31, 0x5f, 0xc2,
0x94, 0xaf, 0x05, 0x29, 0x5e, 0x0a, 0x52, 0xbc, 0x14, 0xa5, 0x78, 0x29, 0x25, 0x78, 0x29, 0x25,
0x78, 0x29, 0x25, 0x68, 0x52, 0x4a, 0xf0, 0x52, 0x4a, 0xf0, 0x52, 0x4a, 0xd0, 0xa4, 0x95, 0xe0,
0xa4, 0x95, 0xe0, 0xa4, 0x95, 0xa1, 0x49, 0x2b, 0xc1, 0x49, 0x2b, 0xc1, 0x49, 0x2b, 0x42, 0x92,
@@ -85,7 +86,7 @@ static const uint8 cb_data[] =
static const uint8 cr_data[] =
{
- 0x1b, 0xfc, 0x11, 0xc1, 0x0f, 0x4a, 0xc1, 0x4f, 0x4a, 0xc1,
+ 0x1b, 0xfc, 0x11, 0xc1, 0x0f, 0x4a, 0xc1, 0x4f, 0x4a, 0xc1,
0x4f, 0x4a, 0xa1, 0x4d, 0x95, 0x42, 0x9e, 0x95, 0x42, 0x9e, 0x95, 0x42, 0x9b, 0x2a, 0x85, 0x3d,
0x2a, 0x85, 0x3d, 0x2a, 0x85, 0x36, 0x55, 0x0a, 0x7a, 0x55, 0x0a, 0x7a, 0x55, 0x0a, 0x6c, 0xaa,
0x14, 0xf4, 0xaa, 0x14, 0xf4, 0xaa, 0x14, 0xd9, 0x54, 0x29, 0xe9, 0x54, 0x29, 0xe9, 0x54, 0x29,
@@ -114,6 +115,44 @@ static const unsigned int test_quantization_values[] =
6, 6, 6, 6, 7, 7, 8, 8, 8, 9
};
+static const uint8 rgb_scanline_data[] =
+{
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF
+};
+
+static uint8 * rgb_data;
int init_librfx_suite(void)
{
@@ -130,11 +169,14 @@ int add_librfx_suite(void)
add_test_suite(librfx);
add_test_function(bitstream);
+ add_test_function(bitstream_enc);
add_test_function(rlgr);
add_test_function(differential);
add_test_function(quantization);
add_test_function(dwt);
add_test_function(decode);
+ add_test_function(encode);
+ add_test_function(message);
return 0;
}
@@ -142,7 +184,7 @@ int add_librfx_suite(void)
void
test_bitstream(void)
{
- unsigned int b;
+ uint16 b;
RFX_BITSTREAM * bs;
bs = rfx_bitstream_new();
@@ -157,10 +199,33 @@ test_bitstream(void)
//printf("\n");
}
-static unsigned int buffer[4096];
+void
+test_bitstream_enc(void)
+{
+ uint8 buffer[10];
+ RFX_BITSTREAM * bs;
+ int i;
+
+ bs = rfx_bitstream_new();
+ memset(buffer, 0, sizeof(buffer));
+ rfx_bitstream_put_buffer(bs, buffer, sizeof(buffer));
+ for (i = 0; i < 16; i++)
+ {
+ rfx_bitstream_put_bits(bs, i, 5);
+ }
+ /*for (i = 0; i < sizeof(buffer); i++)
+ {
+ printf("%X ", buffer[i]);
+ }*/
+ rfx_bitstream_free(bs);
+
+ //printf("\n");
+}
+
+static sint16 buffer[4096];
void
-dump_buffer(int * buf, int n)
+dump_buffer(sint16 * buf, int n)
{
int i;
@@ -168,7 +233,7 @@ dump_buffer(int * buf, int n)
{
if (i % 16 == 0)
printf("\n%04d ", i);
- printf("% 3d ", buf[i]);
+ printf("% 4d ", buf[i]);
}
printf("\n");
}
@@ -194,16 +259,7 @@ test_differential(void)
void
test_quantization(void)
{
- rfx_quantization_decode(buffer, 1024, test_quantization_values[0]); /* HL1 */
- rfx_quantization_decode(buffer + 1024, 1024, test_quantization_values[1]); /* LH1 */
- rfx_quantization_decode(buffer + 2048, 1024, test_quantization_values[2]); /* HH1 */
- rfx_quantization_decode(buffer + 3072, 256, test_quantization_values[3]); /* HL2 */
- rfx_quantization_decode(buffer + 3328, 256, test_quantization_values[4]); /* LH2 */
- rfx_quantization_decode(buffer + 3584, 256, test_quantization_values[5]); /* HH2 */
- rfx_quantization_decode(buffer + 3840, 64, test_quantization_values[6]); /* HL3 */
- rfx_quantization_decode(buffer + 3904, 64, test_quantization_values[7]); /* LH3 */
- rfx_quantization_decode(buffer + 3868, 64, test_quantization_values[8]); /* HH3 */
- rfx_quantization_decode(buffer + 4032, 64, test_quantization_values[9]); /* LL3 */
+ rfx_quantization_decode(buffer, test_quantization_values);
//dump_buffer(buffer, 4096);
}
@@ -213,13 +269,30 @@ test_dwt(void)
RFX_CONTEXT * context;
context = rfx_context_new();
- rfx_dwt_2d_decode(context, (int*) buffer + 3840, 8);
- rfx_dwt_2d_decode(context, (int*) buffer + 3072, 16);
- rfx_dwt_2d_decode(context, (int*) buffer, 32);
+ rfx_dwt_2d_decode(buffer, context->dwt_buffer);
//dump_buffer(buffer, 4096);
rfx_context_free(context);
}
+static void
+dump_ppm_image(uint8 * image_buf)
+{
+ /* Dump a .ppm image. */
+ static int frame_id = 0;
+ char buf[100];
+ FILE * fp;
+
+ snprintf(buf, sizeof(buf), "/tmp/FreeRDP_Frame_%d.ppm", frame_id);
+ fp = fopen(buf, "wb");
+ fwrite("P6\n", 1, 3, fp);
+ fwrite("64 64\n", 1, 6, fp);
+ fwrite("255\n", 1, 4, fp);
+ fwrite(image_buf, 1, 4096 * 3, fp);
+ fflush(fp);
+ fclose(fp);
+ frame_id++;
+}
+
void
test_decode(void)
{
@@ -236,20 +309,91 @@ test_decode(void)
decode_buffer);
rfx_context_free(context);
- /* Dump a .ppm image. */
- static int frame_id = 0;
- char buf[100];
- FILE * fp;
+ dump_ppm_image(decode_buffer);
+}
- snprintf(buf, sizeof(buf), "/tmp/FreeRDP_Frame_%d.ppm", frame_id);
- fp = fopen(buf, "wb");
- fwrite("P6\n", 1, 3, fp);
- fwrite("64 64\n", 1, 6, fp);
- fwrite("255\n", 1, 4, fp);
- fwrite(decode_buffer, 1, 4096 * 3, fp);
- fflush(fp);
- fclose(fp);
- frame_id++;
+void
+test_encode(void)
+{
+ RFX_CONTEXT * context;
+ uint8 ycbcr_buffer[1024000];
+ int y_size, cb_size, cr_size;
+ int i;
+ uint8 decode_buffer[4096 * 3];
+
+ rgb_data = (uint8 *) malloc(64 * 64 * 3);
+ for (i = 0; i < 64; i++)
+ memcpy(rgb_data + i * 64 * 3, rgb_scanline_data, 64 * 3);
+ //hexdump(rgb_data, 64 * 64 * 3);
+
+ context = rfx_context_new();
+ context->mode = RLGR3;
+ rfx_context_set_pixel_format(context, RFX_PIXEL_FORMAT_RGB);
+
+ rfx_encode_rgb(context, rgb_data, 64, 64, 64 * 3,
+ test_quantization_values, test_quantization_values, test_quantization_values,
+ ycbcr_buffer, sizeof(ycbcr_buffer), &y_size, &cb_size, &cr_size);
+ //dump_buffer(context->cb_g_buffer, 4096);
+
+ /*printf("*** Y ***\n");
+ hexdump(ycbcr_buffer, y_size);
+ printf("*** Cb ***\n");
+ hexdump(ycbcr_buffer + y_size, cb_size);
+ printf("*** Cr ***\n");
+ hexdump(ycbcr_buffer + y_size + cb_size, cr_size);*/
+
+ rfx_decode_rgb(context,
+ ycbcr_buffer, y_size, test_quantization_values,
+ ycbcr_buffer + y_size, cb_size, test_quantization_values,
+ ycbcr_buffer + y_size + cb_size, cr_size, test_quantization_values,
+ decode_buffer);
+ dump_ppm_image(decode_buffer);
+
+ rfx_context_free(context);
+ free(rgb_data);
}
+void
+test_message(void)
+{
+ RFX_CONTEXT * context;
+ uint8 buffer[1024000];
+ int size;
+ int i, j;
+ RFX_RECT rect = {0, 0, 100, 80};
+ RFX_MESSAGE * message;
+ rgb_data = (uint8 *) malloc(100 * 80 * 3);
+ for (i = 0; i < 80; i++)
+ memcpy(rgb_data + i * 100 * 3, rgb_scanline_data, 100 * 3);
+
+ context = rfx_context_new();
+ context->mode = RLGR3;
+ context->width = 800;
+ context->height = 600;
+ rfx_context_set_pixel_format(context, RFX_PIXEL_FORMAT_RGB);
+
+ size = rfx_compose_message_header(context, buffer, sizeof(buffer));
+ /*hexdump(buffer, size);*/
+ message = rfx_process_message(context, buffer, size);
+ rfx_message_free(context, message);
+
+ for (i = 0; i < 1000; i++)
+ {
+ size = rfx_compose_message_data(context, buffer, sizeof(buffer),
+ &rect, 1, rgb_data, 100, 80, 100 * 3);
+ /*hexdump(buffer, size);*/
+ message = rfx_process_message(context, buffer, size);
+ if (i == 0)
+ {
+ for (j = 0; j < message->num_tiles; j++)
+ {
+ dump_ppm_image(message->tiles[j]->data);
+ }
+ }
+ rfx_message_free(context, message);
+ }
+
+ rfx_context_free(context);
+ free(rgb_data);
+}
diff --git a/cunit/test_librfx.h b/cunit/test_librfx.h
index 726146a..51d9bcf 100644
--- a/cunit/test_librfx.h
+++ b/cunit/test_librfx.h
@@ -26,6 +26,8 @@ int add_librfx_suite(void);
void
test_bitstream(void);
void
+test_bitstream_enc(void);
+void
test_rlgr(void);
void
test_differential(void);
@@ -35,4 +37,8 @@ void
test_dwt(void);
void
test_decode(void);
+void
+test_encode(void);
+void
+test_message(void);
diff --git a/dfb/Makefile.am b/dfb/Makefile.am
index c65c5bc..42079ce 100644
--- a/dfb/Makefile.am
+++ b/dfb/Makefile.am
@@ -23,6 +23,7 @@ dfbfreerdp_LDADD = \
../libfreerdp-kbd/libfreerdp-kbd.la \
../libfreerdp-chanman/libfreerdp-chanman.la \
../libfreerdp-core/libfreerdp-core.la \
+ ../libfreerdp-utils/libfreerdp-utils.la \
@DFB_LIBS@ -lfusion -ldirect -lz
diff --git a/dfb/dfb_win.c b/dfb/dfb_win.c
index 5dd7c89..ac92c57 100644
--- a/dfb/dfb_win.c
+++ b/dfb/dfb_win.c
@@ -568,12 +568,16 @@ dfb_post_connect(rdpInst * inst)
}
void
-dfb_uninit(void * dfb_info)
+dfb_uninit(rdpInst * inst)
{
- dfbInfo * dfbi;
- dfbi = (dfbInfo *) dfb_info;
- dfbi->primary->Release(dfbi->primary);
- dfbi->dfb->Release(dfbi->dfb);
+ dfbInfo *dfbi = GET_DFBI(inst);
+
+ if (inst->settings->software_gdi == 1)
+ {
+ gdi_free(inst);
+ dfbi->primary->Release(dfbi->primary);
+ dfbi->dfb->Release(dfbi->dfb);
+ }
}
int
diff --git a/dfb/dfb_win.h b/dfb/dfb_win.h
index b33ca39..7dfcb05 100644
--- a/dfb/dfb_win.h
+++ b/dfb/dfb_win.h
@@ -29,7 +29,7 @@ dfb_pre_connect(rdpInst * inst);
int
dfb_post_connect(rdpInst * inst);
void
-dfb_uninit(void * dfb_info);
+dfb_uninit(rdpInst * inst);
int
dfb_get_fds(rdpInst * inst, void ** read_fds, int * read_count,
void ** write_fds, int * write_count);
diff --git a/dfb/dfbfreerdp.c b/dfb/dfbfreerdp.c
index 275fe81..c207523 100644
--- a/dfb/dfbfreerdp.c
+++ b/dfb/dfbfreerdp.c
@@ -503,7 +503,6 @@ static int
run_dfbfreerdp(rdpSet * settings, rdpChanMan * chan_man)
{
rdpInst * inst;
- void * dfb_info;
void * read_fds[32];
void * write_fds[32];
int read_count;
@@ -643,10 +642,9 @@ run_dfbfreerdp(rdpSet * settings, rdpChanMan * chan_man)
}
}
/* cleanup */
- dfb_info = inst->param1;
inst->rdp_disconnect(inst);
+ dfb_uninit(inst);
freerdp_free(inst);
- dfb_uninit(dfb_info);
return 0;
}
diff --git a/include/freerdp/constants/ui.h b/include/freerdp/constants/ui.h
index 02c17e9..61db60c 100644
--- a/include/freerdp/constants/ui.h
+++ b/include/freerdp/constants/ui.h
@@ -54,7 +54,7 @@
#define PERF_ENABLE_DESKTOP_COMPOSITION 0x00000100
/* Surface Command */
-#define CMDTYPE_SET_SURFACE_BITS 0x0001
+#define CMDTYPE_SET_SURFACE_BITS 0x0001
#define CMDTYPE_FRAME_MARKER 0x0004
#define CMDTYPE_STREAM_SURFACE_BITS 0x0006
diff --git a/include/freerdp/rfx.h b/include/freerdp/rfx.h
index 346a26f..932f5f5 100644
--- a/include/freerdp/rfx.h
+++ b/include/freerdp/rfx.h
@@ -22,6 +22,8 @@
#include "types/base.h"
+#include <freerdp/utils/profiler.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,9 +41,13 @@ extern "C" {
#define WBT_FRAME_END 0xCCC5
#define WBT_REGION 0xCCC6
#define WBT_EXTENSION 0xCCC7
+#define CBT_REGION 0xCAC1
#define CBT_TILESET 0xCAC2
#define CBT_TILE 0xCAC3
+/* tileSize */
+#define CT_TILE_64x64 0x0040
+
/* properties.flags */
#define CODEC_MODE 0x02
@@ -120,7 +126,8 @@ typedef struct _RFX_MESSAGE RFX_MESSAGE;
struct _RFX_CONTEXT
{
- int flags;
+ uint16 flags;
+ uint16 properties;
uint16 width;
uint16 height;
RLGR_MODE mode;
@@ -128,29 +135,58 @@ struct _RFX_CONTEXT
uint32 codec_id;
uint32 codec_version;
RFX_PIXEL_FORMAT pixel_format;
+ uint8 bytes_per_pixel;
/* temporary data within a frame */
+ uint32 frame_idx;
uint8 num_quants;
uint32 * quants;
+ uint8 quant_idx_y;
+ uint8 quant_idx_cb;
+ uint8 quant_idx_cr;
/* pre-allocated buffers */
RFX_POOL* pool; /* memory pool */
- uint32 y_r_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
- uint32 cb_g_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
- uint32 cr_b_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
+ sint16 y_r_mem[4096+8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */
+ sint16 cb_g_mem[4096+8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */
+ sint16 cr_b_mem[4096+8]; /* 4096 = 64x64 (+ 8x2 = 16 for mem align) */
- uint32* y_r_buffer;
- uint32* cb_g_buffer;
- uint32* cr_b_buffer;
+ sint16 * y_r_buffer;
+ sint16 * cb_g_buffer;
+ sint16 * cr_b_buffer;
- uint32 idwt_buffer_8[256]; /* sub-band width 8 */
- uint32 idwt_buffer_16[1024]; /* sub-band width 16 */
- uint32 idwt_buffer_32[4096]; /* sub-band width 32 */
- uint32* idwt_buffers[5]; /* sub-band buffer array */
-
- void (* decode_YCbCr_to_RGB)(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
+ sint16 dwt_mem[32*32*2*2 + 8]; /* maximum sub-band width is 32 */
+
+ sint16 * dwt_buffer;
+
+ /* routines */
+ void (* decode_YCbCr_to_RGB)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
+ void (* encode_RGB_to_YCbCr)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
+ void (* quantization_decode)(sint16 * buffer, const uint32 * quantization_values);
+ void (* quantization_encode)(sint16 * buffer, const uint32 * quantization_values);
+ void (* dwt_2d_decode)(sint16 * buffer, sint16 * dwt_buffer);
+ void (* dwt_2d_encode)(sint16 * buffer, sint16 * dwt_buffer);
+
+ /* profiler definitions */
+ PROFILER_DEFINE(prof_rfx_decode_rgb);
+ PROFILER_DEFINE(prof_rfx_decode_component);
+ PROFILER_DEFINE(prof_rfx_rlgr_decode);
+ PROFILER_DEFINE(prof_rfx_differential_decode);
+ PROFILER_DEFINE(prof_rfx_quantization_decode);
+ PROFILER_DEFINE(prof_rfx_dwt_2d_decode);
+ PROFILER_DEFINE(prof_rfx_decode_YCbCr_to_RGB);
+ PROFILER_DEFINE(prof_rfx_decode_format_RGB);
+
+ PROFILER_DEFINE(prof_rfx_encode_rgb);
+ PROFILER_DEFINE(prof_rfx_encode_component);
+ PROFILER_DEFINE(prof_rfx_rlgr_encode);
+ PROFILER_DEFINE(prof_rfx_differential_encode);
+ PROFILER_DEFINE(prof_rfx_quantization_encode);
+ PROFILER_DEFINE(prof_rfx_dwt_2d_encode);
+ PROFILER_DEFINE(prof_rfx_encode_RGB_to_YCbCr);
+ PROFILER_DEFINE(prof_rfx_encode_format_RGB);
};
typedef struct _RFX_CONTEXT RFX_CONTEXT;
@@ -158,9 +194,13 @@ RFX_CONTEXT* rfx_context_new(void);
void rfx_context_free(RFX_CONTEXT * context);
void rfx_context_set_pixel_format(RFX_CONTEXT * context, RFX_PIXEL_FORMAT pixel_format);
-RFX_MESSAGE* rfx_process_message(RFX_CONTEXT * context, uint8 * data, int data_size);
+RFX_MESSAGE* rfx_process_message(RFX_CONTEXT * context, uint8 * data, int size);
void rfx_message_free(RFX_CONTEXT * context, RFX_MESSAGE * message);
+int rfx_compose_message_header(RFX_CONTEXT * context, uint8 * buffer, int buffer_size);
+int rfx_compose_message_data(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
+ const RFX_RECT * rects, int num_rects, uint8 * image_data, int width, int height, int rowstride);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/freerdp/utils/Makefile.am b/include/freerdp/utils/Makefile.am
index eec4969..410babf 100644
--- a/include/freerdp/utils/Makefile.am
+++ b/include/freerdp/utils/Makefile.am
@@ -6,7 +6,9 @@ include_HEADERS = \
chan_plugin.h \
datablob.h \
memory.h \
+ profiler.h \
semaphore.h \
+ stopwatch.h \
stream.h \
unicode.h \
wait_obj.h
diff --git a/include/freerdp/utils/profiler.h b/include/freerdp/utils/profiler.h
new file mode 100644
index 0000000..1a149c1
--- /dev/null
+++ b/include/freerdp/utils/profiler.h
@@ -0,0 +1,71 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ Profiler Utils
+
+ Copyright 2011 Stephen Erisman
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __UTILS_PROFILER_H
+#define __UTILS_PROFILER_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include <freerdp/utils/memory.h>
+#include <freerdp/utils/stopwatch.h>
+
+struct _PROFILER
+{
+ char *name;
+ STOPWATCH *stopwatch;
+};
+typedef struct _PROFILER PROFILER;
+
+PROFILER * profiler_create(char * name);
+void profiler_free(PROFILER * profiler);
+
+void profiler_enter(PROFILER * profiler);
+void profiler_exit(PROFILER * profiler);
+
+void profiler_print_header();
+void profiler_print(PROFILER * profiler);
+void profiler_print_footer();
+
+#ifdef WITH_PROFILER
+#define IF_PROFILER(then) then
+#define PROFILER_DEFINE(prof) PROFILER * prof
+#define PROFILER_CREATE(prof,name) prof = profiler_create(name)
+#define PROFILER_FREE(prof) profiler_free(prof)
+#define PROFILER_ENTER(prof) profiler_enter(prof)
+#define PROFILER_EXIT(prof) profiler_exit(prof)
+#define PROFILER_PRINT_HEADER profiler_print_header()
+#define PROFILER_PRINT(prof) profiler_print(prof)
+#define PROFILER_PRINT_FOOTER profiler_print_footer()
+#else
+#define IF_PROFILER(then) ;
+#define PROFILER_DEFINE(prof) ;
+#define PROFILER_CREATE(prof,name) ;
+#define PROFILER_FREE(prof) ;
+#define PROFILER_ENTER(prof) ;
+#define PROFILER_EXIT(prof) ;
+#define PROFILER_PRINT_HEADER ;
+#define PROFILER_PRINT(prof) ;
+#define PROFILER_PRINT_FOOTER ;
+#endif
+
+#endif /* __UTILS_PROFILER_H */
diff --git a/include/freerdp/utils/stopwatch.h b/include/freerdp/utils/stopwatch.h
new file mode 100644
index 0000000..02db7ea
--- /dev/null
+++ b/include/freerdp/utils/stopwatch.h
@@ -0,0 +1,44 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ Stopwatch Utils
+
+ Copyright 2011 Stephen Erisman
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __UTILS_STOPWATCH_H
+#define __UTILS_STOPWATCH_H
+
+#include <freerdp/utils/memory.h>
+#include <time.h>
+
+struct _STOPWATCH
+{
+ clock_t start;
+ clock_t end;
+ double elapsed;
+ clock_t count;
+};
+typedef struct _STOPWATCH STOPWATCH;
+
+STOPWATCH * stopwatch_create();
+void stopwatch_free(STOPWATCH * stopwatch);
+
+void stopwatch_start(STOPWATCH * stopwatch);
+void stopwatch_stop(STOPWATCH * stopwatch);
+void stopwatch_reset(STOPWATCH * stopwatch);
+
+double stopwatch_get_elapsed_time_in_seconds(STOPWATCH * stopwatch);
+
+#endif /* __UTILS_STOPWATCH_H */
diff --git a/libfreerdp-gdi/Makefile.am b/libfreerdp-gdi/Makefile.am
index bda50bc..2210079 100644
--- a/libfreerdp-gdi/Makefile.am
+++ b/libfreerdp-gdi/Makefile.am
@@ -21,6 +21,7 @@ libfreerdp_gdi_la_SOURCES = \
gdi_8bpp.c gdi_8bpp.h \
color.c color.h \
decode.c decode.h \
+ libgdi.h \
gdi.c gdi.h
libfreerdp_gdi_la_CFLAGS = \
@@ -30,9 +31,21 @@ libfreerdp_gdi_la_CFLAGS = \
libfreerdp_gdi_la_LDFLAGS =
-libfreerdp_gdi_la_LIBDADD = \
+libfreerdp_gdi_la_LIBADD = \
../libfreerdp-rfx/libfreerdp-rfx.la
+if WITH_SSE
+SUBDIRS = sse
+libfreerdp_gdi_la_CFLAGS += -I./sse
+libfreerdp_gdi_la_LIBADD += ./sse/libfreerdp-gdi-sse.la
+endif
+
+if WITH_NEON
+SUBDIRS = neon
+libfreerdp_gdi_la_CFLAGS += -I./neon
+libfreerdp_gdi_la_LIBADD += ./neon/libfreerdp-gdi-neon.la
+endif
+
# extra
EXTRA_DIST =
diff --git a/libfreerdp-gdi/color.c b/libfreerdp-gdi/color.c
index 371f69a..76f72e9 100644
--- a/libfreerdp-gdi/color.c
+++ b/libfreerdp-gdi/color.c
@@ -618,7 +618,7 @@ uint8* gdi_image_convert_32bpp(uint8* srcData, uint8* dstData, int width, int he
return srcData;
}
-p_gdi_image_convert gdi_image_convert_[5] =
+p_gdi_image_convert_bpp gdi_image_convert_[5] =
{
NULL,
gdi_image_convert_8bpp,
@@ -629,7 +629,7 @@ p_gdi_image_convert gdi_image_convert_[5] =
uint8* gdi_image_convert(uint8* srcData, uint8* dstData, int width, int height, int srcBpp, int dstBpp, HCLRCONV clrconv)
{
- p_gdi_image_convert _p_gdi_image_convert = gdi_image_convert_[IBPP(srcBpp)];
+ p_gdi_image_convert_bpp _p_gdi_image_convert = gdi_image_convert_[IBPP(srcBpp)];
if (_p_gdi_image_convert != NULL)
return _p_gdi_image_convert(srcData, dstData, width, height, srcBpp, dstBpp, clrconv);
diff --git a/libfreerdp-gdi/color.h b/libfreerdp-gdi/color.h
index 2e116c1..14e4f1e 100644
--- a/libfreerdp-gdi/color.h
+++ b/libfreerdp-gdi/color.h
@@ -233,7 +233,7 @@ typedef CLRCONV* HCLRCONV;
#define IBPP(_bpp) (((_bpp + 1)/ 8) % 5)
-typedef uint8* (*p_gdi_image_convert)(uint8* srcData, uint8* dstData, int width, int height, int srcBpp, int dstBpp, HCLRCONV clrconv);
+typedef uint8* (*p_gdi_image_convert_bpp)(uint8* srcData, uint8* dstData, int width, int height, int srcBpp, int dstBpp, HCLRCONV clrconv);
int gdi_get_pixel(uint8 * data, int x, int y, int width, int height, int bpp);
void gdi_set_pixel(uint8* data, int x, int y, int width, int height, int bpp, int pixel);
@@ -244,6 +244,8 @@ uint8* gdi_mono_image_convert(uint8* srcData, int width, int height, int srcBpp,
int gdi_mono_cursor_convert(uint8* srcData, uint8* maskData, uint8* xorMask, uint8* andMask, int width, int height, int bpp, HCLRCONV clrconv);
int gdi_alpha_cursor_convert(uint8* alphaData, uint8* xorMask, uint8* andMask, int width, int height, int bpp, HCLRCONV clrconv);
+typedef uint8* (*p_gdi_image_convert)(uint8* srcData, uint8 *dstData, int width, int height, int srcBpp, int dstBpp, HCLRCONV clrconv);
+
#ifdef __cplusplus
}
#endif
diff --git a/libfreerdp-gdi/decode.c b/libfreerdp-gdi/decode.c
index 133d706..9f3f4c0 100644
--- a/libfreerdp-gdi/decode.c
+++ b/libfreerdp-gdi/decode.c
@@ -30,67 +30,150 @@
#include "decode.h"
-void gdi_decode_frame(GDI *gdi, int x, int y, uint8 * data, uint32 length)
+int gdi_decode_bitmap_data_ex(GDI *gdi, uint16 x, uint16 y, uint8 * data, int size)
{
- int i, tx, ty;
+ int i, j;
+ int tx, ty;
+ uint8* bitmapData;
+ uint32 bitmapDataLength;
RFX_MESSAGE * message;
- message = rfx_process_message((RFX_CONTEXT *) gdi->rfx_context, data, length);
+ /* BITMAP_DATA_EX */
+ /* bpp (1 byte) */
+ /* reserved1 (1 byte) */
+ /* reserved2 (1 byte) */
+ /* codecID (1 byte) */
+ /* width (2 bytes) */
+ /* height (2 bytes) */
+ bitmapDataLength = GET_UINT32(data, 8); /* bitmapDataLength (4 bytes) */
+ bitmapData = data + 12; /* bitmapData */
- for (i = 0; i < message->num_rects; i++)
+ /* decode bitmap data */
+ message = rfx_process_message((RFX_CONTEXT *) gdi->rfx_context, bitmapData, bitmapDataLength);
+
+ if (message->num_rects > 1) /* RDVH */
{
- tx = message->rects[i].x + x;
- ty = message->rects[i].y + y;
- gdi_SetClipRgn(gdi->primary->hdc, tx, ty, message->rects[i].width, message->rects[i].height);
- }
+ /* blit each tile */
+ for (i = 0; i < message->num_tiles; i++)
+ {
+ tx = message->tiles[i]->x + x;
+ ty = message->tiles[i]->y + y;
+ data = message->tiles[i]->data;
+
+ gdi_image_convert(data, gdi->tile->bitmap->data, 64, 64, 32, 32, gdi->clrconv);
+
+ for (j = 0; j < message->num_rects; j++)
+ {
+ gdi_SetClipRgn(gdi->primary->hdc,
+ message->rects[j].x, message->rects[j].y,
+ message->rects[j].width, message->rects[j].height);
+
+ gdi_BitBlt(gdi->primary->hdc, tx, ty, 64, 64, gdi->tile->hdc, 0, 0, GDI_SRCCOPY);
+ }
+ }
- for (i = 0; i < message->num_tiles; i++)
+ for (i = 0; i < message->num_rects; i++)
+ {
+ gdi_InvalidateRegion(gdi->primary->hdc,
+ message->rects[i].x, message->rects[i].y,
+ message->rects[i].width, message->rects[i].height);
+ }
+ }
+ else /* RDSH */
{
- tx = message->tiles[i]->x + x;
- ty = message->tiles[i]->y + y;
- data = message->tiles[i]->data;
+ /* blit each tile */
+ for (i = 0; i < message->num_tiles; i++)
+ {
+ tx = message->tiles[i]->x + x;
+ ty = message->tiles[i]->y + y;
+ data = message->tiles[i]->data;
- gdi_image_convert(data, gdi->tile->bitmap->data, 64, 64, 32, 32, gdi->clrconv);
- gdi_BitBlt(gdi->primary->hdc, tx, ty, 64, 64, gdi->tile->hdc, 0, 0, GDI_SRCCOPY);
+ gdi_image_convert(data, gdi->tile->bitmap->data, 64, 64, 32, 32, gdi->clrconv);
- gdi_InvalidateRegion(gdi->primary->hdc, tx, ty, 64, 64);
+ gdi_BitBlt(gdi->primary->hdc, tx, ty, 64, 64, gdi->tile->hdc, 0, 0, GDI_SRCCOPY);
+
+ gdi_InvalidateRegion(gdi->primary->hdc, tx, ty, 64, 64);
+ }
}
rfx_message_free(gdi->rfx_context, message);
+
+ return bitmapDataLength + 12;
}
-void gdi_decode_data(GDI *gdi, uint8 * data, int data_size)
+int gdi_decode_surface_bits(GDI *gdi, uint8 * data, int size)
{
- int size;
- int destLeft;
- int destTop;
+ int length;
+ uint16 destLeft;
+ uint16 destTop;
+ uint16 destRight;
+ uint16 destBottom;
+
+ /* SURFCMD_STREAM_SURF_BITS */
+ /* cmdType (2 bytes) */
+ destLeft = GET_UINT16(data, 2); /* destLeft (2 bytes) */
+ destTop = GET_UINT16(data, 4); /* destTop (2 bytes) */
+ destRight = GET_UINT16(data, 6); /* destRight (2 bytes) */
+ destBottom = GET_UINT16(data, 8); /* destBottom (2 bytes) */
+
+ /* set clipping region */
+ gdi_SetClipRgn(gdi->primary->hdc, destLeft, destTop, destRight - destLeft, destBottom - destTop);
+
+ /* decode extended bitmap data */
+ length = gdi_decode_bitmap_data_ex(gdi, destLeft, destTop, data + 10, size - 10) + 10;
+
+ return length;
+}
+
+int gdi_decode_frame_marker(GDI *gdi, uint8 * data, int size)
+{
+ uint16 frameAction;
+ uint32 frameId;
+
+ frameAction = GET_UINT16(data, 0); /* frameAction */
+ frameId = GET_UINT32(data, 2); /* frameId */
+
+ switch (frameAction)
+ {
+ case SURFACECMD_FRAMEACTION_BEGIN:
+ break;
+
+ case SURFACECMD_FRAMEACTION_END:
+ break;
+
+ default:
+ break;
+ }
+
+ return 8;
+}
+
+void gdi_decode_data(GDI *gdi, uint8 * data, int size)
+{
+ int cmdLength;
uint16 cmdType;
- uint32 length;
- while (data_size > 0)
+ while (size > 0)
{
- cmdType = GET_UINT16(data, 0);
+ cmdType = GET_UINT16(data, 0); /* cmdType */
switch (cmdType)
{
case CMDTYPE_SET_SURFACE_BITS:
case CMDTYPE_STREAM_SURFACE_BITS:
- destLeft = GET_UINT16(data, 2);
- destTop = GET_UINT16(data, 4);
- length = GET_UINT32(data, 18);
- gdi_decode_frame(gdi, destLeft, destTop, data + 22, length);
- size = 22 + length;
+ cmdLength = gdi_decode_surface_bits(gdi, data, size);
break;
case CMDTYPE_FRAME_MARKER:
- size = 8;
+ cmdLength = gdi_decode_frame_marker(gdi, data, size);
break;
default:
- size = 2;
+ cmdLength = 2;
break;
}
- data_size -= size;
- data += size;
+
+ size -= cmdLength;
+ data += cmdLength;
}
}
diff --git a/libfreerdp-gdi/decode.h b/libfreerdp-gdi/decode.h
index ccadb29..408241a 100644
--- a/libfreerdp-gdi/decode.h
+++ b/libfreerdp-gdi/decode.h
@@ -25,7 +25,7 @@
#include "gdi.h"
-void gdi_decode_frame(GDI *gdi, int x, int y, uint8 * data, uint32 length);
-void gdi_decode_data(GDI *gdi, uint8 * data, int data_size);
+void gdi_decode_bitmap_data(GDI *gdi, int x, int y, uint8 * data, uint32 length);
+void gdi_decode_data(GDI *gdi, uint8 * data, int size);
#endif /* __DECODE_H */
diff --git a/libfreerdp-gdi/gdi.c b/libfreerdp-gdi/gdi.c
index 8e86736..ab25825 100644
--- a/libfreerdp-gdi/gdi.c
+++ b/libfreerdp-gdi/gdi.c
@@ -23,19 +23,7 @@
#include <freerdp/rfx.h>
#include <freerdp/freerdp.h>
-#include "color.h"
-#include "decode.h"
-
-#include "gdi_dc.h"
-#include "gdi_pen.h"
-#include "gdi_line.h"
-#include "gdi_shape.h"
-#include "gdi_brush.h"
-#include "gdi_region.h"
-#include "gdi_bitmap.h"
-#include "gdi_palette.h"
-#include "gdi_drawing.h"
-#include "gdi_clipping.h"
+#include "libgdi.h"
#include "gdi.h"
@@ -1188,6 +1176,11 @@ gdi_init(rdpInst * inst, uint32 flags)
gdi_register_callbacks(inst);
+ gdi->BitBlt = gdi_BitBlt;
+ gdi->gdi_image_convert = gdi_image_convert;
+
+ GDI_INIT_SIMD(gdi);
+
return 0;
}
@@ -1197,6 +1190,8 @@ void gdi_free(rdpInst* inst)
if (gdi)
{
+ gdi_bitmap_free(gdi->tile);
+ rfx_context_free(gdi->rfx_context);
gdi_bitmap_free(gdi->primary);
gdi_DeleteObject((HGDIOBJECT) gdi->hdc);
free(gdi->clrconv);
diff --git a/libfreerdp-gdi/gdi.h b/libfreerdp-gdi/gdi.h
index 9398d36..38f2194 100644
--- a/libfreerdp-gdi/gdi.h
+++ b/libfreerdp-gdi/gdi.h
@@ -229,6 +229,17 @@ struct _GDI_IMAGE
typedef struct _GDI_IMAGE GDI_IMAGE;
typedef GDI_IMAGE* HGDI_IMAGE;
+#include "gdi_dc.h"
+#include "gdi_pen.h"
+#include "gdi_line.h"
+#include "gdi_shape.h"
+#include "gdi_brush.h"
+#include "gdi_region.h"
+#include "gdi_bitmap.h"
+#include "gdi_palette.h"
+#include "gdi_drawing.h"
+#include "gdi_clipping.h"
+
struct _GDI
{
int width;
@@ -247,9 +258,15 @@ struct _GDI
GDI_COLOR textColor;
void * rfx_context;
GDI_IMAGE *tile;
+
+ /* callbacks */
+ p_gdi_BitBlt BitBlt;
+ p_gdi_image_convert gdi_image_convert;
};
typedef struct _GDI GDI;
+#include "decode.h"
+
uint32 gdi_rop3_code(uint8 code);
void gdi_copy_mem(uint8 *d, uint8 *s, int n);
void gdi_copy_memb(uint8 *d, uint8 *s, int n);
diff --git a/libfreerdp-gdi/gdi_bitmap.c b/libfreerdp-gdi/gdi_bitmap.c
index 973a4ea..c5af510 100644
--- a/libfreerdp-gdi/gdi_bitmap.c
+++ b/libfreerdp-gdi/gdi_bitmap.c
@@ -31,7 +31,7 @@
#include "gdi_bitmap.h"
-pBitBlt BitBlt_[5] =
+p_gdi_BitBlt_bpp BitBlt_[5] =
{
NULL,
BitBlt_8bpp,
@@ -180,7 +180,7 @@ HGDI_BITMAP gdi_CreateCompatibleBitmap(HGDI_DC hdc, int nWidth, int nHeight)
int gdi_BitBlt(HGDI_DC hdcDest, int nXDest, int nYDest, int nWidth, int nHeight, HGDI_DC hdcSrc, int nXSrc, int nYSrc, int rop)
{
- pBitBlt _BitBlt = BitBlt_[IBPP(hdcDest->bitsPerPixel)];
+ p_gdi_BitBlt_bpp _BitBlt = BitBlt_[IBPP(hdcDest->bitsPerPixel)];
if (_BitBlt != NULL)
return _BitBlt(hdcDest, nXDest, nYDest, nWidth, nHeight, hdcSrc, nXSrc, nYSrc, rop);
diff --git a/libfreerdp-gdi/gdi_bitmap.h b/libfreerdp-gdi/gdi_bitmap.h
index f6943a0..e4f7604 100644
--- a/libfreerdp-gdi/gdi_bitmap.h
+++ b/libfreerdp-gdi/gdi_bitmap.h
@@ -37,6 +37,7 @@ HGDI_BITMAP gdi_CreateBitmap(int nWidth, int nHeight, int cBitsPerPixel, uint8*
HGDI_BITMAP gdi_CreateCompatibleBitmap(HGDI_DC hdc, int nWidth, int nHeight);
int gdi_BitBlt(HGDI_DC hdcDest, int nXDest, int nYDest, int nWidth, int nHeight, HGDI_DC hdcSrc, int nXSrc, int nYSrc, int rop);
-typedef int (*pBitBlt)(HGDI_DC hdcDest, int nXDest, int nYDest, int nWidth, int nHeight, HGDI_DC hdcSrc, int nXSrc, int nYSrc, int rop);
+typedef int (*p_gdi_BitBlt_bpp)(HGDI_DC hdcDest, int nXDest, int nYDest, int nWidth, int nHeight, HGDI_DC hdcSrc, int nXSrc, int nYSrc, int rop);
+typedef int (*p_gdi_BitBlt)(HGDI_DC hdcDest, int nXDest, int nYDest, int nWidth, int nHeight, HGDI_DC hdcSrc, int nXSrc, int nYSrc, int rop);
#endif /* __GDI_BITMAP_H */
diff --git a/libfreerdp-gdi/libgdi.h b/libfreerdp-gdi/libgdi.h
new file mode 100644
index 0000000..22f48f1
--- /dev/null
+++ b/libfreerdp-gdi/libgdi.h
@@ -0,0 +1,35 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ GDI Library
+
+ Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __LIBGDI_H
+#define __LIBGDI_H
+
+#ifdef WITH_SSE
+#include "gdi_sse.h"
+#endif
+
+#ifdef WITH_NEON
+#include "gdi_neon.h"
+#endif
+
+#ifndef GDI_INIT_SIMD
+#define GDI_INIT_SIMD(_gdi) do { } while (0)
+#endif
+
+#endif /* __LIBGDI_H */
diff --git a/libfreerdp-gdi/neon/Makefile.am b/libfreerdp-gdi/neon/Makefile.am
new file mode 100644
index 0000000..27aee48
--- /dev/null
+++ b/libfreerdp-gdi/neon/Makefile.am
@@ -0,0 +1,25 @@
+## Process this file with automake to produce Makefile.in
+
+# libfreerdp-gdi-neon
+noinst_LTLIBRARIES = libfreerdp-gdi-neon.la
+
+libfreerdp_gdi_neon_la_SOURCES =
+
+if WITH_NEON
+libfreerdp_gdi_neon_la_SOURCES += \
+ gdi_neon.c gdi_neon.h
+endif
+
+libfreerdp_gdi_neon_la_CFLAGS = \
+ -I$(top_srcdir) \
+ -I$(top_srcdir)/include \
+ -I..
+
+libfreerdp_gdi_neon_la_LDFLAGS =
+
+libfreerdp_gdi_neon_la_LIBDADD =
+
+# extra
+EXTRA_DIST =
+
+DISTCLEANFILES =
diff --git a/libfreerdp-gdi/neon/gdi_neon.c b/libfreerdp-gdi/neon/gdi_neon.c
new file mode 100644
index 0000000..a0a92b7
--- /dev/null
+++ b/libfreerdp-gdi/neon/gdi_neon.c
@@ -0,0 +1,32 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ GDI NEON Optimizations
+
+ Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <freerdp/freerdp.h>
+#include "gdi.h"
+
+#include "gdi_neon.h"
+
+void gdi_init_neon(GDI* gdi)
+{
+
+}
diff --git a/libfreerdp-gdi/neon/gdi_neon.h b/libfreerdp-gdi/neon/gdi_neon.h
new file mode 100644
index 0000000..ef94d46
--- /dev/null
+++ b/libfreerdp-gdi/neon/gdi_neon.h
@@ -0,0 +1,31 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ GDI NEON Optimizations
+
+ Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __GDI_NEON_H
+#define __GDI_NEON_H
+
+#include "gdi.h"
+
+void gdi_init_neon(GDI* gdi);
+
+#ifndef GDI_INIT_SIMD
+#define GDI_INIT_SIMD(_gdi) gdi_init_neon(_gdi)
+#endif
+
+#endif /* __GDI_NEON_H */
diff --git a/libfreerdp-gdi/sse/Makefile.am b/libfreerdp-gdi/sse/Makefile.am
new file mode 100644
index 0000000..23f6e27
--- /dev/null
+++ b/libfreerdp-gdi/sse/Makefile.am
@@ -0,0 +1,25 @@
+## Process this file with automake to produce Makefile.in
+
+# libfreerdp-gdi-sse
+noinst_LTLIBRARIES = libfreerdp-gdi-sse.la
+
+libfreerdp_gdi_sse_la_SOURCES =
+
+if WITH_SSE
+libfreerdp_gdi_sse_la_SOURCES += \
+ gdi_sse.c gdi_sse.h
+endif
+
+libfreerdp_gdi_sse_la_CFLAGS = \
+ -I$(top_srcdir) \
+ -I$(top_srcdir)/include \
+ -I..
+
+libfreerdp_gdi_sse_la_LDFLAGS =
+
+libfreerdp_gdi_sse_la_LIBDADD =
+
+# extra
+EXTRA_DIST =
+
+DISTCLEANFILES =
diff --git a/libfreerdp-gdi/sse/gdi_sse.c b/libfreerdp-gdi/sse/gdi_sse.c
new file mode 100644
index 0000000..fb0cc8a
--- /dev/null
+++ b/libfreerdp-gdi/sse/gdi_sse.c
@@ -0,0 +1,32 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ GDI SSE Optimizations
+
+ Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <freerdp/freerdp.h>
+#include "gdi.h"
+
+#include "gdi_sse.h"
+
+void gdi_init_sse(GDI* gdi)
+{
+
+}
diff --git a/libfreerdp-gdi/sse/gdi_sse.h b/libfreerdp-gdi/sse/gdi_sse.h
new file mode 100644
index 0000000..e326503
--- /dev/null
+++ b/libfreerdp-gdi/sse/gdi_sse.h
@@ -0,0 +1,31 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ GDI SSE Optimizations
+
+ Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __GDI_SSE_H
+#define __GDI_SSE_H
+
+#include "gdi.h"
+
+void gdi_init_sse(GDI* gdi);
+
+#ifndef GDI_INIT_SIMD
+#define GDI_INIT_SIMD(_gdi) gdi_init_sse(_gdi)
+#endif
+
+#endif /* __GDI_SSE_H */
diff --git a/libfreerdp-rfx/Makefile.am b/libfreerdp-rfx/Makefile.am
index bc60460..908dd06 100644
--- a/libfreerdp-rfx/Makefile.am
+++ b/libfreerdp-rfx/Makefile.am
@@ -12,6 +12,7 @@ libfreerdp_rfx_la_SOURCES = \
rfx_quantization.c rfx_quantization.h \
rfx_dwt.c rfx_dwt.h \
rfx_decode.c rfx_decode.h \
+ rfx_encode.c rfx_encode.h \
rfx_pool.c rfx_pool.h \
librfx.c librfx.h
@@ -24,10 +25,17 @@ libfreerdp_rfx_la_LDFLAGS =
libfreerdp_rfx_la_LIBADD =
if WITH_SSE
+SUBDIRS = sse
libfreerdp_rfx_la_CFLAGS += -I$(top_srcdir)/libfreerdp-rfx/sse
libfreerdp_rfx_la_LIBADD += sse/libfreerdp-rfx-sse.la
endif
+if WITH_NEON
+SUBDIRS = neon
+libfreerdp_rfx_la_CFLAGS += -I$(top_srcdir)/libfreerdp-rfx/neon
+libfreerdp_rfx_la_LIBADD += neon/libfreerdp-rfx-neon.la
+endif
+
# extra
EXTRA_DIST =
diff --git a/libfreerdp-rfx/librfx.c b/libfreerdp-rfx/librfx.c
index 6bc9f66..5d9b5a6 100644
--- a/libfreerdp-rfx/librfx.c
+++ b/libfreerdp-rfx/librfx.c
@@ -27,9 +27,96 @@
#include "rfx_pool.h"
#include "rfx_decode.h"
+#include "rfx_encode.h"
+#include "rfx_quantization.h"
+#include "rfx_dwt.h"
#include "librfx.h"
+/*
+ The quantization values control the compression rate and quality. The value
+ range is between 6 and 15. The higher value, the higher compression rate
+ and lower quality.
+
+ This is the default values being use by the MS RDP server, and we will also
+ use it as our default values for the encoder. It can be overrided by setting
+ the context->num_quants and context->quants member.
+
+ The order of the values are:
+ LL3, LH3, HL3, HH3, LH2, HL2, HH2, LH1, HL1, HH1
+*/
+static const uint32 rfx_default_quantization_values[] =
+{
+ 6, 6, 6, 6, 7, 7, 8, 8, 8, 9
+};
+
+void rfx_profiler_create(RFX_CONTEXT * context)
+{
+ PROFILER_CREATE(context->prof_rfx_decode_rgb, "rfx_decode_rgb");
+ PROFILER_CREATE(context->prof_rfx_decode_component, "rfx_decode_component");
+ PROFILER_CREATE(context->prof_rfx_rlgr_decode, "rfx_rlgr_decode");
+ PROFILER_CREATE(context->prof_rfx_differential_decode, "rfx_differential_decode");
+ PROFILER_CREATE(context->prof_rfx_quantization_decode, "rfx_quantization_decode");
+ PROFILER_CREATE(context->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode");
+ PROFILER_CREATE(context->prof_rfx_decode_YCbCr_to_RGB, "rfx_decode_YCbCr_to_RGB");
+ PROFILER_CREATE(context->prof_rfx_decode_format_RGB, "rfx_decode_format_RGB");
+
+ PROFILER_CREATE(context->prof_rfx_encode_rgb, "rfx_encode_rgb");
+ PROFILER_CREATE(context->prof_rfx_encode_component, "rfx_encode_component");
+ PROFILER_CREATE(context->prof_rfx_rlgr_encode, "rfx_rlgr_encode");
+ PROFILER_CREATE(context->prof_rfx_differential_encode, "rfx_differential_encode");
+ PROFILER_CREATE(context->prof_rfx_quantization_encode, "rfx_quantization_encode");
+ PROFILER_CREATE(context->prof_rfx_dwt_2d_encode, "rfx_dwt_2d_encode");
+ PROFILER_CREATE(context->prof_rfx_encode_RGB_to_YCbCr, "rfx_encode_RGB_to_YCbCr");
+ PROFILER_CREATE(context->prof_rfx_encode_format_RGB, "rfx_encode_format_RGB");
+}
+
+void rfx_profiler_free(RFX_CONTEXT * context)
+{
+ PROFILER_FREE(context->prof_rfx_decode_rgb);
+ PROFILER_FREE(context->prof_rfx_decode_component);
+ PROFILER_FREE(context->prof_rfx_rlgr_decode);
+ PROFILER_FREE(context->prof_rfx_differential_decode);
+ PROFILER_FREE(context->prof_rfx_quantization_decode);
+ PROFILER_FREE(context->prof_rfx_dwt_2d_decode);
+ PROFILER_FREE(context->prof_rfx_decode_YCbCr_to_RGB);
+ PROFILER_FREE(context->prof_rfx_decode_format_RGB);
+
+ PROFILER_FREE(context->prof_rfx_encode_rgb);
+ PROFILER_FREE(context->prof_rfx_encode_component);
+ PROFILER_FREE(context->prof_rfx_rlgr_encode);
+ PROFILER_FREE(context->prof_rfx_differential_encode);
+ PROFILER_FREE(context->prof_rfx_quantization_encode);
+ PROFILER_FREE(context->prof_rfx_dwt_2d_encode);
+ PROFILER_FREE(context->prof_rfx_encode_RGB_to_YCbCr);
+ PROFILER_FREE(context->prof_rfx_encode_format_RGB);
+}
+
+void rfx_profiler_print(RFX_CONTEXT * context)
+{
+ PROFILER_PRINT_HEADER;
+
+ PROFILER_PRINT(context->prof_rfx_decode_rgb);
+ PROFILER_PRINT(context->prof_rfx_decode_component);
+ PROFILER_PRINT(context->prof_rfx_rlgr_decode);
+ PROFILER_PRINT(context->prof_rfx_differential_decode);
+ PROFILER_PRINT(context->prof_rfx_quantization_decode);
+ PROFILER_PRINT(context->prof_rfx_dwt_2d_decode);
+ PROFILER_PRINT(context->prof_rfx_decode_YCbCr_to_RGB);
+ PROFILER_PRINT(context->prof_rfx_decode_format_RGB);
+
+ PROFILER_PRINT(context->prof_rfx_encode_rgb);
+ PROFILER_PRINT(context->prof_rfx_encode_component);
+ PROFILER_PRINT(context->prof_rfx_rlgr_encode);
+ PROFILER_PRINT(context->prof_rfx_differential_encode);
+ PROFILER_PRINT(context->prof_rfx_quantization_encode);
+ PROFILER_PRINT(context->prof_rfx_dwt_2d_encode);
+ PROFILER_PRINT(context->prof_rfx_encode_RGB_to_YCbCr);
+ PROFILER_PRINT(context->prof_rfx_encode_format_RGB);
+
+ PROFILER_PRINT_FOOTER;
+}
+
RFX_CONTEXT *
rfx_context_new(void)
{
@@ -40,17 +127,26 @@ rfx_context_new(void)
context->pool = rfx_pool_new();
+ /* initialize the default pixel format */
+ rfx_context_set_pixel_format(context, RFX_PIXEL_FORMAT_BGRA);
+
/* align buffers to 16 byte boundary (needed for SSE/SSE2 instructions) */
- context->y_r_buffer = (uint32 *)(((uintptr_t)context->y_r_mem + 16) & ~ 0x0F);
- context->cb_g_buffer = (uint32 *)(((uintptr_t)context->cb_g_mem + 16) & ~ 0x0F);
- context->cr_b_buffer = (uint32 *)(((uintptr_t)context->cr_b_mem + 16) & ~ 0x0F);
+ context->y_r_buffer = (sint16 *)(((uintptr_t)context->y_r_mem + 16) & ~ 0x0F);
+ context->cb_g_buffer = (sint16 *)(((uintptr_t)context->cb_g_mem + 16) & ~ 0x0F);
+ context->cr_b_buffer = (sint16 *)(((uintptr_t)context->cr_b_mem + 16) & ~ 0x0F);
+
+ context->dwt_buffer = (sint16 *)(((uintptr_t)context->dwt_mem + 16) & ~ 0x0F);
- context->idwt_buffers[1] = (uint32*) context->idwt_buffer_8;
- context->idwt_buffers[2] = (uint32*) context->idwt_buffer_16;
- context->idwt_buffers[4] = (uint32*) context->idwt_buffer_32;
+ /* create profilers for default decoding routines */
+ rfx_profiler_create(context);
- /* set up default decoding routines */
+ /* set up default routines */
context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB;
+ context->encode_RGB_to_YCbCr = rfx_encode_RGB_to_YCbCr;
+ context->quantization_decode = rfx_quantization_decode;
+ context->quantization_encode = rfx_quantization_encode;
+ context->dwt_2d_decode = rfx_dwt_2d_decode;
+ context->dwt_2d_encode = rfx_dwt_2d_encode;
/* detect and enable SIMD CPU acceleration */
RFX_INIT_SIMD(context);
@@ -66,6 +162,9 @@ rfx_context_free(RFX_CONTEXT * context)
rfx_pool_free(context->pool);
+ rfx_profiler_print(context);
+ rfx_profiler_free(context);
+
if (context != NULL)
free(context);
}
@@ -74,14 +173,29 @@ void
rfx_context_set_pixel_format(RFX_CONTEXT * context, RFX_PIXEL_FORMAT pixel_format)
{
context->pixel_format = pixel_format;
+ switch (pixel_format)
+ {
+ case RFX_PIXEL_FORMAT_BGRA:
+ case RFX_PIXEL_FORMAT_RGBA:
+ context->bytes_per_pixel = 4;
+ break;
+ case RFX_PIXEL_FORMAT_BGR:
+ case RFX_PIXEL_FORMAT_RGB:
+ context->bytes_per_pixel = 3;
+ break;
+ default:
+ context->bytes_per_pixel = 0;
+ break;
+ }
}
static void
-rfx_process_message_sync(RFX_CONTEXT * context, uint8 * data, int data_size)
+rfx_process_message_sync(RFX_CONTEXT * context, uint8 * data, int size)
{
uint32 magic;
- magic = GET_UINT32(data, 0);
+ /* RFX_SYNC */
+ magic = GET_UINT32(data, 0); /* magic (4 bytes), 0xCACCACCA */
if (magic != WF_MAGIC)
{
@@ -89,7 +203,7 @@ rfx_process_message_sync(RFX_CONTEXT * context, uint8 * data, int data_size)
return;
}
- context->version = GET_UINT16(data, 4);
+ context->version = GET_UINT16(data, 4); /* version (2 bytes), WF_VERSION_1_0 (0x0100) */
if (context->version != WF_VERSION_1_0)
{
@@ -101,70 +215,68 @@ rfx_process_message_sync(RFX_CONTEXT * context, uint8 * data, int data_size)
}
static void
-rfx_process_message_codec_versions(RFX_CONTEXT * context, uint8 * data, int data_size)
+rfx_process_message_codec_versions(RFX_CONTEXT * context, uint8 * data, int size)
{
int numCodecs;
- numCodecs = GET_UINT8(data, 0);
+ numCodecs = GET_UINT8(data, 0); /* numCodecs (1 byte), must be set to 0x01 */
- if (numCodecs < 1)
+ if (numCodecs != 1)
{
- DEBUG_RFX("no version.");
+ DEBUG_RFX("numCodecs: %d, expected:1", numCodecs);
return;
}
- context->codec_id = GET_UINT8(data, 1);
- context->codec_version = GET_UINT16(data, 2);
+ /* RFX_CODEC_VERSIONT */
+ context->codec_id = GET_UINT8(data, 1); /* codecId (1 byte) */
+ context->codec_version = GET_UINT16(data, 2); /* version (2 bytes) */
DEBUG_RFX("id %d version 0x%X.", context->codec_id, context->codec_version);
}
static void
-rfx_process_message_channels(RFX_CONTEXT * context, uint8 * data, int data_size)
+rfx_process_message_channels(RFX_CONTEXT * context, uint8 * data, int size)
{
int channelId;
- int numChannels;
+ uint8 numChannels;
- numChannels = GET_UINT8(data, 0);
+ numChannels = GET_UINT8(data, 0); /* numChannels (1 byte), must bet set to 0x01 */
- if (numChannels < 1)
+ if (numChannels != 1)
{
- DEBUG_RFX("no channel.");
+ DEBUG_RFX("numChannels:%d, expected:1", numChannels);
return;
}
- channelId = GET_UINT8(data, 1);
- context->width = GET_UINT16(data, 2);
- context->height = GET_UINT16(data, 4);
+ /* RFX_CHANNELT */
+ channelId = GET_UINT8(data, 1); /* channelId (1 byte) */
+ context->width = GET_UINT16(data, 2); /* width (2 bytes) */
+ context->height = GET_UINT16(data, 4); /* height (2 bytes) */
DEBUG_RFX("numChannels %d id %d, %dx%d.",
numChannels, channelId, context->width, context->height);
}
static void
-rfx_process_message_context(RFX_CONTEXT * context, uint8 * data, int data_size)
+rfx_process_message_context(RFX_CONTEXT * context, uint8 * data, int size)
{
uint8 ctxId;
- uint8 codecId;
- uint8 channelId;
uint16 tileSize;
uint16 properties;
- codecId = GET_UINT8(data, 0);
- channelId = GET_UINT8(data, 1);
- ctxId = GET_UINT8(data, 2);
- tileSize = GET_UINT16(data, 3);
- properties = GET_UINT16(data, 5);
+ ctxId = GET_UINT8(data, 0); /* ctxId (1 byte), must be set to 0x00 */
+ tileSize = GET_UINT16(data, 1); /* tileSize (2 bytes), must be set to CT_TILE_64x64 (0x0040) */
+ properties = GET_UINT16(data, 3); /* properties (2 bytes) */
- DEBUG_RFX("codec %d channel %d ctx %d tileSize %d properties 0x%X.",
- codecId, channelId, ctxId, tileSize, properties);
+ DEBUG_RFX("ctxId %d tileSize %d properties 0x%X.", ctxId, tileSize, properties);
+ context->properties = properties;
context->flags = (properties & 0x0007);
if (context->flags == CODEC_MODE)
- DEBUG_RFX("codec in image mode.");
+ DEBUG_RFX("codec is in image mode.");
else
- DEBUG_RFX("codec in video mode.");
+ DEBUG_RFX("codec is in video mode.");
switch ((properties & 0x1E00) >> 9)
{
@@ -185,11 +297,30 @@ rfx_process_message_context(RFX_CONTEXT * context, uint8 * data, int data_size)
}
static void
-rfx_process_message_region(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8 * data, int data_size)
+rfx_process_message_frame_begin(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8 * data, int size)
+{
+ uint32 frameIdx;
+ uint16 numRegions;
+
+ frameIdx = GET_UINT32(data, 0); /* frameIdx (4 bytes), if codec is in video mode, must be ignored */
+ numRegions = GET_UINT16(data, 4); /* numRegions (2 bytes) */
+
+ DEBUG_RFX("RFX_FRAME_BEGIN: frameIdx:%d numRegions:%d", frameIdx, numRegions);
+}
+
+static void
+rfx_process_message_frame_end(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8 * data, int size)
+{
+ DEBUG_RFX("RFX_FRAME_END");
+}
+
+static void
+rfx_process_message_region(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8 * data, int size)
{
int i;
- message->num_rects = GET_UINT16(data, 3);
+ /* regionFlags (1 byte) */
+ message->num_rects = GET_UINT16(data, 1); /* numRects (2 bytes) */
if (message->num_rects < 1)
{
@@ -202,26 +333,28 @@ rfx_process_message_region(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8 *
else
message->rects = (RFX_RECT*) malloc(message->num_rects * sizeof(RFX_RECT));
- data += 5;
- data_size -= 5;
+ data += 3;
+ size -= 3;
- for (i = 0; i < message->num_rects && data_size > 0; i++)
+ /* rects */
+ for (i = 0; i < message->num_rects && size > 0; i++)
{
- message->rects[i].x = GET_UINT16(data, 0);
- message->rects[i].y = GET_UINT16(data, 2);
- message->rects[i].width = GET_UINT16(data, 4);
- message->rects[i].height = GET_UINT16(data, 6);
+ /* RFX_RECT */
+ message->rects[i].x = GET_UINT16(data, 0); /* x (2 bytes) */
+ message->rects[i].y = GET_UINT16(data, 2); /* y (2 bytes) */
+ message->rects[i].width = GET_UINT16(data, 4); /* width (2 bytes) */
+ message->rects[i].height = GET_UINT16(data, 6); /* height (2 bytes) */
DEBUG_RFX("rect %d (%d %d %d %d).",
i, message->rects[i].x, message->rects[i].y, message->rects[i].width, message->rects[i].height);
data += 8;
- data_size -= 8;
+ size -= 8;
}
}
static void
-rfx_process_message_tile(RFX_CONTEXT * context, RFX_TILE * tile, uint8 * data, int data_size)
+rfx_process_message_tile(RFX_CONTEXT * context, RFX_TILE * tile, uint8 * data, int size)
{
uint8 quantIdxY;
uint8 quantIdxCb;
@@ -229,14 +362,15 @@ rfx_process_message_tile(RFX_CONTEXT * context, RFX_TILE * tile, uint8 * data, i
uint16 xIdx, yIdx;
uint16 YLen, CbLen, CrLen;
- quantIdxY = GET_UINT8(data, 0);
- quantIdxCb = GET_UINT8(data, 1);
- quantIdxCr = GET_UINT8(data, 2);
- xIdx = GET_UINT16(data, 3);
- yIdx = GET_UINT16(data, 5);
- YLen = GET_UINT16(data, 7);
- CbLen = GET_UINT16(data, 9);
- CrLen = GET_UINT16(data, 11);
+ /* RFX_TILE */
+ quantIdxY = GET_UINT8(data, 0); /* quantIdxY (1 byte) */
+ quantIdxCb = GET_UINT8(data, 1); /* quantIdxCb (1 byte) */
+ quantIdxCr = GET_UINT8(data, 2); /* quantIdxCr (1 byte) */
+ xIdx = GET_UINT16(data, 3); /* xIdx (2 bytes) */
+ yIdx = GET_UINT16(data, 5); /* yIdx (2 bytes) */
+ YLen = GET_UINT16(data, 7); /* YLen (2 bytes) */
+ CbLen = GET_UINT16(data, 9); /* CbLen (2 bytes) */
+ CrLen = GET_UINT16(data, 11); /* CrLen (2 bytes) */
DEBUG_RFX("quantIdxY:%d quantIdxCb:%d quantIdxCr:%d xIdx:%d yIdx:%d YLen:%d CbLen:%d CrLen:%d",
quantIdxY, quantIdxCb, quantIdxCr, xIdx, yIdx, YLen, CbLen, CrLen);
@@ -253,14 +387,27 @@ rfx_process_message_tile(RFX_CONTEXT * context, RFX_TILE * tile, uint8 * data, i
}
static void
-rfx_process_message_tileset(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8 * data, int data_size)
+rfx_process_message_tileset(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8 * data, int size)
{
int i;
+ uint16 subtype;
uint32 blockLen;
uint32 blockType;
- uint32 tileDataSize;
+ uint32 tilesDataSize;
+
+ subtype = GET_UINT16(data, 0); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */
+
+ if (subtype != CBT_TILESET)
+ {
+ DEBUG_RFX("invalid subtype, expected CBT_TILESET.");
+ return;
+ }
- context->num_quants = GET_UINT8(data, 4);
+ /* idx (2 bytes), must be set to 0x0000 */
+ /* properties (2 bytes) */
+
+ context->num_quants = GET_UINT8(data, 6); /* numQuant (1 byte) */
+ /* tileSize (1 byte), must be set to 0x40 */
if (context->num_quants < 1)
{
@@ -268,7 +415,7 @@ rfx_process_message_tileset(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8
return;
}
- message->num_tiles = GET_UINT16(data, 6);
+ message->num_tiles = GET_UINT16(data, 8); /* numTiles (2 bytes) */
if (message->num_tiles < 1)
{
@@ -276,18 +423,20 @@ rfx_process_message_tileset(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8
return;
}
- tileDataSize = GET_UINT32(data, 8);
+ tilesDataSize = GET_UINT32(data, 10); /* tilesDataSize (4 bytes) */
- data += 12;
- data_size -= 12;
+ data += 14;
+ size -= 14;
if (context->quants != NULL)
context->quants = (uint32*) realloc((void*) context->quants, context->num_quants * 10 * sizeof(uint32));
else
context->quants = (uint32*) malloc(context->num_quants * 10 * sizeof(uint32));
- for (i = 0; i < context->num_quants && data_size > 0; i++)
+ /* quantVals */
+ for (i = 0; i < context->num_quants && size > 0; i++)
{
+ /* RFX_CODEC_QUANT */
context->quants[i * 10] = (data[0] & 0x0F);
context->quants[i * 10 + 1] = (data[0] >> 4);
context->quants[i * 10 + 2] = (data[1] & 0x0F);
@@ -307,36 +456,35 @@ rfx_process_message_tileset(RFX_CONTEXT * context, RFX_MESSAGE * message, uint8
context->quants[i * 10 + 8], context->quants[i * 10 + 9]);
data += 5;
- data_size -= 5;
+ size -= 5;
}
message->tiles = rfx_pool_get_tiles(context->pool, message->num_tiles);
- for (i = 0; i < message->num_tiles && data_size > 0; i++)
+ /* tiles */
+ for (i = 0; i < message->num_tiles && size > 0; i++)
{
- blockType = GET_UINT16(data, 0);
- blockLen = GET_UINT32(data, 2);
+ /* RFX_TILE */
+ blockType = GET_UINT16(data, 0); /* blockType (2 bytes), must be set to CBT_TILE (0xCAC3) */
+ blockLen = GET_UINT32(data, 2); /* blockLen (4 bytes) */
- switch (blockType)
+ if (blockType != CBT_TILE)
{
- case CBT_TILE:
- rfx_process_message_tile(context, message->tiles[i], data + 6, blockLen - 6);
- break;
-
- default:
- DEBUG_RFX("unknown block type 0x%X", blockType);
- break;
+ DEBUG_RFX("unknown block type 0x%X, expected CBT_TILE (0xCAC3).", blockType);
+ break;
}
- data_size -= blockLen;
+ rfx_process_message_tile(context, message->tiles[i], data + 6, blockLen - 6);
+
+ size -= blockLen;
data += blockLen;
}
}
RFX_MESSAGE *
-rfx_process_message(RFX_CONTEXT * context, uint8 * data, int data_size)
+rfx_process_message(RFX_CONTEXT * context, uint8 * data, int size)
{
- uint32 subtype;
+ uint32 offset;
uint32 blockLen;
uint32 blockType;
RFX_MESSAGE * message;
@@ -344,50 +492,55 @@ rfx_process_message(RFX_CONTEXT * context, uint8 * data, int data_size)
message = (RFX_MESSAGE *) malloc(sizeof(RFX_MESSAGE));
memset(message, 0, sizeof(RFX_MESSAGE));
- while (data_size > 0)
+ while (size > 0)
{
- blockType = GET_UINT16(data, 0);
- blockLen = GET_UINT32(data, 2);
+ /* RFX_BLOCKT */
+ blockType = GET_UINT16(data, 0); /* blockType (2 bytes) */
+ blockLen = GET_UINT32(data, 2); /* blockLen (4 bytes) */
+ offset = 6;
+
DEBUG_RFX("blockType 0x%X blockLen %d", blockType, blockLen);
+ if (blockType >= WBT_CONTEXT && blockType <= WBT_EXTENSION)
+ {
+ /* RFX_CODEC_CHANNELT */
+ /* codecId (1 byte) must be set to 0x01 */
+ /* channelId (1 byte) must be set to 0x00 */
+ offset = 8;
+ }
+
switch (blockType)
{
case WBT_SYNC:
- rfx_process_message_sync(context, data + 6, blockLen - 6);
+ rfx_process_message_sync(context, data + offset, blockLen - offset);
break;
case WBT_CODEC_VERSIONS:
- rfx_process_message_codec_versions(context, data + 6, blockLen - 6);
+ rfx_process_message_codec_versions(context, data + offset, blockLen - offset);
break;
case WBT_CHANNELS:
- rfx_process_message_channels(context, data + 6, blockLen - 6);
+ rfx_process_message_channels(context, data + offset, blockLen - offset);
break;
case WBT_CONTEXT:
- rfx_process_message_context(context, data + 6, blockLen - 6);
+ rfx_process_message_context(context, data + offset, blockLen - offset);
break;
case WBT_FRAME_BEGIN:
+ rfx_process_message_frame_begin(context, message, data + offset, blockLen - offset);
+ break;
+
case WBT_FRAME_END:
- /* Can be ignored. */
+ rfx_process_message_frame_end(context, message, data + offset, blockLen - offset);
break;
case WBT_REGION:
- rfx_process_message_region(context, message, data + 6, blockLen - 6);
+ rfx_process_message_region(context, message, data + offset, blockLen - offset);
break;
case WBT_EXTENSION:
- subtype = GET_UINT16(data, 8);
- switch (subtype)
- {
- case CBT_TILESET:
- rfx_process_message_tileset(context, message, data + 10, blockLen - 10);
- break;
- default:
- DEBUG_RFX("unknown subtype 0x%X", subtype);
- break;
- }
+ rfx_process_message_tileset(context, message, data + offset, blockLen - offset);
break;
default:
@@ -395,7 +548,7 @@ rfx_process_message(RFX_CONTEXT * context, uint8 * data, int data_size)
break;
}
- data_size -= blockLen;
+ size -= blockLen;
data += blockLen;
}
@@ -419,3 +572,320 @@ rfx_message_free(RFX_CONTEXT * context, RFX_MESSAGE * message)
free(message);
}
}
+
+static int
+rfx_compose_message_sync(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
+{
+ if (buffer_size < 12)
+ {
+ printf("rfx_compose_message_sync: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_SYNC); /* BlockT.blockType */
+ SET_UINT32(buffer, 2, 12); /* BlockT.blockLen */
+ SET_UINT32(buffer, 6, WF_MAGIC); /* magic */
+ SET_UINT16(buffer, 10, WF_VERSION_1_0); /* version */
+
+ return 12;
+}
+
+static int
+rfx_compose_message_codec_versions(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
+{
+ if (buffer_size < 10)
+ {
+ printf("rfx_compose_message_codec_versions: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_CODEC_VERSIONS); /* BlockT.blockType */
+ SET_UINT32(buffer, 2, 10); /* BlockT.blockLen */
+ SET_UINT8(buffer, 6, 1); /* numCodecs */
+ SET_UINT8(buffer, 7, 1); /* codecs.codecId */
+ SET_UINT16(buffer, 8, WF_VERSION_1_0); /* codecs.version */
+
+ return 10;
+}
+
+static int
+rfx_compose_message_channels(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
+{
+ if (buffer_size < 12)
+ {
+ printf("rfx_compose_message_channels: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_CHANNELS); /* BlockT.blockType */
+ SET_UINT32(buffer, 2, 12); /* BlockT.blockLen */
+ SET_UINT8(buffer, 6, 1); /* numChannels */
+ SET_UINT8(buffer, 7, 0); /* Channel.channelId */
+ SET_UINT16(buffer, 8, context->width); /* Channel.width */
+ SET_UINT16(buffer, 10, context->height); /* Channel.height */
+
+ return 12;
+}
+
+static int
+rfx_compose_message_context(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
+{
+ uint16 properties;
+
+ if (buffer_size < 13)
+ {
+ printf("rfx_compose_message_context: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_CONTEXT); /* CodecChannelT.blockType */
+ SET_UINT32(buffer, 2, 13); /* CodecChannelT.blockLen */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+ SET_UINT8(buffer, 8, 0); /* ctxId */
+ SET_UINT16(buffer, 9, CT_TILE_64x64); /* tileSize */
+
+ /* properties */
+ properties = context->flags; /* flags */
+ properties |= (COL_CONV_ICT << 3); /* cct */
+ properties |= (CLW_XFORM_DWT_53_A << 5); /* xft */
+ properties |= ((context->mode == RLGR1 ? CLW_ENTROPY_RLGR1 : CLW_ENTROPY_RLGR3) << 9); /* et */
+ properties |= (SCALAR_QUANTIZATION << 13); /* qt */
+ SET_UINT16(buffer, 11, properties);
+ context->properties = properties;
+
+ return 13;
+}
+
+int
+rfx_compose_message_header(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
+{
+ int composed_size;
+
+ composed_size = rfx_compose_message_sync(context, buffer, buffer_size);
+ composed_size += rfx_compose_message_codec_versions(context, buffer + composed_size, buffer_size - composed_size);
+ composed_size += rfx_compose_message_channels(context, buffer + composed_size, buffer_size - composed_size);
+ composed_size += rfx_compose_message_context(context, buffer + composed_size, buffer_size - composed_size);
+
+ return composed_size;
+}
+
+static int
+rfx_compose_message_frame_begin(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
+{
+ if (buffer_size < 14)
+ {
+ printf("rfx_compose_message_frame_begin: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_FRAME_BEGIN); /* CodecChannelT.blockType */
+ SET_UINT32(buffer, 2, 14); /* CodecChannelT.blockLen */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+ SET_UINT32(buffer, 8, context->frame_idx); /* frameIdx */
+ SET_UINT16(buffer, 12, 1); /* numRegions */
+
+ return 14;
+}
+
+static int
+rfx_compose_message_region(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
+ const RFX_RECT * rects, int num_rects)
+{
+ int size;
+ int i;
+
+ if (buffer_size < 15 + num_rects * 8)
+ {
+ printf("rfx_compose_message_region: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_REGION); /* CodecChannelT.blockType */
+ /* set CodecChannelT.blockLen later */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+ SET_UINT8(buffer, 8, 1); /* regionFlags */
+ SET_UINT16(buffer, 9, num_rects); /* numRects */
+ size = 11;
+
+ for (i = 0; i < num_rects; i++)
+ {
+ SET_UINT16(buffer, size, rects[i].x);
+ SET_UINT16(buffer, size + 2, rects[i].y);
+ SET_UINT16(buffer, size + 4, rects[i].width);
+ SET_UINT16(buffer, size + 6, rects[i].height);
+ size += 8;
+ }
+
+ SET_UINT16(buffer, size, CBT_REGION); /* regionType */
+ SET_UINT16(buffer, size + 2, 1); /* numTilesets */
+ size += 4;
+
+ SET_UINT32(buffer, 2, size); /* CodecChannelT.blockLen */
+ return size;
+}
+
+static int
+rfx_compose_message_tile(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
+ uint8 * tile_data, int tile_width, int tile_height, int rowstride,
+ const uint32 * quantVals, int quantIdxY, int quantIdxCb, int quantIdxCr, int xIdx, int yIdx)
+{
+ int YLen = 0;
+ int CbLen = 0;
+ int CrLen = 0;
+ int size;
+
+ if (buffer_size < 19)
+ {
+ printf("rfx_compose_message_tile: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, CBT_TILE); /* BlockT.blockType */
+ /* set BlockT.blockLen later */
+ SET_UINT8(buffer, 6, quantIdxY); /* quantIdxY */
+ SET_UINT8(buffer, 7, quantIdxCb); /* quantIdxCb */
+ SET_UINT8(buffer, 8, quantIdxCr); /* quantIdxCr */
+ SET_UINT16(buffer, 9, xIdx); /* xIdx */
+ SET_UINT16(buffer, 11, yIdx); /* yIdx */
+
+ rfx_encode_rgb(context, tile_data, tile_width, tile_height, rowstride,
+ quantVals + quantIdxY * 10, quantVals + quantIdxCb * 10, quantVals + quantIdxCr * 10,
+ buffer + 19, buffer_size - 19, &YLen, &CbLen, &CrLen);
+
+ DEBUG_RFX("xIdx=%d yIdx=%d width=%d height=%d YLen=%d CbLen=%d CrLen=%d",
+ xIdx, yIdx, tile_width, tile_height, YLen, CbLen, CrLen);
+
+ SET_UINT16(buffer, 13, YLen); /* YLen */
+ SET_UINT16(buffer, 15, CbLen); /* CbLen */
+ SET_UINT16(buffer, 17, CrLen); /* CrLen */
+ size = 19 + YLen + CbLen + CrLen;
+ SET_UINT32(buffer, 2, size); /* BlockT.blockLen */
+
+ return size;
+}
+
+static int
+rfx_compose_message_tileset(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
+ uint8 * image_data, int width, int height, int rowstride)
+{
+ int size;
+ int i;
+ int numQuants;
+ const uint32 * quantVals;
+ const uint32 * quantValsPtr;
+ int quantIdxY;
+ int quantIdxCb;
+ int quantIdxCr;
+ int numTiles;
+ int numTilesX;
+ int numTilesY;
+ int xIdx;
+ int yIdx;
+ int tilesDataSize;
+
+ if (context->num_quants == 0)
+ {
+ numQuants = 1;
+ quantVals = rfx_default_quantization_values;
+ quantIdxY = 0;
+ quantIdxCb = 0;
+ quantIdxCr = 0;
+ }
+ else
+ {
+ numQuants = context->num_quants;
+ quantVals = context->quants;
+ quantIdxY = context->quant_idx_y;
+ quantIdxCb = context->quant_idx_cb;
+ quantIdxCr = context->quant_idx_cr;
+ }
+
+ numTilesX = (width + 63) / 64;
+ numTilesY = (height + 63) / 64;
+ numTiles = numTilesX * numTilesY;
+
+ if (buffer_size < 22 + numQuants * 5)
+ {
+ printf("rfx_compose_message_tileset: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_EXTENSION); /* CodecChannelT.blockType */
+ /* set CodecChannelT.blockLen later */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+ SET_UINT16(buffer, 8, CBT_TILESET); /* subtype */
+ SET_UINT16(buffer, 10, 0); /* idx */
+ SET_UINT16(buffer, 12, context->properties); /* properties */
+ SET_UINT8(buffer, 14, numQuants); /* numQuants */
+ SET_UINT8(buffer, 15, 0x40); /* tileSize */
+ SET_UINT16(buffer, 16, numTiles); /* numTiles */
+ /* set tilesDataSize later */
+ size = 22;
+
+ quantValsPtr = quantVals;
+ for (i = 0; i < numQuants * 5; i++)
+ {
+ SET_UINT8(buffer, size, quantValsPtr[0] + (quantValsPtr[1] << 4));
+ quantValsPtr += 2;
+ size++;
+ }
+
+ DEBUG_RFX("width:%d height:%d rowstride:%d", width, height, rowstride);
+
+ tilesDataSize = 0;
+ for (yIdx = 0; yIdx < numTilesY; yIdx++)
+ {
+ for (xIdx = 0; xIdx < numTilesX; xIdx++)
+ {
+ tilesDataSize += rfx_compose_message_tile(context,
+ buffer + size + tilesDataSize, buffer_size - size - tilesDataSize,
+ image_data + yIdx * 64 * rowstride + xIdx * 64 * context->bytes_per_pixel,
+ xIdx < numTilesX - 1 ? 64 : width - xIdx * 64,
+ yIdx < numTilesY - 1 ? 64 : height - yIdx * 64,
+ rowstride, quantVals, quantIdxY, quantIdxCb, quantIdxCr, xIdx, yIdx);
+ }
+ }
+
+ size += tilesDataSize;
+ SET_UINT32(buffer, 2, size); /* CodecChannelT.blockLen */
+ SET_UINT32(buffer, 18, tilesDataSize); /* tilesDataSize */
+
+ return size;
+}
+
+static int
+rfx_compose_message_frame_end(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
+{
+ if (buffer_size < 8)
+ {
+ printf("rfx_compose_message_frame_end: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_FRAME_END); /* CodecChannelT.blockType */
+ SET_UINT32(buffer, 2, 8); /* CodecChannelT.blockLen */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+
+ return 8;
+}
+
+int
+rfx_compose_message_data(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
+ const RFX_RECT * rects, int num_rects, uint8 * image_data, int width, int height, int rowstride)
+{
+ int composed_size;
+
+ composed_size = rfx_compose_message_frame_begin(context, buffer, buffer_size);
+ composed_size += rfx_compose_message_region(context, buffer + composed_size, buffer_size - composed_size,
+ rects, num_rects);
+ composed_size += rfx_compose_message_tileset(context, buffer + composed_size, buffer_size - composed_size,
+ image_data, width, height, rowstride);
+ composed_size += rfx_compose_message_frame_end(context, buffer + composed_size, buffer_size - composed_size);
+
+ return composed_size;
+}
diff --git a/libfreerdp-rfx/librfx.h b/libfreerdp-rfx/librfx.h
index 3c18474..87ea742 100644
--- a/libfreerdp-rfx/librfx.h
+++ b/libfreerdp-rfx/librfx.h
@@ -32,6 +32,10 @@
#include "rfx_sse.h"
#endif
+#ifdef WITH_NEON
+#include "rfx_neon.h"
+#endif
+
#ifndef RFX_INIT_SIMD
#define RFX_INIT_SIMD(_rfx_context) do { } while (0)
#endif
diff --git a/libfreerdp-rfx/neon/Makefile.am b/libfreerdp-rfx/neon/Makefile.am
new file mode 100644
index 0000000..12d4921
--- /dev/null
+++ b/libfreerdp-rfx/neon/Makefile.am
@@ -0,0 +1,25 @@
+## Process this file with automake to produce Makefile.in
+
+# libfreerdp-rfx-neon
+noinst_LTLIBRARIES = libfreerdp-rfx-neon.la
+
+libfreerdp_rfx_neon_la_SOURCES =
+
+if WITH_NEON
+libfreerdp_rfx_neon_la_SOURCES += \
+ rfx_neon.c rfx_neon.h
+endif
+
+libfreerdp_rfx_neon_la_CFLAGS = \
+ -I$(top_srcdir) \
+ -I$(top_srcdir)/include \
+ -I..
+
+libfreerdp_rfx_neon_la_LDFLAGS =
+
+libfreerdp_rfx_neon_la_LIBDADD =
+
+# extra
+EXTRA_DIST =
+
+DISTCLEANFILES =
diff --git a/libfreerdp-rfx/neon/rfx_neon.c b/libfreerdp-rfx/neon/rfx_neon.c
new file mode 100644
index 0000000..745f9ca
--- /dev/null
+++ b/libfreerdp-rfx/neon/rfx_neon.c
@@ -0,0 +1,372 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ RemoteFX Codec Library - NEON Optimizations
+
+ Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <arm_neon.h>
+
+#include "rfx_neon.h"
+
+#if defined(ANDROID)
+#include <cpu-features.h>
+#endif
+
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+prefetch_data(void * buffer1)
+{
+ asm(" pld [%0, #64] \t\n"
+ : // no output
+ : "r" (buffer1)
+ );
+}
+
+void rfx_decode_YCbCr_to_RGB_NEON(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer)
+{
+ int16x8_t zero = vdupq_n_s16(0);
+ int16x8_t max = vdupq_n_s16(255);
+ int16x8_t y_add = vdupq_n_s16(128);
+
+ int16x8_t* y_r_buf = (int16x8_t*)y_r_buffer;
+ int16x8_t* cb_g_buf = (int16x8_t*)cb_g_buffer;
+ int16x8_t* cr_b_buf = (int16x8_t*)cr_b_buffer;
+
+ int i;
+ for (i = 0; i < 4096 / 8; i++)
+ {
+ prefetch_data(&y_r_buf[i]);
+ prefetch_data(&cr_b_buf[i]);
+ prefetch_data(&cb_g_buf[i]);
+
+ int16x8_t y = vld1q_s16((sint16*)&y_r_buf[i]);
+ y = vaddq_s16(y, y_add);
+
+ int16x8_t cr = vld1q_s16((sint16*)&cr_b_buf[i]);
+
+ // r = between((y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5)), 0, 255);
+ int16x8_t r = vaddq_s16(y, cr);
+ r = vaddq_s16(r, vshrq_n_s16(cr, 2));
+ r = vaddq_s16(r, vshrq_n_s16(cr, 3));
+ r = vaddq_s16(r, vshrq_n_s16(cr, 5));
+ r = vminq_s16(vmaxq_s16(r, zero), max);
+ vst1q_s16((sint16*)&y_r_buf[i], r);
+
+ // cb = cb_g_buf[i];
+ int16x8_t cb = vld1q_s16((sint16*)&cb_g_buf[i]);
+
+ // g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1) - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255);
+ int16x8_t g = vsubq_s16(y, vshrq_n_s16(cb, 2));
+ g = vsubq_s16(g, vshrq_n_s16(cb, 4));
+ g = vsubq_s16(g, vshrq_n_s16(cb, 5));
+ g = vsubq_s16(g, vshrq_n_s16(cr, 1));
+ g = vsubq_s16(g, vshrq_n_s16(cr, 3));
+ g = vsubq_s16(g, vshrq_n_s16(cr, 4));
+ g = vsubq_s16(g, vshrq_n_s16(cr, 5));
+ g = vminq_s16(vmaxq_s16(g, zero), max);
+ vst1q_s16((sint16*)&cb_g_buf[i], g);
+
+ // b = between((y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6)), 0, 255);
+ int16x8_t b = vaddq_s16(y, cb);
+ b = vaddq_s16(b, vshrq_n_s16(cb, 1));
+ b = vaddq_s16(b, vshrq_n_s16(cb, 2));
+ b = vaddq_s16(b, vshrq_n_s16(cb, 6));
+ b = vminq_s16(vmaxq_s16(b, zero), max);
+ vst1q_s16((sint16*)&cr_b_buf[i], b);
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_quantization_decode_block_NEON(sint16 * buffer, const int buffer_size, const uint32 factor)
+{
+ if (factor <= 6)
+ return;
+ int16x8_t quantFactors = vdupq_n_s16(factor - 6);
+ int16x8_t* buf = (int16x8_t*)buffer;
+ int16x8_t* buf_end = (int16x8_t*)(buffer + buffer_size);
+
+ do
+ {
+ prefetch_data(buf);
+ int16x8_t val = vld1q_s16((sint16*)buf);
+ val = vshlq_s16(val, quantFactors);
+ vst1q_s16((sint16*)buf, val);
+ buf++;
+ }
+ while(buf < buf_end);
+}
+
+void
+rfx_quantization_decode_NEON(sint16 * buffer, const uint32 * quantization_values)
+{
+ rfx_quantization_decode_block_NEON(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_decode_block_NEON(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_decode_block_NEON(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_decode_block_NEON(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_decode_block_NEON(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_decode_block_NEON(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_decode_block_NEON(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_decode_block_NEON(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_decode_block_NEON(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_decode_block_NEON(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+}
+
+
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_decode_block_horiz_NEON(sint16 * l, sint16 * h, sint16 * dst, int subband_width)
+{
+ int y, n;
+ sint16 * l_ptr = l;
+ sint16 * h_ptr = h;
+ sint16 * dst_ptr = dst;
+
+ for (y = 0; y < subband_width; y++)
+ {
+ /* Even coefficients */
+ for (n = 0; n < subband_width; n+=8)
+ {
+ // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1);
+ int16x8_t l_n = vld1q_s16(l_ptr);
+ prefetch_data(l_ptr);
+
+ int16x8_t h_n = vld1q_s16(h_ptr);
+ int16x8_t h_n_m = vld1q_s16(h_ptr - 1);
+ prefetch_data(h_ptr);
+
+ if (n == 0)
+ {
+ int16_t first = vgetq_lane_s16(h_n_m, 1);
+ h_n_m = vsetq_lane_s16(first, h_n_m, 0);
+ }
+
+ int16x8_t tmp_n = vaddq_s16(h_n, h_n_m);
+ tmp_n = vaddq_s16(tmp_n, vdupq_n_s16(1));
+ tmp_n = vshrq_n_s16(tmp_n, 1);
+
+ int16x8_t dst_n = vsubq_s16(l_n, tmp_n);
+
+ vst1q_s16(l_ptr, dst_n);
+
+ l_ptr+=8;
+ h_ptr+=8;
+ }
+ l_ptr -= subband_width;
+ h_ptr -= subband_width;
+
+ /* Odd coefficients */
+ for (n = 0; n < subband_width; n+=8)
+ {
+ // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1);
+
+ int16x8_t h_n = vld1q_s16(h_ptr);
+ prefetch_data(h_ptr);
+
+ h_n = vshlq_n_s16(h_n, 1);
+
+ int16x8x2_t dst_n;
+ dst_n.val[0] = vld1q_s16(l_ptr);
+ prefetch_data(l_ptr);
+ int16x8_t dst_n_p = vld1q_s16(l_ptr + 1);
+ if (n == subband_width - 8)
+ {
+ int16_t last = vgetq_lane_s16(dst_n_p, 6);
+ dst_n_p = vsetq_lane_s16(last, dst_n_p, 7);
+ }
+
+ dst_n.val[1] = vaddq_s16(dst_n_p, dst_n.val[0]);
+ dst_n.val[1] = vshrq_n_s16(dst_n.val[1], 1);
+
+ dst_n.val[1] = vaddq_s16(dst_n.val[1], h_n);
+
+ vst2q_s16(dst_ptr, dst_n);
+ prefetch_data(dst_ptr);
+
+ l_ptr+=8;
+ h_ptr+=8;
+ dst_ptr+=16;
+ }
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_decode_block_vert_NEON(sint16 * l, sint16 * h, sint16 * dst, int subband_width)
+{
+ int x, n;
+ sint16 * l_ptr = l;
+ sint16 * h_ptr = h;
+ sint16 * dst_ptr = dst;
+
+ int total_width = subband_width + subband_width;
+
+ /* Even coefficients */
+ for (n = 0; n < subband_width; n++)
+ {
+ for (x = 0; x < total_width; x+=8)
+ {
+ prefetch_data(l_ptr);
+ prefetch_data(h_ptr);
+ // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1);
+
+ int16x8_t l_n = vld1q_s16(l_ptr);
+ int16x8_t h_n = vld1q_s16(h_ptr);
+
+ int16x8_t tmp_n = vaddq_s16(h_n, vdupq_n_s16(1));;
+ if (n == 0)
+ tmp_n = vaddq_s16(tmp_n, h_n);
+ else
+ {
+ int16x8_t h_n_m = vld1q_s16((h_ptr - total_width));
+ tmp_n = vaddq_s16(tmp_n, h_n_m);
+ }
+ tmp_n = vshrq_n_s16(tmp_n, 1);
+
+ prefetch_data(dst_ptr);
+ int16x8_t dst_n = vsubq_s16(l_n, tmp_n);
+ vst1q_s16(dst_ptr, dst_n);
+
+ l_ptr+=8;
+ h_ptr+=8;
+ dst_ptr+=8;
+ }
+ dst_ptr+=total_width;
+ }
+
+ h_ptr = h;
+ dst_ptr = dst + total_width;
+
+ /* Odd coefficients */
+ for (n = 0; n < subband_width; n++)
+ {
+ for (x = 0; x < total_width; x+=8)
+ {
+ // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1);
+ int16x8_t h_n = vld1q_s16(h_ptr);
+ int16x8_t dst_n_m = vld1q_s16(dst_ptr - total_width);
+
+ prefetch_data(h_ptr);
+ prefetch_data(dst_ptr - total_width);
+
+ h_n = vshlq_n_s16(h_n, 1);
+
+ int16x8_t tmp_n = dst_n_m;
+ if (n == subband_width - 1)
+ tmp_n = vaddq_s16(tmp_n, dst_n_m);
+ else
+ {
+ int16x8_t dst_n_p = vld1q_s16((dst_ptr + total_width));
+ tmp_n = vaddq_s16(tmp_n, dst_n_p);
+ }
+ tmp_n = vshrq_n_s16(tmp_n, 1);
+
+ int16x8_t dst_n = vaddq_s16(tmp_n, h_n);
+ vst1q_s16(dst_ptr, dst_n);
+ prefetch_data(dst_ptr);
+
+ h_ptr+=8;
+ dst_ptr+=8;
+ }
+ dst_ptr+=total_width;
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_decode_block_NEON(sint16 * buffer, sint16 * idwt, int subband_width)
+{
+ sint16 * hl, * lh, * hh, * ll;
+ sint16 * l_dst, * h_dst;
+
+ /* Inverse DWT in horizontal direction, results in 2 sub-bands in L, H order in tmp buffer idwt. */
+ /* The 4 sub-bands are stored in HL(0), LH(1), HH(2), LL(3) order. */
+ /* The lower part L uses LL(3) and HL(0). */
+ /* The higher part H uses LH(1) and HH(2). */
+
+ ll = buffer + subband_width * subband_width * 3;
+ hl = buffer;
+ l_dst = idwt;
+
+ rfx_dwt_2d_decode_block_horiz_NEON(ll, hl, l_dst, subband_width);
+
+ lh = buffer + subband_width * subband_width;
+ hh = buffer + subband_width * subband_width * 2;
+ h_dst = idwt + subband_width * subband_width * 2;
+
+ rfx_dwt_2d_decode_block_horiz_NEON(lh, hh, h_dst, subband_width);
+
+ /* Inverse DWT in vertical direction, results are stored in original buffer. */
+ rfx_dwt_2d_decode_block_vert_NEON(l_dst, h_dst, buffer, subband_width);
+}
+
+void
+rfx_dwt_2d_decode_NEON(sint16 * buffer, sint16 * dwt_buffer)
+{
+ rfx_dwt_2d_decode_block_NEON(buffer + 3840, dwt_buffer, 8);
+ rfx_dwt_2d_decode_block_NEON(buffer + 3072, dwt_buffer, 16);
+ rfx_dwt_2d_decode_block_NEON(buffer, dwt_buffer, 32);
+}
+
+
+
+int isNeonSupported()
+{
+#if defined(ANDROID)
+ if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM)
+ {
+ DEBUG_RFX("NEON optimization disabled - No ARM CPU found");
+ return 0;
+ }
+
+ uint64_t features = android_getCpuFeatures();
+ if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
+ {
+ if (features & ANDROID_CPU_ARM_FEATURE_NEON)
+ {
+ DEBUG_RFX("NEON optimization enabled!");
+ return 1;
+ }
+ DEBUG_RFX("NEON optimization disabled - CPU not NEON capable");
+ }
+ else
+ DEBUG_RFX("NEON optimization disabled - No ARMv7 CPU found");
+
+ return 0;
+#else
+ return 1; // TODO: set to 1 for development
+// return 0;
+#endif
+}
+
+
+void rfx_init_neon(RFX_CONTEXT * context)
+{
+
+
+ if(isNeonSupported())
+ {
+ DEBUG_RFX("Using NEON optimizations");
+
+ IF_PROFILER(context->prof_rfx_decode_YCbCr_to_RGB->name = "rfx_decode_YCbCr_to_RGB_NEON");
+ IF_PROFILER(context->prof_rfx_quantization_decode->name = "rfx_quantization_decode_NEON");
+ IF_PROFILER(context->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_NEON");
+
+ context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_NEON;
+ context->quantization_decode = rfx_quantization_decode_NEON;
+ context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
+ }
+}
diff --git a/libfreerdp-rfx/neon/rfx_neon.h b/libfreerdp-rfx/neon/rfx_neon.h
new file mode 100644
index 0000000..64b702f
--- /dev/null
+++ b/libfreerdp-rfx/neon/rfx_neon.h
@@ -0,0 +1,32 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ RemoteFX Codec Library - NEON Optimizations
+
+ Copyright 2011 Marc-Andre Moreau <marcandre.moreau@gmail.com>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __RFX_NEON_H
+#define __RFX_NEON_H
+
+#include "librfx.h"
+#include <freerdp/rfx.h>
+
+void rfx_init_neon(RFX_CONTEXT * context);
+
+#ifndef RFX_INIT_SIMD
+#define RFX_INIT_SIMD(_rfx_context) rfx_init_neon(_rfx_context)
+#endif
+
+#endif /* __RFX_NEON_H */
diff --git a/libfreerdp-rfx/rfx_bitstream.c b/libfreerdp-rfx/rfx_bitstream.c
index ce741fc..a62ec17 100644
--- a/libfreerdp-rfx/rfx_bitstream.c
+++ b/libfreerdp-rfx/rfx_bitstream.c
@@ -43,11 +43,11 @@ rfx_bitstream_put_buffer(RFX_BITSTREAM * bs, uint8 * buffer, int nbytes)
bs->bits_left = 8;
}
-uint32
+uint16
rfx_bitstream_get_bits(RFX_BITSTREAM * bs, int nbits)
{
int b;
- uint32 n = 0;
+ uint16 n = 0;
while (bs->byte_pos < bs->nbytes && nbits > 0)
{
@@ -73,6 +73,30 @@ rfx_bitstream_get_bits(RFX_BITSTREAM * bs, int nbits)
return n;
}
+void
+rfx_bitstream_put_bits(RFX_BITSTREAM * bs, uint16 bits, int nbits)
+{
+ int b;
+
+ while (bs->byte_pos < bs->nbytes && nbits > 0)
+ {
+ b = nbits;
+
+ if (b > bs->bits_left)
+ b = bs->bits_left;
+
+ bs->buffer[bs->byte_pos] |= ((bits >> (nbits - b)) & ((1 << b) - 1)) << (bs->bits_left - b);
+ bs->bits_left -= b;
+ nbits -= b;
+
+ if (bs->bits_left == 0)
+ {
+ bs->bits_left = 8;
+ bs->byte_pos++;
+ }
+ }
+}
+
int
rfx_bitstream_eos(RFX_BITSTREAM * bs)
{
@@ -88,6 +112,12 @@ rfx_bitstream_left(RFX_BITSTREAM * bs)
return (bs->nbytes - bs->byte_pos - 1) * 8 + bs->bits_left;
}
+int
+rfx_bitstream_get_processed_bytes(RFX_BITSTREAM * bs)
+{
+ return (bs->bits_left < 8 ? bs->byte_pos + 1 : bs->byte_pos);
+}
+
void
rfx_bitstream_free(RFX_BITSTREAM * bs)
{
diff --git a/libfreerdp-rfx/rfx_bitstream.h b/libfreerdp-rfx/rfx_bitstream.h
index 93d12b7..191c191 100644
--- a/libfreerdp-rfx/rfx_bitstream.h
+++ b/libfreerdp-rfx/rfx_bitstream.h
@@ -35,12 +35,16 @@ RFX_BITSTREAM *
rfx_bitstream_new(void);
void
rfx_bitstream_put_buffer(RFX_BITSTREAM * bs, uint8 * buffer, int nbytes);
-unsigned int
+uint16
rfx_bitstream_get_bits(RFX_BITSTREAM * bs, int nbits);
+void
+rfx_bitstream_put_bits(RFX_BITSTREAM * bs, uint16 bits, int nbits);
int
rfx_bitstream_eos(RFX_BITSTREAM * bs);
int
rfx_bitstream_left(RFX_BITSTREAM * bs);
+int
+rfx_bitstream_get_processed_bytes(RFX_BITSTREAM * bs);
void
rfx_bitstream_free(RFX_BITSTREAM * bs);
diff --git a/libfreerdp-rfx/rfx_decode.c b/libfreerdp-rfx/rfx_decode.c
index 35dc306..8dc8853 100644
--- a/libfreerdp-rfx/rfx_decode.c
+++ b/libfreerdp-rfx/rfx_decode.c
@@ -27,13 +27,64 @@
#include "rfx_decode.h"
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_decode_format_RGB(sint16 * r_buf, sint16 * g_buf, sint16 * b_buf,
+ RFX_PIXEL_FORMAT pixel_format, uint8 * dst_buf)
+{
+ sint16 * r = r_buf;
+ sint16 * g = g_buf;
+ sint16 * b = b_buf;
+ uint8 * dst = dst_buf;
+ int i;
+
+ switch (pixel_format)
+ {
+ case RFX_PIXEL_FORMAT_BGRA:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*b++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*r++);
+ *dst++ = 0xFF;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGBA:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*r++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*b++);
+ *dst++ = 0xFF;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_BGR:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*b++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*r++);
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGB:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*r++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*b++);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
#define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v)))
void
-rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf)
+rfx_decode_YCbCr_to_RGB(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf)
{
- int y, cb, cr;
- int r, g, b;
+ sint16 y, cb, cr;
+ sint16 r, g, b;
int i;
for (i = 0; i < 4096; i++)
@@ -51,29 +102,28 @@ rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf)
}
static void
-rfx_decode_component(RFX_CONTEXT * context, const uint32 * quantization_values, int half,
- const uint8 * data, int size, uint32 * buffer)
+rfx_decode_component(RFX_CONTEXT * context, const uint32 * quantization_values,
+ const uint8 * data, int size, sint16 * buffer)
{
- rfx_rlgr_decode(context->mode, data, size, buffer, 4096);
-
- rfx_differential_decode(buffer + 4032, 64);
-
- rfx_quantization_decode(buffer, 1024, quantization_values[8]); /* HL1 */
- rfx_quantization_decode(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
- rfx_quantization_decode(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
- rfx_quantization_decode(buffer + 3072, 256, quantization_values[5]); /* HL2 */
- rfx_quantization_decode(buffer + 3328, 256, quantization_values[4]); /* LH2 */
- rfx_quantization_decode(buffer + 3584, 256, quantization_values[6]); /* HH2 */
- rfx_quantization_decode(buffer + 3840, 64, quantization_values[2]); /* HL3 */
- rfx_quantization_decode(buffer + 3904, 64, quantization_values[1]); /* LH3 */
- rfx_quantization_decode(buffer + 3868, 64, quantization_values[3]); /* HH3 */
- rfx_quantization_decode(buffer + 4032, 64, quantization_values[0]); /* LL3 */
-
- rfx_dwt_2d_decode(context, (int*) buffer + 3840, 8);
- rfx_dwt_2d_decode(context, (int*) buffer + 3072, 16);
-
- if (!half)
- rfx_dwt_2d_decode(context, (int*) buffer, 32);
+ PROFILER_ENTER(context->prof_rfx_decode_component);
+
+ PROFILER_ENTER(context->prof_rfx_rlgr_decode);
+ rfx_rlgr_decode(context->mode, data, size, buffer, 4096);
+ PROFILER_EXIT(context->prof_rfx_rlgr_decode);
+
+ PROFILER_ENTER(context->prof_rfx_differential_decode);
+ rfx_differential_decode(buffer + 4032, 64);
+ PROFILER_EXIT(context->prof_rfx_differential_decode);
+
+ PROFILER_ENTER(context->prof_rfx_quantization_decode);
+ context->quantization_decode(buffer, quantization_values);
+ PROFILER_EXIT(context->prof_rfx_quantization_decode);
+
+ PROFILER_ENTER(context->prof_rfx_dwt_2d_decode);
+ context->dwt_2d_decode(buffer, context->dwt_buffer);
+ PROFILER_EXIT(context->prof_rfx_dwt_2d_decode);
+
+ PROFILER_EXIT(context->prof_rfx_decode_component);
}
uint8*
@@ -82,49 +132,22 @@ rfx_decode_rgb(RFX_CONTEXT * context,
const uint8 * cb_data, int cb_size, const uint32 * cb_quants,
const uint8 * cr_data, int cr_size, const uint32 * cr_quants, uint8* rgb_buffer)
{
- int i;
- uint8 * dst;
- int r, g, b;
+ PROFILER_ENTER(context->prof_rfx_decode_rgb);
- dst = rgb_buffer;
- rfx_decode_component(context, y_quants, 0, y_data, y_size, context->y_r_buffer);
- rfx_decode_component(context, cb_quants, 0, cb_data, cb_size, context->cb_g_buffer);
- rfx_decode_component(context, cr_quants, 0, cr_data, cr_size, context->cr_b_buffer);
+ rfx_decode_component(context, y_quants, y_data, y_size, context->y_r_buffer); /* YData */
+ rfx_decode_component(context, cb_quants, cb_data, cb_size, context->cb_g_buffer); /* CbData */
+ rfx_decode_component(context, cr_quants, cr_data, cr_size, context->cr_b_buffer); /* CrData */
- context->decode_YCbCr_to_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
+ PROFILER_ENTER(context->prof_rfx_decode_YCbCr_to_RGB);
+ context->decode_YCbCr_to_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
+ PROFILER_EXIT(context->prof_rfx_decode_YCbCr_to_RGB);
+
+ PROFILER_ENTER(context->prof_rfx_decode_format_RGB);
+ rfx_decode_format_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer,
+ context->pixel_format, rgb_buffer);
+ PROFILER_EXIT(context->prof_rfx_decode_format_RGB);
+
+ PROFILER_EXIT(context->prof_rfx_decode_rgb);
- for (i = 0; i < 4096; i++)
- {
- r = context->y_r_buffer[i];
- g = context->cb_g_buffer[i];
- b = context->cr_b_buffer[i];
- switch (context->pixel_format)
- {
- case RFX_PIXEL_FORMAT_BGRA:
- *dst++ = (uint8) (b);
- *dst++ = (uint8) (g);
- *dst++ = (uint8) (r);
- *dst++ = 0xFF;
- break;
- case RFX_PIXEL_FORMAT_RGBA:
- *dst++ = (uint8) (r);
- *dst++ = (uint8) (g);
- *dst++ = (uint8) (b);
- *dst++ = 0xFF;
- break;
- case RFX_PIXEL_FORMAT_BGR:
- *dst++ = (uint8) (b);
- *dst++ = (uint8) (g);
- *dst++ = (uint8) (r);
- break;
- case RFX_PIXEL_FORMAT_RGB:
- *dst++ = (uint8) (r);
- *dst++ = (uint8) (g);
- *dst++ = (uint8) (b);
- break;
- default:
- break;
- }
- }
return rgb_buffer;
}
diff --git a/libfreerdp-rfx/rfx_decode.h b/libfreerdp-rfx/rfx_decode.h
index 356e232..3b7fd9b 100644
--- a/libfreerdp-rfx/rfx_decode.h
+++ b/libfreerdp-rfx/rfx_decode.h
@@ -23,7 +23,7 @@
#include <freerdp/rfx.h>
void
-rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
+rfx_decode_YCbCr_to_RGB(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
unsigned char *
rfx_decode_rgb(RFX_CONTEXT * context,
diff --git a/libfreerdp-rfx/rfx_differential.c b/libfreerdp-rfx/rfx_differential.c
index 10b9dd6..d1b3eb1 100644
--- a/libfreerdp-rfx/rfx_differential.c
+++ b/libfreerdp-rfx/rfx_differential.c
@@ -23,10 +23,10 @@
#include "rfx_differential.h"
void
-rfx_differential_decode(uint32 * buffer, int buffer_size)
+rfx_differential_decode(sint16 * buffer, int buffer_size)
{
- uint32 * src;
- uint32 * dst;
+ sint16 * src;
+ sint16 * dst;
for (src = buffer, dst = buffer + 1; buffer_size > 1; src++, dst++, buffer_size--)
{
@@ -34,3 +34,17 @@ rfx_differential_decode(uint32 * buffer, int buffer_size)
}
}
+void
+rfx_differential_encode(sint16 * buffer, int buffer_size)
+{
+ sint16 n1, n2;
+ sint16 * dst;
+
+ for (n1 = *buffer, dst = buffer + 1; buffer_size > 1; dst++, buffer_size--)
+ {
+ n2 = *dst;
+ *dst -= n1;
+ n1 = n2;
+ }
+}
+
diff --git a/libfreerdp-rfx/rfx_differential.h b/libfreerdp-rfx/rfx_differential.h
index fc5deb5..141e8c9 100644
--- a/libfreerdp-rfx/rfx_differential.h
+++ b/libfreerdp-rfx/rfx_differential.h
@@ -23,7 +23,9 @@
#include <freerdp/rfx.h>
void
-rfx_differential_decode(uint32 * buffer, int buffer_size);
+rfx_differential_decode(sint16 * buffer, int buffer_size);
+void
+rfx_differential_encode(sint16 * buffer, int buffer_size);
#endif
diff --git a/libfreerdp-rfx/rfx_dwt.c b/libfreerdp-rfx/rfx_dwt.c
index 3ed4e89..7f80975 100644
--- a/libfreerdp-rfx/rfx_dwt.c
+++ b/libfreerdp-rfx/rfx_dwt.c
@@ -24,32 +24,15 @@
#include "rfx_dwt.h"
void
-rfx_dwt_2d_decode(RFX_CONTEXT * context, int * buffer, int subband_width)
+rfx_dwt_2d_decode_block(sint16 * buffer, sint16 * idwt, int subband_width)
{
- int idwt_alloc;
- int * idwt;
- int * dst, * l, * h;
- int * l_dst, * h_dst;
- int * hl, * lh, * hh, * ll;
+ sint16 * dst, * l, * h;
+ sint16 * l_dst, * h_dst;
+ sint16 * hl, * lh, * hh, * ll;
int total_width;
int x, y;
int n;
- switch (subband_width)
- {
- case 8:
- case 16:
- case 32:
- idwt = (int*) context->idwt_buffers[subband_width >> 3];
- idwt_alloc = 0;
- break;
-
- default:
- idwt = (int*) malloc(subband_width * subband_width * 4 * sizeof(int));
- idwt_alloc = 1;
- break;
- }
-
total_width = subband_width << 1;
/* Inverse DWT in horizontal direction, results in 2 sub-bands in L, H order in tmp buffer idwt. */
@@ -85,8 +68,8 @@ rfx_dwt_2d_decode(RFX_CONTEXT * context, int * buffer, int subband_width)
h_dst[x + 1] = (hh[n] << 1) + ((h_dst[x] + h_dst[x + 2]) >> 1);
}
x = n << 1;
- l_dst[x + 1] = (hl[n] << 1) + ((l_dst[x] + l_dst[x]) >> 1);
- h_dst[x + 1] = (hh[n] << 1) + ((h_dst[x] + h_dst[x]) >> 1);
+ l_dst[x + 1] = (hl[n] << 1) + (l_dst[x]);
+ h_dst[x + 1] = (hh[n] << 1) + (h_dst[x]);
ll += subband_width;
hl += subband_width;
@@ -120,8 +103,96 @@ rfx_dwt_2d_decode(RFX_CONTEXT * context, int * buffer, int subband_width)
dst[total_width] = (*h << 1) + ((dst[0] + dst[n < subband_width - 1 ? 2 * total_width : 0]) >> 1);
}
}
+}
- if (idwt_alloc)
- free(idwt);
+void
+rfx_dwt_2d_decode(sint16 * buffer, sint16 * dwt_buffer)
+{
+ rfx_dwt_2d_decode_block(buffer + 3840, dwt_buffer, 8);
+ rfx_dwt_2d_decode_block(buffer + 3072, dwt_buffer, 16);
+ rfx_dwt_2d_decode_block(buffer, dwt_buffer, 32);
}
+void
+rfx_dwt_2d_encode_block(sint16 * buffer, sint16 * dwt, int subband_width)
+{
+ sint16 * src, * l, * h;
+ sint16 * l_src, * h_src;
+ sint16 * hl, * lh, * hh, * ll;
+ int total_width;
+ int x, y;
+ int n;
+
+ total_width = subband_width << 1;
+
+ /* DWT in vertical direction, results in 2 sub-bands in L, H order in tmp buffer dwt. */
+ for (x = 0; x < total_width; x++)
+ {
+ for (n = 0; n < subband_width; n++)
+ {
+ y = n << 1;
+ l = dwt + n * total_width + x;
+ h = l + subband_width * total_width;
+ src = buffer + y * total_width + x;
+
+ /* H */
+ *h = (src[total_width] - ((src[0] + src[n < subband_width - 1 ? 2 * total_width : total_width]) >> 1)) >> 1;
+
+ /* L */
+ *l = src[0] + (n == 0 ? *h : (*(h - total_width) + *h) >> 1);
+ }
+ }
+
+ /* DWT in horizontal direction, results in 4 sub-bands in HL(0), LH(1), HH(2), LL(3) order, stored in original buffer. */
+ /* The lower part L generates LL(3) and HL(0). */
+ /* The higher part H generates LH(1) and HH(2). */
+
+ ll = buffer + subband_width * subband_width * 3;
+ hl = buffer;
+ l_src = dwt;
+
+ lh = buffer + subband_width * subband_width;
+ hh = buffer + subband_width * subband_width * 2;
+ h_src = dwt + subband_width * subband_width * 2;
+
+ for (y = 0; y < subband_width; y++)
+ {
+ /* L */
+ for (n = 0; n < subband_width; n++)
+ {
+ x = n << 1;
+
+ /* HL */
+ hl[n] = (l_src[x + 1] - ((l_src[x] + l_src[n < subband_width - 1 ? x + 2 : x]) >> 1)) >> 1;
+ /* LL */
+ ll[n] = l_src[x] + (n == 0 ? hl[n] : (hl[n - 1] + hl[n]) >> 1);
+ }
+
+ /* H */
+ for (n = 0; n < subband_width; n++)
+ {
+ x = n << 1;
+
+ /* HH */
+ hh[n] = (h_src[x + 1] - ((h_src[x] + h_src[n < subband_width - 1 ? x + 2 : x]) >> 1)) >> 1;
+ /* LH */
+ lh[n] = h_src[x] + (n == 0 ? hh[n] : (hh[n - 1] + hh[n]) >> 1);
+ }
+
+ ll += subband_width;
+ hl += subband_width;
+ l_src += total_width;
+
+ lh += subband_width;
+ hh += subband_width;
+ h_src += total_width;
+ }
+}
+
+void
+rfx_dwt_2d_encode(sint16 * buffer, sint16 * dwt_buffer)
+{
+ rfx_dwt_2d_encode_block(buffer, dwt_buffer, 32);
+ rfx_dwt_2d_encode_block(buffer + 3072, dwt_buffer, 16);
+ rfx_dwt_2d_encode_block(buffer + 3840, dwt_buffer, 8);
+}
diff --git a/libfreerdp-rfx/rfx_dwt.h b/libfreerdp-rfx/rfx_dwt.h
index 21140bb..449d61c 100644
--- a/libfreerdp-rfx/rfx_dwt.h
+++ b/libfreerdp-rfx/rfx_dwt.h
@@ -23,7 +23,9 @@
#include <freerdp/rfx.h>
void
-rfx_dwt_2d_decode(RFX_CONTEXT * context, int * buffer, int subband_width);
+rfx_dwt_2d_decode(sint16 * buffer, sint16 * dwt_buffer);
+void
+rfx_dwt_2d_encode(sint16 * buffer, sint16 * dwt_buffer);
#endif
diff --git a/libfreerdp-rfx/rfx_encode.c b/libfreerdp-rfx/rfx_encode.c
new file mode 100644
index 0000000..cd20200
--- /dev/null
+++ b/libfreerdp-rfx/rfx_encode.c
@@ -0,0 +1,182 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ RemoteFX Codec Library - Encode
+
+ Copyright 2011 Vic Lee
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "rfx_rlgr.h"
+#include "rfx_differential.h"
+#include "rfx_quantization.h"
+#include "rfx_dwt.h"
+
+#include "rfx_encode.h"
+
+#define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v)))
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_encode_format_RGB(const uint8 * rgb_data, int width, int height, int rowstride,
+ RFX_PIXEL_FORMAT pixel_format, sint16 * r_buf, sint16 * g_buf, sint16 * b_buf)
+{
+ int x, y;
+ int x_exceed;
+ int y_exceed;
+ const uint8 * src;
+
+ x_exceed = 64 - width;
+ y_exceed = 64 - height;
+ for (y = 0; y < height; y++)
+ {
+ src = rgb_data + y * rowstride;
+
+ switch (pixel_format)
+ {
+ case RFX_PIXEL_FORMAT_BGRA:
+ for (x = 0; x < width; x++)
+ {
+ *b_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *r_buf++ = (sint16) (*src++);
+ src++;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGBA:
+ for (x = 0; x < width; x++)
+ {
+ *r_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *b_buf++ = (sint16) (*src++);
+ src++;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_BGR:
+ for (x = 0; x < width; x++)
+ {
+ *b_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *r_buf++ = (sint16) (*src++);
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGB:
+ for (x = 0; x < width; x++)
+ {
+ *r_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *b_buf++ = (sint16) (*src++);
+ }
+ break;
+ default:
+ break;
+ }
+ /* Fill the horizontal region outside of 64x64 tile size to 0 in order to be better compressed. */
+ if (x_exceed > 0)
+ {
+ memset(r_buf, 0, x_exceed * sizeof(sint16));
+ memset(g_buf, 0, x_exceed * sizeof(sint16));
+ memset(b_buf, 0, x_exceed * sizeof(sint16));
+ r_buf += x_exceed;
+ g_buf += x_exceed;
+ b_buf += x_exceed;
+ }
+ }
+
+ /* Fill the vertical region outside of 64x64 tile size to 0 in order to be better compressed. */
+ if (y_exceed > 0)
+ {
+ memset(r_buf, 0, y_exceed * 64 * sizeof(sint16));
+ memset(g_buf, 0, y_exceed * 64 * sizeof(sint16));
+ memset(b_buf, 0, y_exceed * 64 * sizeof(sint16));
+ }
+}
+
+void
+rfx_encode_RGB_to_YCbCr(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf)
+{
+ sint16 y, cb, cr;
+ sint16 r, g, b;
+
+ int i;
+ for (i = 0; i < 4096; i++)
+ {
+ r = y_r_buf[i];
+ g = cb_g_buf[i];
+ b = cr_b_buf[i];
+ y = ((r >> 2) + (r >> 5) + (r >> 6)) + ((g >> 1) + (g >> 4) + (g >> 6) + (g >> 7)) + ((b >> 4) + (b >> 5) + (b >> 6));
+ y_r_buf[i] = MINMAX(y, 0, 255) - 128;
+ cb = 0 - ((r >> 3) + (r >> 5) + (r >> 7)) - ((g >> 2) + (g >> 4) + (g >> 6)) + (b >> 1);
+ cb_g_buf[i] = MINMAX(cb, -128, 127);
+ cr = (r >> 1) - ((g >> 2) + (g >> 3) + (g >> 5) + (g >> 7)) - ((b >> 4) + (b >> 6));
+ cr_b_buf[i] = MINMAX(cr, -128, 127);
+ }
+}
+
+static void
+rfx_encode_component(RFX_CONTEXT * context, const uint32 * quantization_values,
+ sint16 * data, uint8 * buffer, int buffer_size, int * size)
+{
+ PROFILER_ENTER(context->prof_rfx_encode_component);
+
+ PROFILER_ENTER(context->prof_rfx_dwt_2d_encode);
+ context->dwt_2d_encode(data, context->dwt_buffer);
+ PROFILER_EXIT(context->prof_rfx_dwt_2d_encode);
+
+ PROFILER_ENTER(context->prof_rfx_quantization_encode);
+ context->quantization_encode(data, quantization_values);
+ PROFILER_EXIT(context->prof_rfx_quantization_encode);
+
+ PROFILER_ENTER(context->prof_rfx_differential_encode);
+ rfx_differential_encode(data + 4032, 64);
+ PROFILER_EXIT(context->prof_rfx_differential_encode);
+
+ PROFILER_ENTER(context->prof_rfx_rlgr_encode);
+ *size = rfx_rlgr_encode(context->mode, data, 4096, buffer, buffer_size);
+ PROFILER_EXIT(context->prof_rfx_rlgr_encode);
+
+ PROFILER_EXIT(context->prof_rfx_encode_component);
+}
+
+void
+rfx_encode_rgb(RFX_CONTEXT * context, const uint8 * rgb_data, int width, int height, int rowstride,
+ const uint32 * y_quants, const uint32 * cb_quants, const uint32 * cr_quants,
+ uint8 * ycbcr_buffer, int buffer_size, int * y_size, int * cb_size, int * cr_size)
+{
+ sint16 * y_r_buffer = context->y_r_buffer;
+ sint16 * cb_g_buffer = context->cb_g_buffer;
+ sint16 * cr_b_buffer = context->cr_b_buffer;
+
+ PROFILER_ENTER(context->prof_rfx_encode_rgb);
+
+ PROFILER_ENTER(context->prof_rfx_encode_format_RGB);
+ rfx_encode_format_RGB(rgb_data, width, height, rowstride,
+ context->pixel_format, y_r_buffer, cb_g_buffer, cr_b_buffer);
+ PROFILER_EXIT(context->prof_rfx_encode_format_RGB);
+
+ PROFILER_ENTER(context->prof_rfx_encode_RGB_to_YCbCr);
+ context->encode_RGB_to_YCbCr(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
+ PROFILER_EXIT(context->prof_rfx_encode_RGB_to_YCbCr);
+
+ rfx_encode_component(context, y_quants, context->y_r_buffer, ycbcr_buffer, buffer_size, y_size);
+ ycbcr_buffer += (*y_size);
+ buffer_size -= (*y_size);
+ rfx_encode_component(context, cb_quants, context->cb_g_buffer, ycbcr_buffer, buffer_size, cb_size);
+ ycbcr_buffer += (*cb_size);
+ buffer_size -= (*cb_size);
+ rfx_encode_component(context, cr_quants, context->cr_b_buffer, ycbcr_buffer, buffer_size, cr_size);
+
+ PROFILER_EXIT(context->prof_rfx_encode_rgb);
+}
diff --git a/libfreerdp-rfx/rfx_encode.h b/libfreerdp-rfx/rfx_encode.h
new file mode 100644
index 0000000..2fac0be
--- /dev/null
+++ b/libfreerdp-rfx/rfx_encode.h
@@ -0,0 +1,34 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ RemoteFX Codec Library - Decode
+
+ Copyright 2011 Vic Lee
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __RFX_ENCODE_H
+#define __RFX_ENCODE_H
+
+#include <freerdp/rfx.h>
+
+void
+rfx_encode_RGB_to_YCbCr(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
+
+void
+rfx_encode_rgb(RFX_CONTEXT * context, const uint8 * rgb_data, int width, int height, int rowstride,
+ const uint32 * y_quants, const uint32 * cb_quants, const uint32 * cr_quants,
+ uint8 * ycbcr_buffer, int buffer_size, int * y_size, int * cb_size, int * cr_size);
+
+#endif
+
diff --git a/libfreerdp-rfx/rfx_quantization.c b/libfreerdp-rfx/rfx_quantization.c
index 9d67c0c..67755c0 100644
--- a/libfreerdp-rfx/rfx_quantization.c
+++ b/libfreerdp-rfx/rfx_quantization.c
@@ -19,17 +19,63 @@
#include "rfx_quantization.h"
+static void
+rfx_quantization_decode_block(sint16 * buffer, int buffer_size, uint32 factor)
+{
+ sint16 * dst;
+
+ if (factor <= 6)
+ return;
+
+ factor -= 6;
+ for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
+ {
+ *dst <<= factor;
+ }
+}
+
void
-rfx_quantization_decode(uint32 * buffer, int buffer_size, uint32 factor)
+rfx_quantization_decode(sint16 * buffer, const uint32 * quantization_values)
+{
+ rfx_quantization_decode_block(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_decode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+}
+
+static void
+rfx_quantization_encode_block(sint16 * buffer, int buffer_size, uint32 factor)
{
- uint32 * dst;
+ sint16 * dst;
if (factor <= 6)
return;
+ factor -= 6;
for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
{
- *dst <<= (factor - 6);
+ *dst >>= factor;
}
}
+void
+rfx_quantization_encode(sint16 * buffer, const uint32 * quantization_values)
+{
+ rfx_quantization_encode_block(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_encode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_encode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_encode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_encode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_encode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_encode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_encode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+}
+
diff --git a/libfreerdp-rfx/rfx_quantization.h b/libfreerdp-rfx/rfx_quantization.h
index 53d35b8..6a37f28 100644
--- a/libfreerdp-rfx/rfx_quantization.h
+++ b/libfreerdp-rfx/rfx_quantization.h
@@ -23,7 +23,9 @@
#include <freerdp/rfx.h>
void
-rfx_quantization_decode(uint32 * buffer, int buffer_size, uint32 factor);
+rfx_quantization_decode(sint16 * buffer, const uint32 * quantization_values);
+void
+rfx_quantization_encode(sint16 * buffer, const uint32 * quantization_values);
#endif
diff --git a/libfreerdp-rfx/rfx_rlgr.c b/libfreerdp-rfx/rfx_rlgr.c
index c727e7c..2d27907 100644
--- a/libfreerdp-rfx/rfx_rlgr.c
+++ b/libfreerdp-rfx/rfx_rlgr.c
@@ -56,7 +56,7 @@
nZeroesWritten = buffer_size; \
if (nZeroesWritten > 0) \
{ \
- memset(dst, 0, nZeroesWritten * sizeof(int)); \
+ memset(dst, 0, nZeroesWritten * sizeof(sint16)); \
dst += nZeroesWritten; \
} \
buffer_size -= (nZeroes); \
@@ -72,10 +72,10 @@
_v >>= 1; \
_nbits++; \
} \
-} \
+}
/* Converts from (2 * magnitude - sign) to integer */
-#define GetIntFrom2MagSign(twoMs) (((twoMs) & 1) ? -1 * (int)(((twoMs) + 1) >> 1) : (int)((twoMs) >> 1))
+#define GetIntFrom2MagSign(twoMs) (((twoMs) & 1) ? -1 * (sint16)(((twoMs) + 1) >> 1) : (sint16)((twoMs) >> 1))
/*
* Update the passed parameter and clamp it to the range [0, KPMAX]
@@ -94,11 +94,11 @@
/* Outputs the Golomb/Rice encoding of a non-negative integer */
#define GetGRCode(krp, kr) rfx_rlgr_get_gr_code(bs, krp, kr)
-static uint32
+static uint16
rfx_rlgr_get_gr_code(RFX_BITSTREAM * bs, int * krp, int * kr)
{
int vk;
- uint32 mag;
+ uint16 mag;
/* chew up/count leading 1s and escape 0 */
for (vk = 0; GetBits(1) == 1;)
@@ -122,13 +122,13 @@ rfx_rlgr_get_gr_code(RFX_BITSTREAM * bs, int * krp, int * kr)
}
int
-rfx_rlgr_decode(RLGR_MODE mode, const uint8 * data, int data_size, uint32 * buffer, int buffer_size)
+rfx_rlgr_decode(RLGR_MODE mode, const uint8 * data, int data_size, sint16 * buffer, int buffer_size)
{
int k;
int kp;
int kr;
int krp;
- uint32 * dst;
+ sint16 * dst;
RFX_BITSTREAM * bs;
bs = rfx_bitstream_new();
@@ -230,3 +230,202 @@ rfx_rlgr_decode(RLGR_MODE mode, const uint8 * data, int data_size, uint32 * buff
return (dst - buffer);
}
+
+/* Returns the next coefficient (a signed int) to encode, from the input stream */
+#define GetNextInput(_n) \
+{ \
+ if (data_size > 0) \
+ { \
+ _n = *data++; \
+ data_size--; \
+ } \
+ else \
+ { \
+ _n = 0; \
+ } \
+}
+
+/* Emit bitPattern to the output bitstream */
+#define OutputBits(numBits, bitPattern) rfx_bitstream_put_bits(bs, bitPattern, numBits);
+
+/* Emit a bit (0 or 1), count number of times, to the output bitstream */
+#define OutputBit(count, bit) \
+{ \
+ uint16 _b = (bit ? 0xFFFF : 0); \
+ int _c = (count); \
+ for (; _c > 0; _c -= 16) \
+ rfx_bitstream_put_bits(bs, _b, (_c > 16 ? 16 : _c)); \
+}
+
+/* Converts the input value to (2 * abs(input) - sign(input)), where sign(input) = (input < 0 ? 1 : 0) and returns it */
+#define Get2MagSign(input) ((input) >= 0 ? 2 * (input) : -2 * (input) - 1)
+
+/* Outputs the Golomb/Rice encoding of a non-negative integer */
+#define CodeGR(krp, val) rfx_rlgr_code_gr(bs, krp, val)
+
+static void
+rfx_rlgr_code_gr(RFX_BITSTREAM * bs, int * krp, uint16 val)
+{
+ int kr = *krp >> LSGR;
+
+ /* unary part of GR code */
+
+ uint16 vk = (val) >> kr;
+ OutputBit(vk, 1);
+ OutputBit(1, 0);
+
+ /* remainder part of GR code, if needed */
+ if (kr)
+ {
+ OutputBits(kr, val & ((1 << kr) - 1));
+ }
+
+ /* update krp, only if it is not equal to 1 */
+ if (vk == 0)
+ {
+ UpdateParam(*krp, -2, kr);
+ }
+ else if (vk > 1)
+ {
+ UpdateParam(*krp, vk, kr);
+ }
+}
+
+int
+rfx_rlgr_encode(RLGR_MODE mode, const sint16 * data, int data_size, uint8 * buffer, int buffer_size)
+{
+ int k;
+ int kp;
+ int kr;
+ int krp;
+ RFX_BITSTREAM * bs;
+ int processed_size;
+
+ bs = rfx_bitstream_new();
+ rfx_bitstream_put_buffer(bs, buffer, buffer_size);
+
+ /* initialize the parameters */
+ k = 1;
+ kp = 1 << LSGR;
+ kr = 1;
+ krp = 1 << LSGR;
+
+ /* process all the input coefficients */
+ while (data_size > 0)
+ {
+ int input;
+
+ if (k)
+ {
+ int numZeros;
+ int runmax;
+ int mag;
+ int sign;
+
+ /* RUN-LENGTH MODE */
+
+ /* collect the run of zeros in the input stream */
+ numZeros = 0;
+ GetNextInput(input);
+ while (input == 0 && data_size > 0)
+ {
+ numZeros++;
+ GetNextInput(input);
+ }
+
+ // emit output zeros
+ runmax = 1 << k;
+ while (numZeros >= runmax)
+ {
+ OutputBit(1, 0); /* output a zero bit */
+ numZeros -= runmax;
+ UpdateParam(kp, UP_GR, k); /* update kp, k */
+ runmax = 1 << k;
+ }
+
+ /* output a 1 to terminate runs */
+ OutputBit(1, 1);
+
+ /* output the remaining run length using k bits */
+ OutputBits(k, numZeros);
+
+ if (input != 0)
+ {
+ /* encode the nonzero value using GR coding */
+ mag = (input < 0 ? -input : input); /* absolute value of input coefficient */
+ sign = (input < 0 ? 1 : 0); /* sign of input coefficient */
+
+ OutputBit(1, sign); /* output the sign bit */
+ CodeGR(&krp, mag - 1); /* output GR code for (mag - 1) */
+
+ UpdateParam(kp, -DN_GR, k);
+ }
+ }
+ else
+ {
+ /* GOLOMB-RICE MODE */
+
+ if (mode == RLGR1)
+ {
+ uint32 twoMs;
+
+ /* RLGR1 variant */
+
+ /* convert input to (2*magnitude - sign), encode using GR code */
+ GetNextInput(input);
+ twoMs = Get2MagSign(input);
+ CodeGR(&krp, twoMs);
+
+ /* update k, kp */
+ if (twoMs)
+ {
+ UpdateParam(kp, UQ_GR, k);
+ }
+ else
+ {
+ UpdateParam(kp, -DQ_GR, k);
+ }
+ }
+ else /* mode == RLGR3 */
+ {
+ uint32 twoMs1;
+ uint32 twoMs2;
+ uint32 sum2Ms;
+ uint32 nIdx;
+
+ /* RLGR3 variant */
+
+ /* convert the next two input values to (2*magnitude - sign) and */
+ /* encode their sum using GR code */
+
+ GetNextInput(input);
+ twoMs1 = Get2MagSign(input);
+ GetNextInput(input);
+ twoMs2 = Get2MagSign(input);
+ sum2Ms = twoMs1 + twoMs2;
+
+ CodeGR(&krp, sum2Ms);
+
+ /* encode binary representation of the first input (twoMs1). */
+ GetMinBits(sum2Ms, nIdx);
+ OutputBits(nIdx, twoMs1);
+
+ /* update k,kp for the two input values */
+
+ if (twoMs1 && twoMs2)
+ {
+ UpdateParam(kp, -2 * DQ_GR, k);
+ }
+ else if (!twoMs1 && !twoMs2)
+ {
+ UpdateParam(kp, 2 * UQ_GR, k);
+ }
+ }
+ }
+ }
+
+ processed_size = rfx_bitstream_get_processed_bytes(bs);
+ rfx_bitstream_free(bs);
+
+ return processed_size;
+}
diff --git a/libfreerdp-rfx/rfx_rlgr.h b/libfreerdp-rfx/rfx_rlgr.h
index 3f988c4..75aa7e4 100644
--- a/libfreerdp-rfx/rfx_rlgr.h
+++ b/libfreerdp-rfx/rfx_rlgr.h
@@ -23,7 +23,9 @@
#include <freerdp/rfx.h>
int
-rfx_rlgr_decode(RLGR_MODE mode, const uint8 * data, int data_size, uint32 * buffer, int buffer_size);
+rfx_rlgr_decode(RLGR_MODE mode, const uint8 * data, int data_size, sint16 * buffer, int buffer_size);
+int
+rfx_rlgr_encode(RLGR_MODE mode, const sint16 * data, int data_size, uint8 * buffer, int buffer_size);
#endif
diff --git a/libfreerdp-rfx/sse/Makefile.am b/libfreerdp-rfx/sse/Makefile.am
index a97edae..faa9796 100644
--- a/libfreerdp-rfx/sse/Makefile.am
+++ b/libfreerdp-rfx/sse/Makefile.am
@@ -3,9 +3,13 @@
# libfreerdp-rfx-sse
noinst_LTLIBRARIES = libfreerdp-rfx-sse.la
-libfreerdp_rfx_sse_la_SOURCES = \
+libfreerdp_rfx_sse_la_SOURCES =
+
+if WITH_SSE
+libfreerdp_rfx_sse_la_SOURCES += \
rfx_sse.c rfx_sse.h \
rfx_sse2.c rfx_sse2.h
+endif
libfreerdp_rfx_sse_la_CFLAGS = \
-I$(top_srcdir) \
diff --git a/libfreerdp-rfx/sse/rfx_sse.c b/libfreerdp-rfx/sse/rfx_sse.c
index 7353e37..76a632d 100644
--- a/libfreerdp-rfx/sse/rfx_sse.c
+++ b/libfreerdp-rfx/sse/rfx_sse.c
@@ -22,11 +22,23 @@
#include <string.h>
#include "rfx_sse2.h"
-
#include "rfx_sse.h"
void rfx_init_sse(RFX_CONTEXT * context)
{
- DEBUG_RFX("Using SSE2 optimizations");
- context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_SSE2;
+ DEBUG_RFX("Using SSE2 optimizations");
+
+ IF_PROFILER(context->prof_rfx_decode_YCbCr_to_RGB->name = "rfx_decode_YCbCr_to_RGB_SSE2");
+ IF_PROFILER(context->prof_rfx_encode_RGB_to_YCbCr->name = "rfx_encode_RGB_to_YCbCr_SSE2");
+ IF_PROFILER(context->prof_rfx_quantization_decode->name = "rfx_quantization_decode_SSE2");
+ IF_PROFILER(context->prof_rfx_quantization_encode->name = "rfx_quantization_encode_SSE2");
+ IF_PROFILER(context->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_SSE2");
+ IF_PROFILER(context->prof_rfx_dwt_2d_encode->name = "rfx_dwt_2d_encode_SSE2");
+
+ context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_SSE2;
+ context->encode_RGB_to_YCbCr = rfx_encode_RGB_to_YCbCr_SSE2;
+ context->quantization_decode = rfx_quantization_decode_SSE2;
+ context->quantization_encode = rfx_quantization_encode_SSE2;
+ context->dwt_2d_decode = rfx_dwt_2d_decode_SSE2;
+ context->dwt_2d_encode = rfx_dwt_2d_encode_SSE2;
}
diff --git a/libfreerdp-rfx/sse/rfx_sse.h b/libfreerdp-rfx/sse/rfx_sse.h
index 48b8f29..e5be61c 100644
--- a/libfreerdp-rfx/sse/rfx_sse.h
+++ b/libfreerdp-rfx/sse/rfx_sse.h
@@ -31,20 +31,12 @@ void rfx_init_sse(RFX_CONTEXT * context);
#define RFX_INIT_SIMD(_rfx_context) rfx_init_sse(_rfx_context)
#endif
-static __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_between_ps (__m128 val, __m128 min, __m128 max)
+static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_between_epi16 (__m128i val, __m128i min, __m128i max)
{
- __m128 ret;
- ret = _mm_max_ps(val, min);
- return _mm_min_ps(ret, max);
-}
-
-static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_epi32_and_store (__m128i * loc, __m128 val)
-{
- __m128i tmp;
- tmp = _mm_cvtps_epi32(val);
- _mm_stream_si128(loc, tmp);
+ __m128i ret;
+ ret = _mm_max_epi16(val, min);
+ return _mm_min_epi16(ret, max);
}
#endif /* __RFX_SSE_H */
diff --git a/libfreerdp-rfx/sse/rfx_sse2.c b/libfreerdp-rfx/sse/rfx_sse2.c
index c796c92..3434ec5 100644
--- a/libfreerdp-rfx/sse/rfx_sse2.c
+++ b/libfreerdp-rfx/sse/rfx_sse2.c
@@ -25,57 +25,583 @@
#include "rfx_sse2.h"
-void rfx_decode_YCbCr_to_RGB_SSE2(uint32 * y_r_buffer, uint32 * cb_g_buffer, uint32 * cr_b_buffer)
+#define CACHE_LINE_BYTES 64
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch_buffer(char * buffer, int num_bytes)
{
- __m128 y_add = _mm_set_ps1(128.0f);
- __m128 r_cr_t = _mm_set_ps1(1.403f);
- __m128 g_cb_t = _mm_set_ps1(-0.344f);
- __m128 g_cr_t = _mm_set_ps1(-0.714f);
- __m128 b_cb_t = _mm_set_ps1(1.77f);
+ __m128i * buf = (__m128i*) buffer;
+ int i;
+ for (i = 0; i < (num_bytes / sizeof(__m128i)); i+=(CACHE_LINE_BYTES / sizeof(__m128i)))
+ {
+ _mm_prefetch((char*)(&buf[i]), _MM_HINT_NTA);
+ }
+}
+
+void
+rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer)
+{
+ __m128i zero = _mm_setzero_si128();
+ __m128i max = _mm_set1_epi16(255);
+
+ __m128i * y_r_buf = (__m128i*) y_r_buffer;
+ __m128i * cb_g_buf = (__m128i*) cb_g_buffer;
+ __m128i * cr_b_buf = (__m128i*) cr_b_buffer;
+
+ __m128i y;
+ __m128i cr;
+ __m128i cb;
+ __m128i r;
+ __m128i g;
+ __m128i b;
+
+ int i;
- __m128 min = _mm_set_ps1(0.0f);
- __m128 max = _mm_set_ps1(255.0f);
+ for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i)))
+ {
+ _mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA);
+ _mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA);
+ _mm_prefetch((char*)(&cr_b_buf[i]), _MM_HINT_NTA);
+ }
+ for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i++)
+ {
+ /* y = y_r_buf[i] + 128; */
+ y = _mm_load_si128(&y_r_buf[i]);
+ y = _mm_add_epi16(y, _mm_set1_epi16(128));
- __m128 y, cb, cr;
- __m128 r, g, b, tmp;
+ /* cr = cr_b_buf[i]; */
+ cr = _mm_load_si128(&cr_b_buf[i]);
+
+ /* r = between(y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5), 0, 255); */
+ r = _mm_add_epi16(y, cr);
+ r = _mm_add_epi16(r, _mm_srai_epi16(cr, 2));
+ r = _mm_add_epi16(r, _mm_srai_epi16(cr, 3));
+ r = _mm_add_epi16(r, _mm_srai_epi16(cr, 5));
+ r = _mm_between_epi16(r, zero, max);
+ _mm_store_si128(&y_r_buf[i], r);
+
+ /* cb = cb_g_buf[i]; */
+ cb = _mm_load_si128(&cb_g_buf[i]);
+
+ /* g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1) - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255); */
+ g = _mm_sub_epi16(y, _mm_srai_epi16(cb, 2));
+ g = _mm_sub_epi16(g, _mm_srai_epi16(cb, 4));
+ g = _mm_sub_epi16(g, _mm_srai_epi16(cb, 5));
+ g = _mm_sub_epi16(g, _mm_srai_epi16(cr, 1));
+ g = _mm_sub_epi16(g, _mm_srai_epi16(cr, 3));
+ g = _mm_sub_epi16(g, _mm_srai_epi16(cr, 4));
+ g = _mm_sub_epi16(g, _mm_srai_epi16(cr, 5));
+ g = _mm_between_epi16(g, zero, max);
+ _mm_store_si128(&cb_g_buf[i], g);
+
+ /* b = between(y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6), 0, 255); */
+ b = _mm_add_epi16(y, cb);
+ b = _mm_add_epi16(b, _mm_srai_epi16(cb, 1));
+ b = _mm_add_epi16(b, _mm_srai_epi16(cb, 2));
+ b = _mm_add_epi16(b, _mm_srai_epi16(cb, 6));
+ b = _mm_between_epi16(b, zero, max);
+ _mm_store_si128(&cr_b_buf[i], b);
+ }
+}
+
+void
+rfx_encode_RGB_to_YCbCr_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer)
+{
+ __m128i min = _mm_set1_epi16(-128);
+ __m128i max = _mm_set1_epi16(127);
__m128i * y_r_buf = (__m128i*) y_r_buffer;
__m128i * cb_g_buf = (__m128i*) cb_g_buffer;
__m128i * cr_b_buf = (__m128i*) cr_b_buffer;
+ __m128i y;
+ __m128i cr;
+ __m128i cb;
+ __m128i r;
+ __m128i g;
+ __m128i b;
+
int i;
- for (i = 0; i < (4096 / 4); i++)
+
+ for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
- y = _mm_cvtepi32_ps(*y_r_buf);
- cb = _mm_cvtepi32_ps(*cb_g_buf);
- cr = _mm_cvtepi32_ps(*cr_b_buf);
-
- /* y = y + 128 */
- y = _mm_add_ps(y, y_add);
-
- /* r = between(y + (cr * 1.403), 0, 255) */
- r = _mm_mul_ps(cr, r_cr_t);
- r = _mm_add_ps(r, y);
- r = _mm_between_ps(r, min, max);
- _mm_cvtps_epi32_and_store(y_r_buf, r);
-
- /* g = between(y + (cb * -0.344) + (cr * -0.714), 0, 255) */
- g = _mm_mul_ps(cb, g_cb_t);
- tmp = _mm_mul_ps(cr, g_cr_t);
- g = _mm_add_ps(g, tmp);
- g = _mm_add_ps(g, y);
- g = _mm_between_ps(g, min, max);
- _mm_cvtps_epi32_and_store(cb_g_buf, g);
-
- /* b = between(y + (cb * 1.77), 0, 255) */
- b = _mm_mul_ps(cb, b_cb_t);
- b = _mm_add_ps(b, y);
- b = _mm_between_ps(b, min, max);
- _mm_cvtps_epi32_and_store(cr_b_buf, b);
-
- y_r_buf++;
- cb_g_buf++;
- cr_b_buf++;
+ _mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA);
+ _mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA);
+ _mm_prefetch((char*)(&cr_b_buf[i]), _MM_HINT_NTA);
}
+ for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i++)
+ {
+ /* r = y_r_buf[i]; */
+ r = _mm_load_si128(&y_r_buf[i]);
+
+ /* g = cb_g_buf[i]; */
+ g = _mm_load_si128(&cb_g_buf[i]);
+
+ /* b = cr_b_buf[i]; */
+ b = _mm_load_si128(&cr_b_buf[i]);
+
+ /* y = ((r >> 2) + (r >> 5) + (r >> 6)) + ((g >> 1) + (g >> 4) + (g >> 6) + (g >> 7)) + ((b >> 4) + (b >> 5) + (b >> 6)); */
+ /* y_r_buf[i] = MINMAX(y, 0, 255) - 128; */
+ y = _mm_add_epi16(_mm_srai_epi16(r, 2), _mm_srai_epi16(r, 5));
+ y = _mm_add_epi16(y, _mm_srai_epi16(r, 6));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 1));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 4));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 6));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 7));
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 4));
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 5));
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 6));
+ y = _mm_add_epi16(y, min);
+ y = _mm_between_epi16(y, min, max);
+ _mm_store_si128(&y_r_buf[i], y);
+
+ /* cb = 0 - ((r >> 3) + (r >> 5) + (r >> 7)) - ((g >> 2) + (g >> 4) + (g >> 6)) + (b >> 1); */
+ /* cb_g_buf[i] = MINMAX(cb, -128, 127); */
+ cb = _mm_sub_epi16(_mm_srai_epi16(b, 1), _mm_srai_epi16(r, 3));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 5));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 7));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 2));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 4));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 6));
+ cb = _mm_between_epi16(cb, min, max);
+ _mm_store_si128(&cb_g_buf[i], cb);
+
+ /* cr = (r >> 1) - ((g >> 2) + (g >> 3) + (g >> 5) + (g >> 7)) - ((b >> 4) + (b >> 6)); */
+ /* cr_b_buf[i] = MINMAX(cr, -128, 127); */
+ cr = _mm_sub_epi16(_mm_srai_epi16(r, 1), _mm_srai_epi16(g, 2));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 3));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 5));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 7));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 4));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 6));
+ cr = _mm_between_epi16(cr, min, max);
+ _mm_store_si128(&cr_b_buf[i], cr);
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_quantization_decode_block_SSE2(sint16 * buffer, const int buffer_size, const uint32 factor)
+{
+ int shift = factor-6;
+ if (shift <= 0)
+ return;
+
+ __m128i a;
+ __m128i * ptr = (__m128i*) buffer;
+ __m128i * buf_end = (__m128i*) (buffer + buffer_size);
+ do
+ {
+ a = _mm_load_si128(ptr);
+ a = _mm_slli_epi16(a, shift);
+ _mm_store_si128(ptr, a);
+
+ ptr++;
+ } while(ptr < buf_end);
+}
+
+void
+rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values)
+{
+ _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
+
+ rfx_quantization_decode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_decode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_decode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_decode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_decode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_decode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_decode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_quantization_encode_block_SSE2(sint16 * buffer, const int buffer_size, const uint32 factor)
+{
+ int shift = factor-6;
+ if (shift <= 0)
+ return;
+
+ __m128i a;
+ __m128i * ptr = (__m128i*) buffer;
+ __m128i * buf_end = (__m128i*) (buffer + buffer_size);
+ do
+ {
+ a = _mm_load_si128(ptr);
+ a = _mm_srai_epi16(a, shift);
+ _mm_store_si128(ptr, a);
+
+ ptr++;
+ } while(ptr < buf_end);
+}
+
+void
+rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values)
+{
+ _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
+
+ rfx_quantization_encode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_encode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_encode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_encode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_encode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_encode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_encode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
}
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_decode_block_horiz_SSE2(sint16 * l, sint16 * h, sint16 * dst, int subband_width)
+{
+ int y, n;
+ sint16 * l_ptr = l;
+ sint16 * h_ptr = h;
+ sint16 * dst_ptr = dst;
+ int first;
+ int last;
+ __m128i l_n;
+ __m128i h_n;
+ __m128i h_n_m;
+ __m128i tmp_n;
+ __m128i dst_n;
+ __m128i dst_n_p;
+ __m128i dst1;
+ __m128i dst2;
+
+ for (y = 0; y < subband_width; y++)
+ {
+ /* Even coefficients */
+ for (n = 0; n < subband_width; n+=8)
+ {
+ /* dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1); */
+
+ l_n = _mm_load_si128((__m128i*) l_ptr);
+
+ h_n = _mm_load_si128((__m128i*) h_ptr);
+ h_n_m = _mm_loadu_si128((__m128i*) (h_ptr - 1));
+ if (n == 0)
+ {
+ first = _mm_extract_epi16(h_n_m, 1);
+ h_n_m = _mm_insert_epi16(h_n_m, first, 0);
+ }
+
+ tmp_n = _mm_add_epi16(h_n, h_n_m);
+ tmp_n = _mm_add_epi16(tmp_n, _mm_set1_epi16(1));
+ tmp_n = _mm_srai_epi16(tmp_n, 1);
+
+ dst_n = _mm_sub_epi16(l_n, tmp_n);
+
+ _mm_store_si128((__m128i*) l_ptr, dst_n);
+
+ l_ptr+=8;
+ h_ptr+=8;
+ }
+ l_ptr -= subband_width;
+ h_ptr -= subband_width;
+
+ /* Odd coefficients */
+ for (n = 0; n < subband_width; n+=8)
+ {
+ /* dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1); */
+
+ h_n = _mm_load_si128((__m128i*) h_ptr);
+
+ h_n = _mm_slli_epi16(h_n, 1);
+
+ dst_n = _mm_load_si128((__m128i*) (l_ptr));
+ dst_n_p = _mm_loadu_si128((__m128i*) (l_ptr + 1));
+ if (n == subband_width - 8)
+ {
+ last = _mm_extract_epi16(dst_n_p, 6);
+ dst_n_p = _mm_insert_epi16(dst_n_p, last, 7);
+ }
+
+ tmp_n = _mm_add_epi16(dst_n_p, dst_n);
+ tmp_n = _mm_srai_epi16(tmp_n, 1);
+
+ tmp_n = _mm_add_epi16(tmp_n, h_n);
+
+ dst1 = _mm_unpacklo_epi16(dst_n, tmp_n);
+ dst2 = _mm_unpackhi_epi16(dst_n, tmp_n);
+
+ _mm_store_si128((__m128i*) dst_ptr, dst1);
+ _mm_store_si128((__m128i*) (dst_ptr + 8), dst2);
+
+ l_ptr+=8;
+ h_ptr+=8;
+ dst_ptr+=16;
+ }
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_decode_block_vert_SSE2(sint16 * l, sint16 * h, sint16 * dst, int subband_width)
+{
+ int x, n;
+ sint16 * l_ptr = l;
+ sint16 * h_ptr = h;
+ sint16 * dst_ptr = dst;
+ __m128i l_n;
+ __m128i h_n;
+ __m128i tmp_n;
+ __m128i h_n_m;
+ __m128i dst_n;
+ __m128i dst_n_m;
+ __m128i dst_n_p;
+
+ int total_width = subband_width + subband_width;
+
+ /* Even coefficients */
+ for (n = 0; n < subband_width; n++)
+ {
+ for (x = 0; x < total_width; x+=8)
+ {
+ /* dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1); */
+
+ l_n = _mm_load_si128((__m128i*) l_ptr);
+ h_n = _mm_load_si128((__m128i*) h_ptr);
+
+ tmp_n = _mm_add_epi16(h_n, _mm_set1_epi16(1));;
+ if (n == 0)
+ tmp_n = _mm_add_epi16(tmp_n, h_n);
+ else
+ {
+ h_n_m = _mm_loadu_si128((__m128i*) (h_ptr - total_width));
+ tmp_n = _mm_add_epi16(tmp_n, h_n_m);
+ }
+ tmp_n = _mm_srai_epi16(tmp_n, 1);
+
+ dst_n = _mm_sub_epi16(l_n, tmp_n);
+ _mm_store_si128((__m128i*) dst_ptr, dst_n);
+
+ l_ptr+=8;
+ h_ptr+=8;
+ dst_ptr+=8;
+ }
+ dst_ptr+=total_width;
+ }
+
+ h_ptr = h;
+ dst_ptr = dst + total_width;
+
+ /* Odd coefficients */
+ for (n = 0; n < subband_width; n++)
+ {
+ for (x = 0; x < total_width; x+=8)
+ {
+ /* dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1); */
+
+ h_n = _mm_load_si128((__m128i*) h_ptr);
+ dst_n_m = _mm_load_si128((__m128i*) (dst_ptr - total_width));
+ h_n = _mm_slli_epi16(h_n, 1);
+
+ tmp_n = dst_n_m;
+ if (n == subband_width - 1)
+ tmp_n = _mm_add_epi16(tmp_n, dst_n_m);
+ else
+ {
+ dst_n_p = _mm_loadu_si128((__m128i*) (dst_ptr + total_width));
+ tmp_n = _mm_add_epi16(tmp_n, dst_n_p);
+ }
+ tmp_n = _mm_srai_epi16(tmp_n, 1);
+
+ dst_n = _mm_add_epi16(tmp_n, h_n);
+ _mm_store_si128((__m128i*) dst_ptr, dst_n);
+
+ h_ptr+=8;
+ dst_ptr+=8;
+ }
+ dst_ptr+=total_width;
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_decode_block_SSE2(sint16 * buffer, sint16 * idwt, int subband_width)
+{
+ sint16 * hl, * lh, * hh, * ll;
+ sint16 * l_dst, * h_dst;
+
+ _mm_prefetch_buffer((char *) idwt, subband_width * 4 * sizeof(sint16));
+
+ /* Inverse DWT in horizontal direction, results in 2 sub-bands in L, H order in tmp buffer idwt. */
+ /* The 4 sub-bands are stored in HL(0), LH(1), HH(2), LL(3) order. */
+ /* The lower part L uses LL(3) and HL(0). */
+ /* The higher part H uses LH(1) and HH(2). */
+
+ ll = buffer + subband_width * subband_width * 3;
+ hl = buffer;
+ l_dst = idwt;
+
+ rfx_dwt_2d_decode_block_horiz_SSE2(ll, hl, l_dst, subband_width);
+
+ lh = buffer + subband_width * subband_width;
+ hh = buffer + subband_width * subband_width * 2;
+ h_dst = idwt + subband_width * subband_width * 2;
+
+ rfx_dwt_2d_decode_block_horiz_SSE2(lh, hh, h_dst, subband_width);
+
+ /* Inverse DWT in vertical direction, results are stored in original buffer. */
+ rfx_dwt_2d_decode_block_vert_SSE2(l_dst, h_dst, buffer, subband_width);
+}
+
+void
+rfx_dwt_2d_decode_SSE2(sint16 * buffer, sint16 * dwt_buffer)
+{
+ _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
+
+ rfx_dwt_2d_decode_block_SSE2(buffer + 3840, dwt_buffer, 8);
+ rfx_dwt_2d_decode_block_SSE2(buffer + 3072, dwt_buffer, 16);
+ rfx_dwt_2d_decode_block_SSE2(buffer, dwt_buffer, 32);
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_encode_block_vert_SSE2(sint16 * src, sint16 * l, sint16 * h, int subband_width)
+{
+ int total_width;
+ int x;
+ int n;
+ __m128i src_2n;
+ __m128i src_2n_1;
+ __m128i src_2n_2;
+ __m128i h_n;
+ __m128i h_n_m;
+ __m128i l_n;
+
+ total_width = subband_width << 1;
+
+ for (n = 0; n < subband_width; n++)
+ {
+ for (x = 0; x < total_width; x += 8)
+ {
+ src_2n = _mm_load_si128((__m128i*) src);
+ src_2n_1 = _mm_load_si128((__m128i*) (src + total_width));
+ if (n < subband_width - 1)
+ src_2n_2 = _mm_load_si128((__m128i*) (src + 2 * total_width));
+ else
+ src_2n_2 = src_2n_1;
+
+ /* h[n] = (src[2n + 1] - ((src[2n] + src[2n + 2]) >> 1)) >> 1 */
+
+ h_n = _mm_add_epi16(src_2n, src_2n_2);
+ h_n = _mm_srai_epi16(h_n, 1);
+ h_n = _mm_sub_epi16(src_2n_1, h_n);
+ h_n = _mm_srai_epi16(h_n, 1);
+
+ _mm_store_si128((__m128i*) h, h_n);
+
+ if (n == 0)
+ h_n_m = h_n;
+ else
+ h_n_m = _mm_load_si128((__m128i*) (h - total_width));
+
+ /* l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1) */
+
+ l_n = _mm_add_epi16(h_n_m, h_n);
+ l_n = _mm_srai_epi16(l_n, 1);
+ l_n = _mm_add_epi16(l_n, src_2n);
+
+ _mm_store_si128((__m128i*) l, l_n);
+
+ src += 8;
+ l += 8;
+ h += 8;
+ }
+ src += total_width;
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_encode_block_horiz_SSE2(sint16 * src, sint16 * l, sint16 * h, int subband_width)
+{
+ int y;
+ int n;
+ int first;
+ __m128i src_2n;
+ __m128i src_2n_1;
+ __m128i src_2n_2;
+ __m128i h_n;
+ __m128i h_n_m;
+ __m128i l_n;
+
+ for (y = 0; y < subband_width; y++)
+ {
+ for (n = 0; n < subband_width; n += 8)
+ {
+ /* The following 3 Set operations consumes more than half of the total DWT processing time! */
+ src_2n = _mm_set_epi16(src[14], src[12], src[10], src[8], src[6], src[4], src[2], src[0]);
+ src_2n_1 = _mm_set_epi16(src[15], src[13], src[11], src[9], src[7], src[5], src[3], src[1]);
+ src_2n_2 = _mm_set_epi16(n == subband_width - 8 ? src[15] : src[16],
+ src[14], src[12], src[10], src[8], src[6], src[4], src[2]);
+
+ /* h[n] = (src[2n + 1] - ((src[2n] + src[2n + 2]) >> 1)) >> 1 */
+
+ h_n = _mm_add_epi16(src_2n, src_2n_2);
+ h_n = _mm_srai_epi16(h_n, 1);
+ h_n = _mm_sub_epi16(src_2n_1, h_n);
+ h_n = _mm_srai_epi16(h_n, 1);
+
+ _mm_store_si128((__m128i*) h, h_n);
+
+ h_n_m = _mm_loadu_si128((__m128i*) (h - 1));
+ if (n == 0)
+ {
+ first = _mm_extract_epi16(h_n_m, 1);
+ h_n_m = _mm_insert_epi16(h_n_m, first, 0);
+ }
+
+ /* l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1) */
+
+ l_n = _mm_add_epi16(h_n_m, h_n);
+ l_n = _mm_srai_epi16(l_n, 1);
+ l_n = _mm_add_epi16(l_n, src_2n);
+
+ _mm_store_si128((__m128i*) l, l_n);
+
+ src += 16;
+ l += 8;
+ h += 8;
+ }
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_encode_block_SSE2(sint16 * buffer, sint16 * dwt, int subband_width)
+{
+ sint16 * hl, * lh, * hh, * ll;
+ sint16 * l_src, * h_src;
+
+ _mm_prefetch_buffer((char *) dwt, subband_width * 4 * sizeof(sint16));
+
+ /* DWT in vertical direction, results in 2 sub-bands in L, H order in tmp buffer dwt. */
+
+ l_src = dwt;
+ h_src = dwt + subband_width * subband_width * 2;
+
+ rfx_dwt_2d_encode_block_vert_SSE2(buffer, l_src, h_src, subband_width);
+
+ /* DWT in horizontal direction, results in 4 sub-bands in HL(0), LH(1), HH(2), LL(3) order, stored in original buffer. */
+ /* The lower part L generates LL(3) and HL(0). */
+ /* The higher part H generates LH(1) and HH(2). */
+
+ ll = buffer + subband_width * subband_width * 3;
+ hl = buffer;
+
+ lh = buffer + subband_width * subband_width;
+ hh = buffer + subband_width * subband_width * 2;
+
+ rfx_dwt_2d_encode_block_horiz_SSE2(l_src, ll, hl, subband_width);
+ rfx_dwt_2d_encode_block_horiz_SSE2(h_src, lh, hh, subband_width);
+}
+
+void
+rfx_dwt_2d_encode_SSE2(sint16 * buffer, sint16 * dwt_buffer)
+{
+ _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
+
+ rfx_dwt_2d_encode_block_SSE2(buffer, dwt_buffer, 32);
+ rfx_dwt_2d_encode_block_SSE2(buffer + 3072, dwt_buffer, 16);
+ rfx_dwt_2d_encode_block_SSE2(buffer + 3840, dwt_buffer, 8);
+}
diff --git a/libfreerdp-rfx/sse/rfx_sse2.h b/libfreerdp-rfx/sse/rfx_sse2.h
index 0c16180..85921da 100644
--- a/libfreerdp-rfx/sse/rfx_sse2.h
+++ b/libfreerdp-rfx/sse/rfx_sse2.h
@@ -22,6 +22,11 @@
#include <freerdp/rfx.h>
-void rfx_decode_YCbCr_to_RGB_SSE2(uint32 * y_r_buffer, uint32 * cb_g_buffer, uint32 * cr_b_buffer);
+void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer);
+void rfx_encode_RGB_to_YCbCr_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer);
+void rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values);
+void rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values);
+void rfx_dwt_2d_decode_SSE2(sint16 * buffer, sint16 * dwt_buffer);
+void rfx_dwt_2d_encode_SSE2(sint16 * buffer, sint16 * dwt_buffer);
#endif /* __RFX_SSE2_H */
diff --git a/libfreerdp-utils/Makefile.am b/libfreerdp-utils/Makefile.am
index 6db6df1..d6912fd 100644
--- a/libfreerdp-utils/Makefile.am
+++ b/libfreerdp-utils/Makefile.am
@@ -11,7 +11,9 @@ libfreerdp_utils_la_SOURCES = \
semaphore.c \
unicode.c \
wait_obj.c \
- chan_plugin.c
+ chan_plugin.c \
+ stopwatch.c \
+ profiler.c
libfreerdp_utils_la_CFLAGS = \
-I$(top_srcdir) \
diff --git a/libfreerdp-utils/profiler.c b/libfreerdp-utils/profiler.c
new file mode 100644
index 0000000..308c78c
--- /dev/null
+++ b/libfreerdp-utils/profiler.c
@@ -0,0 +1,72 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ Profiler Utils
+
+ Copyright 2011 Stephen Erisman
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <freerdp/utils/profiler.h>
+
+PROFILER * profiler_create(char * name)
+{
+ PROFILER * profiler;
+
+ profiler = (PROFILER *) xmalloc(sizeof(PROFILER));
+
+ profiler->name = name;
+ profiler->stopwatch = stopwatch_create();
+
+ return profiler;
+}
+
+void profiler_free(PROFILER * profiler)
+{
+ stopwatch_free(profiler->stopwatch);
+
+ xfree(profiler);
+}
+
+void profiler_enter(PROFILER * profiler)
+{
+ stopwatch_start(profiler->stopwatch);
+}
+
+void profiler_exit(PROFILER * profiler)
+{
+ stopwatch_stop(profiler->stopwatch);
+}
+
+void profiler_print_header()
+{
+ printf("\n");
+ printf(" |-----------------------|\n" );
+ printf(" PROFILER | elapsed seconds |\n" );
+ printf("|--------------------------------------------|-----------------------|\n" );
+ printf("| code section | iterations | total | avg. |\n" );
+ printf("|-------------------------------|------------|-----------|-----------|\n" );
+}
+
+void profiler_print(PROFILER * profiler)
+{
+ double elapsed_sec = stopwatch_get_elapsed_time_in_seconds(profiler->stopwatch);
+ double avg_sec = elapsed_sec / (double) profiler->stopwatch->count;
+
+ printf("| %-30.30s| %'10lu | %'9f | %'9f |\n", profiler->name, profiler->stopwatch->count, elapsed_sec, avg_sec);
+}
+
+void profiler_print_footer()
+{
+ printf("|--------------------------------------------------------------------|\n" );
+}
diff --git a/libfreerdp-utils/stopwatch.c b/libfreerdp-utils/stopwatch.c
new file mode 100644
index 0000000..c13365c
--- /dev/null
+++ b/libfreerdp-utils/stopwatch.c
@@ -0,0 +1,60 @@
+/*
+ FreeRDP: A Remote Desktop Protocol client.
+ Stopwatch Utils
+
+ Copyright 2011 Stephen Erisman
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <freerdp/utils/stopwatch.h>
+
+STOPWATCH * stopwatch_create()
+{
+ STOPWATCH * sw;
+
+ sw = (STOPWATCH *) xmalloc(sizeof(STOPWATCH));
+ stopwatch_reset(sw);
+
+ return sw;
+}
+
+void stopwatch_free(STOPWATCH * stopwatch)
+{
+ xfree(stopwatch);
+}
+
+void stopwatch_start(STOPWATCH * stopwatch)
+{
+ stopwatch->start = clock();
+ stopwatch->count++;
+}
+
+void stopwatch_stop(STOPWATCH * stopwatch)
+{
+ stopwatch->end = clock();
+ stopwatch->elapsed += (stopwatch->end - stopwatch->start);
+}
+
+void stopwatch_reset(STOPWATCH * stopwatch)
+{
+ stopwatch->start = 0;
+ stopwatch->end = 0;
+ stopwatch->elapsed = 0;
+ stopwatch->count = 0;
+}
+
+double stopwatch_get_elapsed_time_in_seconds(STOPWATCH * stopwatch)
+{
+ return ((double)stopwatch->elapsed) / CLOCKS_PER_SEC;
+}