Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FreeRDP/FreeRDP-old.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc-André Moreau <marcandre.moreau@gmail.com>2011-06-22 08:23:58 +0400
committerMarc-André Moreau <marcandre.moreau@gmail.com>2011-06-22 08:23:58 +0400
commit605ec47335f1b3b2d61a2d1c253a1579dc852d05 (patch)
treeb693d696b93b86af4a2e2d3c6badf19dd210c17d
parent3ccc01ff6a03fdb238cde2e414722013f5f81097 (diff)
parent8b8194f515c7bf713fbb1855a9caaffbb1b71729 (diff)
libfreerdp-rfx: merging latest refactoring with remotefx branch
-rw-r--r--cunit/test_librfx.c56
-rw-r--r--include/freerdp/rfx.h23
-rw-r--r--include/freerdp/utils/Makefile.am2
-rw-r--r--libfreerdp-rfx/librfx.c284
-rw-r--r--libfreerdp-rfx/rfx_decode.c112
-rw-r--r--libfreerdp-rfx/rfx_encode.c146
-rw-r--r--libfreerdp-rfx/rfx_encode.h2
-rw-r--r--libfreerdp-rfx/rfx_rlgr.c15
-rw-r--r--libfreerdp-rfx/sse/rfx_sse.c4
-rw-r--r--libfreerdp-rfx/sse/rfx_sse2.c262
-rw-r--r--libfreerdp-rfx/sse/rfx_sse2.h2
11 files changed, 731 insertions, 177 deletions
diff --git a/cunit/test_librfx.c b/cunit/test_librfx.c
index 16b8cbf..584ac7a 100644
--- a/cunit/test_librfx.c
+++ b/cunit/test_librfx.c
@@ -61,7 +61,7 @@ static const uint8 y_data[] =
static const uint8 cb_data[] =
{
- 0x1b, 0x04, 0x7f, 0x04, 0x31, 0x5f, 0xc2,
+ 0x1b, 0x04, 0x7f, 0x04, 0x31, 0x5f, 0xc2,
0x94, 0xaf, 0x05, 0x29, 0x5e, 0x0a, 0x52, 0xbc, 0x14, 0xa5, 0x78, 0x29, 0x25, 0x78, 0x29, 0x25,
0x78, 0x29, 0x25, 0x68, 0x52, 0x4a, 0xf0, 0x52, 0x4a, 0xf0, 0x52, 0x4a, 0xd0, 0xa4, 0x95, 0xe0,
0xa4, 0x95, 0xe0, 0xa4, 0x95, 0xa1, 0x49, 0x2b, 0xc1, 0x49, 0x2b, 0xc1, 0x49, 0x2b, 0x42, 0x92,
@@ -86,7 +86,7 @@ static const uint8 cb_data[] =
static const uint8 cr_data[] =
{
- 0x1b, 0xfc, 0x11, 0xc1, 0x0f, 0x4a, 0xc1, 0x4f, 0x4a, 0xc1,
+ 0x1b, 0xfc, 0x11, 0xc1, 0x0f, 0x4a, 0xc1, 0x4f, 0x4a, 0xc1,
0x4f, 0x4a, 0xa1, 0x4d, 0x95, 0x42, 0x9e, 0x95, 0x42, 0x9e, 0x95, 0x42, 0x9b, 0x2a, 0x85, 0x3d,
0x2a, 0x85, 0x3d, 0x2a, 0x85, 0x36, 0x55, 0x0a, 0x7a, 0x55, 0x0a, 0x7a, 0x55, 0x0a, 0x6c, 0xaa,
0x14, 0xf4, 0xaa, 0x14, 0xf4, 0xaa, 0x14, 0xd9, 0x54, 0x29, 0xe9, 0x54, 0x29, 0xe9, 0x54, 0x29,
@@ -132,6 +132,23 @@ static const uint8 rgb_scanline_data[] =
0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
+ 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF,
0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0xFF
};
@@ -299,7 +316,7 @@ void
test_encode(void)
{
RFX_CONTEXT * context;
- uint8 ycbcr_buffer[16384];
+ uint8 ycbcr_buffer[1024000];
int y_size, cb_size, cr_size;
int i;
uint8 decode_buffer[4096 * 3];
@@ -313,7 +330,7 @@ test_encode(void)
context->mode = RLGR3;
rfx_context_set_pixel_format(context, RFX_PIXEL_FORMAT_RGB);
- rfx_encode_rgb(context, rgb_data, 64 * 3,
+ rfx_encode_rgb(context, rgb_data, 64, 64, 64 * 3,
test_quantization_values, test_quantization_values, test_quantization_values,
ycbcr_buffer, sizeof(ycbcr_buffer), &y_size, &cb_size, &cr_size);
//dump_buffer(context->cb_g_buffer, 4096);
@@ -340,8 +357,15 @@ void
test_message(void)
{
RFX_CONTEXT * context;
- uint8 buffer[16384];
+ uint8 buffer[1024000];
int size;
+ int i, j;
+ RFX_RECT rect = {0, 0, 100, 80};
+ RFX_MESSAGE * message;
+
+ rgb_data = (uint8 *) malloc(100 * 80 * 3);
+ for (i = 0; i < 80; i++)
+ memcpy(rgb_data + i * 100 * 3, rgb_scanline_data, 100 * 3);
context = rfx_context_new();
context->mode = RLGR3;
@@ -349,9 +373,27 @@ test_message(void)
context->height = 600;
rfx_context_set_pixel_format(context, RFX_PIXEL_FORMAT_RGB);
- size = rfx_compose_message_header(context, buffer, sizeof(buffer));
+ size = rfx_compose_message_header(context, buffer, sizeof(buffer));
/*hexdump(buffer, size);*/
- rfx_process_message(context, buffer, size);
+ message = rfx_process_message(context, buffer, size);
+ rfx_message_free(context, message);
+
+ for (i = 0; i < 1000; i++)
+ {
+ size = rfx_compose_message_data(context, buffer, sizeof(buffer),
+ &rect, 1, rgb_data, 100, 80, 100 * 3);
+ /*hexdump(buffer, size);*/
+ message = rfx_process_message(context, buffer, size);
+ if (i == 0)
+ {
+ for (j = 0; j < message->num_tiles; j++)
+ {
+ dump_ppm_image(message->tiles[j]->data);
+ }
+ }
+ rfx_message_free(context, message);
+ }
rfx_context_free(context);
+ free(rgb_data);
}
diff --git a/include/freerdp/rfx.h b/include/freerdp/rfx.h
index 6e2c42f..7cf309c 100644
--- a/include/freerdp/rfx.h
+++ b/include/freerdp/rfx.h
@@ -41,6 +41,7 @@ extern "C" {
#define WBT_FRAME_END 0xCCC5
#define WBT_REGION 0xCCC6
#define WBT_EXTENSION 0xCCC7
+#define CBT_REGION 0xCAC1
#define CBT_TILESET 0xCAC2
#define CBT_TILE 0xCAC3
@@ -125,7 +126,8 @@ typedef struct _RFX_MESSAGE RFX_MESSAGE;
struct _RFX_CONTEXT
{
- int flags;
+ uint16 flags;
+ uint16 properties;
uint16 width;
uint16 height;
RLGR_MODE mode;
@@ -133,10 +135,15 @@ struct _RFX_CONTEXT
uint32 codec_id;
uint32 codec_version;
RFX_PIXEL_FORMAT pixel_format;
+ uint8 bytes_per_pixel;
/* temporary data within a frame */
+ uint32 frame_idx;
uint8 num_quants;
uint32 * quants;
+ uint8 quant_idx_y;
+ uint8 quant_idx_cb;
+ uint8 quant_idx_cr;
/* pre-allocated buffers */
@@ -153,7 +160,6 @@ struct _RFX_CONTEXT
sint16 dwt_mem_8[8*8*2*2 + 8]; /* sub-band width 8 */
sint16 dwt_mem_16[16*16*2*2 + 8]; /* sub-band width 16 */
sint16 dwt_mem_32[32*32*2*2 + 8]; /* sub-band width 32 */
- //sint16* dwt_buffers[5]; /* sub-band buffer array */
sint16 * dwt_buffer_8;
sint16 * dwt_buffer_16;
@@ -163,6 +169,7 @@ struct _RFX_CONTEXT
void (* decode_YCbCr_to_RGB)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
void (* encode_RGB_to_YCbCr)(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
void (* quantization_decode)(sint16 * buffer, const uint32 * quantization_values);
+ void (* quantization_encode)(sint16 * buffer, const uint32 * quantization_values);
void (* dwt_2d_decode)(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32);
/* profiler definitions */
@@ -173,6 +180,16 @@ struct _RFX_CONTEXT
PROFILER_DEFINE(prof_rfx_quantization_decode);
PROFILER_DEFINE(prof_rfx_dwt_2d_decode);
PROFILER_DEFINE(prof_rfx_decode_YCbCr_to_RGB);
+ PROFILER_DEFINE(prof_rfx_decode_format_RGB);
+
+ PROFILER_DEFINE(prof_rfx_encode_rgb);
+ PROFILER_DEFINE(prof_rfx_encode_component);
+ PROFILER_DEFINE(prof_rfx_rlgr_encode);
+ PROFILER_DEFINE(prof_rfx_differential_encode);
+ PROFILER_DEFINE(prof_rfx_quantization_encode);
+ PROFILER_DEFINE(prof_rfx_dwt_2d_encode);
+ PROFILER_DEFINE(prof_rfx_encode_RGB_to_YCbCr);
+ PROFILER_DEFINE(prof_rfx_encode_format_RGB);
};
typedef struct _RFX_CONTEXT RFX_CONTEXT;
@@ -185,7 +202,7 @@ void rfx_message_free(RFX_CONTEXT * context, RFX_MESSAGE * message);
int rfx_compose_message_header(RFX_CONTEXT * context, uint8 * buffer, int buffer_size);
int rfx_compose_message_data(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
- const RFX_RECT * rects, int num_rects, uint8 * image_buffer, int width, int height);
+ const RFX_RECT * rects, int num_rects, uint8 * image_data, int width, int height, int rowstride);
#ifdef __cplusplus
}
diff --git a/include/freerdp/utils/Makefile.am b/include/freerdp/utils/Makefile.am
index eec4969..410babf 100644
--- a/include/freerdp/utils/Makefile.am
+++ b/include/freerdp/utils/Makefile.am
@@ -6,7 +6,9 @@ include_HEADERS = \
chan_plugin.h \
datablob.h \
memory.h \
+ profiler.h \
semaphore.h \
+ stopwatch.h \
stream.h \
unicode.h \
wait_obj.h
diff --git a/libfreerdp-rfx/librfx.c b/libfreerdp-rfx/librfx.c
index 160ea57..b5b046f 100644
--- a/libfreerdp-rfx/librfx.c
+++ b/libfreerdp-rfx/librfx.c
@@ -33,6 +33,23 @@
#include "librfx.h"
+/*
+ The quantization values control the compression rate and quality. The value
+ range is between 6 and 15. The higher value, the higher compression rate
+ and lower quality.
+
+ This is the default values being use by the MS RDP server, and we will also
+ use it as our default values for the encoder. It can be overrided by setting
+ the context->num_quants and context->quants member.
+
+ The order of the values are:
+ LL3, LH3, HL3, HH3, LH2, HL2, HH2, LH1, HL1, HH1
+*/
+static const uint32 rfx_default_quantization_values[] =
+{
+ 6, 6, 6, 6, 7, 7, 8, 8, 8, 9
+};
+
void rfx_profiler_create(RFX_CONTEXT * context)
{
PROFILER_CREATE(context->prof_rfx_decode_rgb, "rfx_decode_rgb");
@@ -42,6 +59,16 @@ void rfx_profiler_create(RFX_CONTEXT * context)
PROFILER_CREATE(context->prof_rfx_quantization_decode, "rfx_quantization_decode");
PROFILER_CREATE(context->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode");
PROFILER_CREATE(context->prof_rfx_decode_YCbCr_to_RGB, "rfx_decode_YCbCr_to_RGB");
+ PROFILER_CREATE(context->prof_rfx_decode_format_RGB, "rfx_decode_format_RGB");
+
+ PROFILER_CREATE(context->prof_rfx_encode_rgb, "rfx_encode_rgb");
+ PROFILER_CREATE(context->prof_rfx_encode_component, "rfx_encode_component");
+ PROFILER_CREATE(context->prof_rfx_rlgr_encode, "rfx_rlgr_encode");
+ PROFILER_CREATE(context->prof_rfx_differential_encode, "rfx_differential_encode");
+ PROFILER_CREATE(context->prof_rfx_quantization_encode, "rfx_quantization_encode");
+ PROFILER_CREATE(context->prof_rfx_dwt_2d_encode, "rfx_dwt_2d_encode");
+ PROFILER_CREATE(context->prof_rfx_encode_RGB_to_YCbCr, "rfx_encode_RGB_to_YCbCr");
+ PROFILER_CREATE(context->prof_rfx_encode_format_RGB, "rfx_encode_format_RGB");
}
void rfx_profiler_free(RFX_CONTEXT * context)
@@ -53,11 +80,22 @@ void rfx_profiler_free(RFX_CONTEXT * context)
PROFILER_FREE(context->prof_rfx_quantization_decode);
PROFILER_FREE(context->prof_rfx_dwt_2d_decode);
PROFILER_FREE(context->prof_rfx_decode_YCbCr_to_RGB);
+ PROFILER_FREE(context->prof_rfx_decode_format_RGB);
+
+ PROFILER_FREE(context->prof_rfx_encode_rgb);
+ PROFILER_FREE(context->prof_rfx_encode_component);
+ PROFILER_FREE(context->prof_rfx_rlgr_encode);
+ PROFILER_FREE(context->prof_rfx_differential_encode);
+ PROFILER_FREE(context->prof_rfx_quantization_encode);
+ PROFILER_FREE(context->prof_rfx_dwt_2d_encode);
+ PROFILER_FREE(context->prof_rfx_encode_RGB_to_YCbCr);
+ PROFILER_FREE(context->prof_rfx_encode_format_RGB);
}
void rfx_profiler_print(RFX_CONTEXT * context)
{
PROFILER_PRINT_HEADER;
+
PROFILER_PRINT(context->prof_rfx_decode_rgb);
PROFILER_PRINT(context->prof_rfx_decode_component);
PROFILER_PRINT(context->prof_rfx_rlgr_decode);
@@ -65,6 +103,17 @@ void rfx_profiler_print(RFX_CONTEXT * context)
PROFILER_PRINT(context->prof_rfx_quantization_decode);
PROFILER_PRINT(context->prof_rfx_dwt_2d_decode);
PROFILER_PRINT(context->prof_rfx_decode_YCbCr_to_RGB);
+ PROFILER_PRINT(context->prof_rfx_decode_format_RGB);
+
+ PROFILER_PRINT(context->prof_rfx_encode_rgb);
+ PROFILER_PRINT(context->prof_rfx_encode_component);
+ PROFILER_PRINT(context->prof_rfx_rlgr_encode);
+ PROFILER_PRINT(context->prof_rfx_differential_encode);
+ PROFILER_PRINT(context->prof_rfx_quantization_encode);
+ PROFILER_PRINT(context->prof_rfx_dwt_2d_encode);
+ PROFILER_PRINT(context->prof_rfx_encode_RGB_to_YCbCr);
+ PROFILER_PRINT(context->prof_rfx_encode_format_RGB);
+
PROFILER_PRINT_FOOTER;
}
@@ -78,6 +127,9 @@ rfx_context_new(void)
context->pool = rfx_pool_new();
+ /* initialize the default pixel format */
+ rfx_context_set_pixel_format(context, RFX_PIXEL_FORMAT_BGRA);
+
/* align buffers to 16 byte boundary (needed for SSE/SSE2 instructions) */
context->y_r_buffer = (sint16 *)(((uintptr_t)context->y_r_mem + 16) & ~ 0x0F);
context->cb_g_buffer = (sint16 *)(((uintptr_t)context->cb_g_mem + 16) & ~ 0x0F);
@@ -121,6 +173,20 @@ void
rfx_context_set_pixel_format(RFX_CONTEXT * context, RFX_PIXEL_FORMAT pixel_format)
{
context->pixel_format = pixel_format;
+ switch (pixel_format)
+ {
+ case RFX_PIXEL_FORMAT_BGRA:
+ case RFX_PIXEL_FORMAT_RGBA:
+ context->bytes_per_pixel = 4;
+ break;
+ case RFX_PIXEL_FORMAT_BGR:
+ case RFX_PIXEL_FORMAT_RGB:
+ context->bytes_per_pixel = 3;
+ break;
+ default:
+ context->bytes_per_pixel = 0;
+ break;
+ }
}
static void
@@ -204,6 +270,7 @@ rfx_process_message_context(RFX_CONTEXT * context, uint8 * data, int size)
DEBUG_RFX("ctxId %d tileSize %d properties 0x%X.", ctxId, tileSize, properties);
+ context->properties = properties;
context->flags = (properties & 0x0007);
if (context->flags == CODEC_MODE)
@@ -509,6 +576,12 @@ rfx_message_free(RFX_CONTEXT * context, RFX_MESSAGE * message)
static int
rfx_compose_message_sync(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
{
+ if (buffer_size < 12)
+ {
+ printf("rfx_compose_message_sync: buffer size too small.\n");
+ return 0;
+ }
+
SET_UINT16(buffer, 0, WBT_SYNC); /* BlockT.blockType */
SET_UINT32(buffer, 2, 12); /* BlockT.blockLen */
SET_UINT32(buffer, 6, WF_MAGIC); /* magic */
@@ -520,6 +593,12 @@ rfx_compose_message_sync(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
static int
rfx_compose_message_codec_versions(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
{
+ if (buffer_size < 10)
+ {
+ printf("rfx_compose_message_codec_versions: buffer size too small.\n");
+ return 0;
+ }
+
SET_UINT16(buffer, 0, WBT_CODEC_VERSIONS); /* BlockT.blockType */
SET_UINT32(buffer, 2, 10); /* BlockT.blockLen */
SET_UINT8(buffer, 6, 1); /* numCodecs */
@@ -532,6 +611,12 @@ rfx_compose_message_codec_versions(RFX_CONTEXT * context, uint8 * buffer, int bu
static int
rfx_compose_message_channels(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
{
+ if (buffer_size < 12)
+ {
+ printf("rfx_compose_message_channels: buffer size too small.\n");
+ return 0;
+ }
+
SET_UINT16(buffer, 0, WBT_CHANNELS); /* BlockT.blockType */
SET_UINT32(buffer, 2, 12); /* BlockT.blockLen */
SET_UINT8(buffer, 6, 1); /* numChannels */
@@ -547,6 +632,12 @@ rfx_compose_message_context(RFX_CONTEXT * context, uint8 * buffer, int buffer_si
{
uint16 properties;
+ if (buffer_size < 13)
+ {
+ printf("rfx_compose_message_context: buffer size too small.\n");
+ return 0;
+ }
+
SET_UINT16(buffer, 0, WBT_CONTEXT); /* CodecChannelT.blockType */
SET_UINT32(buffer, 2, 13); /* CodecChannelT.blockLen */
SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
@@ -561,6 +652,7 @@ rfx_compose_message_context(RFX_CONTEXT * context, uint8 * buffer, int buffer_si
properties |= ((context->mode == RLGR1 ? CLW_ENTROPY_RLGR1 : CLW_ENTROPY_RLGR3) << 9); /* et */
properties |= (SCALAR_QUANTIZATION << 13); /* qt */
SET_UINT16(buffer, 11, properties);
+ context->properties = properties;
return 13;
}
@@ -581,32 +673,210 @@ rfx_compose_message_header(RFX_CONTEXT * context, uint8 * buffer, int buffer_siz
static int
rfx_compose_message_frame_begin(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
{
- return 0;
+ if (buffer_size < 14)
+ {
+ printf("rfx_compose_message_frame_begin: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_FRAME_BEGIN); /* CodecChannelT.blockType */
+ SET_UINT32(buffer, 2, 14); /* CodecChannelT.blockLen */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+ SET_UINT32(buffer, 8, context->frame_idx); /* frameIdx */
+ SET_UINT16(buffer, 12, 1); /* numRegions */
+
+ return 14;
}
static int
rfx_compose_message_region(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
const RFX_RECT * rects, int num_rects)
{
- return 0;
+ int size;
+ int i;
+
+ if (buffer_size < 15 + num_rects * 8)
+ {
+ printf("rfx_compose_message_region: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_REGION); /* CodecChannelT.blockType */
+ /* set CodecChannelT.blockLen later */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+ SET_UINT8(buffer, 8, 1); /* regionFlags */
+ SET_UINT16(buffer, 9, num_rects); /* numRects */
+ size = 11;
+
+ for (i = 0; i < num_rects; i++)
+ {
+ SET_UINT16(buffer, size, rects[i].x);
+ SET_UINT16(buffer, size + 2, rects[i].y);
+ SET_UINT16(buffer, size + 4, rects[i].width);
+ SET_UINT16(buffer, size + 6, rects[i].height);
+ size += 8;
+ }
+
+ SET_UINT16(buffer, size, CBT_REGION); /* regionType */
+ SET_UINT16(buffer, size + 2, 1); /* numTilesets */
+ size += 4;
+
+ SET_UINT32(buffer, 2, size); /* CodecChannelT.blockLen */
+ return size;
+}
+
+static int
+rfx_compose_message_tile(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
+ uint8 * tile_data, int tile_width, int tile_height, int rowstride,
+ const uint32 * quantVals, int quantIdxY, int quantIdxCb, int quantIdxCr, int xIdx, int yIdx)
+{
+ int YLen = 0;
+ int CbLen = 0;
+ int CrLen = 0;
+ int size;
+
+ if (buffer_size < 19)
+ {
+ printf("rfx_compose_message_tile: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, CBT_TILE); /* BlockT.blockType */
+ /* set BlockT.blockLen later */
+ SET_UINT8(buffer, 6, quantIdxY); /* quantIdxY */
+ SET_UINT8(buffer, 7, quantIdxCb); /* quantIdxCb */
+ SET_UINT8(buffer, 8, quantIdxCr); /* quantIdxCr */
+ SET_UINT16(buffer, 9, xIdx); /* xIdx */
+ SET_UINT16(buffer, 11, yIdx); /* yIdx */
+
+ rfx_encode_rgb(context, tile_data, tile_width, tile_height, rowstride,
+ quantVals + quantIdxY * 10, quantVals + quantIdxCb * 10, quantVals + quantIdxCr * 10,
+ buffer + 19, buffer_size - 19, &YLen, &CbLen, &CrLen);
+
+ DEBUG_RFX("xIdx=%d yIdx=%d width=%d height=%d YLen=%d CbLen=%d CrLen=%d",
+ xIdx, yIdx, tile_width, tile_height, YLen, CbLen, CrLen);
+
+ SET_UINT16(buffer, 13, YLen); /* YLen */
+ SET_UINT16(buffer, 15, CbLen); /* CbLen */
+ SET_UINT16(buffer, 17, CrLen); /* CrLen */
+ size = 19 + YLen + CbLen + CrLen;
+ SET_UINT32(buffer, 2, size); /* BlockT.blockLen */
+
+ return size;
}
static int
rfx_compose_message_tileset(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
- uint8 * image_buffer, int width, int height)
+ uint8 * image_data, int width, int height, int rowstride)
{
- return 0;
+ int size;
+ int i;
+ int numQuants;
+ const uint32 * quantVals;
+ const uint32 * quantValsPtr;
+ int quantIdxY;
+ int quantIdxCb;
+ int quantIdxCr;
+ int numTiles;
+ int numTilesX;
+ int numTilesY;
+ int xIdx;
+ int yIdx;
+ int tilesDataSize;
+
+ if (context->num_quants == 0)
+ {
+ numQuants = 1;
+ quantVals = rfx_default_quantization_values;
+ quantIdxY = 0;
+ quantIdxCb = 0;
+ quantIdxCr = 0;
+ }
+ else
+ {
+ numQuants = context->num_quants;
+ quantVals = context->quants;
+ quantIdxY = context->quant_idx_y;
+ quantIdxCb = context->quant_idx_cb;
+ quantIdxCr = context->quant_idx_cr;
+ }
+
+ numTilesX = (width + 63) / 64;
+ numTilesY = (height + 63) / 64;
+ numTiles = numTilesX * numTilesY;
+
+ if (buffer_size < 22 + numQuants * 5)
+ {
+ printf("rfx_compose_message_tileset: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_EXTENSION); /* CodecChannelT.blockType */
+ /* set CodecChannelT.blockLen later */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+ SET_UINT16(buffer, 8, CBT_TILESET); /* subtype */
+ SET_UINT16(buffer, 10, 0); /* idx */
+ SET_UINT16(buffer, 12, context->properties); /* properties */
+ SET_UINT8(buffer, 14, numQuants); /* numQuants */
+ SET_UINT8(buffer, 15, 0x40); /* tileSize */
+ SET_UINT16(buffer, 16, numTiles); /* numTiles */
+ /* set tilesDataSize later */
+ size = 22;
+
+ quantValsPtr = quantVals;
+ for (i = 0; i < numQuants * 5; i++)
+ {
+ SET_UINT8(buffer, size, quantValsPtr[0] + (quantValsPtr[1] << 4));
+ quantValsPtr += 2;
+ size++;
+ }
+
+ DEBUG_RFX("width:%d height:%d rowstride:%d", width, height, rowstride);
+
+ tilesDataSize = 0;
+ for (yIdx = 0; yIdx < numTilesY; yIdx++)
+ {
+ for (xIdx = 0; xIdx < numTilesX; xIdx++)
+ {
+ tilesDataSize += rfx_compose_message_tile(context,
+ buffer + size + tilesDataSize, buffer_size - size - tilesDataSize,
+ image_data + yIdx * 64 * rowstride + xIdx * 64 * context->bytes_per_pixel,
+ xIdx < numTilesX - 1 ? 64 : width - xIdx * 64,
+ yIdx < numTilesY - 1 ? 64 : height - yIdx * 64,
+ rowstride, quantVals, quantIdxY, quantIdxCb, quantIdxCr, xIdx, yIdx);
+ }
+ }
+
+ size += tilesDataSize;
+ SET_UINT32(buffer, 2, size); /* CodecChannelT.blockLen */
+ SET_UINT32(buffer, 18, tilesDataSize); /* tilesDataSize */
+
+ return size;
}
static int
rfx_compose_message_frame_end(RFX_CONTEXT * context, uint8 * buffer, int buffer_size)
{
- return 0;
+ if (buffer_size < 8)
+ {
+ printf("rfx_compose_message_frame_end: buffer size too small.\n");
+ return 0;
+ }
+
+ SET_UINT16(buffer, 0, WBT_FRAME_END); /* CodecChannelT.blockType */
+ SET_UINT32(buffer, 2, 8); /* CodecChannelT.blockLen */
+ SET_UINT8(buffer, 6, 1); /* CodecChannelT.codecId */
+ SET_UINT8(buffer, 7, 0); /* CodecChannelT.channelId */
+
+ return 8;
}
int
rfx_compose_message_data(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
- const RFX_RECT * rects, int num_rects, uint8 * image_buffer, int width, int height)
+ const RFX_RECT * rects, int num_rects, uint8 * image_data, int width, int height, int rowstride)
{
int composed_size;
@@ -614,7 +884,7 @@ rfx_compose_message_data(RFX_CONTEXT * context, uint8 * buffer, int buffer_size,
composed_size += rfx_compose_message_region(context, buffer + composed_size, buffer_size - composed_size,
rects, num_rects);
composed_size += rfx_compose_message_tileset(context, buffer + composed_size, buffer_size - composed_size,
- image_buffer, width, height);
+ image_data, width, height, rowstride);
composed_size += rfx_compose_message_frame_end(context, buffer + composed_size, buffer_size - composed_size);
return composed_size;
diff --git a/libfreerdp-rfx/rfx_decode.c b/libfreerdp-rfx/rfx_decode.c
index 75eab83..2e3180c 100644
--- a/libfreerdp-rfx/rfx_decode.c
+++ b/libfreerdp-rfx/rfx_decode.c
@@ -27,6 +27,57 @@
#include "rfx_decode.h"
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_decode_format_RGB(sint16 * r_buf, sint16 * g_buf, sint16 * b_buf,
+ RFX_PIXEL_FORMAT pixel_format, uint8 * dst_buf)
+{
+ sint16 * r = r_buf;
+ sint16 * g = g_buf;
+ sint16 * b = b_buf;
+ uint8 * dst = dst_buf;
+ int i;
+
+ switch (pixel_format)
+ {
+ case RFX_PIXEL_FORMAT_BGRA:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*b++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*r++);
+ *dst++ = 0xFF;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGBA:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*r++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*b++);
+ *dst++ = 0xFF;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_BGR:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*b++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*r++);
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGB:
+ for (i = 0; i < 4096; i++)
+ {
+ *dst++ = (uint8) (*r++);
+ *dst++ = (uint8) (*g++);
+ *dst++ = (uint8) (*b++);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
#define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v)))
void
@@ -81,10 +132,6 @@ rfx_decode_rgb(RFX_CONTEXT * context,
const uint8 * cb_data, int cb_size, const uint32 * cb_quants,
const uint8 * cr_data, int cr_size, const uint32 * cr_quants, uint8* rgb_buffer)
{
- int i;
- uint8 * dst;
- sint16 * r, * g, * b;
-
PROFILER_ENTER(context->prof_rfx_decode_rgb);
dst = rgb_buffer;
@@ -96,58 +143,11 @@ rfx_decode_rgb(RFX_CONTEXT * context,
context->decode_YCbCr_to_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
PROFILER_EXIT(context->prof_rfx_decode_YCbCr_to_RGB);
- switch (context->pixel_format)
- {
- case RFX_PIXEL_FORMAT_BGRA:
- r = context->y_r_buffer;
- g = context->cb_g_buffer;
- b = context->cr_b_buffer;
- for (i = 0; i < 4096; i++)
- {
- *dst++ = (uint8) (*b++);
- *dst++ = (uint8) (*g++);
- *dst++ = (uint8) (*r++);
- *dst++ = 0xFF;
- }
- break;
- case RFX_PIXEL_FORMAT_RGBA:
- r = context->y_r_buffer;
- g = context->cb_g_buffer;
- b = context->cr_b_buffer;
- for (i = 0; i < 4096; i++)
- {
- *dst++ = (uint8) (*r++);
- *dst++ = (uint8) (*g++);
- *dst++ = (uint8) (*b++);
- *dst++ = 0xFF;
- }
- break;
- case RFX_PIXEL_FORMAT_BGR:
- r = context->y_r_buffer;
- g = context->cb_g_buffer;
- b = context->cr_b_buffer;
- for (i = 0; i < 4096; i++)
- {
- *dst++ = (uint8) (*b++);
- *dst++ = (uint8) (*g++);
- *dst++ = (uint8) (*r++);
- }
- break;
- case RFX_PIXEL_FORMAT_RGB:
- r = context->y_r_buffer;
- g = context->cb_g_buffer;
- b = context->cr_b_buffer;
- for (i = 0; i < 4096; i++)
- {
- *dst++ = (uint8) (*r++);
- *dst++ = (uint8) (*g++);
- *dst++ = (uint8) (*b++);
- }
- break;
- default:
- break;
- }
-
+ PROFILER_ENTER(context->prof_rfx_decode_format_RGB);
+ rfx_decode_format_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer,
+ context->pixel_format, rgb_buffer);
+ PROFILER_EXIT(context->prof_rfx_decode_format_RGB);
+
PROFILER_EXIT(context->prof_rfx_decode_rgb);
return rgb_buffer;
}
diff --git a/libfreerdp-rfx/rfx_encode.c b/libfreerdp-rfx/rfx_encode.c
index b138120..e458dd1 100644
--- a/libfreerdp-rfx/rfx_encode.c
+++ b/libfreerdp-rfx/rfx_encode.c
@@ -1,6 +1,6 @@
/*
FreeRDP: A Remote Desktop Protocol client.
- RemoteFX Codec Library - Decode
+ RemoteFX Codec Library - Encode
Copyright 2011 Vic Lee
@@ -29,6 +29,81 @@
#define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v)))
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_encode_format_RGB(const uint8 * rgb_data, int width, int height, int rowstride,
+ RFX_PIXEL_FORMAT pixel_format, sint16 * r_buf, sint16 * g_buf, sint16 * b_buf)
+{
+ int x, y;
+ int x_exceed;
+ int y_exceed;
+ const uint8 * src;
+
+ x_exceed = 64 - width;
+ y_exceed = 64 - height;
+ for (y = 0; y < height; y++)
+ {
+ src = rgb_data + y * rowstride;
+
+ switch (pixel_format)
+ {
+ case RFX_PIXEL_FORMAT_BGRA:
+ for (x = 0; x < width; x++)
+ {
+ *b_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *r_buf++ = (sint16) (*src++);
+ src++;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGBA:
+ for (x = 0; x < width; x++)
+ {
+ *r_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *b_buf++ = (sint16) (*src++);
+ src++;
+ }
+ break;
+ case RFX_PIXEL_FORMAT_BGR:
+ for (x = 0; x < width; x++)
+ {
+ *b_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *r_buf++ = (sint16) (*src++);
+ }
+ break;
+ case RFX_PIXEL_FORMAT_RGB:
+ for (x = 0; x < width; x++)
+ {
+ *r_buf++ = (sint16) (*src++);
+ *g_buf++ = (sint16) (*src++);
+ *b_buf++ = (sint16) (*src++);
+ }
+ break;
+ default:
+ break;
+ }
+ /* Fill the horizontal region outside of 64x64 tile size to 0 in order to be better compressed. */
+ if (x_exceed > 0)
+ {
+ memset(r_buf, 0, x_exceed * sizeof(sint16));
+ memset(g_buf, 0, x_exceed * sizeof(sint16));
+ memset(b_buf, 0, x_exceed * sizeof(sint16));
+ r_buf += x_exceed;
+ g_buf += x_exceed;
+ b_buf += x_exceed;
+ }
+ }
+
+ /* Fill the vertical region outside of 64x64 tile size to 0 in order to be better compressed. */
+ if (y_exceed > 0)
+ {
+ memset(r_buf, 0, y_exceed * 64 * sizeof(sint16));
+ memset(g_buf, 0, y_exceed * 64 * sizeof(sint16));
+ memset(b_buf, 0, y_exceed * 64 * sizeof(sint16));
+ }
+}
+
void
rfx_encode_RGB_to_YCbCr(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf)
{
@@ -54,63 +129,46 @@ static void
rfx_encode_component(RFX_CONTEXT * context, const uint32 * quantization_values,
sint16 * data, uint8 * buffer, int buffer_size, int * size)
{
- rfx_dwt_2d_encode(data, context->dwt_buffer_8, context->dwt_buffer_16, context->dwt_buffer_32);
+ PROFILER_ENTER(context->prof_rfx_encode_component);
+
+ PROFILER_ENTER(context->prof_rfx_dwt_2d_encode);
+ rfx_dwt_2d_encode(data, context->dwt_buffer_8, context->dwt_buffer_16, context->dwt_buffer_32);
+ PROFILER_EXIT(context->prof_rfx_dwt_2d_encode);
- rfx_quantization_encode(data, quantization_values);
+ PROFILER_ENTER(context->prof_rfx_quantization_encode);
+ context->quantization_encode(data, quantization_values);
+ PROFILER_EXIT(context->prof_rfx_quantization_encode);
- rfx_differential_encode(data + 4032, 64);
+ PROFILER_ENTER(context->prof_rfx_differential_encode);
+ rfx_differential_encode(data + 4032, 64);
+ PROFILER_EXIT(context->prof_rfx_differential_encode);
- *size = rfx_rlgr_encode(context->mode, data, 4096, buffer, buffer_size);
+ PROFILER_ENTER(context->prof_rfx_rlgr_encode);
+ *size = rfx_rlgr_encode(context->mode, data, 4096, buffer, buffer_size);
+ PROFILER_EXIT(context->prof_rfx_rlgr_encode);
+
+ PROFILER_EXIT(context->prof_rfx_encode_component);
}
void
-rfx_encode_rgb(RFX_CONTEXT * context, const uint8 * rgb_buffer, int rowstride,
+rfx_encode_rgb(RFX_CONTEXT * context, const uint8 * rgb_data, int width, int height, int rowstride,
const uint32 * y_quants, const uint32 * cb_quants, const uint32 * cr_quants,
uint8 * ycbcr_buffer, int buffer_size, int * y_size, int * cb_size, int * cr_size)
{
- int x, y;
- const uint8 * src;
sint16 * y_r_buffer = context->y_r_buffer;
sint16 * cb_g_buffer = context->cb_g_buffer;
sint16 * cr_b_buffer = context->cr_b_buffer;
- for (y = 0; y < 64; y++)
- {
- src = rgb_buffer + y * rowstride;
+ PROFILER_ENTER(context->prof_rfx_encode_rgb);
- for (x = 0; x < 64; x++)
- {
- switch (context->pixel_format)
- {
- case RFX_PIXEL_FORMAT_BGRA:
- *cr_b_buffer++ = (sint16) (*src++);
- *cb_g_buffer++ = (sint16) (*src++);
- *y_r_buffer++ = (sint16) (*src++);
- src++;
- break;
- case RFX_PIXEL_FORMAT_RGBA:
- *y_r_buffer++ = (sint16) (*src++);
- *cb_g_buffer++ = (sint16) (*src++);
- *cr_b_buffer++ = (sint16) (*src++);
- src++;
- break;
- case RFX_PIXEL_FORMAT_BGR:
- *cr_b_buffer++ = (sint16) (*src++);
- *cb_g_buffer++ = (sint16) (*src++);
- *y_r_buffer++ = (sint16) (*src++);
- break;
- case RFX_PIXEL_FORMAT_RGB:
- *y_r_buffer++ = (sint16) (*src++);
- *cb_g_buffer++ = (sint16) (*src++);
- *cr_b_buffer++ = (sint16) (*src++);
- break;
- default:
- break;
- }
- }
- }
+ PROFILER_ENTER(context->prof_rfx_encode_format_RGB);
+ rfx_encode_format_RGB(rgb_data, width, height, rowstride,
+ context->pixel_format, y_r_buffer, cb_g_buffer, cr_b_buffer);
+ PROFILER_EXIT(context->prof_rfx_encode_format_RGB);
- context->encode_RGB_to_YCbCr(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
+ PROFILER_ENTER(context->prof_rfx_encode_RGB_to_YCbCr);
+ context->encode_RGB_to_YCbCr(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
+ PROFILER_EXIT(context->prof_rfx_encode_RGB_to_YCbCr);
rfx_encode_component(context, y_quants, context->y_r_buffer, ycbcr_buffer, buffer_size, y_size);
ycbcr_buffer += (*y_size);
@@ -119,4 +177,6 @@ rfx_encode_rgb(RFX_CONTEXT * context, const uint8 * rgb_buffer, int rowstride,
ycbcr_buffer += (*cb_size);
buffer_size -= (*cb_size);
rfx_encode_component(context, cr_quants, context->cr_b_buffer, ycbcr_buffer, buffer_size, cr_size);
+
+ PROFILER_EXIT(context->prof_rfx_encode_rgb);
}
diff --git a/libfreerdp-rfx/rfx_encode.h b/libfreerdp-rfx/rfx_encode.h
index 664458c..2fac0be 100644
--- a/libfreerdp-rfx/rfx_encode.h
+++ b/libfreerdp-rfx/rfx_encode.h
@@ -26,7 +26,7 @@ void
rfx_encode_RGB_to_YCbCr(sint16 * y_r_buf, sint16 * cb_g_buf, sint16 * cr_b_buf);
void
-rfx_encode_rgb(RFX_CONTEXT * context, const uint8 * rgb_buffer, int rowstride,
+rfx_encode_rgb(RFX_CONTEXT * context, const uint8 * rgb_data, int width, int height, int rowstride,
const uint32 * y_quants, const uint32 * cb_quants, const uint32 * cr_quants,
uint8 * ycbcr_buffer, int buffer_size, int * y_size, int * cb_size, int * cr_size);
diff --git a/libfreerdp-rfx/rfx_rlgr.c b/libfreerdp-rfx/rfx_rlgr.c
index c1cb54d..2d27907 100644
--- a/libfreerdp-rfx/rfx_rlgr.c
+++ b/libfreerdp-rfx/rfx_rlgr.c
@@ -349,14 +349,17 @@ rfx_rlgr_encode(RLGR_MODE mode, const sint16 * data, int data_size, uint8 * buff
/* output the remaining run length using k bits */
OutputBits(k, numZeros);
- /* encode the nonzero value using GR coding */
- mag = (input < 0 ? -input : input); /* absolute value of input coefficient */
- sign = (input < 0 ? 1 : 0); /* sign of input coefficient */
+ if (input != 0)
+ {
+ /* encode the nonzero value using GR coding */
+ mag = (input < 0 ? -input : input); /* absolute value of input coefficient */
+ sign = (input < 0 ? 1 : 0); /* sign of input coefficient */
- OutputBit(1, sign); /* output the sign bit */
- CodeGR(&krp, mag - 1); /* output GR code for (mag - 1) */
+ OutputBit(1, sign); /* output the sign bit */
+ CodeGR(&krp, mag - 1); /* output GR code for (mag - 1) */
- UpdateParam(kp, -DN_GR, k);
+ UpdateParam(kp, -DN_GR, k);
+ }
}
else
{
diff --git a/libfreerdp-rfx/sse/rfx_sse.c b/libfreerdp-rfx/sse/rfx_sse.c
index 3ccc9bf..0407323 100644
--- a/libfreerdp-rfx/sse/rfx_sse.c
+++ b/libfreerdp-rfx/sse/rfx_sse.c
@@ -29,10 +29,14 @@ void rfx_init_sse(RFX_CONTEXT * context)
DEBUG_RFX("Using SSE2 optimizations");
IF_PROFILER(context->prof_rfx_decode_YCbCr_to_RGB->name = "rfx_decode_YCbCr_to_RGB_SSE2");
+ IF_PROFILER(context->prof_rfx_encode_RGB_to_YCbCr->name = "rfx_encode_RGB_to_YCbCr_SSE2");
IF_PROFILER(context->prof_rfx_quantization_decode->name = "rfx_quantization_decode_SSE2");
+ IF_PROFILER(context->prof_rfx_quantization_encode->name = "rfx_quantization_encode_SSE2");
IF_PROFILER(context->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_SSE2");
context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_SSE2;
+ context->encode_RGB_to_YCbCr = rfx_encode_RGB_to_YCbCr_SSE2;
context->quantization_decode = rfx_quantization_decode_SSE2;
+ context->quantization_encode = rfx_quantization_encode_SSE2;
context->dwt_2d_decode = rfx_dwt_2d_decode_SSE2;
}
diff --git a/libfreerdp-rfx/sse/rfx_sse2.c b/libfreerdp-rfx/sse/rfx_sse2.c
index 642d725..63dfdcf 100644
--- a/libfreerdp-rfx/sse/rfx_sse2.c
+++ b/libfreerdp-rfx/sse/rfx_sse2.c
@@ -38,7 +38,8 @@ _mm_prefetch_buffer(char * buffer, int num_bytes)
}
}
-void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer)
+void
+rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer)
{
__m128i zero = _mm_setzero_si128();
__m128i max = _mm_set1_epi16(255);
@@ -48,7 +49,7 @@ void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sin
__m128i * cr_b_buf = (__m128i*) cr_b_buffer;
int i;
- for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i+=(CACHE_LINE_BYTES / sizeof(__m128i)))
+ for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i)))
{
_mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA);
_mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA);
@@ -56,14 +57,14 @@ void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sin
}
for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i++)
{
- // y = y_r_buf[i] + 128;
+ /* y = y_r_buf[i] + 128; */
__m128i y = _mm_load_si128(&y_r_buf[i]);
y = _mm_add_epi16(y, _mm_set1_epi16(128));
-
- // cr = cr_b_buf[i];
+
+ /* cr = cr_b_buf[i]; */
__m128i cr = _mm_load_si128(&cr_b_buf[i]);
-
- // r = between(y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5), 0, 255);
+
+ /* r = between(y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5), 0, 255); */
__m128i r = _mm_add_epi16(y, cr);
r = _mm_add_epi16(r, _mm_srai_epi16(cr, 2));
r = _mm_add_epi16(r, _mm_srai_epi16(cr, 3));
@@ -71,10 +72,10 @@ void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sin
r = _mm_between_epi16(r, zero, max);
_mm_store_si128(&y_r_buf[i], r);
- // cb = cb_g_buf[i];
+ /* cb = cb_g_buf[i]; */
__m128i cb = _mm_load_si128(&cb_g_buf[i]);
- // g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1) - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255);
+ /* g = between(y - (cb >> 2) - (cb >> 4) - (cb >> 5) - (cr >> 1) - (cr >> 3) - (cr >> 4) - (cr >> 5), 0, 255); */
__m128i g = _mm_sub_epi16(y, _mm_srai_epi16(cb, 2));
g = _mm_sub_epi16(g, _mm_srai_epi16(cb, 4));
g = _mm_sub_epi16(g, _mm_srai_epi16(cb, 5));
@@ -84,8 +85,8 @@ void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sin
g = _mm_sub_epi16(g, _mm_srai_epi16(cr, 5));
g = _mm_between_epi16(g, zero, max);
_mm_store_si128(&cb_g_buf[i], g);
-
- // b = between(y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6), 0, 255);
+
+ /* b = between(y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6), 0, 255); */
__m128i b = _mm_add_epi16(y, cb);
b = _mm_add_epi16(b, _mm_srai_epi16(cb, 1));
b = _mm_add_epi16(b, _mm_srai_epi16(cb, 2));
@@ -95,6 +96,73 @@ void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sin
}
}
+void
+rfx_encode_RGB_to_YCbCr_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer)
+{
+ __m128i min = _mm_set1_epi16(-128);
+ __m128i max = _mm_set1_epi16(127);
+
+ __m128i * y_r_buf = (__m128i*) y_r_buffer;
+ __m128i * cb_g_buf = (__m128i*) cb_g_buffer;
+ __m128i * cr_b_buf = (__m128i*) cr_b_buffer;
+
+ int i;
+ for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i += (CACHE_LINE_BYTES / sizeof(__m128i)))
+ {
+ _mm_prefetch((char*)(&y_r_buf[i]), _MM_HINT_NTA);
+ _mm_prefetch((char*)(&cb_g_buf[i]), _MM_HINT_NTA);
+ _mm_prefetch((char*)(&cr_b_buf[i]), _MM_HINT_NTA);
+ }
+ for (i = 0; i < (4096 * sizeof(sint16) / sizeof(__m128i)); i++)
+ {
+ /* r = y_r_buf[i]; */
+ __m128i r = _mm_load_si128(&y_r_buf[i]);
+
+ /* g = cb_g_buf[i]; */
+ __m128i g = _mm_load_si128(&cb_g_buf[i]);
+
+ /* b = cr_b_buf[i]; */
+ __m128i b = _mm_load_si128(&cr_b_buf[i]);
+
+ /* y = ((r >> 2) + (r >> 5) + (r >> 6)) + ((g >> 1) + (g >> 4) + (g >> 6) + (g >> 7)) + ((b >> 4) + (b >> 5) + (b >> 6)); */
+ /* y_r_buf[i] = MINMAX(y, 0, 255) - 128; */
+ __m128i y = _mm_add_epi16(_mm_srai_epi16(r, 2), _mm_srai_epi16(r, 5));
+ y = _mm_add_epi16(y, _mm_srai_epi16(r, 6));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 1));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 4));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 6));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 7));
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 4));
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 5));
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 6));
+ y = _mm_add_epi16(y, min);
+ y = _mm_between_epi16(y, min, max);
+ _mm_store_si128(&y_r_buf[i], y);
+
+ /* cb = 0 - ((r >> 3) + (r >> 5) + (r >> 7)) - ((g >> 2) + (g >> 4) + (g >> 6)) + (b >> 1); */
+ /* cb_g_buf[i] = MINMAX(cb, -128, 127); */
+ __m128i cb = _mm_sub_epi16(_mm_srai_epi16(b, 1), _mm_srai_epi16(r, 3));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 5));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 7));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 2));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 4));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 6));
+ cb = _mm_between_epi16(cb, min, max);
+ _mm_store_si128(&cb_g_buf[i], cb);
+
+ /* cr = (r >> 1) - ((g >> 2) + (g >> 3) + (g >> 5) + (g >> 7)) - ((b >> 4) + (b >> 6)); */
+ /* cr_b_buf[i] = MINMAX(cr, -128, 127); */
+ __m128i cr = _mm_sub_epi16(_mm_srai_epi16(r, 1), _mm_srai_epi16(g, 2));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 3));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 5));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 7));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 4));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 6));
+ cr = _mm_between_epi16(cr, min, max);
+ _mm_store_si128(&cr_b_buf[i], cr);
+ }
+}
+
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
rfx_quantization_decode_block_SSE2(sint16 * buffer, const int buffer_size, const uint32 factor)
{
@@ -115,26 +183,64 @@ rfx_quantization_decode_block_SSE2(sint16 * buffer, const int buffer_size, const
} while(ptr < buf_end);
}
-void rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values)
+void
+rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values)
{
_mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
- rfx_quantization_decode_block_SSE2(buffer, 1024, quantization_values[8]); // HL1
- rfx_quantization_decode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); // LH1
- rfx_quantization_decode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); // HH1
- rfx_quantization_decode_block_SSE2(buffer + 3072, 256, quantization_values[5]); // HL2
- rfx_quantization_decode_block_SSE2(buffer + 3328, 256, quantization_values[4]); // LH2
- rfx_quantization_decode_block_SSE2(buffer + 3584, 256, quantization_values[6]); // HH2
- rfx_quantization_decode_block_SSE2(buffer + 3840, 64, quantization_values[2]); // HL3
- rfx_quantization_decode_block_SSE2(buffer + 3904, 64, quantization_values[1]); // LH3
- rfx_quantization_decode_block_SSE2(buffer + 3868, 64, quantization_values[3]); // HH3
- rfx_quantization_decode_block_SSE2(buffer + 4032, 64, quantization_values[0]); // LL3
+ rfx_quantization_decode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_decode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_decode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_decode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_decode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_decode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_decode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_decode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_quantization_encode_block_SSE2(sint16 * buffer, const int buffer_size, const uint32 factor)
+{
+ int shift = factor-6;
+ if (shift <= 0)
+ return;
+
+ __m128i a;
+ __m128i * ptr = (__m128i*) buffer;
+ __m128i * buf_end = (__m128i*) (buffer + buffer_size);
+ do
+ {
+ a = _mm_load_si128(ptr);
+ a = _mm_srai_epi16(a, shift);
+ _mm_store_si128(ptr, a);
+
+ ptr++;
+ } while(ptr < buf_end);
+}
+
+void
+rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values)
+{
+ _mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
+
+ rfx_quantization_encode_block_SSE2(buffer, 1024, quantization_values[8]); /* HL1 */
+ rfx_quantization_encode_block_SSE2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
+ rfx_quantization_encode_block_SSE2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
+ rfx_quantization_encode_block_SSE2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
+ rfx_quantization_encode_block_SSE2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
+ rfx_quantization_encode_block_SSE2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
+ rfx_quantization_encode_block_SSE2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
+ rfx_quantization_encode_block_SSE2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
}
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
rfx_dwt_2d_decode_block_horiz_SSE2(sint16 * l, sint16 * h, sint16 * dst, int subband_width)
{
- int y, n;
+ int y, n;
sint16 * l_ptr = l;
sint16 * h_ptr = h;
sint16 * dst_ptr = dst;
@@ -166,7 +272,7 @@ rfx_dwt_2d_decode_block_horiz_SSE2(sint16 * l, sint16 * h, sint16 * dst, int sub
l_ptr+=8;
h_ptr+=8;
- }
+ }
l_ptr -= subband_width;
h_ptr -= subband_width;
@@ -206,19 +312,87 @@ rfx_dwt_2d_decode_block_horiz_SSE2(sint16 * l, sint16 * h, sint16 * dst, int sub
}
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+rfx_dwt_2d_decode_block_vert_SSE2(sint16 * l, sint16 * h, sint16 * dst, int subband_width)
+{
+ int x, n;
+ sint16 * l_ptr = l;
+ sint16 * h_ptr = h;
+ sint16 * dst_ptr = dst;
+
+ int total_width = subband_width + subband_width;
+
+ /* Even coefficients */
+ for (n = 0; n < subband_width; n++)
+ {
+ for (x = 0; x < total_width; x+=8)
+ {
+ // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1);
+
+ __m128i l_n = _mm_load_si128((__m128i*) l_ptr);
+ __m128i h_n = _mm_load_si128((__m128i*) h_ptr);
+
+ __m128i tmp_n = _mm_add_epi16(h_n, _mm_set1_epi16(1));;
+ if (n == 0)
+ tmp_n = _mm_add_epi16(tmp_n, h_n);
+ else
+ {
+ __m128i h_n_m = _mm_loadu_si128((__m128i*) (h_ptr - total_width));
+ tmp_n = _mm_add_epi16(tmp_n, h_n_m);
+ }
+ tmp_n = _mm_srai_epi16(tmp_n, 1);
+
+ __m128i dst_n = _mm_sub_epi16(l_n, tmp_n);
+ _mm_store_si128((__m128i*) dst_ptr, dst_n);
+
+ l_ptr+=8;
+ h_ptr+=8;
+ dst_ptr+=8;
+ }
+ dst_ptr+=total_width;
+ }
+
+ h_ptr = h;
+ dst_ptr = dst + total_width;
+
+ /* Odd coefficients */
+ for (n = 0; n < subband_width; n++)
+ {
+ for (x = 0; x < total_width; x+=8)
+ {
+ // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1);
+
+ __m128i h_n = _mm_load_si128((__m128i*) h_ptr);
+ __m128i dst_n_m = _mm_load_si128((__m128i*) (dst_ptr - total_width));
+ h_n = _mm_slli_epi16(h_n, 1);
+
+ __m128i tmp_n = dst_n_m;
+ if (n == subband_width - 1)
+ tmp_n = _mm_add_epi16(tmp_n, dst_n_m);
+ else
+ {
+ __m128i dst_n_p = _mm_loadu_si128((__m128i*) (dst_ptr + total_width));
+ tmp_n = _mm_add_epi16(tmp_n, dst_n_p);
+ }
+ tmp_n = _mm_srai_epi16(tmp_n, 1);
+
+ __m128i dst_n = _mm_add_epi16(tmp_n, h_n);
+ _mm_store_si128((__m128i*) dst_ptr, dst_n);
+
+ h_ptr+=8;
+ dst_ptr+=8;
+ }
+ dst_ptr+=total_width;
+ }
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
rfx_dwt_2d_decode_block_SSE2(sint16 * buffer, sint16 * idwt, int subband_width)
{
- sint16 * dst, * l, * h;
- sint16 * l_dst, * h_dst;
sint16 * hl, * lh, * hh, * ll;
- int total_width;
- int x, y;
- int n;
+ sint16 * l_dst, * h_dst;
_mm_prefetch_buffer((char *) idwt, subband_width * 4 * sizeof(sint16));
- total_width = subband_width + subband_width;
-
/* Inverse DWT in horizontal direction, results in 2 sub-bands in L, H order in tmp buffer idwt. */
/* The 4 sub-bands are stored in HL(0), LH(1), HH(2), LL(3) order. */
/* The lower part L uses LL(3) and HL(0). */
@@ -237,31 +411,11 @@ rfx_dwt_2d_decode_block_SSE2(sint16 * buffer, sint16 * idwt, int subband_width)
rfx_dwt_2d_decode_block_horiz_SSE2(lh, hh, h_dst, subband_width);
/* Inverse DWT in vertical direction, results are stored in original buffer. */
- for (x = 0; x < total_width; x++)
- {
- /* Even coefficients */
- for (n = 0; n < subband_width; n++)
- {
- y = n << 1;
- dst = buffer + y * total_width + x;
- l = idwt + n * total_width + x;
- h = l + subband_width * total_width;
- dst[0] = *l - (((n > 0 ? *(h - total_width) : *h) + (*h) + 1) >> 1);
- }
-
- /* Odd coefficients */
- for (n = 0; n < subband_width; n++)
- {
- y = n << 1;
- dst = buffer + y * total_width + x;
- l = idwt + n * total_width + x;
- h = l + subband_width * total_width;
- dst[total_width] = (*h << 1) + ((dst[0] + dst[n < subband_width - 1 ? 2 * total_width : 0]) >> 1);
- }
- }
+ rfx_dwt_2d_decode_block_vert_SSE2(l_dst, h_dst, buffer, subband_width);
}
-void rfx_dwt_2d_decode_SSE2(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32)
+void
+rfx_dwt_2d_decode_SSE2(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32)
{
_mm_prefetch_buffer((char *) buffer, 4096 * sizeof(sint16));
diff --git a/libfreerdp-rfx/sse/rfx_sse2.h b/libfreerdp-rfx/sse/rfx_sse2.h
index d1df7db..8f35f7c 100644
--- a/libfreerdp-rfx/sse/rfx_sse2.h
+++ b/libfreerdp-rfx/sse/rfx_sse2.h
@@ -23,7 +23,9 @@
#include <freerdp/rfx.h>
void rfx_decode_YCbCr_to_RGB_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer);
+void rfx_encode_RGB_to_YCbCr_SSE2(sint16 * y_r_buffer, sint16 * cb_g_buffer, sint16 * cr_b_buffer);
void rfx_quantization_decode_SSE2(sint16 * buffer, const uint32 * quantization_values);
+void rfx_quantization_encode_SSE2(sint16 * buffer, const uint32 * quantization_values);
void rfx_dwt_2d_decode_SSE2(sint16 * buffer, sint16 * dwt_buffer_8, sint16 * dwt_buffer_16, sint16 * dwt_buffer_32);
#endif /* __RFX_SSE2_H */