diff options
author | Jay Sorg <jay.sorg@gmail.com> | 2020-12-07 11:29:46 +0300 |
---|---|---|
committer | Nexarian <cmp@pitstick.net> | 2022-05-09 01:54:06 +0300 |
commit | 34d051363112b16b7df511f08524f69a925939a5 (patch) | |
tree | e4e9a53f1cee17735cc237101a9bacc74792085a | |
parent | d8f126abc48a1b949a0be27b334099161c06f0fc (diff) |
Progressive RFX for EGFX protocol
Largely a consolidation of the work by jsorg71, with a few minor bug
fixes.
Verified that RFX Progressive works.
-rw-r--r-- | include/rfxcodec_common.h | 1 | ||||
-rw-r--r-- | include/rfxcodec_encode.h | 4 | ||||
-rw-r--r-- | src/Makefile.am | 8 | ||||
-rw-r--r-- | src/amd64/rfxencode_tile_amd64.c | 8 | ||||
-rw-r--r-- | src/rfxcommon.h | 3 | ||||
-rw-r--r-- | src/rfxconstants.h | 16 | ||||
-rw-r--r-- | src/rfxencode.c | 86 | ||||
-rw-r--r-- | src/rfxencode.h | 28 | ||||
-rw-r--r-- | src/rfxencode_compose.c | 352 | ||||
-rw-r--r-- | src/rfxencode_compose.h | 10 | ||||
-rw-r--r-- | src/rfxencode_diff_rlgr1.c | 30 | ||||
-rw-r--r-- | src/rfxencode_diff_rlgr1.h | 3 | ||||
-rw-r--r-- | src/rfxencode_diff_rlgr3.c | 29 | ||||
-rw-r--r-- | src/rfxencode_diff_rlgr3.h | 3 | ||||
-rw-r--r-- | src/rfxencode_dwt.c | 59 | ||||
-rw-r--r-- | src/rfxencode_dwt.h | 7 | ||||
-rw-r--r-- | src/rfxencode_dwt_rem.c | 542 | ||||
-rw-r--r-- | src/rfxencode_dwt_rem.h | 26 | ||||
-rw-r--r-- | src/rfxencode_dwt_shift_rem.c | 568 | ||||
-rw-r--r-- | src/rfxencode_dwt_shift_rem.h | 26 | ||||
-rw-r--r-- | src/rfxencode_quantization.c | 65 | ||||
-rw-r--r-- | src/rfxencode_quantization.h | 2 | ||||
-rw-r--r-- | src/rfxencode_rlgr1.c | 14 | ||||
-rw-r--r-- | src/rfxencode_rlgr3.c | 14 | ||||
-rw-r--r-- | src/rfxencode_tile.c | 3 | ||||
-rw-r--r-- | src/x86/rfxencode_tile_x86.c | 8 |
26 files changed, 1850 insertions, 65 deletions
diff --git a/include/rfxcodec_common.h b/include/rfxcodec_common.h index 0411c73..616d016 100644 --- a/include/rfxcodec_common.h +++ b/include/rfxcodec_common.h @@ -31,6 +31,7 @@ #define RFX_FLAGS_OPT1 (1 << 3) #define RFX_FLAGS_OPT2 (1 << 4) #define RFX_FLAGS_NOACCEL (1 << 6) +#define RFX_FLAGS_PRO1 (1 << 7) #define RFX_FLAGS_RLGR3 0 /* default */ #define RFX_FLAGS_RLGR1 1 diff --git a/include/rfxcodec_encode.h b/include/rfxcodec_encode.h index 2c5876f..76e5e6a 100644 --- a/include/rfxcodec_encode.h +++ b/include/rfxcodec_encode.h @@ -80,8 +80,8 @@ typedef int (*rfxencode_differential_proc)(short *buffer, int buffer_size); typedef int (*rfxencode_quantization_proc)(short *buffer, const char *quantization_values); typedef int (*rfxencode_dwt_2d_proc)(const unsigned char *in_buffer, short *buffer, short *dwt_buffer); -typedef int (*rfxencode_diff_rlgr1_proc)(short *coef, unsigned char *cdata, int cdata_size); -typedef int (*rfxencode_diff_rlgr3_proc)(short *coef, unsigned char *cdata, int cdata_size); +typedef int (*rfxencode_diff_rlgr1_proc)(short *coef, unsigned char *cdata, int cdata_size, int diff_bytes); +typedef int (*rfxencode_diff_rlgr3_proc)(short *coef, unsigned char *cdata, int cdata_size, int diff_bytes); typedef int (*rfxencode_dwt_shift_x86_sse2_proc)(const char *qtable, const unsigned char *data, short *dwt_buffer1, short *dwt_buffer); typedef int (*rfxencode_dwt_shift_x86_sse41_proc)(const char *qtable, const unsigned char *data, short *dwt_buffer1, short *dwt_buffer); diff --git a/src/Makefile.am b/src/Makefile.am index 5b7983d..83b5171 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -32,7 +32,9 @@ noinst_HEADERS = \ rfxencode_tile.h \ rfxencode_diff_rlgr1.h \ rfxencode_diff_rlgr3.h \ - rfxencode_rgb_to_yuv.h + rfxencode_rgb_to_yuv.h \ + rfxencode_dwt_rem.h \ + rfxencode_dwt_shift_rem.h lib_LTLIBRARIES = librfxencode.la @@ -41,4 +43,6 @@ librfxencode_la_SOURCES = $(noinst_HEADERS) rfxencode.c \ rfxencode_quantization.c rfxencode_differential.c \ rfxencode_rlgr1.c rfxencode_rlgr3.c rfxencode_alpha.c \ rfxencode_diff_rlgr1.c rfxencode_diff_rlgr3.c \ - rfxencode_rgb_to_yuv.c + rfxencode_rgb_to_yuv.c \ + rfxencode_dwt_rem.c \ + rfxencode_dwt_shift_rem.c diff --git a/src/amd64/rfxencode_tile_amd64.c b/src/amd64/rfxencode_tile_amd64.c index 1619752..2ce5037 100644 --- a/src/amd64/rfxencode_tile_amd64.c +++ b/src/amd64/rfxencode_tile_amd64.c @@ -53,7 +53,7 @@ rfx_encode_component_rlgr1_amd64_sse2(struct rfxencode *enc, const char *qtable, { return 1; } - *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } @@ -69,7 +69,7 @@ rfx_encode_component_rlgr3_amd64_sse2(struct rfxencode *enc, const char *qtable, { return 1; } - *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } @@ -85,7 +85,7 @@ rfx_encode_component_rlgr1_amd64_sse41(struct rfxencode *enc, const char *qtable { return 1; } - *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } @@ -101,6 +101,6 @@ rfx_encode_component_rlgr3_amd64_sse41(struct rfxencode *enc, const char *qtable { return 1; } - *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } diff --git a/src/rfxcommon.h b/src/rfxcommon.h index dad376c..2d208af 100644 --- a/src/rfxcommon.h +++ b/src/rfxcommon.h @@ -84,6 +84,9 @@ typedef struct _STREAM STREAM; } while (0) #endif +#define stream_read(_s, _b, _n) do { memcpy(_b, (_s)->p, _n); (_s)->p += _n; } while (0) +#define stream_write(_s, _b, _n) do { memcpy((_s)->p, _b, _n); (_s)->p += _n; } while (0) + #define stream_seek(_s, _n) (_s)->p += _n #define stream_seek_uint8(_s) (_s)->p += 1 #define stream_seek_uint16(_s) (_s)->p += 2 diff --git a/src/rfxconstants.h b/src/rfxconstants.h index 770fccb..77f487c 100644 --- a/src/rfxconstants.h +++ b/src/rfxconstants.h @@ -44,6 +44,22 @@ enum _RLGR_MODE #define CBT_TILESET 0xCAC2 #define CBT_TILE 0xCAC3 +/* progressive blockType */ +#define PRO_WBT_SYNC 0xCCC0 +#define PRO_WBT_FRAME_BEGIN 0xCCC1 +#define PRO_WBT_FRAME_END 0xCCC2 +#define PRO_WBT_CONTEXT 0xCCC3 +#define PRO_WBT_REGION 0xCCC4 +#define PRO_WBT_TILE_SIMPLE 0xCCC5 +#define PRO_WBT_TILE_PROGRESSIVE_FIRST 0xCCC6 +#define PRO_WBT_TILE_PROGRESSIVE_UPGRADE 0xCCC7 + +#define RFX_SUBBAND_DIFFING 0x01 + +#define RFX_DWT_REDUCE_EXTRAPOLATE 0x01 + +#define RFX_TILE_DIFFERENCE 0x01 + /* tileSize */ #define CT_TILE_64x64 0x0040 diff --git a/src/rfxencode.c b/src/rfxencode.c index 7822f14..9b7826a 100644 --- a/src/rfxencode.c +++ b/src/rfxencode.c @@ -68,6 +68,10 @@ rfxcodec_encode_create_ex(int width, int height, int format, int flags, enc->dwt_buffer = (sint16 *) (((size_t) (enc->dwt_buffer_a)) & ~15); enc->dwt_buffer1 = (sint16 *) (((size_t) (enc->dwt_buffer1_a)) & ~15); enc->dwt_buffer2 = (sint16 *) (((size_t) (enc->dwt_buffer2_a)) & ~15); + enc->dwt_buffer3 = (sint16 *) (((size_t) (enc->dwt_buffer3_a)) & ~15); + enc->dwt_buffer4 = (sint16 *) (((size_t) (enc->dwt_buffer4_a)) & ~15); + enc->dwt_buffer5 = (sint16 *) (((size_t) (enc->dwt_buffer5_a)) & ~15); + enc->dwt_buffer6 = (sint16 *) (((size_t) (enc->dwt_buffer6_a)) & ~15); #if defined(RFX_USE_ACCEL_X86) cpuid_x86(1, 0, &ax, &bx, &cx, &dx); @@ -157,7 +161,11 @@ rfxcodec_encode_create_ex(int width, int height, int format, int flags, enc->rfx_encode_rgb_to_yuv = rfx_encode_rgb_to_yuv; enc->rfx_encode_argb_to_yuva = rfx_encode_argb_to_yuva; /* assign encoding functions */ - if (flags & RFX_FLAGS_NOACCEL) + if (flags & RFX_FLAGS_PRO1) + { + enc->pro_ver = 1; + } + else if (flags & RFX_FLAGS_NOACCEL) { if (enc->mode == RLGR3) { @@ -295,12 +303,21 @@ int rfxcodec_encode_destroy(void *handle) { struct rfxencode *enc; + int index; + int jndex; enc = (struct rfxencode *) handle; if (enc == NULL) { return 0; } + for (index = 0; index < 64; index++) + { + for (jndex = 0; jndex < 64; jndex++) + { + free(enc->rbs[index][jndex]); + } + } free(enc); return 0; } @@ -323,6 +340,28 @@ rfxcodec_encode_ex(void *handle, char *cdata, int *cdata_bytes, s.p = s.data; s.size = *cdata_bytes; + if (enc->pro_ver > 0) + { + /* Only the first frame should send the RemoteFX header */ + if ((enc->frame_idx == 0) && (enc->header_processed == 0)) + { + if (rfx_pro_compose_message_header(enc, &s) != 0) + { + return -1; + } + } + tiles_written = rfx_pro_compose_message_data(enc, &s, regions, num_regions, + buf, width, height, stride_bytes, + tiles, num_tiles, quants, num_quants, + flags); + if (tiles_written <= 0) + { + return -1; + } + *cdata_bytes = (int) (s.p - s.data); + return tiles_written; + } + /* Only the first frame should send the RemoteFX header */ if ((enc->frame_idx == 0) && (enc->header_processed == 0)) { @@ -374,3 +413,48 @@ rfxcodec_encode_get_internals(struct rfxcodec_encode_internals *internals) #endif return 0; } + +/*****************************************************************************/ +/* produce a hex dump */ +void +rfxcodec_hexdump(const void *p, int len) +{ + unsigned char *line; + int i; + int thisline; + int offset; + + line = (unsigned char *)p; + offset = 0; + + while (offset < len) + { + printf("%04x ", offset); + thisline = len - offset; + + if (thisline > 16) + { + thisline = 16; + } + + for (i = 0; i < thisline; i++) + { + printf("%02x ", line[i]); + } + + for (; i < 16; i++) + { + printf(" "); + } + + for (i = 0; i < thisline; i++) + { + printf("%c", (line[i] >= 0x20 && line[i] < 0x7f) ? line[i] : '.'); + } + + printf("%s", "\n"); + offset += thisline; + line += thisline; + } +} + diff --git a/src/rfxencode.h b/src/rfxencode.h index 8e185dc..28b1479 100644 --- a/src/rfxencode.h +++ b/src/rfxencode.h @@ -33,6 +33,17 @@ typedef int (*rfx_encode_proc)(struct rfxencode *enc, const char *qtable, const uint8 *data, uint8 *buffer, int buffer_size, int *size); +struct rfx_rb +{ + sint16 y[4096]; + sint16 u[4096]; + sint16 v[4096]; +}; + + +#define RFX_MAX_RB_X 64 +#define RFX_MAX_RB_Y 64 + struct rfxencode { int width; @@ -44,7 +55,8 @@ struct rfxencode int flags; int bits_per_pixel; int format; - int pad0[7]; + int pro_ver; + int pad0[6]; uint8 a_buffer[4096]; uint8 y_r_buffer[4096]; @@ -54,13 +66,24 @@ struct rfxencode sint16 dwt_buffer_a[4096]; sint16 dwt_buffer1_a[4096]; sint16 dwt_buffer2_a[4096]; + sint16 dwt_buffer3_a[4096]; + sint16 dwt_buffer4_a[4096]; + sint16 dwt_buffer5_a[4096]; + sint16 dwt_buffer6_a[4096]; uint8 pad2[16]; sint16 *dwt_buffer; sint16 *dwt_buffer1; sint16 *dwt_buffer2; + sint16 *dwt_buffer3; + sint16 *dwt_buffer4; + sint16 *dwt_buffer5; + sint16 *dwt_buffer6; rfx_encode_proc rfx_encode; rfx_encode_rgb_to_yuv_proc rfx_encode_rgb_to_yuv; rfx_encode_argb_to_yuva_proc rfx_encode_argb_to_yuva; + rfx_encode_proc rfx_rem_encode; + + struct rfx_rb * rbs[RFX_MAX_RB_X][RFX_MAX_RB_Y]; int got_sse2; int got_sse3; @@ -72,4 +95,7 @@ struct rfxencode int got_neon; }; +void +rfxcodec_hexdump(const void *p, int len); + #endif diff --git a/src/rfxencode_compose.c b/src/rfxencode_compose.c index 0279c90..b07f0c0 100644 --- a/src/rfxencode_compose.c +++ b/src/rfxencode_compose.c @@ -33,6 +33,13 @@ #include "rfxconstants.h" #include "rfxencode_tile.h" +#include "rfxencode_quantization.h" +#include "rfxencode_dwt_rem.h" +#include "rfxencode_dwt_shift_rem.h" +#include "rfxencode_diff_rlgr1.h" +#include "rfxencode_rlgr1.h" +#include "rfxencode_differential.h" + #define LLOG_LEVEL 1 #define LLOGLN(_level, _args) \ do { if (_level < LLOG_LEVEL) { printf _args ; printf("\n"); } } while (0) @@ -238,6 +245,8 @@ rfx_compose_message_tile_yuv(struct rfxencode *enc, STREAM *s, { return 1; } + LLOGLN(10, ("rfx_compose_message_tile_yuv: YLen %d CbLen %d CrLen %d", + YLen, CbLen, CrLen)); end_pos = stream_get_pos(s); stream_set_pos(s, start_pos + 2); stream_write_uint32(s, 19 + YLen + CbLen + CrLen); /* BlockT.blockLen */ @@ -610,3 +619,346 @@ rfx_compose_message_data(struct rfxencode *enc, STREAM *s, } return tiles_written; } + +/******************************************************************************/ +static int +rfx_pro_compose_message_context(struct rfxencode *enc, STREAM *s) +{ + if (stream_get_left(s) < 10) + { + return 1; + } + stream_write_uint16(s, PRO_WBT_CONTEXT); + stream_write_uint32(s, 10); + stream_write_uint8(s, 0); /* ctxId */ + stream_write_uint16(s, CT_TILE_64x64); /* tileSize */ + stream_write_uint8(s, RFX_SUBBAND_DIFFING); /* flags */ + return 0; +} + +/******************************************************************************/ +int +rfx_pro_compose_message_header(struct rfxencode *enc, STREAM *s) +{ + if (rfx_compose_message_sync(enc, s) != 0) + { + return 1; + } + if (rfx_pro_compose_message_context(enc, s) != 0) + { + return 1; + } + enc->header_processed = 1; + return 0; +} + +/******************************************************************************/ +static int +rfx_pro_compose_message_frame_begin(struct rfxencode *enc, STREAM *s) +{ + if (stream_get_left(s) < 12) + { + return 1; + } + stream_write_uint16(s, PRO_WBT_FRAME_BEGIN); + stream_write_uint32(s, 12); + stream_write_uint32(s, enc->frame_idx); + stream_write_uint16(s, 1); + enc->frame_idx++; + return 0; +} + +/******************************************************************************/ +/* coef1 = coef2 - coef3 (QCdt = QCot - QCrb) + count zeros in coef1, coef2 + coef3 = coef2 */ +#define COEF_DIFF_COUNT_COPY(_coef1, _coef2, _coef3, _loop, _count1, _count2) \ +do { _count1 = 0; _count2 = 0; \ + for (_loop = 0; _loop < 4096 - 81; _loop++) { \ + _coef1[_loop] = _coef2[_loop] - _coef3[_loop]; \ + if (_coef1[_loop] == 0) { _count1++; } \ + if (_coef2[_loop] == 0) { _count2++; } \ + _coef3[_loop] = _coef2[_loop]; } \ + while (_loop < 4096) { \ + _coef1[_loop] = _coef2[_loop] - _coef3[_loop]; \ + _coef3[_loop] = _coef2[_loop]; _loop++; } \ +} while (0) + +/******************************************************************************/ +/* coef1 = coef2 - coef3 (QCdt = QCot - QCrb) + count zeros in coef1, coef2 */ +#define COEF_DIFF_COUNT(_coef1, _coef2, _coef3, _loop, _count1, _count2) \ +do { _count1 = 0; _count2 = 0; \ + for (_loop = 0; _loop < 4096 - 81; _loop++) { \ + _coef1[_loop] = _coef2[_loop] - _coef3[_loop]; \ + if (_coef1[_loop] == 0) { _count1++; } \ + if (_coef2[_loop] == 0) { _count2++; } } \ + while (_loop < 4096) { \ + _coef1[_loop] = _coef2[_loop] - _coef3[_loop]; _loop++; } \ +} while (0) + +/******************************************************************************/ +/* coef1 = coef2 - coef3 (QCdt = QCot - QCrb) + coef3 = coef2 */ +#define COEF_DIFF_COPY(_coef1, _coef2, _coef3, _loop) \ +do { \ + for (_loop = 0; _loop < 4096; _loop++) { \ + _coef1[_loop] = _coef2[_loop] - _coef3[_loop]; \ + _coef3[_loop] = _coef2[_loop]; } \ +} while (0) + +/******************************************************************************/ +/* coef1 = coef2 - coef3 (QCdt = QCot - QCrb) */ +#define COEF_DIFF(_coef1, _coef2, _coef3, _loop) \ +do { \ + for (_loop = 0; _loop < 4096; _loop++) { \ + _coef1[_loop] = _coef2[_loop] - _coef3[_loop]; } \ +} while (0) + +/******************************************************************************/ +static int +rfx_pro_compose_message_region(struct rfxencode *enc, STREAM *s, + const struct rfx_rect *regions, int num_regions, + const char *buf, int width, int height, + int stride_bytes, + const struct rfx_tile *tiles, int num_tiles, + const char *quants, int num_quants, + int flags) +{ + int index; + int jndex; + int start_pos; + int tiles_start_pos; + int end_pos; + int tiles_written; + int x; + int y; + uint8 quantIdxY; + uint8 quantIdxCb; + uint8 quantIdxCr; + const char *tile_data; + + int y_bytes; + int u_bytes; + int v_bytes; + int tile_start_pos; + int tile_end_pos; + uint16 xIdx; + uint16 yIdx; + + const uint8 *y_buffer; + const uint8 *u_buffer; + const uint8 *v_buffer; + const char *y_quants; + const char *u_quants; + const char *v_quants; + + struct rfx_rb *rb; + int dt_y_zeros; + int dt_u_zeros; + int dt_v_zeros; + int ot_y_zeros; + int ot_u_zeros; + int ot_v_zeros; + int tile_flags; + sint16 *dwt_buffer_y; + sint16 *dwt_buffer_u; + sint16 *dwt_buffer_v; + + if (stream_get_left(s) < 18 + num_regions * 8 + num_quants * 5) + { + return 1; + } + if (quants == NULL) + { + num_quants = 1; + quants = (const char *) g_rfx_default_quantization_values; + } + start_pos = stream_get_pos(s); + stream_write_uint16(s, PRO_WBT_REGION); + stream_seek_uint32(s); /* blockLen, set later */ + stream_write_uint8(s, CT_TILE_64x64); + stream_write_uint16(s, num_regions); + stream_write_uint8(s, num_quants); + stream_write_uint8(s, 0); /* numProgQuant */ + stream_write_uint8(s, RFX_DWT_REDUCE_EXTRAPOLATE); /* flags */ + stream_write_uint16(s, num_tiles); + stream_seek_uint32(s); /* tileDataSize, set later */ + for (index = 0; index < num_regions; index++) + { + stream_write_uint16(s, regions[index].x); + stream_write_uint16(s, regions[index].y); + stream_write_uint16(s, regions[index].cx); + stream_write_uint16(s, regions[index].cy); + } + stream_write(s, quants, num_quants * 5); + tiles_start_pos = stream_get_pos(s); + tiles_written = 0; + for (index = 0; index < num_tiles; index++) + { + if (stream_get_left(s) < 22) + { + return 1; + } + x = tiles[index].x; + y = tiles[index].y; + quantIdxY = tiles[index].quant_y; + quantIdxCb = tiles[index].quant_cb; + quantIdxCr = tiles[index].quant_cr; + if ((quantIdxY >= num_quants) || (quantIdxCb >= num_quants) || + (quantIdxCr >= num_quants)) + { + return 1; + } + tile_data = buf + (y << 8) * (stride_bytes >> 8) + (x << 8); + xIdx = x / 64; + yIdx = y / 64; + if ((xIdx >= RFX_MAX_RB_X) || (yIdx >= RFX_MAX_RB_Y)) + { + return 1; + } + tile_start_pos = stream_get_pos(s); + stream_write_uint16(s, PRO_WBT_TILE_SIMPLE); + stream_seek_uint32(s); /* set later */ + stream_write_uint8(s, quantIdxY); + stream_write_uint8(s, quantIdxCb); + stream_write_uint8(s, quantIdxCr); + stream_write_uint16(s, xIdx); + stream_write_uint16(s, yIdx); + stream_seek(s, 1); /* flags, set later */ + stream_seek(s, 8); /* yLen, cbLen, crLen, tailLen, set later */ + y_buffer = (const uint8 *) tile_data; + u_buffer = (const uint8 *) (tile_data + RFX_YUV_BTES); + v_buffer = (const uint8 *) (tile_data + RFX_YUV_BTES * 2); + y_quants = quants + quantIdxY * 5; + u_quants = quants + quantIdxCb * 5; + v_quants = quants + quantIdxCr * 5; + rb = enc->rbs[xIdx][yIdx]; + if (rb == NULL) + { + rb = xnew(struct rfx_rb); + if (rb == NULL) + { + return 1; + } + enc->rbs[xIdx][yIdx] = rb; + } + rfx_rem_dwt_shift_encode(y_buffer, enc->dwt_buffer1, + enc->dwt_buffer, y_quants); + rfx_rem_dwt_shift_encode(u_buffer, enc->dwt_buffer2, + enc->dwt_buffer, u_quants); + rfx_rem_dwt_shift_encode(v_buffer, enc->dwt_buffer3, + enc->dwt_buffer, v_quants); + COEF_DIFF_COUNT_COPY(enc->dwt_buffer4, enc->dwt_buffer1, rb->y, + jndex, dt_y_zeros, ot_y_zeros); + COEF_DIFF_COUNT_COPY(enc->dwt_buffer5, enc->dwt_buffer2, rb->u, + jndex, dt_u_zeros, ot_u_zeros); + COEF_DIFF_COUNT_COPY(enc->dwt_buffer6, enc->dwt_buffer3, rb->v, + jndex, dt_v_zeros, ot_v_zeros); + if (ot_y_zeros + ot_u_zeros + ot_v_zeros < + dt_y_zeros + dt_u_zeros + dt_v_zeros) + { + LLOGLN(10, ("rfx_pro_compose_message_region: diff")); + tile_flags = RFX_TILE_DIFFERENCE; + dwt_buffer_y = enc->dwt_buffer4; + dwt_buffer_u = enc->dwt_buffer5; + dwt_buffer_v = enc->dwt_buffer6; + } + else + { + LLOGLN(10, ("rfx_pro_compose_message_region: orig")); + tile_flags = 0; + dwt_buffer_y = enc->dwt_buffer1; + dwt_buffer_u = enc->dwt_buffer2; + dwt_buffer_v = enc->dwt_buffer3; + } + y_bytes = rfx_encode_diff_rlgr1(dwt_buffer_y, + stream_get_tail(s), + stream_get_left(s), 81); + if (y_bytes < 0) + { + return 1; + } + stream_seek(s, y_bytes); + u_bytes = rfx_encode_diff_rlgr1(dwt_buffer_u, + stream_get_tail(s), + stream_get_left(s), 81); + if (u_bytes < 0) + { + return 1; + } + stream_seek(s, u_bytes); + v_bytes = rfx_encode_diff_rlgr1(dwt_buffer_v, + stream_get_tail(s), + stream_get_left(s), 81); + if (v_bytes < 0) + { + return 1; + } + stream_seek(s, v_bytes); + LLOGLN(10, ("rfx_pro_compose_message_region: y_bytes %d " + "u_bytes %d v_bytes %d", y_bytes, u_bytes, v_bytes)); + tile_end_pos = stream_get_pos(s); + stream_set_pos(s, tile_start_pos + 2); + stream_write_uint32(s, tile_end_pos - tile_start_pos); /* blockLen */ + stream_set_pos(s, tile_start_pos + 13); + stream_write_uint8(s, tile_flags); /* flags */ + stream_write_uint16(s, y_bytes); /* yLen */ + stream_write_uint16(s, u_bytes); /* cbLen */ + stream_write_uint16(s, v_bytes); /* crLen */ + stream_write_uint16(s, 0); /* tailLen */ + stream_set_pos(s, tile_end_pos); + ++tiles_written; + } + end_pos = stream_get_pos(s); + stream_set_pos(s, start_pos + 2); + stream_write_uint32(s, end_pos - start_pos); /* blockLen */ + stream_set_pos(s, start_pos + 14); + stream_write_uint32(s, end_pos - tiles_start_pos); /* tileDataSize */ + stream_set_pos(s, end_pos); + return tiles_written; +} + +/******************************************************************************/ +static int +rfx_pro_compose_message_frame_end(struct rfxencode *enc, STREAM *s) +{ + if (stream_get_left(s) < 6) + { + return 1; + } + stream_write_uint16(s, PRO_WBT_FRAME_END); + stream_write_uint32(s, 6); + return 0; +} + +/******************************************************************************/ +int +rfx_pro_compose_message_data(struct rfxencode *enc, STREAM *s, + const struct rfx_rect *regions, int num_regions, + const char *buf, int width, int height, + int stride_bytes, + const struct rfx_tile *tiles, int num_tiles, + const char *quants, int num_quants, + int flags) +{ + int tiles_written; + LLOGLN(10, ("rfx_pro_compose_message_data:")); + if (rfx_pro_compose_message_frame_begin(enc, s) != 0) + { + return -1; + } + tiles_written = rfx_pro_compose_message_region(enc, s, regions, num_regions, + buf, width, height, stride_bytes, + tiles, num_tiles, quants, num_quants, + flags); + if (tiles_written <= 0) + { + return -1; + } + if (rfx_pro_compose_message_frame_end(enc, s) != 0) + { + return -1; + } + return tiles_written; +} diff --git a/src/rfxencode_compose.h b/src/rfxencode_compose.h index 6fde0f3..1bafb22 100644 --- a/src/rfxencode_compose.h +++ b/src/rfxencode_compose.h @@ -31,4 +31,14 @@ rfx_compose_message_data(struct rfxencode *enc, STREAM *s, const struct rfx_tile *tiles, int num_tiles, const char *quants, int num_quants, int flags); +int +rfx_pro_compose_message_header(struct rfxencode *enc, STREAM *s); +int +rfx_pro_compose_message_data(struct rfxencode *enc, STREAM *s, + const struct rfx_rect *regions, int num_regions, + const char *buf, int width, int height, + int stride_bytes, + const struct rfx_tile *tiles, int num_tiles, + const char *quants, int num_quants, int flags); + #endif diff --git a/src/rfxencode_diff_rlgr1.c b/src/rfxencode_diff_rlgr1.c index cd09f18..345108f 100644 --- a/src/rfxencode_diff_rlgr1.c +++ b/src/rfxencode_diff_rlgr1.c @@ -52,6 +52,11 @@ #define CheckWrite do { \ while (bit_count >= 8) \ { \ + if (cdata_size < 1) \ + { \ + return -1; \ + } \ + cdata_size--; \ bit_count -= 8; \ *cdata = bits >> bit_count; \ cdata++; \ @@ -97,7 +102,8 @@ } while (0) int -rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size) +rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size, + int diff_bytes) { int k; int kp; @@ -119,8 +125,8 @@ rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size) uint32 twoMs; - /* the last 64 bytes are diff */ - for (k = PIXELS_IN_TILE - 1; k > PIXELS_IN_TILE - 64; k--) + /* the last x bytes are diff */ + for (k = PIXELS_IN_TILE - 1; k > PIXELS_IN_TILE - diff_bytes; k--) { coef[k] -= coef[k - 1]; } @@ -147,9 +153,13 @@ rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size) numZeros = 0; GetNextInput; - while (input == 0 && coef_size > 0) + while (input == 0) { numZeros++; + if (coef_size < 1) + { + break; + } GetNextInput; } @@ -183,6 +193,11 @@ rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size) CheckWrite; + if (input == 0) + { + continue; + } + /* encode the nonzero value using GR coding */ if (input < 0) { @@ -199,7 +214,7 @@ rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size) bits |= sign; bit_count++; - lmag = mag ? mag - 1 : 0; + lmag = mag - 1; CodeGR(krp, lmag); /* output GR code for (mag - 1) */ CheckWrite; @@ -239,6 +254,10 @@ rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size) if (bit_count > 0) { + if (cdata_size < 1) + { + return -1; + } bits <<= 8 - bit_count; *cdata = bits; cdata++; @@ -249,4 +268,3 @@ rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size) return processed_size; } - diff --git a/src/rfxencode_diff_rlgr1.h b/src/rfxencode_diff_rlgr1.h index 796f8f2..b123766 100644 --- a/src/rfxencode_diff_rlgr1.h +++ b/src/rfxencode_diff_rlgr1.h @@ -23,7 +23,8 @@ #include "rfxcommon.h" int -rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size); +rfx_encode_diff_rlgr1(sint16 *coef, uint8 *cdata, int cdata_size, + int diff_bytes); #endif /* __RFX_DIFF_RLGR1_H */ diff --git a/src/rfxencode_diff_rlgr3.c b/src/rfxencode_diff_rlgr3.c index 0b68db3..24bb2e5 100644 --- a/src/rfxencode_diff_rlgr3.c +++ b/src/rfxencode_diff_rlgr3.c @@ -52,6 +52,11 @@ #define CheckWrite do { \ while (bit_count >= 8) \ { \ + if (cdata_size < 1) \ + { \ + return -1; \ + } \ + cdata_size--; \ bit_count -= 8; \ *cdata = bits >> bit_count; \ cdata++; \ @@ -97,7 +102,8 @@ } while (0) int -rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size) +rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size, + int diff_bytes) { int k; int kp; @@ -122,8 +128,8 @@ rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size) uint32 sum2Ms; uint32 nIdx; - /* the last 64 bytes are diff */ - for (k = PIXELS_IN_TILE - 1; k > PIXELS_IN_TILE - 64; k--) + /* the last x bytes are diff */ + for (k = PIXELS_IN_TILE - 1; k > PIXELS_IN_TILE - diff_bytes; k--) { coef[k] -= coef[k - 1]; } @@ -150,9 +156,13 @@ rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size) numZeros = 0; GetNextInput; - while (input == 0 && coef_size > 0) + while (input == 0) { numZeros++; + if (coef_size < 1) + { + break; + } GetNextInput; } @@ -186,6 +196,11 @@ rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size) CheckWrite; + if (input == 0) + { + continue; + } + /* encode the nonzero value using GR coding */ if (input < 0) { @@ -202,7 +217,7 @@ rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size) bits |= sign; bit_count++; - lmag = mag ? mag - 1 : 0; + lmag = mag - 1; CodeGR(krp, lmag); /* output GR code for (mag - 1) */ CheckWrite; @@ -276,6 +291,10 @@ rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size) if (bit_count > 0) { + if (cdata_size < 1) + { + return -1; + } bits <<= 8 - bit_count; *cdata = bits; cdata++; diff --git a/src/rfxencode_diff_rlgr3.h b/src/rfxencode_diff_rlgr3.h index dd61309..41b8f6a 100644 --- a/src/rfxencode_diff_rlgr3.h +++ b/src/rfxencode_diff_rlgr3.h @@ -23,7 +23,8 @@ #include "rfxcommon.h" int -rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size); +rfx_encode_diff_rlgr3(sint16 *coef, uint8 *cdata, int cdata_size, + int diff_bytes); #endif /* __RFX_DIFF_RLGR3_H */ diff --git a/src/rfxencode_dwt.c b/src/rfxencode_dwt.c index 5d82044..4984858 100644 --- a/src/rfxencode_dwt.c +++ b/src/rfxencode_dwt.c @@ -27,12 +27,14 @@ #include <string.h> #include "rfxcommon.h" +#include "rfxencode_dwt.h" /******************************************************************************/ static int -rfx_dwt_2d_encode_horz(sint16 *buffer, sint16 *dwt, int subband_width) +rfx_dwt_2d_encode_horz(const sint16 *in_buffer, sint16 *out_buffer, + int subband_width) { - sint16 *l_src, *h_src; + const sint16 *l_src, *h_src; sint16 *hl, *lh, *hh, *ll; int x, y; int n; @@ -42,13 +44,13 @@ rfx_dwt_2d_encode_horz(sint16 *buffer, sint16 *dwt, int subband_width) /* The lower part L generates LL(3) and HL(0). */ /* The higher part H generates LH(1) and HH(2). */ - ll = buffer + subband_width * subband_width * 3; - hl = buffer; - l_src = dwt; + ll = out_buffer + subband_width * subband_width * 3; + hl = out_buffer; + l_src = in_buffer; - lh = buffer + subband_width * subband_width; - hh = buffer + subband_width * subband_width * 2; - h_src = dwt + subband_width * subband_width * 2; + lh = out_buffer + subband_width * subband_width; + hh = out_buffer + subband_width * subband_width * 2; + h_src = in_buffer + subband_width * subband_width * 2; for (y = 0; y < subband_width; y++) { @@ -102,7 +104,8 @@ rfx_dwt_2d_encode_horz(sint16 *buffer, sint16 *dwt, int subband_width) /******************************************************************************/ static int -rfx_dwt_2d_encode_block(sint16 *buffer, sint16 *dwt, int subband_width) +rfx_dwt_2d_encode_block(sint16 *in_out_buffer, sint16 *tmp_buffer, + int subband_width) { sint16 *src, *l, *h; int total_width; @@ -112,14 +115,14 @@ rfx_dwt_2d_encode_block(sint16 *buffer, sint16 *dwt, int subband_width) total_width = subband_width << 1; /* DWT in vertical direction, results in 2 sub-bands in L, H order in - * tmp buffer dwt. */ + * tmp buffer. */ for (x = 0; x < total_width; x++) { /* pre */ - l = dwt + x; + l = tmp_buffer + x; h = l + subband_width * total_width; - src = buffer + x; + src = in_out_buffer + x; *h = (src[total_width] - ((src[0] + src[2 * total_width]) >> 1)) >> 1; *l = src[0] + (*h); @@ -127,9 +130,9 @@ rfx_dwt_2d_encode_block(sint16 *buffer, sint16 *dwt, int subband_width) for (n = 1; n < subband_width - 1; n++) { y = n << 1; - l = dwt + n * total_width + x; + l = tmp_buffer + n * total_width + x; h = l + subband_width * total_width; - src = buffer + y * total_width + x; + src = in_out_buffer + y * total_width + x; *h = (src[total_width] - ((src[0] + src[2 * total_width]) >> 1)) >> 1; *l = src[0] + ((*(h - total_width) + *h) >> 1); } @@ -137,21 +140,22 @@ rfx_dwt_2d_encode_block(sint16 *buffer, sint16 *dwt, int subband_width) /* post */ n = subband_width - 1; y = n << 1; - l = dwt + n * total_width + x; + l = tmp_buffer + n * total_width + x; h = l + subband_width * total_width; - src = buffer + y * total_width + x; + src = in_out_buffer + y * total_width + x; *h = (src[total_width] - ((src[0] + src[0]) >> 1)) >> 1; *l = src[0] + ((*(h - total_width) + *h) >> 1); } - return rfx_dwt_2d_encode_horz(buffer, dwt, subband_width); + return rfx_dwt_2d_encode_horz(tmp_buffer, in_out_buffer, subband_width); } /******************************************************************************/ static int rfx_dwt_2d_encode_block8(const uint8 *in_buffer, - sint16 *buffer, sint16 *dwt, int subband_width) + sint16 *out_buffer, sint16 *tmp_buffer, + int subband_width) { const uint8 *src; sint16 *l, *h; @@ -163,12 +167,12 @@ rfx_dwt_2d_encode_block8(const uint8 *in_buffer, total_width = subband_width << 1; /* DWT in vertical direction, results in 2 sub-bands in L, H order in - * tmp buffer dwt. */ + * tmp buffer. */ for (x = 0; x < total_width; x++) { /* pre */ - l = dwt + x; + l = tmp_buffer + x; h = l + subband_width * total_width; src = in_buffer + x; s1 = (src[total_width] - 128) << DWT_FACTOR; @@ -182,7 +186,7 @@ rfx_dwt_2d_encode_block8(const uint8 *in_buffer, for (n = 1; n < subband_width - 1; n++) { y = n << 1; - l = dwt + n * total_width + x; + l = tmp_buffer + n * total_width + x; h = l + subband_width * total_width; src = in_buffer + y * total_width + x; s1 = (src[total_width] - 128) << DWT_FACTOR; @@ -196,7 +200,7 @@ rfx_dwt_2d_encode_block8(const uint8 *in_buffer, /* post */ n = subband_width - 1; y = n << 1; - l = dwt + n * total_width + x; + l = tmp_buffer + n * total_width + x; h = l + subband_width * total_width; src = in_buffer + y * total_width + x; s1 = (src[total_width] - 128) << DWT_FACTOR; @@ -208,15 +212,16 @@ rfx_dwt_2d_encode_block8(const uint8 *in_buffer, } - return rfx_dwt_2d_encode_horz(buffer, dwt, subband_width); + return rfx_dwt_2d_encode_horz(tmp_buffer, out_buffer, subband_width); } /******************************************************************************/ int -rfx_dwt_2d_encode(const uint8 *in_buffer, sint16 *buffer, sint16 *dwt_buffer) +rfx_dwt_2d_encode(const uint8 *in_buffer, sint16 *out_buffer, + sint16 *tmp_buffer) { - rfx_dwt_2d_encode_block8(in_buffer, buffer, dwt_buffer, 32); - rfx_dwt_2d_encode_block(buffer + 3072, dwt_buffer, 16); - rfx_dwt_2d_encode_block(buffer + 3840, dwt_buffer, 8); + rfx_dwt_2d_encode_block8(in_buffer, out_buffer, tmp_buffer, 32); + rfx_dwt_2d_encode_block(out_buffer + 3072, tmp_buffer, 16); + rfx_dwt_2d_encode_block(out_buffer + 3840, tmp_buffer, 8); return 0; } diff --git a/src/rfxencode_dwt.h b/src/rfxencode_dwt.h index 2d91176..0a82650 100644 --- a/src/rfxencode_dwt.h +++ b/src/rfxencode_dwt.h @@ -16,10 +16,11 @@ * limitations under the License. */ -#ifndef __RFXENCODE_RFX_H -#define __RFXENCODE_RFX_H +#ifndef __RFXENCODE_RFX_DWT_H +#define __RFXENCODE_RFX_DWT_H int -rfx_dwt_2d_encode(const uint8 *in_buffer, sint16 *buffer, sint16 *dwt_buffer); +rfx_dwt_2d_encode(const uint8 *in_buffer, sint16 *out_buffer, + sint16 *tmp_buffer); #endif diff --git a/src/rfxencode_dwt_rem.c b/src/rfxencode_dwt_rem.c new file mode 100644 index 0000000..7510512 --- /dev/null +++ b/src/rfxencode_dwt_rem.c @@ -0,0 +1,542 @@ +/** + * RemoteFX Codec Library + * + * Copyright 2020 Jay Sorg <jay.sorg@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * DWT Reduce-Extrapolate Method MS-RDPEGFX 3.2.8.1.2.2 + * also does Quantization and Linearization 3.2.8.1.3 + */ + +#if defined(HAVE_CONFIG_H) +#include <config_ac.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "rfxcommon.h" +#include "rfxencode_dwt_rem.h" + +#define ICL1(_offset) (ic[(_offset) * 64] - 128) << DWT_FACTOR +#define ICL2(_offset) ic[(_offset) * 33] +#define ICL3(_offset) ic[(_offset) * 17] + +#define LOL1(_offset) lo[(_offset) * 64] +#define HIL1(_offset) hi[(_offset) * 64] +#define LOL2(_offset) lo[(_offset) * 33] +#define HIL2(_offset) hi[(_offset) * 33] +#define LOL3(_offset) lo[(_offset) * 17] +#define HIL3(_offset) hi[(_offset) * 17] + +/******************************************************************************/ +static void +rfx_rem_dwt_encode_vert_lv1(const uint8 *in_buffer, sint16 *out_buffer) +{ + const uint8 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic62; + int n; + int y; + + for (y = 0; y < 64; y++) + { + + /* setup */ + ic = in_buffer + y; + lo = out_buffer + y; + hi = lo + 64 * 33; + + /* pre */ + x2n = ICL1(0); + x2n1 = ICL1(1); + x2n2 = ICL1(2); + HIL1(0) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL1(0) = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 31; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ICL1(2 * n + 1); + x2n2 = ICL1(2 * n + 2); + HIL1(n) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL1(n) = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic62 = x2n = x2n2; + x2n1 = ICL1(63); + x2n2 = 2 * x2n1 - x2n; /* ic[64] = 2 * ic[63] - ic[62] */ + LOL1(31) = x2n + (hn1 >> 1); + + x2n = x2n2; + /* x2n1 already set, mirror 65 -> 63 */ + x2n2 = ic62; /* mirror 66 -> 62 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL1(32) = x2n + (hn >> 1); + + } +} + +/******************************************************************************/ +static void +rfx_rem_dwt_encode_horz_lv1(const sint16 *in_buffer, sint16 *out_buffer) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic62; + int n; + int y; + + for (y = 0; y < 33; y++) /* lo */ + { + + /* setup */ + ic = in_buffer + 64 * y; + lo = out_buffer + 31 * 33 + 33 * 31 + 31 * 31 + 33 * y; /* LL1 */ + hi = out_buffer + 31 * y; /* HL1 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[0] = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 31; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic62 = x2n = x2n2; + x2n1 = ic[63]; + x2n2 = 2 * x2n1 - x2n; /* ic[64] = 2 * ic[63] - ic[62] */ + lo[31] = x2n + (hn1 >> 1); + + x2n = x2n2; + /* x2n1 already set, mirror 65 -> 63 */ + x2n2 = ic62; /* mirror 66 -> 62 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[32] = x2n + (hn >> 1); + + } + + for (y = 0; y < 31; y++) /* hi */ + { + + /* setup */ + ic = in_buffer + 64 * (33 + y); + lo = out_buffer + 31 * 33 + 33 * y; /* LH1 */ + hi = out_buffer + 31 * 33 + 33 * 31 + 31 * y; /* HH1 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[0] = x2n + hn; + + /* loop */ + for (n = 1; n < 31; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic62 = x2n = x2n2; + x2n1 = ic[63]; + x2n2 = 2 * x2n1 - x2n; /* ic[64] = 2 * ic[63] - ic[62] */ + lo[31] = x2n + (hn1 >> 1); + + x2n = x2n2; + /* x2n1 already set, mirror 65 -> 63 */ + x2n2 = ic62; /* mirror 66 -> 62 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[32] = x2n + (hn >> 1); + + } + +} + +/******************************************************************************/ +static void +rfx_rem_dwt_encode_vert_lv2(const sint16 *in_buffer, sint16 *out_buffer) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic30; + int n; + int y; + + for (y = 0; y < 33; y++) + { + + /* setup */ + ic = in_buffer + y; + lo = out_buffer + y; + hi = lo + 33 * 17; + + /* pre */ + x2n = ICL2(0); + x2n1 = ICL2(1); + x2n2 = ICL2(2); + HIL2(0) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(0) = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 15; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ICL2(2 * n + 1); + x2n2 = ICL2(2 * n + 2); + HIL2(n) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(n) = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic30 = x2n = x2n2; + x2n1 = ICL2(31); + x2n2 = ICL2(32); + HIL2(15) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(15) = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 33 -> 31 */ + x2n2 = ic30; /* mirror 34 -> 30 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(16) = x2n + ((hn1 + hn) >> 1); + + } +} + +/******************************************************************************/ +static void +rfx_rem_dwt_encode_horz_lv2(const sint16 *in_buffer, sint16 *out_buffer) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic30; + int n; + int y; + + for (y = 0; y < 17; y++) /* lo */ + { + + /* setup */ + ic = in_buffer + 33 * y; + lo = out_buffer + 16 * 17 + 17 * 16 + 16 * 16 + 17 * y; /* LL2 */ + hi = out_buffer + 16 * y; /* HL2 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[0] = x2n + hn; + + /* loop */ + for (n = 1; n < 15; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic30 = x2n = x2n2; + x2n1 = ic[31]; + x2n2 = ic[32]; + hi[15] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[15] = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 33 -> 31 */ + x2n2 = ic30; /* mirror 34 -> 30 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[16] = x2n + ((hn1 + hn) >> 1); + + } + + for (y = 0; y < 16; y++) /* hi */ + { + + /* setup */ + ic = in_buffer + 33 * (17 + y); + lo = out_buffer + 16 * 17 + 17 * y; /* LH2 */ + hi = out_buffer + 16 * 17 + 17 * 16 + 16 * y; /* HH2 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[0] = x2n + hn; + + /* loop */ + for (n = 1; n < 15; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic30 = x2n = x2n2; + x2n1 = ic[31]; + x2n2 = ic[32]; + hi[15] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[15] = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 33 -> 31 */ + x2n2 = ic30; /* mirror 34 -> 30 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[16] = x2n + ((hn1 + hn) >> 1); + + } + +} + +/******************************************************************************/ +static void +rfx_rem_dwt_encode_vert_lv3(const sint16 *in_buffer, sint16 *out_buffer) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic14; + int n; + int y; + + for (y = 0; y < 17; y++) + { + + /* setup */ + ic = in_buffer + y; + lo = out_buffer + y; + hi = lo + 17 * 9; + + /* pre */ + x2n = ICL3(0); + x2n1 = ICL3(1); + x2n2 = ICL3(2); + HIL3(0) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(0) = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 7; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ICL3(2 * n + 1); + x2n2 = ICL3(2 * n + 2); + HIL3(n) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(n) = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic14 = x2n = x2n2; + x2n1 = ICL3(15); + x2n2 = ICL3(16); + HIL3(7) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(7) = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 17 -> 15 */ + x2n2 = ic14; /* mirror 18 -> 14 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(8) = x2n + ((hn1 + hn) >> 1); + + } +} + +/******************************************************************************/ +static void +rfx_rem_dwt_encode_horz_lv3(const sint16 *in_buffer, sint16 *out_buffer) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic14; + int n; + int y; + + for (y = 0; y < 9; y++) /* lo */ + { + + /* setup */ + ic = in_buffer + 17 * y; + lo = out_buffer + 8 * 9 + 9 * 8 + 8 * 8 + 9 * y; /* LL3 */ + hi = out_buffer + 8 * y; /* HL3 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[0] = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 7; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic14 = x2n = x2n2; + x2n1 = ic[15]; + x2n2 = ic[16]; + hi[7] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[7] = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 17 -> 15 */ + x2n2 = ic14; /* mirror 18 -> 14 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[8] = x2n + ((hn1 + hn) >> 1); + + } + + for (y = 0; y < 8; y++) /* hi */ + { + + /* setup */ + ic = in_buffer + 17 * (9 + y); + lo = out_buffer + 8 * 9 + 9 * y; /* LH3 */ + hi = out_buffer + 8 * 9 + 9 * 8 + 8 * y; /* HH3 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[0] = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 7; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic14 = x2n = x2n2; + x2n1 = ic[15]; + x2n2 = ic[16]; + hi[7] = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[7] = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 17 -> 15 */ + x2n2 = ic14; /* mirror 18 -> 14 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[8] = x2n + ((hn1 + hn) >> 1); + + } + +} + +/******************************************************************************/ +int +rfx_rem_dwt_encode(const uint8 *in_buffer, sint16 *out_buffer, + sint16 *tmp_buffer) +{ + rfx_rem_dwt_encode_vert_lv1(in_buffer, tmp_buffer); + rfx_rem_dwt_encode_horz_lv1(tmp_buffer, out_buffer); + rfx_rem_dwt_encode_vert_lv2(out_buffer + 3007, tmp_buffer); + rfx_rem_dwt_encode_horz_lv2(tmp_buffer, out_buffer + 3007); + rfx_rem_dwt_encode_vert_lv3(out_buffer + 3807, tmp_buffer); + rfx_rem_dwt_encode_horz_lv3(tmp_buffer, out_buffer + 3807); + return 0; +} diff --git a/src/rfxencode_dwt_rem.h b/src/rfxencode_dwt_rem.h new file mode 100644 index 0000000..7171f86 --- /dev/null +++ b/src/rfxencode_dwt_rem.h @@ -0,0 +1,26 @@ +/** + * RFX codec encoder + * + * Copyright 2020 Jay Sorg <jay.sorg@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RFXENCODE_RFX_REM_DWT_H +#define __RFXENCODE_RFX_REM_DWT_H + +int +rfx_rem_dwt_encode(const uint8 *in_buffer, sint16 *out_buffer, + sint16 *tmp_buffer); + +#endif diff --git a/src/rfxencode_dwt_shift_rem.c b/src/rfxencode_dwt_shift_rem.c new file mode 100644 index 0000000..fa3ef2a --- /dev/null +++ b/src/rfxencode_dwt_shift_rem.c @@ -0,0 +1,568 @@ +/** + * RemoteFX Codec Library + * + * Copyright 2020 Jay Sorg <jay.sorg@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * DWT Reduce-Extrapolate Method MS-RDPEGFX 3.2.8.1.2.2 + * also does Quantization and Linearization 3.2.8.1.3 + */ + +#if defined(HAVE_CONFIG_H) +#include <config_ac.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "rfxcommon.h" +#include "rfxencode_dwt_rem.h" + +#define ICL1(_offset) (ic[(_offset) * 64] - 128) << DWT_FACTOR +#define ICL2(_offset) ic[(_offset) * 33] +#define ICL3(_offset) ic[(_offset) * 17] + +#define LOL1(_offset) lo[(_offset) * 64] +#define HIL1(_offset) hi[(_offset) * 64] +#define LOL2(_offset) lo[(_offset) * 33] +#define HIL2(_offset) hi[(_offset) * 33] +#define LOL3(_offset) lo[(_offset) * 17] +#define HIL3(_offset) hi[(_offset) * 17] + +#define SETUPLOQ(_index, _shift) do { \ + lo_fact = (((quants[_index] >> (_shift)) & 0xf) - 6) + DWT_FACTOR; \ + lo_half = 1 << (hi_fact - 1); } while (0) +#define SETUPHIQ(_index, _shift) do { \ + hi_fact = (((quants[_index] >> (_shift)) & 0xf) - 6) + DWT_FACTOR; \ + hi_half = 1 << (hi_fact - 1); } while (0) +#define LOQ(_val) ((_val) + lo_half) >> lo_fact +#define HIQ(_val) ((_val) + hi_half) >> hi_fact + +/******************************************************************************/ +static void +rfx_rem_dwt_shift_encode_vert_lv1(const uint8 *in_buffer, sint16 *out_buffer) +{ + const uint8 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic62; + int n; + int y; + + for (y = 0; y < 64; y++) + { + + /* setup */ + ic = in_buffer + y; + lo = out_buffer + y; + hi = lo + 64 * 33; + + /* pre */ + x2n = ICL1(0); + x2n1 = ICL1(1); + x2n2 = ICL1(2); + HIL1(0) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL1(0) = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 31; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ICL1(2 * n + 1); + x2n2 = ICL1(2 * n + 2); + HIL1(n) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL1(n) = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic62 = x2n = x2n2; + x2n1 = ICL1(63); + x2n2 = 2 * x2n1 - x2n; /* ic[64] = 2 * ic[63] - ic[62] */ + LOL1(31) = x2n + (hn1 >> 1); + + x2n = x2n2; + /* x2n1 already set, mirror 65 -> 63 */ + x2n2 = ic62; /* mirror 66 -> 62 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL1(32) = x2n + (hn >> 1); + } +} + +/******************************************************************************/ +static void +rfx_rem_dwt_shift_encode_horz_lv1(const sint16 *in_buffer, sint16 *out_buffer, + const char *quants) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic62; + int n; + int y; + int lo_fact; + int hi_fact; + int lo_half; + int hi_half; + + SETUPHIQ(4, 0); /* HL1 */ + for (y = 0; y < 33; y++) /* lo */ + { + /* setup */ + ic = in_buffer + 64 * y; + lo = out_buffer + 31 * 33 + 33 * 31 + 31 * 31 + 33 * y; /* LL1 */ + hi = out_buffer + 31 * y; /* HL1 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[0] = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 31; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic62 = x2n = x2n2; + x2n1 = ic[63]; + x2n2 = 2 * x2n1 - x2n; /* ic[64] = 2 * ic[63] - ic[62] */ + lo[31] = x2n + (hn1 >> 1); + + x2n = x2n2; + /* x2n1 already set, mirror 65 -> 63 */ + x2n2 = ic62; /* mirror 66 -> 62 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[32] = x2n + (hn >> 1); + + } + + SETUPLOQ(3, 4); /* LH1 */ + SETUPHIQ(4, 4); /* HH1 */ + for (y = 0; y < 31; y++) /* hi */ + { + + /* setup */ + ic = in_buffer + 64 * (33 + y); + lo = out_buffer + 31 * 33 + 33 * y; /* LH1 */ + hi = out_buffer + 31 * 33 + 33 * 31 + 31 * y; /* HH1 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[0] = LOQ(x2n + hn); + + /* loop */ + for (n = 1; n < 31; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[n] = LOQ(x2n + ((hn1 + hn) >> 1)); + } + + /* post */ + hn1 = hn; + ic62 = x2n = x2n2; + x2n1 = ic[63]; + x2n2 = 2 * x2n1 - x2n; /* ic[64] = 2 * ic[63] - ic[62] */ + lo[31] = LOQ(x2n + (hn1 >> 1)); + + x2n = x2n2; + /* x2n1 already set, mirror 65 -> 63 */ + x2n2 = ic62; /* mirror 66 -> 62 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[32] = LOQ(x2n + (hn >> 1)); + } +} + +/******************************************************************************/ +static void +rfx_rem_dwt_shift_encode_vert_lv2(const sint16 *in_buffer, sint16 *out_buffer) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic30; + int n; + int y; + + for (y = 0; y < 33; y++) + { + + /* setup */ + ic = in_buffer + y; + lo = out_buffer + y; + hi = lo + 33 * 17; + + /* pre */ + x2n = ICL2(0); + x2n1 = ICL2(1); + x2n2 = ICL2(2); + HIL2(0) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(0) = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 15; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ICL2(2 * n + 1); + x2n2 = ICL2(2 * n + 2); + HIL2(n) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(n) = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic30 = x2n = x2n2; + x2n1 = ICL2(31); + x2n2 = ICL2(32); + HIL2(15) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(15) = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 33 -> 31 */ + x2n2 = ic30; /* mirror 34 -> 30 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL2(16) = x2n + ((hn1 + hn) >> 1); + + } +} + +/******************************************************************************/ +static void +rfx_rem_dwt_shift_encode_horz_lv2(const sint16 *in_buffer, sint16 *out_buffer, + const char *quants) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic30; + int n; + int y; + int lo_fact; + int hi_fact; + int lo_half; + int hi_half; + + SETUPHIQ(2, 4); /* HL2 */ + for (y = 0; y < 17; y++) /* lo */ + { + + /* setup */ + ic = in_buffer + 33 * y; + lo = out_buffer + 16 * 17 + 17 * 16 + 16 * 16 + 17 * y; /* LL2 */ + hi = out_buffer + 16 * y; /* HL2 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[0] = x2n + hn; + + /* loop */ + for (n = 1; n < 15; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[n] = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic30 = x2n = x2n2; + x2n1 = ic[31]; + x2n2 = ic[32]; + hi[15] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[15] = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 33 -> 31 */ + x2n2 = ic30; /* mirror 34 -> 30 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[16] = x2n + ((hn1 + hn) >> 1); + + } + + SETUPHIQ(3, 0); /* HH2 */ + SETUPLOQ(2, 0); /* LH2 */ + for (y = 0; y < 16; y++) /* hi */ + { + + /* setup */ + ic = in_buffer + 33 * (17 + y); + lo = out_buffer + 16 * 17 + 17 * y; /* LH2 */ + hi = out_buffer + 16 * 17 + 17 * 16 + 16 * y; /* HH2 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[0] = LOQ(x2n + hn); + + /* loop */ + for (n = 1; n < 15; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[n] = LOQ(x2n + ((hn1 + hn) >> 1)); + } + + /* post */ + hn1 = hn; + ic30 = x2n = x2n2; + x2n1 = ic[31]; + x2n2 = ic[32]; + hi[15] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[15] = LOQ(x2n + ((hn1 + hn) >> 1)); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 33 -> 31 */ + x2n2 = ic30; /* mirror 34 -> 30 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[16] = LOQ(x2n + ((hn1 + hn) >> 1)); + + } + +} + +/******************************************************************************/ +static void +rfx_rem_dwt_shift_encode_vert_lv3(const sint16 *in_buffer, sint16 *out_buffer) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic14; + int n; + int y; + + for (y = 0; y < 17; y++) + { + /* setup */ + ic = in_buffer + y; + lo = out_buffer + y; + hi = lo + 17 * 9; + + /* pre */ + x2n = ICL3(0); + x2n1 = ICL3(1); + x2n2 = ICL3(2); + HIL3(0) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(0) = x2n + hn; /* mirror */ + + /* loop */ + for (n = 1; n < 7; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ICL3(2 * n + 1); + x2n2 = ICL3(2 * n + 2); + HIL3(n) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(n) = x2n + ((hn1 + hn) >> 1); + } + + /* post */ + hn1 = hn; + ic14 = x2n = x2n2; + x2n1 = ICL3(15); + x2n2 = ICL3(16); + HIL3(7) = hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(7) = x2n + ((hn1 + hn) >> 1); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 17 -> 15 */ + x2n2 = ic14; /* mirror 18 -> 14 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + LOL3(8) = x2n + ((hn1 + hn) >> 1); + + } +} + +/******************************************************************************/ +static void +rfx_rem_dwt_shift_encode_horz_lv3(const sint16 *in_buffer, sint16 *out_buffer, + const char *quants) +{ + const sint16 *ic; /* input coefficients */ + sint16 *lo; + sint16 *hi; + sint16 x2n; /* n[2n] */ + sint16 x2n1; /* n[2n + 1] */ + sint16 x2n2; /* n[2n + 2] */ + sint16 hn1; /* H[n - 1] */ + sint16 hn; /* H[n] */ + sint16 ic14; + int n; + int y; + int lo_fact; + int hi_fact; + int lo_half; + int hi_half; + + SETUPHIQ(1, 0); /* HL3 */ + SETUPLOQ(0, 0); /* LL3 */ + for (y = 0; y < 9; y++) /* lo */ + { + + /* setup */ + ic = in_buffer + 17 * y; + lo = out_buffer + 8 * 9 + 9 * 8 + 8 * 8 + 9 * y; /* LL3 */ + hi = out_buffer + 8 * y; /* HL3 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[0] = LOQ(x2n + hn); /* mirror */ + + /* loop */ + for (n = 1; n < 7; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[n] = LOQ(x2n + ((hn1 + hn) >> 1)); + } + + /* post */ + hn1 = hn; + ic14 = x2n = x2n2; + x2n1 = ic[15]; + x2n2 = ic[16]; + hi[7] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[7] = LOQ(x2n + ((hn1 + hn) >> 1)); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 17 -> 15 */ + x2n2 = ic14; /* mirror 18 -> 14 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[8] = LOQ(x2n + ((hn1 + hn) >> 1)); + + } + + SETUPHIQ(1, 4); /* HH3 */ + SETUPLOQ(0, 4); /* LH3 */ + for (y = 0; y < 8; y++) /* hi */ + { + /* setup */ + ic = in_buffer + 17 * (9 + y); + lo = out_buffer + 8 * 9 + 9 * y; /* LH3 */ + hi = out_buffer + 8 * 9 + 9 * 8 + 8 * y; /* HH3 */ + + /* pre */ + x2n = ic[0]; + x2n1 = ic[1]; + x2n2 = ic[2]; + hi[0] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[0] = LOQ(x2n + hn); /* mirror */ + + /* loop */ + for (n = 1; n < 7; n++) + { + hn1 = hn; + x2n = x2n2; + x2n1 = ic[2 * n + 1]; + x2n2 = ic[2 * n + 2]; + hi[n] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[n] = LOQ(x2n + ((hn1 + hn) >> 1)); + } + + /* post */ + hn1 = hn; + ic14 = x2n = x2n2; + x2n1 = ic[15]; + x2n2 = ic[16]; + hi[7] = HIQ(hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1); + lo[7] = LOQ(x2n + ((hn1 + hn) >> 1)); + + hn1 = hn; + x2n = x2n2; + /* x2n1 already set, mirror 17 -> 15 */ + x2n2 = ic14; /* mirror 18 -> 14 */ + hn = (x2n1 - ((x2n + x2n2) >> 1)) >> 1; + lo[8] = LOQ(x2n + ((hn1 + hn) >> 1)); + } +} + +/******************************************************************************/ +int +rfx_rem_dwt_shift_encode(const uint8 *in_buffer, sint16 *out_buffer, + sint16 *tmp_buffer, const char *quants) +{ + rfx_rem_dwt_shift_encode_vert_lv1(in_buffer, tmp_buffer); + rfx_rem_dwt_shift_encode_horz_lv1(tmp_buffer, out_buffer, quants); + rfx_rem_dwt_shift_encode_vert_lv2(out_buffer + 3007, tmp_buffer); + rfx_rem_dwt_shift_encode_horz_lv2(tmp_buffer, out_buffer + 3007, quants); + rfx_rem_dwt_shift_encode_vert_lv3(out_buffer + 3807, tmp_buffer); + rfx_rem_dwt_shift_encode_horz_lv3(tmp_buffer, out_buffer + 3807, quants); + return 0; +} diff --git a/src/rfxencode_dwt_shift_rem.h b/src/rfxencode_dwt_shift_rem.h new file mode 100644 index 0000000..f3c7285 --- /dev/null +++ b/src/rfxencode_dwt_shift_rem.h @@ -0,0 +1,26 @@ +/** + * RFX codec encoder + * + * Copyright 2020 Jay Sorg <jay.sorg@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RFXENCODE_DWT_SHIFT_REM_H +#define __RFXENCODE_DWT_SHIFT_REM_H + +int +rfx_rem_dwt_shift_encode(const uint8 *in_buffer, sint16 *out_buffer, + sint16 *tmp_buffer, const char *quants); + +#endif diff --git a/src/rfxencode_quantization.c b/src/rfxencode_quantization.c index b10a70e..45b1672 100644 --- a/src/rfxencode_quantization.c +++ b/src/rfxencode_quantization.c @@ -137,6 +137,22 @@ rfx_quantization_encode_block(sint16 *buffer, int buffer_size, uint32 factor) #endif /******************************************************************************/ +/* + 8 x 8 = 64 + 16 x 16 = 256 + 32 x 32 = 1024 + + HL1 = 32 x 32 = 1024 (1024) + LH1 = 32 x 32 = 1024 (2048) + HH1 = 32 x 32 = 1024 (3072) + HL2 = 16 x 16 = 256 (3328) + LH2 = 16 x 16 = 256 (3584) + HH2 = 16 x 16 = 256 (3840) + HL3 = 8 x 8 = 64 (3904) + LH3 = 8 x 8 = 64 (3968) + HH3 = 8 x 8 = 64 (4032) + LL3 = 8 x 8 = 64 (4096) +*/ int rfx_quantization_encode(sint16 *buffer, const char *qtable) { @@ -165,3 +181,52 @@ rfx_quantization_encode(sint16 *buffer, const char *qtable) return 0; } +/******************************************************************************/ +/* + 8 x 8 = 64 + 8 x 9 = 72 + 9 x 9 = 81 + 16 x 16 = 256 + 16 x 17 = 272 + 31 x 31 = 961 + 31 x 33 = 1023 + + HL1 = 31 x 33 = 1023 (1023) + LH1 = 33 x 31 = 1023 (2046) + HH1 = 31 x 31 = 961 (3007) + HL2 = 16 x 17 = 272 (3279) + LH2 = 17 x 16 = 272 (3551) + HH2 = 16 x 16 = 256 (3807) + HL3 = 8 x 9 = 72 (3879) + LH3 = 9 x 8 = 72 (3951) + HH3 = 8 x 8 = 64 (4015) + LL3 = 9 x 9 = 81 (4096) +*/ +int +rfx_rem_quantization_encode(sint16 *buffer, const char *qtable) +{ + uint32 factor; + + factor = ((qtable[4] >> 0) & 0xf) - 6; + rfx_quantization_encode_block(buffer, 1023, factor); /* HL1 */ + factor = ((qtable[3] >> 4) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 1023, 1023, factor); /* LH1 */ + factor = ((qtable[4] >> 4) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 2046, 961, factor); /* HH1 */ + factor = ((qtable[2] >> 4) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 3007, 272, factor); /* HL2 */ + factor = ((qtable[2] >> 0) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 3279, 272, factor); /* LH2 */ + factor = ((qtable[3] >> 0) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 3551, 256, factor); /* HH2 */ + factor = ((qtable[1] >> 0) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 3807, 72, factor); /* HL3 */ + factor = ((qtable[0] >> 4) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 3879, 72, factor); /* LH3 */ + factor = ((qtable[1] >> 4) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 3951, 64, factor); /* HH3 */ + factor = ((qtable[0] >> 0) & 0xf) - 6; + rfx_quantization_encode_block(buffer + 4015, 81, factor); /* LL3 */ + return 0; +} + diff --git a/src/rfxencode_quantization.h b/src/rfxencode_quantization.h index d246889..d08b533 100644 --- a/src/rfxencode_quantization.h +++ b/src/rfxencode_quantization.h @@ -24,5 +24,7 @@ int rfx_quantization_encode(sint16 *buffer, const char *quantization_values); +int +rfx_rem_quantization_encode(sint16 *buffer, const char *quantization_values); #endif /* __RFX_QUANTIZATION_H */ diff --git a/src/rfxencode_rlgr1.c b/src/rfxencode_rlgr1.c index be9c8ea..e3dcf88 100644 --- a/src/rfxencode_rlgr1.c +++ b/src/rfxencode_rlgr1.c @@ -165,9 +165,13 @@ rfx_rlgr1_encode(const sint16 *data, uint8 *buffer, int buffer_size) /* collect the run of zeros in the input stream */ numZeros = 0; GetNextInput(input); - while (input == 0 && data_size > 0) + while (input == 0) { numZeros++; + if (data_size < 1) + { + break; + } GetNextInput(input); } @@ -187,15 +191,17 @@ rfx_rlgr1_encode(const sint16 *data, uint8 *buffer, int buffer_size) /* output the remaining run length using k bits */ OutputBits(k, numZeros); - /* note: when we reach here and the last byte being encoded is 0, we still - need to output the last two bits, otherwise mstsc will crash */ + if (input == 0) + { + continue; + } /* encode the nonzero value using GR coding */ mag = (input < 0 ? -input : input); /* absolute value of input coefficient */ sign = (input < 0 ? 1 : 0); /* sign of input coefficient */ OutputBit(1, sign); /* output the sign bit */ - lmag = mag ? mag - 1 : 0; + lmag = mag - 1; CodeGR(krp, lmag); /* output GR code for (mag - 1) */ UpdateParam(kp, -DN_GR, k); diff --git a/src/rfxencode_rlgr3.c b/src/rfxencode_rlgr3.c index e66b38f..5c26985 100644 --- a/src/rfxencode_rlgr3.c +++ b/src/rfxencode_rlgr3.c @@ -168,9 +168,13 @@ rfx_rlgr3_encode(const sint16 *data, uint8 *buffer, int buffer_size) /* collect the run of zeros in the input stream */ numZeros = 0; GetNextInput(input); - while (input == 0 && data_size > 0) + while (input == 0) { numZeros++; + if (data_size < 1) + { + break; + } GetNextInput(input); } @@ -190,15 +194,17 @@ rfx_rlgr3_encode(const sint16 *data, uint8 *buffer, int buffer_size) /* output the remaining run length using k bits */ OutputBits(k, numZeros); - /* note: when we reach here and the last byte being encoded is 0, we still - need to output the last two bits, otherwise mstsc will crash */ + if (input == 0) + { + continue; + } /* encode the nonzero value using GR coding */ mag = (input < 0 ? -input : input); /* absolute value of input coefficient */ sign = (input < 0 ? 1 : 0); /* sign of input coefficient */ OutputBit(1, sign); /* output the sign bit */ - lmag = mag ? mag - 1 : 0; + lmag = mag - 1; CodeGR(krp, lmag); /* output GR code for (mag - 1) */ UpdateParam(kp, -DN_GR, k); diff --git a/src/rfxencode_tile.c b/src/rfxencode_tile.c index 763cab8..a0ec108 100644 --- a/src/rfxencode_tile.c +++ b/src/rfxencode_tile.c @@ -33,10 +33,13 @@ #include "rfxconstants.h" #include "rfxencode_tile.h" #include "rfxencode_dwt.h" +#include "rfxencode_dwt_rem.h" #include "rfxencode_quantization.h" #include "rfxencode_differential.h" #include "rfxencode_rlgr1.h" #include "rfxencode_rlgr3.h" +#include "rfxencode_diff_rlgr1.h" +#include "rfxencode_diff_rlgr3.h" #include "rfxencode_alpha.h" #ifdef RFX_USE_ACCEL_X86 diff --git a/src/x86/rfxencode_tile_x86.c b/src/x86/rfxencode_tile_x86.c index a84305e..b65e1a9 100644 --- a/src/x86/rfxencode_tile_x86.c +++ b/src/x86/rfxencode_tile_x86.c @@ -53,7 +53,7 @@ rfx_encode_component_rlgr1_x86_sse2(struct rfxencode *enc, const char *qtable, { return 1; } - *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } @@ -69,7 +69,7 @@ rfx_encode_component_rlgr3_x86_sse2(struct rfxencode *enc, const char *qtable, { return 1; } - *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } @@ -85,7 +85,7 @@ rfx_encode_component_rlgr1_x86_sse41(struct rfxencode *enc, const char *qtable, { return 1; } - *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr1(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } @@ -101,6 +101,6 @@ rfx_encode_component_rlgr3_x86_sse41(struct rfxencode *enc, const char *qtable, { return 1; } - *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size); + *size = rfx_encode_diff_rlgr3(enc->dwt_buffer1, buffer, buffer_size, 64); return 0; } |