Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/lvandeve/lodepng.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLode <lvandeve@gmail.com>2019-09-08 21:47:27 +0300
committerLode <lvandeve@gmail.com>2019-09-08 21:47:27 +0300
commit11fa71469fd78afec9ed96df61598f6a43dd41fd (patch)
tree6a38b96503b50c0de2d4073e635dee12ad1fe442 /lodepng.cpp
parentbd647f3bde7d662b382ed67ea915009ec526ce70 (diff)
more decoder speedups, 5% faster
Diffstat (limited to 'lodepng.cpp')
-rw-r--r--lodepng.cpp243
1 files changed, 170 insertions, 73 deletions
diff --git a/lodepng.cpp b/lodepng.cpp
index 75c824d..6d68ca0 100644
--- a/lodepng.cpp
+++ b/lodepng.cpp
@@ -1,5 +1,5 @@
/*
-LodePNG version 20190901
+LodePNG version 20190908
Copyright (c) 2005-2019 Lode Vandevenne
@@ -44,7 +44,7 @@ Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for
#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
#endif /*_MSC_VER */
-const char* LODEPNG_VERSION_STRING = "20190901";
+const char* LODEPNG_VERSION_STRING = "20190908";
/*
This source file is built up in the following large parts. The code sections
@@ -95,6 +95,15 @@ void* lodepng_realloc(void* ptr, size_t new_size);
void lodepng_free(void* ptr);
#endif /*LODEPNG_COMPILE_ALLOCATORS*/
+/* convince the compiler to inline a function, for use when this measurably improves performance */
+/* inline is not available in C90, but use it when supported by the compiler */
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define LODEPNG_INLINE inline
+#else
+#define LODEPNG_INLINE /* not available */
+#endif
+
+/* restrict is not available in C90, but use it when supported by the compiler */
#if (defined(__GNUC__) && defined(__GNUC_MINOR__) && (__GNUC__ >= 3) && (__GNUC_MINOR__ >= 1)) ||\
(defined(_MSC_VER) && (_MSC_VER >= 1400)) || (defined(__WATCOMC__) && (__WATCOMC__ >= 1250))
#define LODEPNG_RESTRICT __restrict
@@ -125,6 +134,33 @@ static size_t lodepng_strlen(const char* a) {
#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b))
#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x))
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Safely check if multiplying two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_mulofl(size_t a, size_t b, size_t* result) {
+ *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */
+ return (a != 0 && *result / a != b);
+}
+
+/* Safely check if adding two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_addofl(size_t a, size_t b, size_t* result) {
+ *result = a + b; /* Unsigned addition is well defined and safe in C90 */
+ return *result < a;
+}
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/* Safely check if a + b > c, even if overflow could happen. */
+static int lodepng_gtofl(size_t a, size_t b, size_t c) {
+ size_t d;
+ if(lodepng_addofl(a, b, &d)) return 1;
+ return d > c;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
/*
Often in case of an error a value is assigned to a variable and then it breaks
out of a loop (to go to the cleanup phase of a function). This macro does that.
@@ -457,39 +493,106 @@ typedef struct {
unsigned buffer; /*buffer for reading bits. NOTE: 'unsigned' must support at least 32 bits*/
} LodePNGBitReader;
-/* data size argument is in bytes */
-void LodePNGBitReader_init(LodePNGBitReader* reader, const unsigned char* data, size_t size) {
+/* data size argument is in bytes. Returns error if size too large causing overflow */
+static unsigned LodePNGBitReader_init(LodePNGBitReader* reader, const unsigned char* data, size_t size) {
+ size_t temp;
reader->data = data;
reader->size = size;
- reader->bitsize = size << 3u; /* size in bits */
+ /* size in bits, return error if overflow (if size_t is 32 bit this supports up to 500MB) */
+ if(lodepng_mulofl(size, 8u, &reader->bitsize)) return 105;
+ /*ensure incremented bp can be compared to bitsize without overflow even when it would be incremented 32 too much and
+ trying to ensure 32 more bits*/
+ if(lodepng_addofl(reader->bitsize, 64u, &temp)) return 105;
reader->bp = 0;
reader->buffer = 0;
+ return 0; /*ok*/
}
-/*Ensures the reader can at least read nbits bits in one or more readBits calls.
-Returns 0 if there are enough bits available, or amount of shortage of bits if not.
-The 'nbits' parameter must be <= 17. */
-static unsigned ensureBits(LodePNGBitReader* reader, size_t nbits) {
- if(nbits == 1) { /*the compiler is intended to perform do this 'if' statically for efficiency*/
- if(reader->bp >= reader->bitsize) return 1;
- reader->buffer = (unsigned)reader->data[reader->bp >> 3u] >> (reader->bp & 7u);
- return 0;
+/*
+ensureBits functions:
+Ensures the reader can at least read nbits bits in one or more readBits calls,
+safely even if not enough bits are available.
+Returns 1 if there are enough bits available, 0 if not.
+*/
+
+/*See ensureBits documentation above. This one ensures exactly 1 bit */
+/*static unsigned ensureBits1(LodePNGBitReader* reader) {
+ if(reader->bp >= reader->bitsize) return 0;
+ reader->buffer = (unsigned)reader->data[reader->bp >> 3u] >> (reader->bp & 7u);
+ return 1;
+}*/
+
+/*See ensureBits documentation above. This one ensures up to 9 bits */
+static unsigned ensureBits9(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 1u < size) {
+ reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u);
+ reader->buffer >>= (reader->bp & 7u);
+ return 1;
} else {
- size_t start = reader->bp >> 3u;
- unsigned rem = reader->bitsize - reader->bp;
- if(rem >= 24) {
- reader->buffer = (unsigned)(reader->data[start + 0] | (unsigned)((reader->data[start + 1] << 8u)) |
- (unsigned)(reader->data[start + 2] << 16u)) >> (reader->bp & 7u);
- return 0;
- } else {
- size_t size = reader->size;
- reader->buffer = 0;
- if(start + 0 < size) reader->buffer |= reader->data[start + 0];
- if(start + 1 < size) reader->buffer |= (unsigned)(reader->data[start + 1] << 8u);
- if(start + 2 < size) reader->buffer |= (unsigned)(reader->data[start + 2] << 16u);
- reader->buffer >>= (reader->bp & 7u);
- return (rem >= nbits) ? 0 : (nbits - rem);
- }
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits < reader->bitsize;
+ }
+}
+
+/*See ensureBits documentation above. This one ensures up to 17 bits */
+static unsigned ensureBits17(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 2u < size) {
+ reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u) |
+ (unsigned)(reader->data[start + 2] << 16u);
+ reader->buffer >>= (reader->bp & 7u);
+ return 1;
+ } else {
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ if(start + 1u < size) reader->buffer |= (unsigned)(reader->data[start + 1] << 8u);
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits < reader->bitsize;
+ }
+}
+
+/*See ensureBits documentation above. This one ensures up to 25 bits */
+static LODEPNG_INLINE unsigned ensureBits25(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 3u < size) {
+ reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u) |
+ (unsigned)(reader->data[start + 2] << 16u) | (unsigned)(reader->data[start + 3] << 24u);
+ reader->buffer >>= (reader->bp & 7u);
+ return 1;
+ } else {
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ if(start + 1u < size) reader->buffer |= (unsigned)(reader->data[start + 1] << 8u);
+ if(start + 2u < size) reader->buffer |= (unsigned)(reader->data[start + 2] << 16u);
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits < reader->bitsize;
+ }
+}
+
+/*See ensureBits documentation above. This one ensures up to 32 bits */
+static LODEPNG_INLINE unsigned ensureBits32(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 4u < size) {
+ reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u) |
+ (unsigned)(reader->data[start + 2] << 16u) | (unsigned)(reader->data[start + 3] << 24u);
+ reader->buffer >>= (reader->bp & 7u);
+ reader->buffer |= ((unsigned)(reader->data[start + 4] << 24u) << (7u - (reader->bp & 7u)));
+ return 1;
+ } else {
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ if(start + 1u < size) reader->buffer |= (unsigned)(reader->data[start + 1] << 8u);
+ if(start + 2u < size) reader->buffer |= (unsigned)(reader->data[start + 2] << 16u);
+ if(start + 3u < size) reader->buffer |= (unsigned)(reader->data[start + 3] << 24u);
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits < reader->bitsize;
}
}
@@ -587,7 +690,8 @@ static void HuffmanTree_cleanup(HuffmanTree* tree) {
}
/* amount of bits for first huffman table lookup (aka root bits), see HuffmanTree_makeTable and huffmanDecodeSymbol.*/
-#define FIRSTBITS 8u
+/* values 8u and 9u work the fastest */
+#define FIRSTBITS 9u
/* make table for huffman decoding */
static unsigned HuffmanTree_makeTable(HuffmanTree* tree) {
@@ -987,14 +1091,12 @@ static unsigned generateFixedDistanceTree(HuffmanTree* tree) {
#ifdef LODEPNG_COMPILE_DECODER
/*
-returns the code, or (unsigned)(-1) if error happened
+returns the code. The bit reader must already have been ensured at least 15 bits
*/
static unsigned huffmanDecodeSymbol(LodePNGBitReader* reader, const HuffmanTree* codetree) {
- unsigned short code, l, value;
- if(ensureBits(reader, 15) > 1) return (unsigned)(-1);
- code = peekBits(reader, FIRSTBITS);
- l = codetree->table_len[code];
- value = codetree->table_value[code];
+ unsigned short code = peekBits(reader, FIRSTBITS);
+ unsigned short l = codetree->table_len[code];
+ unsigned short value = codetree->table_value[code];
if(l <= FIRSTBITS) {
advanceBits(reader, l);
return value;
@@ -1035,7 +1137,7 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
unsigned* bitlen_cl = 0;
HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
- if(ensureBits(reader, 14)) return 49; /*error: the bit pointer is or will go past the memory*/
+ if(!ensureBits17(reader, 14)) return 49; /*error: the bit pointer is or will go past the memory*/
/*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
HLIT = readBits(reader, 5) + 257;
@@ -1051,9 +1153,11 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
while(!error) {
/*read the code length codes out of 3 * (amount of code length codes) bits*/
- if((reader->bp) + HCLEN * 3 > reader->bitsize) ERROR_BREAK(50); /*error: the bit pointer is or will go past the memory*/
+ if(lodepng_gtofl(reader->bp, HCLEN * 3, reader->bitsize)) {
+ ERROR_BREAK(50); /*error: the bit pointer is or will go past the memory*/
+ }
for(i = 0; i != HCLEN; ++i) {
- ensureBits(reader, 3);
+ ensureBits9(reader, 3); /*out of bounds already checked above */
bitlen_cl[CLCL_ORDER[i]] = readBits(reader, 3);
}
for(i = HCLEN; i != NUM_CODE_LENGTH_CODES; ++i) {
@@ -1073,7 +1177,9 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
/*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
i = 0;
while(i < HLIT + HDIST) {
- unsigned code = huffmanDecodeSymbol(reader, &tree_cl);
+ unsigned code;
+ ensureBits25(reader, 22); /* up to 15 bits for huffman code, up to 7 extra bits below*/
+ code = huffmanDecodeSymbol(reader, &tree_cl);
if(code <= 15) /*a length code*/ {
if(i < HLIT) bitlen_ll[i] = code;
else bitlen_d[i - HLIT] = code;
@@ -1084,7 +1190,6 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
- if(ensureBits(reader, 2)) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
replength += readBits(reader, 2);
if(i < HLIT + 1) value = bitlen_ll[i - 1];
@@ -1098,7 +1203,6 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
}
} else if(code == 17) /*repeat "0" 3-10 times*/ {
unsigned replength = 3; /*read in the bits that indicate repeat length*/
- if(ensureBits(reader, 3)) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
replength += readBits(reader, 3);
/*repeat this value in the next lengths*/
@@ -1111,7 +1215,6 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
}
} else if(code == 18) /*repeat "0" 11-138 times*/ {
unsigned replength = 11; /*read in the bits that indicate repeat length*/
- if(ensureBits(reader, 7)) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
replength += readBits(reader, 7);
/*repeat this value in the next lengths*/
@@ -1123,13 +1226,14 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
++i;
}
} else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ {
- if(code == (unsigned)(-1)) {
- /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
- (10=no endcode, 11=wrong jump outside of tree)*/
- error = (reader->bp) > reader->bitsize ? 10 : 11;
- }
- else error = 16; /*unexisting code, this can never happen*/
- break;
+ ERROR_BREAK(16); /*unexisting code, this can never happen*/
+ }
+ /*check if any of the ensureBits above went out of bounds*/
+ if(reader->bp > reader->bitsize) {
+ /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+ (10=no endcode, 11=wrong jump outside of tree)*/
+ /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+ ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
}
}
if(error) break;
@@ -1167,7 +1271,9 @@ static unsigned inflateHuffmanBlock(ucvector* out, size_t* pos, LodePNGBitReader
while(!error) /*decode all symbols until end reached, breaks at end code*/ {
/*code_ll is literal, length or end code*/
- unsigned code_ll = huffmanDecodeSymbol(reader, &tree_ll);
+ unsigned code_ll;
+ ensureBits25(reader, 20); /* up to 15 for the huffman symbol, up to 5 for the length extra bits */
+ code_ll = huffmanDecodeSymbol(reader, &tree_ll);
if(code_ll <= 255) /*literal symbol*/ {
/*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
@@ -1184,12 +1290,12 @@ static unsigned inflateHuffmanBlock(ucvector* out, size_t* pos, LodePNGBitReader
/*part 2: get extra bits and add the value of that to length*/
numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
if(numextrabits_l != 0) {
- /* we need numextrabits_l bits, but using the constant 5 (max possible) in ensureBits is slightly faster */
- if(ensureBits(reader, 5)) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
+ /* bits already ensured above */
length += readBits(reader, numextrabits_l);
}
/*part 3: get distance code*/
+ ensureBits32(reader, 28); /* up to 15 for the huffman symbol, up to 13 for the extra bits */
code_d = huffmanDecodeSymbol(reader, &tree_d);
if(code_d > 29) {
if(code_d == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ {
@@ -1205,8 +1311,7 @@ static unsigned inflateHuffmanBlock(ucvector* out, size_t* pos, LodePNGBitReader
/*part 4: get extra bits from distance*/
numextrabits_d = DISTANCEEXTRA[code_d];
if(numextrabits_d != 0) {
- /* we need numextrabits_d bits, but using the constant 13 (max possible) in ensureBits is slightly faster */
- if(ensureBits(reader, 13)) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
+ /* bits already ensured above */
distance += readBits(reader, numextrabits_d);
}
@@ -1230,10 +1335,14 @@ static unsigned inflateHuffmanBlock(ucvector* out, size_t* pos, LodePNGBitReader
} else if(code_ll == 256) {
break; /*end code, break the loop*/
} else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ {
+ ERROR_BREAK(16) /* impossible */
+ }
+ /*check if any of the ensureBits above went out of bounds*/
+ if(reader->bp > reader->bitsize) {
/*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
(10=no endcode, 11=wrong jump outside of tree)*/
- error = (reader->bp > reader->bitsize) ? 10 : 11;
- break;
+ /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+ ERROR_BREAK(51); /*error, bit pointer jumps past memory*/
}
}
@@ -1281,13 +1390,14 @@ static unsigned lodepng_inflatev(ucvector* out,
const LodePNGDecompressSettings* settings) {
unsigned BFINAL = 0;
size_t pos = 0; /*byte position in the out buffer*/
- unsigned error = 0;
LodePNGBitReader reader;
- LodePNGBitReader_init(&reader, in, insize);
+ unsigned error = LodePNGBitReader_init(&reader, in, insize);
+
+ if(error) return error;
while(!BFINAL) {
unsigned BTYPE;
- if(ensureBits(&reader, 3)) return 52; /*error, bit pointer will jump past memory*/
+ if(!ensureBits9(&reader, 3)) return 52; /*error, bit pointer will jump past memory*/
BFINAL = readBits(&reader, 1);
BTYPE = readBits(&reader, 2);
@@ -2617,20 +2727,6 @@ static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGCol
return (size_t)h * line;
}
-/* Safely check if multiplying two integers will overflow (no undefined
-behavior, compiler removing the code, etc...) and output result. */
-static int lodepng_mulofl(size_t a, size_t b, size_t* result) {
- *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */
- return (a != 0 && *result / a != b);
-}
-
-/* Safely check if adding two integers will overflow (no undefined
-behavior, compiler removing the code, etc...) and output result. */
-static int lodepng_addofl(size_t a, size_t b, size_t* result) {
- *result = a + b; /* Unsigned addition is well defined and safe in C90 */
- return *result < a;
-}
-
/*Safely checks whether size_t overflow can be caused due to amount of pixels.
This check is overcautious rather than precise. If this check indicates no overflow,
you can safely compute in a size_t (but not an unsigned):
@@ -5899,6 +5995,7 @@ const char* lodepng_error_text(unsigned code) {
case 102: return "not allowed to set grayscale ICC profile with colored pixels by PNG specification";
case 103: return "invalid palette index in bKGD chunk. Maybe it came before PLTE chunk?";
case 104: return "invalid bKGD color while encoding (e.g. palette index out of range)";
+ case 105: return "integer overflow of bitsize";
}
return "unknown error code";
}