Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nemequ/liblzf.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Lehmann <schmorpforge@schmorp.de>2012-02-16 09:43:19 +0400
committerMarc Lehmann <schmorpforge@schmorp.de>2012-02-16 09:43:19 +0400
commite60bd889308c9c2fb7335731acf631c94b788722 (patch)
tree6b8e330137bbc2c0125a312a8716abd92e321af9
parenta209a07baccf519c24378e4221761f2e4481ec03 (diff)
*** empty log message ***
-rw-r--r--Changes3
-rw-r--r--bench.c14
-rw-r--r--lzfP.h17
-rw-r--r--lzf_c_best.c207
4 files changed, 227 insertions, 14 deletions
diff --git a/Changes b/Changes
index 3134742..fe6bb1e 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,7 @@
+TODO: try unaligned copy again in decompressor
+TODO: allow size-optimised binaries by avoiding unrolling
+
- switch to a multiplicative hash (developed with Steinar Gunderson),
which is faster on modern cpus and compresses a bit better. The old
hash function which uses only shifts is still available.
diff --git a/bench.c b/bench.c
index a495421..6cf47b7 100644
--- a/bench.c
+++ b/bench.c
@@ -42,8 +42,10 @@ static void sigu (int signum)
{
}
-#define DSIZE 2821120
-#define DSIZE 32768
+#define DSIZE 17318440
+//#define DSIZE 32768
+
+#include "lzf_c_slow.c"
unsigned char data[DSIZE], data2[DSIZE*2], data3[DSIZE*2];
@@ -51,7 +53,7 @@ int main(void)
{
tval s;
tval si[1000];
- int i, l, j;
+ int i, j, k, l;
int min = 1<<30;
int lp;
char buf[8192];
@@ -84,10 +86,14 @@ int main(void)
//struct timeval tv; gettimeofday (&tv, 0);
//void *x = mmap (0, 16384, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,-1,0);
- l = lzf_compress (data, DSIZE, data2, DSIZE*2);
+ l = lzf_compress_slow (data, DSIZE, data2, DSIZE*2);
+ //for (k = 0; k < l; ++k)
+ //printf ("1 %2d: %02x\n", k, data2[k]);
assert(l);
j = lzf_decompress (data2, l, data3, DSIZE*2);
+ //for (k = 0; k < j; ++k)
+ //printf ("2 %2d: %02x\n", k, data3[k]);
assert (j == DSIZE);
si[0]=measure(s);
diff --git a/lzfP.h b/lzfP.h
index 97aabf4..deac5ba 100644
--- a/lzfP.h
+++ b/lzfP.h
@@ -187,16 +187,13 @@ typedef unsigned char u8;
typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
-#if !STRICT_ALIGN
-/* for unaligned accesses we need a 16 bit datatype. */
-# if USHRT_MAX == 65535
- typedef unsigned short u16;
-# elif UINT_MAX == 65535
- typedef unsigned int u16;
-# else
-# undef STRICT_ALIGN
-# define STRICT_ALIGN 1
-# endif
+#if USHRT_MAX == 65535
+ typedef unsigned short u16;
+#elif UINT_MAX == 65535
+ typedef unsigned int u16;
+#else
+# undef STRICT_ALIGN
+# define STRICT_ALIGN 1
#endif
#if ULTRA_FAST
diff --git a/lzf_c_best.c b/lzf_c_best.c
new file mode 100644
index 0000000..6554a9c
--- /dev/null
+++ b/lzf_c_best.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2000-2012 Marc Alexander Lehmann <schmorp@schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#define HASH(p) (p[0] << 6) ^ (p[1] << 3) ^ p[2]
+
+#define MAX_LIT (1 << 5)
+#define MAX_OFF (1 << 13)
+#define MAX_REF ((1 << 8) + (1 << 3))
+
+#if __GNUC__ >= 3
+# define expect(expr,value) __builtin_expect ((expr),(value))
+# define inline inline
+#else
+# define expect(expr,value) (expr)
+# define inline static
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr) expect ((expr) != 0, 1)
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1> ; literal, L+1=1..33 octets
+ * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset
+ * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset
+ *
+ */
+
+unsigned int
+lzf_compress_best (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len)
+{
+ const u8 *ip = (const u8 *)in_data;
+ u8 *op = (u8 *)out_data;
+ const u8 *in_end = ip + in_len;
+ u8 *out_end = op + out_len;
+
+ const u8 *first [1 << (6+8)]; /* most recent occurance of a match */
+ u16 prev [MAX_OFF]; /* how many bytes to go backwards for te next match */
+
+ int lit;
+
+ if (!in_len || !out_len)
+ return 0;
+
+ lit = 0; op++; /* start run */
+
+ lit++; *op++ = *ip++;
+
+ while (ip < in_end - 2)
+ {
+ int best_l = 0;
+ const u8 *best_p;
+ int e = (in_end - ip < MAX_REF ? in_end - ip : MAX_REF) - 1;
+ unsigned int res = ((unsigned int)ip) & (MAX_OFF - 1);
+ u16 hash = HASH (ip);
+ u16 diff;
+ const u8 *b = ip < (u8 *)in_data + MAX_OFF ? in_data : ip - MAX_OFF;
+ const u8 *p = first [hash];
+ prev [res] = ip - p; /* update ptr to previous hash match */
+ first [hash] = ip; /* first hash match is here */
+
+ if (p < ip)
+ while (p >= b)
+ {
+ if (p[2] == ip[2]) /* first two bytes almost always match */
+ if (p[best_l] == ip[best_l]) /* new match must be longer than the old one to qualify */
+ if (p[1] == ip[1] && p[0] == ip[0]) /* just to be sure */
+ {
+ int l = 3;
+
+ while (p[l] == ip[l] && l < e)
+ ++l;
+
+ if (l >= best_l)
+ {
+ best_p = p;
+ best_l = l;
+ }
+ }
+
+ diff = prev [((unsigned int)p) & (MAX_OFF - 1)];
+ p = diff ? p - diff : (u8 *)diff;
+ }
+
+ if (best_l)
+ {
+ int len = best_l;
+ int off = ip - best_p - 1;
+
+ if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+ return 0;
+
+ op [- lit - 1] = lit - 1; /* stop run */
+ op -= !lit; /* undo run if length is zero */
+
+ len -= 2; /* len is now #octets - 1 */
+ ip++;
+
+ if (len < 7)
+ {
+ *op++ = (off >> 8) + (len << 5);
+ }
+ else
+ {
+ *op++ = (off >> 8) + ( 7 << 5);
+ *op++ = len - 7;
+ }
+
+ *op++ = off;
+
+ lit = 0; op++; /* start run */
+
+ ip += len + 1;
+
+ if (expect_false (ip >= in_end - 2))
+ break;
+
+ ip -= len + 1;
+
+ //printf (" fill %p for %d\n", ip, len);//D
+ do
+ {
+ u16 hash = HASH (ip);
+ res = ((unsigned int)ip) & (MAX_OFF - 1);
+
+ p = first [hash];
+ prev [res] = ip - p; /* update ptr to previous hash match */
+ first [hash] = ip; /* first hash match is here */
+
+ ip++;
+ }
+ while (len--);
+ }
+ else
+ {
+ /* one more literal byte we must copy */
+ if (expect_false (op >= out_end))
+ return 0;
+
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+ }
+
+ if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+ return 0;
+
+ while (ip < in_end)
+ {
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+
+ op [- lit - 1] = lit - 1; /* end run */
+ op -= !lit; /* undo run if length is zero */
+
+ return op - (u8 *)out_data;
+}
+