From 833d4e7f84f59099ee66eabfa3457ebb7d37eaa8 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 3 Nov 2010 02:38:31 +0100 Subject: rename archival/libunarchive -> archival/libarchive; move bz/ into it Signed-off-by: Denys Vlasenko --- archival/libarchive/Kbuild.src | 64 ++ archival/libarchive/bz/LICENSE | 44 + archival/libarchive/bz/README | 90 ++ archival/libarchive/bz/blocksort.c | 1072 ++++++++++++++++++ archival/libarchive/bz/bzlib.c | 431 +++++++ archival/libarchive/bz/bzlib.h | 65 ++ archival/libarchive/bz/bzlib_private.h | 219 ++++ archival/libarchive/bz/compress.c | 685 +++++++++++ archival/libarchive/bz/huffman.c | 229 ++++ archival/libarchive/data_align.c | 15 + archival/libarchive/data_extract_all.c | 200 ++++ archival/libarchive/data_extract_to_command.c | 134 +++ archival/libarchive/data_extract_to_stdout.c | 14 + archival/libarchive/data_skip.c | 12 + archival/libarchive/decompress_bunzip2.c | 822 ++++++++++++++ archival/libarchive/decompress_uncompress.c | 307 +++++ archival/libarchive/decompress_unlzma.c | 465 ++++++++ archival/libarchive/decompress_unxz.c | 98 ++ archival/libarchive/decompress_unzip.c | 1252 +++++++++++++++++++++ archival/libarchive/filter_accept_all.c | 17 + archival/libarchive/filter_accept_list.c | 19 + archival/libarchive/filter_accept_list_reassign.c | 51 + archival/libarchive/filter_accept_reject_list.c | 36 + archival/libarchive/find_list_entry.c | 54 + archival/libarchive/get_header_ar.c | 133 +++ archival/libarchive/get_header_cpio.c | 186 +++ archival/libarchive/get_header_tar.c | 461 ++++++++ archival/libarchive/get_header_tar_bz2.c | 21 + archival/libarchive/get_header_tar_gz.c | 36 + archival/libarchive/get_header_tar_lzma.c | 24 + archival/libarchive/header_list.c | 12 + archival/libarchive/header_skip.c | 10 + archival/libarchive/header_verbose_list.c | 69 ++ archival/libarchive/init_handle.c | 22 + archival/libarchive/liblzo.h | 93 ++ archival/libarchive/lzo1x_1.c | 35 + archival/libarchive/lzo1x_1o.c | 35 + archival/libarchive/lzo1x_9x.c | 921 +++++++++++++++ archival/libarchive/lzo1x_c.c | 296 +++++ archival/libarchive/lzo1x_d.c | 420 +++++++ archival/libarchive/open_transformer.c | 54 + archival/libarchive/seek_by_jump.c | 19 + archival/libarchive/seek_by_read.c | 16 + archival/libarchive/unpack_ar_archive.c | 22 + archival/libarchive/unxz/README | 135 +++ archival/libarchive/unxz/xz.h | 271 +++++ archival/libarchive/unxz/xz_config.h | 123 ++ archival/libarchive/unxz/xz_dec_bcj.c | 564 ++++++++++ archival/libarchive/unxz/xz_dec_lzma2.c | 1175 +++++++++++++++++++ archival/libarchive/unxz/xz_dec_stream.c | 822 ++++++++++++++ archival/libarchive/unxz/xz_lzma2.h | 204 ++++ archival/libarchive/unxz/xz_private.h | 159 +++ archival/libarchive/unxz/xz_stream.h | 57 + 53 files changed, 12790 insertions(+) create mode 100644 archival/libarchive/Kbuild.src create mode 100644 archival/libarchive/bz/LICENSE create mode 100644 archival/libarchive/bz/README create mode 100644 archival/libarchive/bz/blocksort.c create mode 100644 archival/libarchive/bz/bzlib.c create mode 100644 archival/libarchive/bz/bzlib.h create mode 100644 archival/libarchive/bz/bzlib_private.h create mode 100644 archival/libarchive/bz/compress.c create mode 100644 archival/libarchive/bz/huffman.c create mode 100644 archival/libarchive/data_align.c create mode 100644 archival/libarchive/data_extract_all.c create mode 100644 archival/libarchive/data_extract_to_command.c create mode 100644 archival/libarchive/data_extract_to_stdout.c create mode 100644 archival/libarchive/data_skip.c create mode 100644 archival/libarchive/decompress_bunzip2.c create mode 100644 archival/libarchive/decompress_uncompress.c create mode 100644 archival/libarchive/decompress_unlzma.c create mode 100644 archival/libarchive/decompress_unxz.c create mode 100644 archival/libarchive/decompress_unzip.c create mode 100644 archival/libarchive/filter_accept_all.c create mode 100644 archival/libarchive/filter_accept_list.c create mode 100644 archival/libarchive/filter_accept_list_reassign.c create mode 100644 archival/libarchive/filter_accept_reject_list.c create mode 100644 archival/libarchive/find_list_entry.c create mode 100644 archival/libarchive/get_header_ar.c create mode 100644 archival/libarchive/get_header_cpio.c create mode 100644 archival/libarchive/get_header_tar.c create mode 100644 archival/libarchive/get_header_tar_bz2.c create mode 100644 archival/libarchive/get_header_tar_gz.c create mode 100644 archival/libarchive/get_header_tar_lzma.c create mode 100644 archival/libarchive/header_list.c create mode 100644 archival/libarchive/header_skip.c create mode 100644 archival/libarchive/header_verbose_list.c create mode 100644 archival/libarchive/init_handle.c create mode 100644 archival/libarchive/liblzo.h create mode 100644 archival/libarchive/lzo1x_1.c create mode 100644 archival/libarchive/lzo1x_1o.c create mode 100644 archival/libarchive/lzo1x_9x.c create mode 100644 archival/libarchive/lzo1x_c.c create mode 100644 archival/libarchive/lzo1x_d.c create mode 100644 archival/libarchive/open_transformer.c create mode 100644 archival/libarchive/seek_by_jump.c create mode 100644 archival/libarchive/seek_by_read.c create mode 100644 archival/libarchive/unpack_ar_archive.c create mode 100644 archival/libarchive/unxz/README create mode 100644 archival/libarchive/unxz/xz.h create mode 100644 archival/libarchive/unxz/xz_config.h create mode 100644 archival/libarchive/unxz/xz_dec_bcj.c create mode 100644 archival/libarchive/unxz/xz_dec_lzma2.c create mode 100644 archival/libarchive/unxz/xz_dec_stream.c create mode 100644 archival/libarchive/unxz/xz_lzma2.h create mode 100644 archival/libarchive/unxz/xz_private.h create mode 100644 archival/libarchive/unxz/xz_stream.h (limited to 'archival/libarchive') diff --git a/archival/libarchive/Kbuild.src b/archival/libarchive/Kbuild.src new file mode 100644 index 000000000..b0bc4e5aa --- /dev/null +++ b/archival/libarchive/Kbuild.src @@ -0,0 +1,64 @@ +# Makefile for busybox +# +# Copyright (C) 1999-2004 by Erik Andersen +# +# Licensed under GPLv2 or later, see file LICENSE in this source tree. + +lib-y:= + +COMMON_FILES:= \ +\ + data_skip.o \ + data_extract_all.o \ + data_extract_to_stdout.o \ +\ + filter_accept_all.o \ + filter_accept_list.o \ + filter_accept_reject_list.o \ +\ + header_skip.o \ + header_list.o \ + header_verbose_list.o \ +\ + seek_by_read.o \ + seek_by_jump.o \ +\ + data_align.o \ + find_list_entry.o \ + init_handle.o + +DPKG_FILES:= \ + get_header_ar.o \ + unpack_ar_archive.o \ + get_header_tar.o \ + filter_accept_list_reassign.o + +INSERT + +lib-$(CONFIG_AR) += get_header_ar.o unpack_ar_archive.o +lib-$(CONFIG_BUNZIP2) += decompress_bunzip2.o +lib-$(CONFIG_UNLZMA) += decompress_unlzma.o +lib-$(CONFIG_UNXZ) += decompress_unxz.o +lib-$(CONFIG_CPIO) += get_header_cpio.o +lib-$(CONFIG_DPKG) += $(DPKG_FILES) +lib-$(CONFIG_DPKG_DEB) += $(DPKG_FILES) +lib-$(CONFIG_GUNZIP) += decompress_unzip.o +lib-$(CONFIG_RPM2CPIO) += decompress_unzip.o get_header_cpio.o +lib-$(CONFIG_RPM) += open_transformer.o decompress_unzip.o get_header_cpio.o +lib-$(CONFIG_TAR) += get_header_tar.o +lib-$(CONFIG_UNCOMPRESS) += decompress_uncompress.o +lib-$(CONFIG_UNZIP) += decompress_unzip.o +lib-$(CONFIG_LZOP) += lzo1x_1.o lzo1x_1o.o lzo1x_d.o +lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o +lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompress.o +lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o +lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o +lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o +lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o +lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o +lib-$(CONFIG_FEATURE_COMPRESS_BBCONFIG) += decompress_bunzip2.o +lib-$(CONFIG_FEATURE_TAR_TO_COMMAND) += data_extract_to_command.o + +ifneq ($(lib-y),) +lib-y += $(COMMON_FILES) +endif diff --git a/archival/libarchive/bz/LICENSE b/archival/libarchive/bz/LICENSE new file mode 100644 index 000000000..da4346520 --- /dev/null +++ b/archival/libarchive/bz/LICENSE @@ -0,0 +1,44 @@ +bzip2 applet in busybox is based on lightly-modified source +of bzip2 version 1.0.4. bzip2 source is distributed +under the following conditions (copied verbatim from LICENSE file) +=========================================================== + + +This program, "bzip2", the associated library "libbzip2", and all +documentation, are copyright (C) 1996-2006 Julian R Seward. All +rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + +3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + +4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Julian Seward, Cambridge, UK. +jseward@bzip.org +bzip2/libbzip2 version 1.0.4 of 20 December 2006 diff --git a/archival/libarchive/bz/README b/archival/libarchive/bz/README new file mode 100644 index 000000000..fffd47b8a --- /dev/null +++ b/archival/libarchive/bz/README @@ -0,0 +1,90 @@ +This file is an abridged version of README from bzip2 1.0.4 +Build instructions (which are not relevant to busyboxed bzip2) +are removed. +=========================================================== + + +This is the README for bzip2/libzip2. +This version is fully compatible with the previous public releases. + +------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in this file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ + +Please read and be aware of the following: + + +WARNING: + + This program and library (attempts to) compress data by + performing several non-trivial transformations on it. + Unless you are 100% familiar with *all* the algorithms + contained herein, and with the consequences of modifying them, + you should NOT meddle with the compression or decompression + machinery. Incorrect changes can and very likely *will* + lead to disastrous loss of data. + + +DISCLAIMER: + + I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE + USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED. + + Every compression of a file implies an assumption that the + compressed file can be decompressed to reproduce the original. + Great efforts in design, coding and testing have been made to + ensure that this program works correctly. However, the complexity + of the algorithms, and, in particular, the presence of various + special cases in the code which occur with very low but non-zero + probability make it impossible to rule out the possibility of bugs + remaining in the program. DO NOT COMPRESS ANY DATA WITH THIS + PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER + SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. + + That is not to say this program is inherently unreliable. + Indeed, I very much hope the opposite is true. bzip2/libbzip2 + has been carefully constructed and extensively tested. + + +PATENTS: + + To the best of my knowledge, bzip2/libbzip2 does not use any + patented algorithms. However, I do not have the resources + to carry out a patent search. Therefore I cannot give any + guarantee of the above statement. + + +I hope you find bzip2 useful. Feel free to contact me at + jseward@bzip.org +if you have any suggestions or queries. Many people mailed me with +comments, suggestions and patches after the releases of bzip-0.15, +bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1, +1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this +feedback. I thank you for your comments. + +bzip2's "home" is http://www.bzip.org/ + +Julian Seward +jseward@bzip.org +Cambridge, UK. + +18 July 1996 (version 0.15) +25 August 1996 (version 0.21) + 7 August 1997 (bzip2, version 0.1) +29 August 1997 (bzip2, version 0.1pl2) +23 August 1998 (bzip2, version 0.9.0) + 8 June 1999 (bzip2, version 0.9.5) + 4 Sept 1999 (bzip2, version 0.9.5d) + 5 May 2000 (bzip2, version 1.0pre8) +30 December 2001 (bzip2, version 1.0.2pre1) +15 February 2005 (bzip2, version 1.0.3) +20 December 2006 (bzip2, version 1.0.4) diff --git a/archival/libarchive/bz/blocksort.c b/archival/libarchive/bz/blocksort.c new file mode 100644 index 000000000..f70c3701d --- /dev/null +++ b/archival/libarchive/bz/blocksort.c @@ -0,0 +1,1072 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Block sorting machinery ---*/ +/*--- blocksort.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +#define mswap(zz1, zz2) \ +{ \ + int32_t zztmp = zz1; \ + zz1 = zz2; \ + zz2 = zztmp; \ +} + +static +/* No measurable speed gain with inlining */ +/* ALWAYS_INLINE */ +void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn) +{ + while (zzn > 0) { + mswap(ptr[zzp1], ptr[zzp2]); + zzp1++; + zzp2++; + zzn--; + } +} + +static +ALWAYS_INLINE +int32_t mmin(int32_t a, int32_t b) +{ + return (a < b) ? a : b; +} + + +/*---------------------------------------------*/ +/*--- Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +inline +void fallbackSimpleSort(uint32_t* fmap, + uint32_t* eclass, + int32_t lo, + int32_t hi) +{ + int32_t i, j, tmp; + uint32_t ec_tmp; + + if (lo == hi) return; + + if (hi - lo > 3) { + for (i = hi-4; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) + fmap[j-4] = fmap[j]; + fmap[j-4] = tmp; + } + } + + for (i = hi-1; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) + fmap[j-1] = fmap[j]; + fmap[j-1] = tmp; + } +} + + +/*---------------------------------------------*/ +#define fpush(lz,hz) { \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + sp++; \ +} + +#define fpop(lz,hz) { \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ +} + +#define FALLBACK_QSORT_SMALL_THRESH 10 +#define FALLBACK_QSORT_STACK_SIZE 100 + +static +void fallbackQSort3(uint32_t* fmap, + uint32_t* eclass, + int32_t loSt, + int32_t hiSt) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m; + int32_t sp, lo, hi; + uint32_t med, r, r3; + int32_t stackLo[FALLBACK_QSORT_STACK_SIZE]; + int32_t stackHi[FALLBACK_QSORT_STACK_SIZE]; + + r = 0; + + sp = 0; + fpush(loSt, hiSt); + + while (sp > 0) { + AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004); + + fpop(lo, hi); + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort(fmap, eclass, lo, hi); + continue; + } + + /* Random partitioning. Median of 3 sometimes fails to + * avoid bad cases. Median of 9 seems to help but + * looks rather expensive. This too seems to work but + * is cheaper. Guidance for the magic constants + * 7621 and 32768 is taken from Sedgewick's algorithms + * book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) + med = eclass[fmap[lo]]; + else if (r3 == 1) + med = eclass[fmap[(lo+hi)>>1]]; + else + med = eclass[fmap[hi]]; + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unLo]] - (int32_t)med; + if (n == 0) { + mswap(fmap[unLo], fmap[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unHi]] - (int32_t)med; + if (n == 0) { + mswap(fmap[unHi], fmap[gtHi]); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; + } + + AssertD(unHi == unLo-1, "fallbackQSort3(2)"); + + if (gtHi < ltLo) continue; + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush(lo, n); + fpush(m, hi); + } else { + fpush(m, hi); + fpush(lo, n); + } + } +} + +#undef fpush +#undef fpop +#undef FALLBACK_QSORT_SMALL_THRESH +#undef FALLBACK_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * eclass exists for [0 .. nblock-1] + * ((uint8_t*)eclass) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)eclass) [0 .. nblock-1] holds block + * All other areas of eclass destroyed + * fmap [0 .. nblock-1] holds sorted order + * bhtab[0 .. 2+(nblock/32)] destroyed +*/ + +#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) +#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) +#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) +#define WORD_BH(zz) bhtab[(zz) >> 5] +#define UNALIGNED_BH(zz) ((zz) & 0x01f) + +static +void fallbackSort(uint32_t* fmap, + uint32_t* eclass, + uint32_t* bhtab, + int32_t nblock) +{ + int32_t ftab[257]; + int32_t ftabCopy[256]; + int32_t H, i, j, k, l, r, cc, cc1; + int32_t nNotDone; + int32_t nBhtab; + uint8_t* eclass8 = (uint8_t*)eclass; + + /* + * Initial 1-char radix sort to generate + * initial fmap and initial BH bits. + */ + for (i = 0; i < 257; i++) ftab[i] = 0; + for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; + for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; + + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i < 257; i++) { + j += ftab[i]; + ftab[i] = j; + } + + for (i = 0; i < nblock; i++) { + j = eclass8[i]; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ + for (i = 0; i < nBhtab; i++) bhtab[i] = 0; + for (i = 0; i < 256; i++) SET_BH(ftab[i]); + + /* + * Inductively refine the buckets. Kind-of an + * "exponential radix sort" (!), inspired by the + * Manber-Myers suffix array construction algorithm. + */ + + /*-- set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + SET_BH(nblock + 2*i); + CLEAR_BH(nblock + 2*i + 1); + } + + /*-- the log(N) loop --*/ + H = 1; + while (1) { + j = 0; + for (i = 0; i < nblock; i++) { + if (ISSET_BH(i)) + j = i; + k = fmap[i] - H; + if (k < 0) + k += nblock; + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (1) { + + /*-- find the next non-singleton bucket --*/ + k = r + 1; + while (ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (ISSET_BH(k)) { + while (WORD_BH(k) == 0xffffffff) k += 32; + while (ISSET_BH(k)) k++; + } + l = k - 1; + if (l >= nblock) + break; + while (!ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (!ISSET_BH(k)) { + while (WORD_BH(k) == 0x00000000) k += 32; + while (!ISSET_BH(k)) k++; + } + r = k - 1; + if (r >= nblock) + break; + + /*-- now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3(fmap, eclass, l, r); + + /*-- scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { + SET_BH(i); + cc = cc1; + }; + } + } + } + + H *= 2; + if (H > nblock || nNotDone == 0) + break; + } + + /* + * Reconstruct the original block in + * eclass8 [0 .. nblock-1], since the + * previous phase destroyed it. + */ + j = 0; + for (i = 0; i < nblock; i++) { + while (ftabCopy[j] == 0) + j++; + ftabCopy[j]--; + eclass8[fmap[i]] = (uint8_t)j; + } + AssertH(j < 256, 1005); +} + +#undef SET_BH +#undef CLEAR_BH +#undef ISSET_BH +#undef WORD_BH +#undef UNALIGNED_BH + + +/*---------------------------------------------*/ +/*--- The main, O(N^2 log(N)) sorting ---*/ +/*--- algorithm. Faster for "normal" ---*/ +/*--- non-repetitive blocks. ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +NOINLINE +int mainGtU( + uint32_t i1, + uint32_t i2, + uint8_t* block, + uint16_t* quadrant, + uint32_t nblock, + int32_t* budget) +{ + int32_t k; + uint8_t c1, c2; + uint16_t s1, s2; + +/* Loop unrolling here is actually very useful + * (generated code is much simpler), + * code size increase is only 270 bytes (i386) + * but speeds up compression 10% overall + */ + +#if CONFIG_BZIP2_FEATURE_SPEED >= 1 + +#define TIMES_8(code) \ + code; code; code; code; \ + code; code; code; code; +#define TIMES_12(code) \ + code; code; code; code; \ + code; code; code; code; \ + code; code; code; code; + +#else + +#define TIMES_8(code) \ +{ \ + int nn = 8; \ + do { \ + code; \ + } while (--nn); \ +} +#define TIMES_12(code) \ +{ \ + int nn = 12; \ + do { \ + code; \ + } while (--nn); \ +} + +#endif + + AssertD(i1 != i2, "mainGtU"); + TIMES_12( + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + ) + + k = nblock + 8; + + do { + TIMES_8( + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + ) + + if (i1 >= nblock) i1 -= nblock; + if (i2 >= nblock) i2 -= nblock; + + (*budget)--; + k -= 8; + } while (k >= 0); + + return False; +} +#undef TIMES_8 +#undef TIMES_12 + +/*---------------------------------------------*/ +/* + * Knuth's increments seem to work better + * than Incerpi-Sedgewick here. Possibly + * because the number of elems to sort is + * usually small, typically <= 20. + */ +static +const int32_t incs[14] = { + 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 +}; + +static +void mainSimpleSort(uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + int32_t nblock, + int32_t lo, + int32_t hi, + int32_t d, + int32_t* budget) +{ + int32_t i, j, h, bigN, hp; + uint32_t v; + + bigN = hi - lo + 1; + if (bigN < 2) return; + + hp = 0; + while (incs[hp] < bigN) hp++; + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (1) { + /*-- copy 1 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + +/* 1.5% overall speedup, +290 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 3 + /*-- copy 2 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 3 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; +#endif + if (*budget < 0) return; + } + } +} + + +/*---------------------------------------------*/ +/* + * The following is an implementation of + * an elegant 3-way quicksort for strings, + * described in a paper "Fast Algorithms for + * Sorting and Searching Strings", by Robert + * Sedgewick and Jon L. Bentley. + */ + +static +ALWAYS_INLINE +uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c) +{ + uint8_t t; + if (a > b) { + t = a; + a = b; + b = t; + }; + /* here b >= a */ + if (b > c) { + b = c; + if (a > b) + b = a; + } + return b; +} + +#define mpush(lz,hz,dz) \ +{ \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + stackD [sp] = dz; \ + sp++; \ +} + +#define mpop(lz,hz,dz) \ +{ \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ + dz = stackD [sp]; \ +} + +#define mnextsize(az) (nextHi[az] - nextLo[az]) + +#define mnextswap(az,bz) \ +{ \ + int32_t tz; \ + tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ + tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ + tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \ +} + +#define MAIN_QSORT_SMALL_THRESH 20 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) +#define MAIN_QSORT_STACK_SIZE 100 + +static NOINLINE +void mainQSort3(uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + int32_t nblock, + int32_t loSt, + int32_t hiSt, + int32_t dSt, + int32_t* budget) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m, med; + int32_t sp, lo, hi, d; + + int32_t stackLo[MAIN_QSORT_STACK_SIZE]; + int32_t stackHi[MAIN_QSORT_STACK_SIZE]; + int32_t stackD [MAIN_QSORT_STACK_SIZE]; + + int32_t nextLo[3]; + int32_t nextHi[3]; + int32_t nextD [3]; + + sp = 0; + mpush(loSt, hiSt, dSt); + + while (sp > 0) { + AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001); + + mpop(lo, hi, d); + if (hi - lo < MAIN_QSORT_SMALL_THRESH + || d > MAIN_QSORT_DEPTH_THRESH + ) { + mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); + if (*budget < 0) + return; + continue; + } + med = (int32_t) mmed3(block[ptr[lo ] + d], + block[ptr[hi ] + d], + block[ptr[(lo+hi) >> 1] + d]); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unLo]+d]) - med; + if (n == 0) { + mswap(ptr[unLo], ptr[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unHi]+d]) - med; + if (n == 0) { + mswap(ptr[unHi], ptr[gtHi]); + gtHi--; + unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) + break; + mswap(ptr[unLo], ptr[unHi]); + unLo++; + unHi--; + } + + AssertD(unHi == unLo-1, "mainQSort3(2)"); + + if (gtHi < ltLo) { + mpush(lo, hi, d + 1); + continue; + } + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; + nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; + + if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); + if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2); + if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); + + AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)"); + AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)"); + + mpush(nextLo[0], nextHi[0], nextD[0]); + mpush(nextLo[1], nextHi[1], nextD[1]); + mpush(nextLo[2], nextHi[2], nextD[2]); + } +} + +#undef mpush +#undef mpop +#undef mnextsize +#undef mnextswap +#undef MAIN_QSORT_SMALL_THRESH +#undef MAIN_QSORT_DEPTH_THRESH +#undef MAIN_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > N_OVERSHOOT + * block32 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((uint8_t*)block32) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)block32) [0 .. nblock-1] holds block + * All other areas of block32 destroyed + * ftab[0 .. 65536] destroyed + * ptr [0 .. nblock-1] holds sorted order + * if (*budget < 0), sorting was abandoned + */ + +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) +#define SETMASK (1 << 21) +#define CLEARMASK (~(SETMASK)) + +static NOINLINE +void mainSort(EState* state, + uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + uint32_t* ftab, + int32_t nblock, + int32_t* budget) +{ + int32_t i, j, k, ss, sb; + uint8_t c1; + int32_t numQSorted; + uint16_t s; + Bool bigDone[256]; + /* bbox: moved to EState to save stack + int32_t runningOrder[256]; + int32_t copyStart[256]; + int32_t copyEnd [256]; + */ +#define runningOrder (state->mainSort__runningOrder) +#define copyStart (state->mainSort__copyStart) +#define copyEnd (state->mainSort__copyEnd) + + /*-- set up the 2-byte frequency table --*/ + /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ + memset(ftab, 0, 65537 * sizeof(ftab[0])); + + j = block[0] << 8; + i = nblock - 1; +/* 3%, +300 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 2 + for (; i >= 3; i -= 4) { + quadrant[i] = 0; + j = (j >> 8) | (((uint16_t)block[i]) << 8); + ftab[j]++; + quadrant[i-1] = 0; + j = (j >> 8) | (((uint16_t)block[i-1]) << 8); + ftab[j]++; + quadrant[i-2] = 0; + j = (j >> 8) | (((uint16_t)block[i-2]) << 8); + ftab[j]++; + quadrant[i-3] = 0; + j = (j >> 8) | (((uint16_t)block[i-3]) << 8); + ftab[j]++; + } +#endif + for (; i >= 0; i--) { + quadrant[i] = 0; + j = (j >> 8) | (((uint16_t)block[i]) << 8); + ftab[j]++; + } + + /*-- (emphasises close relationship of block & quadrant) --*/ + for (i = 0; i < BZ_N_OVERSHOOT; i++) { + block [nblock+i] = block[i]; + quadrant[nblock+i] = 0; + } + + /*-- Complete the initial radix sort --*/ + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i <= 65536; i++) { + j += ftab[i]; + ftab[i] = j; + } + + s = block[0] << 8; + i = nblock - 1; +#if CONFIG_BZIP2_FEATURE_SPEED >= 2 + for (; i >= 3; i -= 4) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i; + s = (s >> 8) | (block[i-1] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-1; + s = (s >> 8) | (block[i-2] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-2; + s = (s >> 8) | (block[i-3] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-3; + } +#endif + for (; i >= 0; i--) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i; + } + + /* + * Now ftab contains the first loc of every small bucket. + * Calculate the running order, from smallest to largest + * big bucket. + */ + for (i = 0; i <= 255; i++) { + bigDone [i] = False; + runningOrder[i] = i; + } + + { + int32_t vv; + /* bbox: was: int32_t h = 1; */ + /* do h = 3 * h + 1; while (h <= 256); */ + uint32_t h = 364; + + do { + /*h = h / 3;*/ + h = (h * 171) >> 9; /* bbox: fast h/3 */ + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { + runningOrder[j] = runningOrder[j-h]; + j = j - h; + if (j <= (h - 1)) + goto zero; + } + zero: + runningOrder[j] = vv; + } + } while (h != 1); + } + + /* + * The main sorting loop. + */ + + numQSorted = 0; + + for (i = 0; i <= 255; i++) { + + /* + * Process big buckets, starting with the least full. + * Basically this is a 3-step process in which we call + * mainQSort3 to sort the small buckets [ss, j], but + * also make a big effort to avoid the calls if we can. + */ + ss = runningOrder[i]; + + /* + * Step 1: + * Complete the big bucket [ss] by quicksorting + * any unsorted small buckets [ss, j], for j != ss. + * Hopefully previous pointer-scanning phases have already + * completed many of the small buckets [ss, j], so + * we don't have to sort them at all. + */ + for (j = 0; j <= 255; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if (!(ftab[sb] & SETMASK)) { + int32_t lo = ftab[sb] & CLEARMASK; + int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3( + ptr, block, quadrant, nblock, + lo, hi, BZ_N_RADIX, budget + ); + if (*budget < 0) return; + numQSorted += (hi - lo + 1); + } + } + ftab[sb] |= SETMASK; + } + } + + AssertH(!bigDone[ss], 1006); + + /* + * Step 2: + * Now scan this big bucket [ss] so as to synthesise the + * sorted order for small buckets [t, ss] for all t, + * including, magically, the bucket [ss,ss] too. + * This will avoid doing Real Work in subsequent Step 1's. + */ + { + for (j = 0; j <= 255; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = ptr[j] - 1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyStart[c1]++] = k; + } + for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = ptr[j]-1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyEnd[c1]--] = k; + } + } + + /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. + * Necessity for this case is demonstrated by compressing + * a sequence of approximately 48.5 million of character + * 251; 1.0.0/1.0.1 will then die here. */ + AssertH((copyStart[ss]-1 == copyEnd[ss]) \ + || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007); + + for (j = 0; j <= 255; j++) + ftab[(j << 8) + ss] |= SETMASK; + + /* + * Step 3: + * The [ss] big bucket is now done. Record this fact, + * and update the quadrant descriptors. Remember to + * update quadrants in the overshoot area too, if + * necessary. The "if (i < 255)" test merely skips + * this updating for the last bucket processed, since + * updating for the last bucket is pointless. + * + * The quadrant array provides a way to incrementally + * cache sort orderings, as they appear, so as to + * make subsequent comparisons in fullGtU() complete + * faster. For repetitive blocks this makes a big + * difference (but not big enough to be able to avoid + * the fallback sorting mechanism, exponential radix sort). + * + * The precise meaning is: at all times: + * + * for 0 <= i < nblock and 0 <= j <= nblock + * + * if block[i] != block[j], + * + * then the relative values of quadrant[i] and + * quadrant[j] are meaningless. + * + * else { + * if quadrant[i] < quadrant[j] + * then the string starting at i lexicographically + * precedes the string starting at j + * + * else if quadrant[i] > quadrant[j] + * then the string starting at j lexicographically + * precedes the string starting at i + * + * else + * the relative ordering of the strings starting + * at i and j has not yet been determined. + * } + */ + bigDone[ss] = True; + + if (i < 255) { + int32_t bbStart = ftab[ss << 8] & CLEARMASK; + int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; + int32_t shifts = 0; + + while ((bbSize >> shifts) > 65534) shifts++; + + for (j = bbSize-1; j >= 0; j--) { + int32_t a2update = ptr[bbStart + j]; + uint16_t qVal = (uint16_t)(j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZ_N_OVERSHOOT) + quadrant[a2update + nblock] = qVal; + } + AssertH(((bbSize-1) >> shifts) <= 65535, 1002); + } + } +#undef runningOrder +#undef copyStart +#undef copyEnd +} + +#undef BIGFREQ +#undef SETMASK +#undef CLEARMASK + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((uint8_t*)arr2)[0 .. nblock-1] holds block + * arr1 exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)arr2) [0 .. nblock-1] holds block + * All other areas of block destroyed + * ftab[0 .. 65536] destroyed + * arr1[0 .. nblock-1] holds sorted order + */ +static NOINLINE +void BZ2_blockSort(EState* s) +{ + /* In original bzip2 1.0.4, it's a parameter, but 30 + * (which was the default) should work ok. */ + enum { wfact = 30 }; + + uint32_t* ptr = s->ptr; + uint8_t* block = s->block; + uint32_t* ftab = s->ftab; + int32_t nblock = s->nblock; + uint16_t* quadrant; + int32_t budget; + int32_t i; + + if (nblock < 10000) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } else { + /* Calculate the location for quadrant, remembering to get + * the alignment right. Assumes that &(block[0]) is at least + * 2-byte aligned -- this should be ok since block is really + * the first section of arr2. + */ + i = nblock + BZ_N_OVERSHOOT; + if (i & 1) i++; + quadrant = (uint16_t*)(&(block[i])); + + /* (wfact-1) / 3 puts the default-factor-30 + * transition point at very roughly the same place as + * with v0.1 and v0.9.0. + * Not that it particularly matters any more, since the + * resulting compressed stream is now the same regardless + * of whether or not we use the main sort or fallback sort. + */ + budget = nblock * ((wfact-1) / 3); + + mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); + if (budget < 0) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } + } + + s->origPtr = -1; + for (i = 0; i < s->nblock; i++) + if (ptr[i] == 0) { + s->origPtr = i; + break; + }; + + AssertH(s->origPtr != -1, 1003); +} + + +/*-------------------------------------------------------------*/ +/*--- end blocksort.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/bzlib.c b/archival/libarchive/bz/bzlib.c new file mode 100644 index 000000000..b3beeabed --- /dev/null +++ b/archival/libarchive/bz/bzlib.c @@ -0,0 +1,431 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Library top-level functions. ---*/ +/*--- bzlib.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). + * fixed bzWrite/bzRead to ignore zero-length requests. + * fixed bzread to correctly handle read requests after EOF. + * wrong parameter order in call to bzDecompressInit in + * bzBuffToBuffDecompress. Fixed. + */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Compression stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#if BZ_LIGHT_DEBUG +static +void bz_assert_fail(int errcode) +{ + /* if (errcode == 1007) bb_error_msg_and_die("probably bad RAM"); */ + bb_error_msg_and_die("internal error %d", errcode); +} +#endif + +/*---------------------------------------------------*/ +static +void prepare_new_block(EState* s) +{ + int i; + s->nblock = 0; + s->numZ = 0; + s->state_out_pos = 0; + BZ_INITIALISE_CRC(s->blockCRC); + /* inlined memset would be nice to have here */ + for (i = 0; i < 256; i++) + s->inUse[i] = 0; + s->blockNo++; +} + + +/*---------------------------------------------------*/ +static +ALWAYS_INLINE +void init_RL(EState* s) +{ + s->state_in_ch = 256; + s->state_in_len = 0; +} + + +static +int isempty_RL(EState* s) +{ + return (s->state_in_ch >= 256 || s->state_in_len <= 0); +} + + +/*---------------------------------------------------*/ +static +void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k) +{ + int32_t n; + EState* s; + + s = xzalloc(sizeof(EState)); + s->strm = strm; + + n = 100000 * blockSize100k; + s->arr1 = xmalloc(n * sizeof(uint32_t)); + s->mtfv = (uint16_t*)s->arr1; + s->ptr = (uint32_t*)s->arr1; + s->arr2 = xmalloc((n + BZ_N_OVERSHOOT) * sizeof(uint32_t)); + s->block = (uint8_t*)s->arr2; + s->ftab = xmalloc(65537 * sizeof(uint32_t)); + + s->crc32table = crc32_filltable(NULL, 1); + + s->state = BZ_S_INPUT; + s->mode = BZ_M_RUNNING; + s->blockSize100k = blockSize100k; + s->nblockMAX = n - 19; + + strm->state = s; + /*strm->total_in = 0;*/ + strm->total_out = 0; + init_RL(s); + prepare_new_block(s); +} + + +/*---------------------------------------------------*/ +static +void add_pair_to_block(EState* s) +{ + int32_t i; + uint8_t ch = (uint8_t)(s->state_in_ch); + for (i = 0; i < s->state_in_len; i++) { + BZ_UPDATE_CRC(s, s->blockCRC, ch); + } + s->inUse[s->state_in_ch] = 1; + switch (s->state_in_len) { + case 3: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + /* fall through */ + case 2: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + /* fall through */ + case 1: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + break; + default: + s->inUse[s->state_in_len - 4] = 1; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)(s->state_in_len - 4); + s->nblock++; + break; + } +} + + +/*---------------------------------------------------*/ +static +void flush_RL(EState* s) +{ + if (s->state_in_ch < 256) add_pair_to_block(s); + init_RL(s); +} + + +/*---------------------------------------------------*/ +#define ADD_CHAR_TO_BLOCK(zs, zchh0) \ +{ \ + uint32_t zchh = (uint32_t)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && zs->state_in_len == 1) { \ + uint8_t ch = (uint8_t)(zs->state_in_ch); \ + BZ_UPDATE_CRC(zs, zs->blockCRC, ch); \ + zs->inUse[zs->state_in_ch] = 1; \ + zs->block[zs->nblock] = (uint8_t)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block(zs); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ copy_input_until_stop(EState* s) +{ + /*Bool progress_in = False;*/ + +#ifdef SAME_CODE_AS_BELOW + if (s->mode == BZ_M_RUNNING) { + /*-- fast track the common case --*/ + while (1) { + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*progress_in = True;*/ + ADD_CHAR_TO_BLOCK(s, (uint32_t)(*(uint8_t*)(s->strm->next_in))); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + } + } else +#endif + { + /*-- general, uncommon case --*/ + while (1) { + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + //# /*-- flush/finish end? --*/ + //# if (s->avail_in_expect == 0) break; + /*progress_in = True;*/ + ADD_CHAR_TO_BLOCK(s, *(uint8_t*)(s->strm->next_in)); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + //# s->avail_in_expect--; + } + } + /*return progress_in;*/ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ copy_output_until_stop(EState* s) +{ + /*Bool progress_out = False;*/ + + while (1) { + /*-- no output space? --*/ + if (s->strm->avail_out == 0) break; + + /*-- block done? --*/ + if (s->state_out_pos >= s->numZ) break; + + /*progress_out = True;*/ + *(s->strm->next_out) = s->zbits[s->state_out_pos]; + s->state_out_pos++; + s->strm->avail_out--; + s->strm->next_out++; + s->strm->total_out++; + } + /*return progress_out;*/ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ handle_compress(bz_stream *strm) +{ + /*Bool progress_in = False;*/ + /*Bool progress_out = False;*/ + EState* s = strm->state; + + while (1) { + if (s->state == BZ_S_OUTPUT) { + /*progress_out |=*/ copy_output_until_stop(s); + if (s->state_out_pos < s->numZ) break; + if (s->mode == BZ_M_FINISHING + //# && s->avail_in_expect == 0 + && s->strm->avail_in == 0 + && isempty_RL(s)) + break; + prepare_new_block(s); + s->state = BZ_S_INPUT; +#ifdef FLUSH_IS_UNUSED + if (s->mode == BZ_M_FLUSHING + && s->avail_in_expect == 0 + && isempty_RL(s)) + break; +#endif + } + + if (s->state == BZ_S_INPUT) { + /*progress_in |=*/ copy_input_until_stop(s); + //#if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { + if (s->mode != BZ_M_RUNNING && s->strm->avail_in == 0) { + flush_RL(s); + BZ2_compressBlock(s, (s->mode == BZ_M_FINISHING)); + s->state = BZ_S_OUTPUT; + } else + if (s->nblock >= s->nblockMAX) { + BZ2_compressBlock(s, 0); + s->state = BZ_S_OUTPUT; + } else + if (s->strm->avail_in == 0) { + break; + } + } + } + + /*return progress_in || progress_out;*/ +} + + +/*---------------------------------------------------*/ +static +int BZ2_bzCompress(bz_stream *strm, int action) +{ + /*Bool progress;*/ + EState* s; + + s = strm->state; + + switch (s->mode) { + case BZ_M_RUNNING: + if (action == BZ_RUN) { + /*progress =*/ handle_compress(strm); + /*return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;*/ + return BZ_RUN_OK; + } +#ifdef FLUSH_IS_UNUSED + else + if (action == BZ_FLUSH) { + //#s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FLUSHING; + goto case_BZ_M_FLUSHING; + } +#endif + else + /*if (action == BZ_FINISH)*/ { + //#s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FINISHING; + goto case_BZ_M_FINISHING; + } + +#ifdef FLUSH_IS_UNUSED + case_BZ_M_FLUSHING: + case BZ_M_FLUSHING: + /*if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR;*/ + /*progress =*/ handle_compress(strm); + if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FLUSH_OK; + s->mode = BZ_M_RUNNING; + return BZ_RUN_OK; +#endif + + case_BZ_M_FINISHING: + /*case BZ_M_FINISHING:*/ + default: + /*if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR;*/ + /*progress =*/ handle_compress(strm); + /*if (!progress) return BZ_SEQUENCE_ERROR;*/ + //#if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + //# return BZ_FINISH_OK; + if (s->strm->avail_in > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FINISH_OK; + /*s->mode = BZ_M_IDLE;*/ + return BZ_STREAM_END; + } + /* return BZ_OK; --not reached--*/ +} + + +/*---------------------------------------------------*/ +#if ENABLE_FEATURE_CLEAN_UP +static +void BZ2_bzCompressEnd(bz_stream *strm) +{ + EState* s; + + s = strm->state; + free(s->arr1); + free(s->arr2); + free(s->ftab); + free(s->crc32table); + free(strm->state); +} +#endif + + +/*---------------------------------------------------*/ +/*--- Misc convenience stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#ifdef EXAMPLE_CODE_FOR_MEM_TO_MEM_COMPRESSION +static +int BZ2_bzBuffToBuffCompress(char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL + || source == NULL + || blockSize100k < 1 || blockSize100k > 9 + ) { + return BZ_PARAM_ERROR; + } + + BZ2_bzCompressInit(&strm, blockSize100k); + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzCompress(&strm, BZ_FINISH); + if (ret == BZ_FINISH_OK) goto output_overflow; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzCompressEnd(&strm); + return BZ_OK; + + output_overflow: + BZ2_bzCompressEnd(&strm); + return BZ_OUTBUFF_FULL; + + errhandler: + BZ2_bzCompressEnd(&strm); + return ret; +} +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/bzlib.h b/archival/libarchive/bz/bzlib.h new file mode 100644 index 000000000..1bb811c4a --- /dev/null +++ b/archival/libarchive/bz/bzlib.h @@ -0,0 +1,65 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Public header file for the library. ---*/ +/*--- bzlib.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +#define BZ_RUN 0 +#define BZ_FLUSH 1 +#define BZ_FINISH 2 + +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) +#define BZ_DATA_ERROR_MAGIC (-5) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) +#define BZ_CONFIG_ERROR (-9) + +typedef struct bz_stream { + void *state; + char *next_in; + char *next_out; + unsigned avail_in; + unsigned avail_out; + /*unsigned long long total_in;*/ + unsigned long long total_out; +} bz_stream; + +/*-- Core (low-level) library functions --*/ + +static void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k); +static int BZ2_bzCompress(bz_stream *strm, int action); +#if ENABLE_FEATURE_CLEAN_UP +static void BZ2_bzCompressEnd(bz_stream *strm); +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/bzlib_private.h b/archival/libarchive/bz/bzlib_private.h new file mode 100644 index 000000000..6430ce407 --- /dev/null +++ b/archival/libarchive/bz/bzlib_private.h @@ -0,0 +1,219 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Private header file for the library. ---*/ +/*--- bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib.h" */ + +/*-- General stuff. --*/ + +typedef unsigned char Bool; + +#define True ((Bool)1) +#define False ((Bool)0) + +#if BZ_LIGHT_DEBUG +static void bz_assert_fail(int errcode) NORETURN; +#define AssertH(cond, errcode) \ +do { \ + if (!(cond)) \ + bz_assert_fail(errcode); \ +} while (0) +#else +#define AssertH(cond, msg) do { } while (0) +#endif + +#if BZ_DEBUG +#define AssertD(cond, msg) \ +do { \ + if (!(cond)) \ + bb_error_msg_and_die("(debug build): internal error %s", msg); \ +} while (0) +#else +#define AssertD(cond, msg) do { } while (0) +#endif + + +/*-- Header bytes. --*/ + +#define BZ_HDR_B 0x42 /* 'B' */ +#define BZ_HDR_Z 0x5a /* 'Z' */ +#define BZ_HDR_h 0x68 /* 'h' */ +#define BZ_HDR_0 0x30 /* '0' */ + +#define BZ_HDR_BZh0 0x425a6830 + +/*-- Constants for the back end. --*/ + +#define BZ_MAX_ALPHA_SIZE 258 +#define BZ_MAX_CODE_LEN 23 + +#define BZ_RUNA 0 +#define BZ_RUNB 1 + +#define BZ_N_GROUPS 6 +#define BZ_G_SIZE 50 +#define BZ_N_ITERS 4 + +#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) + + +/*-- Stuff for doing CRCs. --*/ + +#define BZ_INITIALISE_CRC(crcVar) \ +{ \ + crcVar = 0xffffffffL; \ +} + +#define BZ_FINALISE_CRC(crcVar) \ +{ \ + crcVar = ~(crcVar); \ +} + +#define BZ_UPDATE_CRC(s, crcVar, cha) \ +{ \ + crcVar = (crcVar << 8) ^ s->crc32table[(crcVar >> 24) ^ ((uint8_t)cha)]; \ +} + + +/*-- States and modes for compression. --*/ + +#define BZ_M_IDLE 1 +#define BZ_M_RUNNING 2 +#define BZ_M_FLUSHING 3 +#define BZ_M_FINISHING 4 + +#define BZ_S_OUTPUT 1 +#define BZ_S_INPUT 2 + +#define BZ_N_RADIX 2 +#define BZ_N_QSORT 12 +#define BZ_N_SHELL 18 +#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) + + +/*-- Structure holding all the compression-side stuff. --*/ + +typedef struct EState { + /* pointer back to the struct bz_stream */ + bz_stream *strm; + + /* mode this stream is in, and whether inputting */ + /* or outputting data */ + int32_t mode; + int32_t state; + + /* remembers avail_in when flush/finish requested */ +/* bbox: not needed, strm->avail_in always has the same value */ +/* commented out with '//#' throughout the code */ + /* uint32_t avail_in_expect; */ + + /* for doing the block sorting */ + int32_t origPtr; + uint32_t *arr1; + uint32_t *arr2; + uint32_t *ftab; + + /* aliases for arr1 and arr2 */ + uint32_t *ptr; + uint8_t *block; + uint16_t *mtfv; + uint8_t *zbits; + + /* guess what */ + uint32_t *crc32table; + + /* run-length-encoding of the input */ + uint32_t state_in_ch; + int32_t state_in_len; + + /* input and output limits and current posns */ + int32_t nblock; + int32_t nblockMAX; + int32_t numZ; + int32_t state_out_pos; + + /* the buffer for bit stream creation */ + uint32_t bsBuff; + int32_t bsLive; + + /* block and combined CRCs */ + uint32_t blockCRC; + uint32_t combinedCRC; + + /* misc administratium */ + int32_t blockNo; + int32_t blockSize100k; + + /* stuff for coding the MTF values */ + int32_t nMTF; + + /* map of bytes used in block */ + int32_t nInUse; + Bool inUse[256] ALIGNED(sizeof(long)); + uint8_t unseqToSeq[256]; + + /* stuff for coding the MTF values */ + int32_t mtfFreq [BZ_MAX_ALPHA_SIZE]; + uint8_t selector [BZ_MAX_SELECTORS]; + uint8_t selectorMtf[BZ_MAX_SELECTORS]; + + uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + /* stack-saving measures: these can be local, but they are too big */ + int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + /* second dimension: only 3 needed; 4 makes index calculations faster */ + uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4]; +#endif + int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2]; + + int32_t mainSort__runningOrder[256]; + int32_t mainSort__copyStart[256]; + int32_t mainSort__copyEnd[256]; +} EState; + + +/*-- compression. --*/ + +static void +BZ2_blockSort(EState*); + +static void +BZ2_compressBlock(EState*, int); + +static void +BZ2_bsInitWrite(EState*); + +static void +BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t); + +static void +BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t); + +/*-------------------------------------------------------------*/ +/*--- end bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c new file mode 100644 index 000000000..6f1c70a08 --- /dev/null +++ b/archival/libarchive/bz/compress.c @@ -0,0 +1,685 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- changed setting of nGroups in sendMTFValues() + * so as to do a bit better on small files +*/ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void BZ2_bsInitWrite(EState* s) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static NOINLINE +void bsFinishWrite(EState* s) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +static +/* Helps only on level 5, on other levels hurts. ? */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 +ALWAYS_INLINE +#endif +void bsW(EState* s, int32_t n, uint32_t v) +{ + while (s->bsLive >= 8) { + s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +static +void bsPutU32(EState* s, unsigned u) +{ + bsW(s, 8, (u >> 24) & 0xff); + bsW(s, 8, (u >> 16) & 0xff); + bsW(s, 8, (u >> 8) & 0xff); + bsW(s, 8, u & 0xff); +} + + +/*---------------------------------------------------*/ +static +void bsPutU16(EState* s, unsigned u) +{ + bsW(s, 8, (u >> 8) & 0xff); + bsW(s, 8, u & 0xff); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e(EState* s) +{ + int i; + s->nInUse = 0; + for (i = 0; i < 256; i++) { + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } + } +} + + +/*---------------------------------------------------*/ +static NOINLINE +void generateMTFValues(EState* s) +{ + uint8_t yy[256]; + int32_t i, j; + int32_t zPend; + int32_t wr; + int32_t EOB; + + /* + * After sorting (eg, here), + * s->arr1[0 .. s->nblock-1] holds sorted order, + * and + * ((uint8_t*)s->arr2)[0 .. s->nblock-1] + * holds the original block data. + * + * The first thing to do is generate the MTF values, + * and put them in ((uint16_t*)s->arr1)[0 .. s->nblock-1]. + * + * Because there are strictly fewer or equal MTF values + * than block values, ptr values in this area are overwritten + * with MTF values only when they are no longer needed. + * + * The final compressed bitstream is generated into the + * area starting at &((uint8_t*)s->arr2)[s->nblock] + * + * These storage aliases are set up in bzCompressInit(), + * except for the last one, which is arranged in + * compressBlock(). + */ + uint32_t* ptr = s->ptr; + uint8_t* block = s->block; + uint16_t* mtfv = s->mtfv; + + makeMaps_e(s); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) + s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) + yy[i] = (uint8_t) i; + + for (i = 0; i < s->nblock; i++) { + uint8_t ll_i; + AssertD(wr <= i, "generateMTFValues(1)"); + j = ptr[i] - 1; + if (j < 0) + j += s->nblock; + ll_i = s->unseqToSeq[block[j]]; + AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); + + if (yy[0] == ll_i) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ + }; + zPend = 0; + } + { + register uint8_t rtmp; + register uint8_t* ryy_j; + register uint8_t rll_i; + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while (rll_i != rtmp) { + register uint8_t rtmp2; + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; + wr++; + s->mtfFreq[j+1]++; + } + } + } + + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; + wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; + wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) + break; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ + }; + zPend = 0; + } + + mtfv[wr] = EOB; + wr++; + s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static NOINLINE +void sendMTFValues(EState* s) +{ + int32_t v, t, i, j, gs, ge, totc, bt, bc, iter; + int32_t nSelectors, alphaSize, minLen, maxLen, selCtr; + int32_t nGroups, nBytes; + + /* + * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * is a global since the decoder also needs it. + * + * int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * are also globals only used in this proc. + * Made global to keep stack frame size small. + */ +#define code sendMTFValues__code +#define rfreq sendMTFValues__rfreq +#define len_pack sendMTFValues__len_pack + + uint16_t cost[BZ_N_GROUPS]; + int32_t fave[BZ_N_GROUPS]; + + uint16_t* mtfv = s->mtfv; + + alphaSize = s->nInUse + 2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH(s->nMTF > 0, 3001); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + int32_t nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs - 1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */ + ) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; + else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge + 1; + remF -= aFreq; + } + } + + /* + * Iterate up to BZ_N_ITERS times to improve the tables. + */ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + for (t = 0; t < nGroups; t++) + fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + /* + * Set up an auxiliary length table which is used to fast-track + * the common case (nGroups == 6). + */ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } +#endif + nSelectors = 0; + totc = 0; + gs = 0; + while (1) { + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + + /* + * Calculate the cost of this group as coded + * by each of the coding tables. + */ + for (t = 0; t < nGroups; t++) + cost[t] = 0; +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + register uint32_t cost01, cost23, cost45; + register uint16_t icv; + cost01 = cost23 = cost45 = 0; +#define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); +#undef BZ_ITER + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + uint16_t icv = mtfv[i]; + for (t = 0; t < nGroups; t++) + cost[t] += s->len[t][icv]; + } + } + /* + * Find the coding table which is best for this group, + * and record its identity in the selector table. + */ + /*bc = 999999999;*/ + /*bt = -1;*/ + bc = cost[0]; + bt = 0; + for (t = 1 /*0*/; t < nGroups; t++) { + if (cost[t] < bc) { + bc = cost[t]; + bt = t; + } + } + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /* + * Increment the symbol frequencies for the selected table. + */ +/* 1% faster compress. +800 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 4 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ +#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++ + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); +#undef BZ_ITUR + gs = ge + 1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + while (gs <= ge) { + s->rfreq[bt][mtfv[gs]]++; + gs++; + } + /* already is: gs = ge + 1; */ + } + } + + /* + * Recompute the tables based on the accumulated frequencies. + */ + /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See + * comment in huffman.c for details. */ + for (t = 0; t < nGroups; t++) + BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/); + } + + AssertH(nGroups < 8, 3002); + AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003); + + /*--- Compute MTF values for the selectors. ---*/ + { + uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + + for (i = 0; i < nGroups; i++) + pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while (ll_i != tmp) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH(!(maxLen > 17 /*20*/), 3004); + AssertH(!(minLen < 1), 3005); + BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize); + } + + /*--- Transmit the mapping table. ---*/ + { + /* bbox: optimized a bit more than in bzip2 */ + int inUse16 = 0; + for (i = 0; i < 16; i++) { + if (sizeof(long) <= 4) { + inUse16 = inUse16*2 + + ((*(uint32_t*)&(s->inUse[i * 16 + 0]) + | *(uint32_t*)&(s->inUse[i * 16 + 4]) + | *(uint32_t*)&(s->inUse[i * 16 + 8]) + | *(uint32_t*)&(s->inUse[i * 16 + 12])) != 0); + } else { /* Our CPU can do better */ + inUse16 = inUse16*2 + + ((*(uint64_t*)&(s->inUse[i * 16 + 0]) + | *(uint64_t*)&(s->inUse[i * 16 + 8])) != 0); + } + } + + nBytes = s->numZ; + bsW(s, 16, inUse16); + + inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */ + for (i = 0; i < 16; i++) { + if (inUse16 < 0) { + unsigned v16 = 0; + for (j = 0; j < 16; j++) + v16 = v16*2 + s->inUse[i * 16 + j]; + bsW(s, 16, v16); + } + inUse16 <<= 1; + } + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW(s, 3, nGroups); + bsW(s, 15, nSelectors); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) + bsW(s, 1, 1); + bsW(s, 1, 0); + } + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + int32_t curr = s->len[t][0]; + bsW(s, 5, curr); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }; + bsW(s, 1, 0); + } + } + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (1) { + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + AssertH(s->selector[selCtr] < nGroups, 3006); + +/* Costs 1300 bytes and is _slower_ (on Intel Core 2) */ +#if 0 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + uint16_t mtfv_i; + uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); +#define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i]) + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); +#undef BZ_ITAH + gs = ge+1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + /* code is bit bigger, but moves multiply out of the loop */ + uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); + while (gs <= ge) { + bsW(s, + s_len_sel_selCtr[mtfv[gs]], + s_code_sel_selCtr[mtfv[gs]] + ); + gs++; + } + /* already is: gs = ge+1; */ + } + selCtr++; + } + AssertH(selCtr == nSelectors, 3007); +#undef code +#undef rfreq +#undef len_pack +} + + +/*---------------------------------------------------*/ +static +void BZ2_compressBlock(EState* s, int is_last_block) +{ + if (s->nblock > 0) { + BZ_FINALISE_CRC(s->blockCRC); + s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); + s->combinedCRC ^= s->blockCRC; + if (s->blockNo > 1) + s->numZ = 0; + + BZ2_blockSort(s); + } + + s->zbits = &((uint8_t*)s->arr2)[s->nblock]; + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + BZ2_bsInitWrite(s); + /*bsPutU8(s, BZ_HDR_B);*/ + /*bsPutU8(s, BZ_HDR_Z);*/ + /*bsPutU8(s, BZ_HDR_h);*/ + /*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/ + bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k); + } + + if (s->nblock > 0) { + /*bsPutU8(s, 0x31);*/ + /*bsPutU8(s, 0x41);*/ + /*bsPutU8(s, 0x59);*/ + /*bsPutU8(s, 0x26);*/ + bsPutU32(s, 0x31415926); + /*bsPutU8(s, 0x53);*/ + /*bsPutU8(s, 0x59);*/ + bsPutU16(s, 0x5359); + + /*-- Now the block's CRC, so it is in a known place. --*/ + bsPutU32(s, s->blockCRC); + + /* + * Now a single bit indicating (non-)randomisation. + * As of version 0.9.5, we use a better sorting algorithm + * which makes randomisation unnecessary. So always set + * the randomised bit to 'no'. Of course, the decoder + * still needs to be able to handle randomised blocks + * so as to maintain backwards compatibility with + * older versions of bzip2. + */ + bsW(s, 1, 0); + + bsW(s, 24, s->origPtr); + generateMTFValues(s); + sendMTFValues(s); + } + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + /*bsPutU8(s, 0x17);*/ + /*bsPutU8(s, 0x72);*/ + /*bsPutU8(s, 0x45);*/ + /*bsPutU8(s, 0x38);*/ + bsPutU32(s, 0x17724538); + /*bsPutU8(s, 0x50);*/ + /*bsPutU8(s, 0x90);*/ + bsPutU16(s, 0x5090); + bsPutU32(s, s->combinedCRC); + bsFinishWrite(s); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/huffman.c b/archival/libarchive/bz/huffman.c new file mode 100644 index 000000000..676b1af66 --- /dev/null +++ b/archival/libarchive/bz/huffman.c @@ -0,0 +1,229 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Huffman coding low-level stuff ---*/ +/*--- huffman.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) +#define DEPTHOF(zz1) ((zz1) & 0x000000ff) +#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) + +#define ADDWEIGHTS(zw1,zw2) \ + (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ + (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) + +#define UPHEAP(z) \ +{ \ + int32_t zz, tmp; \ + zz = z; \ + tmp = heap[zz]; \ + while (weight[tmp] < weight[heap[zz >> 1]]) { \ + heap[zz] = heap[zz >> 1]; \ + zz >>= 1; \ + } \ + heap[zz] = tmp; \ +} + + +/* 90 bytes, 0.3% of overall compress speed */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 1 + +/* macro works better than inline (gcc 4.2.1) */ +#define DOWNHEAP1(heap, weight, Heap) \ +{ \ + int32_t zz, yy, tmp; \ + zz = 1; \ + tmp = heap[zz]; \ + while (1) { \ + yy = zz << 1; \ + if (yy > nHeap) \ + break; \ + if (yy < nHeap \ + && weight[heap[yy+1]] < weight[heap[yy]]) \ + yy++; \ + if (weight[tmp] < weight[heap[yy]]) \ + break; \ + heap[zz] = heap[yy]; \ + zz = yy; \ + } \ + heap[zz] = tmp; \ +} + +#else + +static +void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap) +{ + int32_t zz, yy, tmp; + zz = 1; + tmp = heap[zz]; + while (1) { + yy = zz << 1; + if (yy > nHeap) + break; + if (yy < nHeap + && weight[heap[yy + 1]] < weight[heap[yy]]) + yy++; + if (weight[tmp] < weight[heap[yy]]) + break; + heap[zz] = heap[yy]; + zz = yy; + } + heap[zz] = tmp; +} + +#endif + +/*---------------------------------------------------*/ +static +void BZ2_hbMakeCodeLengths(EState *s, + uint8_t *len, + int32_t *freq, + int32_t alphaSize, + int32_t maxLen) +{ + /* + * Nodes and heap entries run from 1. Entry 0 + * for both the heap and nodes is a sentinel. + */ + int32_t nNodes, nHeap, n1, n2, i, j, k; + Bool tooLong; + + /* bbox: moved to EState to save stack + int32_t heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t parent[BZ_MAX_ALPHA_SIZE * 2]; + */ +#define heap (s->BZ2_hbMakeCodeLengths__heap) +#define weight (s->BZ2_hbMakeCodeLengths__weight) +#define parent (s->BZ2_hbMakeCodeLengths__parent) + + for (i = 0; i < alphaSize; i++) + weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + + while (1) { + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + UPHEAP(nHeap); + } + + AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001); + + while (nHeap > 1) { + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); + nNodes++; + parent[n1] = parent[n2] = nNodes; + weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + UPHEAP(nHeap); + } + + AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002); + + tooLong = False; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { + k = parent[k]; + j++; + } + len[i-1] = j; + if (j > maxLen) + tooLong = True; + } + + if (!tooLong) + break; + + /* 17 Oct 04: keep-going condition for the following loop used + to be 'i < alphaSize', which missed the last element, + theoretically leading to the possibility of the compressor + looping. However, this count-scaling step is only needed if + one of the generated Huffman code words is longer than + maxLen, which up to and including version 1.0.2 was 20 bits, + which is extremely unlikely. In version 1.0.3 maxLen was + changed to 17 bits, which has minimal effect on compression + ratio, but does mean this scaling step is used from time to + time, enough to verify that it works. + + This means that bzip2-1.0.3 and later will only produce + Huffman codes with a maximum length of 17 bits. However, in + order to preserve backwards compatibility with bitstreams + produced by versions pre-1.0.3, the decompressor must still + handle lengths of up to 20. */ + + for (i = 1; i <= alphaSize; i++) { + j = weight[i] >> 8; + /* bbox: yes, it is a signed division. + * don't replace with shift! */ + j = 1 + (j / 2); + weight[i] = j << 8; + } + } +#undef heap +#undef weight +#undef parent +} + + +/*---------------------------------------------------*/ +static +void BZ2_hbAssignCodes(int32_t *code, + uint8_t *length, + int32_t minLen, + int32_t maxLen, + int32_t alphaSize) +{ + int32_t n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) { + if (length[i] == n) { + code[i] = vec; + vec++; + }; + } + vec <<= 1; + } +} + + +/*-------------------------------------------------------------*/ +/*--- end huffman.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/data_align.c b/archival/libarchive/data_align.c new file mode 100644 index 000000000..2e56fa8ff --- /dev/null +++ b/archival/libarchive/data_align.c @@ -0,0 +1,15 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_align(archive_handle_t *archive_handle, unsigned boundary) +{ + unsigned skip_amount = (boundary - (archive_handle->offset % boundary)) % boundary; + + archive_handle->seek(archive_handle->src_fd, skip_amount); + archive_handle->offset += skip_amount; +} diff --git a/archival/libarchive/data_extract_all.c b/archival/libarchive/data_extract_all.c new file mode 100644 index 000000000..1b25c8bd6 --- /dev/null +++ b/archival/libarchive/data_extract_all.c @@ -0,0 +1,200 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + int dst_fd; + int res; + +#if ENABLE_FEATURE_TAR_SELINUX + char *sctx = archive_handle->tar__next_file_sctx; + if (!sctx) + sctx = archive_handle->tar__global_sctx; + if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */ + setfscreatecon(sctx); + free(archive_handle->tar__next_file_sctx); + archive_handle->tar__next_file_sctx = NULL; + } +#endif + + if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) { + char *slash = strrchr(file_header->name, '/'); + if (slash) { + *slash = '\0'; + bb_make_directory(file_header->name, -1, FILEUTILS_RECUR); + *slash = '/'; + } + } + + if (archive_handle->ah_flags & ARCHIVE_UNLINK_OLD) { + /* Remove the entry if it exists */ + if (!S_ISDIR(file_header->mode)) { + /* Is it hardlink? + * We encode hard links as regular files of size 0 with a symlink */ + if (S_ISREG(file_header->mode) + && file_header->link_target + && file_header->size == 0 + ) { + /* Ugly special case: + * tar cf t.tar hardlink1 hardlink2 hardlink1 + * results in this tarball structure: + * hardlink1 + * hardlink2 -> hardlink1 + * hardlink1 -> hardlink1 <== !!! + */ + if (strcmp(file_header->link_target, file_header->name) == 0) + goto ret; + } + /* Proceed with deleting */ + if (unlink(file_header->name) == -1 + && errno != ENOENT + ) { + bb_perror_msg_and_die("can't remove old file %s", + file_header->name); + } + } + } + else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) { + /* Remove the existing entry if its older than the extracted entry */ + struct stat existing_sb; + if (lstat(file_header->name, &existing_sb) == -1) { + if (errno != ENOENT) { + bb_perror_msg_and_die("can't stat old file"); + } + } + else if (existing_sb.st_mtime >= file_header->mtime) { + if (!(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + && !S_ISDIR(file_header->mode) + ) { + bb_error_msg("%s not created: newer or " + "same age file exists", file_header->name); + } + data_skip(archive_handle); + goto ret; + } + else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) { + bb_perror_msg_and_die("can't remove old file %s", + file_header->name); + } + } + + /* Handle hard links separately + * We encode hard links as regular files of size 0 with a symlink */ + if (S_ISREG(file_header->mode) + && file_header->link_target + && file_header->size == 0 + ) { + /* hard link */ + res = link(file_header->link_target, file_header->name); + if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) { + bb_perror_msg("can't create %slink " + "from %s to %s", "hard", + file_header->name, + file_header->link_target); + } + /* Hardlinks have no separate mode/ownership, skip chown/chmod */ + goto ret; + } + + /* Create the filesystem entry */ + switch (file_header->mode & S_IFMT) { + case S_IFREG: { + /* Regular file */ + int flags = O_WRONLY | O_CREAT | O_EXCL; + if (archive_handle->ah_flags & ARCHIVE_O_TRUNC) + flags = O_WRONLY | O_CREAT | O_TRUNC; + dst_fd = xopen3(file_header->name, + flags, + file_header->mode + ); + bb_copyfd_exact_size(archive_handle->src_fd, dst_fd, file_header->size); + close(dst_fd); + break; + } + case S_IFDIR: + res = mkdir(file_header->name, file_header->mode); + if ((res == -1) + && (errno != EISDIR) /* btw, Linux doesn't return this */ + && (errno != EEXIST) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("can't make dir %s", file_header->name); + } + break; + case S_IFLNK: + /* Symlink */ +//TODO: what if file_header->link_target == NULL (say, corrupted tarball?) + res = symlink(file_header->link_target, file_header->name); + if ((res == -1) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("can't create %slink " + "from %s to %s", "sym", + file_header->name, + file_header->link_target); + } + break; + case S_IFSOCK: + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + res = mknod(file_header->name, file_header->mode, file_header->device); + if ((res == -1) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("can't create node %s", file_header->name); + } + break; + default: + bb_error_msg_and_die("unrecognized file type"); + } + + if (!S_ISLNK(file_header->mode)) { + if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_OWNER)) { + uid_t uid = file_header->uid; + gid_t gid = file_header->gid; +#if ENABLE_FEATURE_TAR_UNAME_GNAME + if (!(archive_handle->ah_flags & ARCHIVE_NUMERIC_OWNER)) { + if (file_header->tar__uname) { +//TODO: cache last name/id pair? + struct passwd *pwd = getpwnam(file_header->tar__uname); + if (pwd) uid = pwd->pw_uid; + } + if (file_header->tar__gname) { + struct group *grp = getgrnam(file_header->tar__gname); + if (grp) gid = grp->gr_gid; + } + } +#endif + /* GNU tar 1.15.1 uses chown, not lchown */ + chown(file_header->name, uid, gid); + } + /* uclibc has no lchmod, glibc is even stranger - + * it has lchmod which seems to do nothing! + * so we use chmod... */ + if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) { + chmod(file_header->name, file_header->mode); + } + if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) { + struct timeval t[2]; + + t[1].tv_sec = t[0].tv_sec = file_header->mtime; + t[1].tv_usec = t[0].tv_usec = 0; + utimes(file_header->name, t); + } + } + + ret: ; +#if ENABLE_FEATURE_TAR_SELINUX + if (sctx) { + /* reset the context after creating an entry */ + setfscreatecon(NULL); + } +#endif +} diff --git a/archival/libarchive/data_extract_to_command.c b/archival/libarchive/data_extract_to_command.c new file mode 100644 index 000000000..2bbab7641 --- /dev/null +++ b/archival/libarchive/data_extract_to_command.c @@ -0,0 +1,134 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +enum { + //TAR_FILETYPE, + TAR_MODE, + TAR_FILENAME, + TAR_REALNAME, +#if ENABLE_FEATURE_TAR_UNAME_GNAME + TAR_UNAME, + TAR_GNAME, +#endif + TAR_SIZE, + TAR_UID, + TAR_GID, + TAR_MAX, +}; + +static const char *const tar_var[] = { + // "FILETYPE", + "MODE", + "FILENAME", + "REALNAME", +#if ENABLE_FEATURE_TAR_UNAME_GNAME + "UNAME", + "GNAME", +#endif + "SIZE", + "UID", + "GID", +}; + +static void xputenv(char *str) +{ + if (putenv(str)) + bb_error_msg_and_die(bb_msg_memory_exhausted); +} + +static void str2env(char *env[], int idx, const char *str) +{ + env[idx] = xasprintf("TAR_%s=%s", tar_var[idx], str); + xputenv(env[idx]); +} + +static void dec2env(char *env[], int idx, unsigned long long val) +{ + env[idx] = xasprintf("TAR_%s=%llu", tar_var[idx], val); + xputenv(env[idx]); +} + +static void oct2env(char *env[], int idx, unsigned long val) +{ + env[idx] = xasprintf("TAR_%s=%lo", tar_var[idx], val); + xputenv(env[idx]); +} + +void FAST_FUNC data_extract_to_command(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + +#if 0 /* do we need this? ENABLE_FEATURE_TAR_SELINUX */ + char *sctx = archive_handle->tar__next_file_sctx; + if (!sctx) + sctx = archive_handle->tar__global_sctx; + if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */ + setfscreatecon(sctx); + free(archive_handle->tar__next_file_sctx); + archive_handle->tar__next_file_sctx = NULL; + } +#endif + + if ((file_header->mode & S_IFMT) == S_IFREG) { + pid_t pid; + int p[2], status; + char *tar_env[TAR_MAX]; + + memset(tar_env, 0, sizeof(tar_env)); + + xpipe(p); + pid = BB_MMU ? xfork() : xvfork(); + if (pid == 0) { + /* Child */ + /* str2env(tar_env, TAR_FILETYPE, "f"); - parent should do it once */ + oct2env(tar_env, TAR_MODE, file_header->mode); + str2env(tar_env, TAR_FILENAME, file_header->name); + str2env(tar_env, TAR_REALNAME, file_header->name); +#if ENABLE_FEATURE_TAR_UNAME_GNAME + str2env(tar_env, TAR_UNAME, file_header->tar__uname); + str2env(tar_env, TAR_GNAME, file_header->tar__gname); +#endif + dec2env(tar_env, TAR_SIZE, file_header->size); + dec2env(tar_env, TAR_UID, file_header->uid); + dec2env(tar_env, TAR_GID, file_header->gid); + close(p[1]); + xdup2(p[0], STDIN_FILENO); + signal(SIGPIPE, SIG_DFL); + execl(DEFAULT_SHELL, DEFAULT_SHELL_SHORT_NAME, "-c", archive_handle->tar__to_command, NULL); + bb_perror_msg_and_die("can't execute '%s'", DEFAULT_SHELL); + } + close(p[0]); + /* Our caller is expected to do signal(SIGPIPE, SIG_IGN) + * so that we don't die if child don't read all the input: */ + bb_copyfd_exact_size(archive_handle->src_fd, p[1], -file_header->size); + close(p[1]); + + if (safe_waitpid(pid, &status, 0) == -1) + bb_perror_msg_and_die("waitpid"); + if (WIFEXITED(status) && WEXITSTATUS(status)) + bb_error_msg_and_die("'%s' returned status %d", + archive_handle->tar__to_command, WEXITSTATUS(status)); + if (WIFSIGNALED(status)) + bb_error_msg_and_die("'%s' terminated on signal %d", + archive_handle->tar__to_command, WTERMSIG(status)); + + if (!BB_MMU) { + int i; + for (i = 0; i < TAR_MAX; i++) { + if (tar_env[i]) + bb_unsetenv_and_free(tar_env[i]); + } + } + } + +#if 0 /* ENABLE_FEATURE_TAR_SELINUX */ + if (sctx) + /* reset the context after creating an entry */ + setfscreatecon(NULL); +#endif +} diff --git a/archival/libarchive/data_extract_to_stdout.c b/archival/libarchive/data_extract_to_stdout.c new file mode 100644 index 000000000..91f3f3539 --- /dev/null +++ b/archival/libarchive/data_extract_to_stdout.c @@ -0,0 +1,14 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_extract_to_stdout(archive_handle_t *archive_handle) +{ + bb_copyfd_exact_size(archive_handle->src_fd, + STDOUT_FILENO, + archive_handle->file_header->size); +} diff --git a/archival/libarchive/data_skip.c b/archival/libarchive/data_skip.c new file mode 100644 index 000000000..a055424e2 --- /dev/null +++ b/archival/libarchive/data_skip.c @@ -0,0 +1,12 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_skip(archive_handle_t *archive_handle) +{ + archive_handle->seek(archive_handle->src_fd, archive_handle->file_header->size); +} diff --git a/archival/libarchive/decompress_bunzip2.c b/archival/libarchive/decompress_bunzip2.c new file mode 100644 index 000000000..4e46e6849 --- /dev/null +++ b/archival/libarchive/decompress_bunzip2.c @@ -0,0 +1,822 @@ +/* vi: set sw=4 ts=4: */ +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). + + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), + which also acknowledges contributions by Mike Burrows, David Wheeler, + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, + Robert Sedgewick, and Jon L. Bentley. + + Licensed under GPLv2 or later, see file LICENSE in this source tree. +*/ + +/* + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). + + More efficient reading of Huffman codes, a streamlined read_bunzip() + function, and various other tweaks. In (limited) tests, approximately + 20% faster than bzcat on x86 and about 10% faster on arm. + + Note that about 2/3 of the time is spent in read_bunzip() reversing + the Burrows-Wheeler transformation. Much of that time is delay + resulting from cache misses. + + (2010 update by vda: profiled "bzcat <84mbyte.bz2 >/dev/null" + on x86-64 CPU with L2 > 1M: get_next_block is hotter than read_bunzip: + %time seconds calls function + 71.01 12.69 444 get_next_block + 28.65 5.12 93065 read_bunzip + 00.22 0.04 7736490 get_bits + 00.11 0.02 47 dealloc_bunzip + 00.00 0.00 93018 full_write + ...) + + + I would ask that anyone benefiting from this work, especially those + using it in commercial products, consider making a donation to my local + non-profit hospice organization (www.hospiceacadiana.com) in the name of + the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003. + + Manuel + */ + +#include "libbb.h" +#include "archive.h" + +/* Constants for Huffman coding */ +#define MAX_GROUPS 6 +#define GROUP_SIZE 50 /* 64 would have been more efficient */ +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ +#define SYMBOL_RUNA 0 +#define SYMBOL_RUNB 1 + +/* Status return values */ +#define RETVAL_OK 0 +#define RETVAL_LAST_BLOCK (-1) +#define RETVAL_NOT_BZIP_DATA (-2) +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) +#define RETVAL_SHORT_WRITE (-4) +#define RETVAL_DATA_ERROR (-5) +#define RETVAL_OUT_OF_MEMORY (-6) +#define RETVAL_OBSOLETE_INPUT (-7) + +/* Other housekeeping constants */ +#define IOBUF_SIZE 4096 + +/* This is what we know about each Huffman coding group */ +struct group_data { + /* We have an extra slot at the end of limit[] for a sentinel value. */ + int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; + int minLen, maxLen; +}; + +/* Structure holding all the housekeeping data, including IO buffers and + * memory that persists between calls to bunzip + * Found the most used member: + * cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \ + * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER + * and moved it (inbufBitCount) to offset 0. + */ +struct bunzip_data { + /* I/O tracking data (file handles, buffers, positions, etc.) */ + unsigned inbufBitCount, inbufBits; + int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; + uint8_t *inbuf /*,*outbuf*/; + + /* State for interrupting output loop */ + int writeCopies, writePos, writeRunCountdown, writeCount; + int writeCurrent; /* actually a uint8_t */ + + /* The CRC values stored in the block header and calculated from the data */ + uint32_t headerCRC, totalCRC, writeCRC; + + /* Intermediate buffer and its size (in bytes) */ + uint32_t *dbuf; + unsigned dbufSize; + + /* For I/O error handling */ + jmp_buf jmpbuf; + + /* Big things go last (register-relative addressing can be larger for big offsets) */ + uint32_t crc32Table[256]; + uint8_t selectors[32768]; /* nSelectors=15 bits */ + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ +}; +/* typedef struct bunzip_data bunzip_data; -- done in .h file */ + + +/* Return the next nnn bits of input. All reads from the compressed input + are done through this function. All reads are big endian */ +static unsigned get_bits(bunzip_data *bd, int bits_wanted) +{ + unsigned bits = 0; + /* Cache bd->inbufBitCount in a CPU register (hopefully): */ + int bit_count = bd->inbufBitCount; + + /* If we need to get more data from the byte buffer, do so. (Loop getting + one byte at a time to enforce endianness and avoid unaligned access.) */ + while (bit_count < bits_wanted) { + + /* If we need to read more data from file into byte buffer, do so */ + if (bd->inbufPos == bd->inbufCount) { + /* if "no input fd" case: in_fd == -1, read fails, we jump */ + bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); + if (bd->inbufCount <= 0) + longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF); + bd->inbufPos = 0; + } + + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ + if (bit_count >= 24) { + bits = bd->inbufBits & ((1 << bit_count) - 1); + bits_wanted -= bit_count; + bits <<= bits_wanted; + bit_count = 0; + } + + /* Grab next 8 bits of input from buffer. */ + bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; + bit_count += 8; + } + + /* Calculate result */ + bit_count -= bits_wanted; + bd->inbufBitCount = bit_count; + bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1); + + return bits; +} + +/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */ +static int get_next_block(bunzip_data *bd) +{ + struct group_data *hufGroup; + int dbufCount, dbufSize, groupCount, *base, *limit, selector, + i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; + int runCnt = runCnt; /* for compiler */ + uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; + uint32_t *dbuf; + unsigned origPtr; + + dbuf = bd->dbuf; + dbufSize = bd->dbufSize; + selectors = bd->selectors; + +/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */ +#if 0 + /* Reset longjmp I/O error handling */ + i = setjmp(bd->jmpbuf); + if (i) return i; +#endif + + /* Read in header signature and CRC, then validate signature. + (last block signature means CRC is for whole file, return now) */ + i = get_bits(bd, 24); + j = get_bits(bd, 24); + bd->headerCRC = get_bits(bd, 32); + if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK; + if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA; + + /* We can add support for blockRandomised if anybody complains. There was + some code for this in busybox 1.0.0-pre3, but nobody ever noticed that + it didn't actually work. */ + if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; + origPtr = get_bits(bd, 24); + if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; + + /* mapping table: if some byte values are never used (encoding things + like ascii text), the compression code removes the gaps to have fewer + symbols to deal with, and writes a sparse bitfield indicating which + values were present. We make a translation table to convert the symbols + back to the corresponding bytes. */ + symTotal = 0; + i = 0; + t = get_bits(bd, 16); + do { + if (t & (1 << 15)) { + unsigned inner_map = get_bits(bd, 16); + do { + if (inner_map & (1 << 15)) + symToByte[symTotal++] = i; + inner_map <<= 1; + i++; + } while (i & 15); + i -= 16; + } + t <<= 1; + i += 16; + } while (i < 256); + + /* How many different Huffman coding groups does this block use? */ + groupCount = get_bits(bd, 3); + if (groupCount < 2 || groupCount > MAX_GROUPS) + return RETVAL_DATA_ERROR; + + /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding + group. Read in the group selector list, which is stored as MTF encoded + bit runs. (MTF=Move To Front, as each value is used it's moved to the + start of the list.) */ + for (i = 0; i < groupCount; i++) + mtfSymbol[i] = i; + nSelectors = get_bits(bd, 15); + if (!nSelectors) + return RETVAL_DATA_ERROR; + for (i = 0; i < nSelectors; i++) { + uint8_t tmp_byte; + /* Get next value */ + int n = 0; + while (get_bits(bd, 1)) { + if (n >= groupCount) return RETVAL_DATA_ERROR; + n++; + } + /* Decode MTF to get the next selector */ + tmp_byte = mtfSymbol[n]; + while (--n >= 0) + mtfSymbol[n + 1] = mtfSymbol[n]; + mtfSymbol[0] = selectors[i] = tmp_byte; + } + + /* Read the Huffman coding tables for each group, which code for symTotal + literal symbols, plus two run symbols (RUNA, RUNB) */ + symCount = symTotal + 2; + for (j = 0; j < groupCount; j++) { + uint8_t length[MAX_SYMBOLS]; + /* 8 bits is ALMOST enough for temp[], see below */ + unsigned temp[MAX_HUFCODE_BITS+1]; + int minLen, maxLen, pp, len_m1; + + /* Read Huffman code lengths for each symbol. They're stored in + a way similar to mtf; record a starting value for the first symbol, + and an offset from the previous value for every symbol after that. + (Subtracting 1 before the loop and then adding it back at the end is + an optimization that makes the test inside the loop simpler: symbol + length 0 becomes negative, so an unsigned inequality catches it.) */ + len_m1 = get_bits(bd, 5) - 1; + for (i = 0; i < symCount; i++) { + for (;;) { + int two_bits; + if ((unsigned)len_m1 > (MAX_HUFCODE_BITS-1)) + return RETVAL_DATA_ERROR; + + /* If first bit is 0, stop. Else second bit indicates whether + to increment or decrement the value. Optimization: grab 2 + bits and unget the second if the first was 0. */ + two_bits = get_bits(bd, 2); + if (two_bits < 2) { + bd->inbufBitCount++; + break; + } + + /* Add one if second bit 1, else subtract 1. Avoids if/else */ + len_m1 += (((two_bits+1) & 2) - 1); + } + + /* Correct for the initial -1, to get the final symbol length */ + length[i] = len_m1 + 1; + } + + /* Find largest and smallest lengths in this group */ + minLen = maxLen = length[0]; + for (i = 1; i < symCount; i++) { + if (length[i] > maxLen) maxLen = length[i]; + else if (length[i] < minLen) minLen = length[i]; + } + + /* Calculate permute[], base[], and limit[] tables from length[]. + * + * permute[] is the lookup table for converting Huffman coded symbols + * into decoded symbols. base[] is the amount to subtract from the + * value of a Huffman symbol of a given length when using permute[]. + * + * limit[] indicates the largest numerical value a symbol with a given + * number of bits can have. This is how the Huffman codes can vary in + * length: each code with a value>limit[length] needs another bit. + */ + hufGroup = bd->groups + j; + hufGroup->minLen = minLen; + hufGroup->maxLen = maxLen; + + /* Note that minLen can't be smaller than 1, so we adjust the base + and limit array pointers so we're not always wasting the first + entry. We do this again when using them (during symbol decoding). */ + base = hufGroup->base - 1; + limit = hufGroup->limit - 1; + + /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ + pp = 0; + for (i = minLen; i <= maxLen; i++) { + int k; + temp[i] = limit[i] = 0; + for (k = 0; k < symCount; k++) + if (length[k] == i) + hufGroup->permute[pp++] = k; + } + + /* Count symbols coded for at each bit length */ + /* NB: in pathological cases, temp[8] can end ip being 256. + * That's why uint8_t is too small for temp[]. */ + for (i = 0; i < symCount; i++) temp[length[i]]++; + + /* Calculate limit[] (the largest symbol-coding value at each bit + * length, which is (previous limit<<1)+symbols at this level), and + * base[] (number of symbols to ignore at each bit length, which is + * limit minus the cumulative count of symbols coded for already). */ + pp = t = 0; + for (i = minLen; i < maxLen;) { + unsigned temp_i = temp[i]; + + pp += temp_i; + + /* We read the largest possible symbol size and then unget bits + after determining how many we need, and those extra bits could + be set to anything. (They're noise from future symbols.) At + each level we're really only interested in the first few bits, + so here we set all the trailing to-be-ignored bits to 1 so they + don't affect the value>limit[length] comparison. */ + limit[i] = (pp << (maxLen - i)) - 1; + pp <<= 1; + t += temp_i; + base[++i] = pp - t; + } + limit[maxLen] = pp + temp[maxLen] - 1; + limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ + base[minLen] = 0; + } + + /* We've finished reading and digesting the block header. Now read this + block's Huffman coded symbols from the file and undo the Huffman coding + and run length encoding, saving the result into dbuf[dbufCount++] = uc */ + + /* Initialize symbol occurrence counters and symbol Move To Front table */ + /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */ + for (i = 0; i < 256; i++) { + byteCount[i] = 0; + mtfSymbol[i] = (uint8_t)i; + } + + /* Loop through compressed symbols. */ + + runPos = dbufCount = selector = 0; + for (;;) { + int nextSym; + + /* Fetch next Huffman coding group from list. */ + symCount = GROUP_SIZE - 1; + if (selector >= nSelectors) return RETVAL_DATA_ERROR; + hufGroup = bd->groups + selectors[selector++]; + base = hufGroup->base - 1; + limit = hufGroup->limit - 1; + + continue_this_group: + /* Read next Huffman-coded symbol. */ + + /* Note: It is far cheaper to read maxLen bits and back up than it is + to read minLen bits and then add additional bit at a time, testing + as we go. Because there is a trailing last block (with file CRC), + there is no danger of the overread causing an unexpected EOF for a + valid compressed file. + */ + if (1) { + /* As a further optimization, we do the read inline + (falling back to a call to get_bits if the buffer runs dry). + */ + int new_cnt; + while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) { + /* bd->inbufBitCount < hufGroup->maxLen */ + if (bd->inbufPos == bd->inbufCount) { + nextSym = get_bits(bd, hufGroup->maxLen); + goto got_huff_bits; + } + bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + }; + bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */ + nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1); + got_huff_bits: ; + } else { /* unoptimized equivalent */ + nextSym = get_bits(bd, hufGroup->maxLen); + } + /* Figure how many bits are in next symbol and unget extras */ + i = hufGroup->minLen; + while (nextSym > limit[i]) ++i; + j = hufGroup->maxLen - i; + if (j < 0) + return RETVAL_DATA_ERROR; + bd->inbufBitCount += j; + + /* Huffman decode value to get nextSym (with bounds checking) */ + nextSym = (nextSym >> j) - base[i]; + if ((unsigned)nextSym >= MAX_SYMBOLS) + return RETVAL_DATA_ERROR; + nextSym = hufGroup->permute[nextSym]; + + /* We have now decoded the symbol, which indicates either a new literal + byte, or a repeated run of the most recent literal byte. First, + check if nextSym indicates a repeated run, and if so loop collecting + how many times to repeat the last literal. */ + if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ + + /* If this is the start of a new run, zero out counter */ + if (runPos == 0) { + runPos = 1; + runCnt = 0; + } + + /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at + each bit position, add 1 or 2 instead. For example, + 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2. + You can make any bit pattern that way using 1 less symbol than + the basic or 0/1 method (except all bits 0, which would use no + symbols, but a run of length 0 doesn't mean anything in this + context). Thus space is saved. */ + runCnt += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ + if (runPos < dbufSize) runPos <<= 1; + goto end_of_huffman_loop; + } + + /* When we hit the first non-run symbol after a run, we now know + how many times to repeat the last literal, so append that many + copies to our buffer of decoded symbols (dbuf) now. (The last + literal used is the one at the head of the mtfSymbol array.) */ + if (runPos != 0) { + uint8_t tmp_byte; + if (dbufCount + runCnt >= dbufSize) return RETVAL_DATA_ERROR; + tmp_byte = symToByte[mtfSymbol[0]]; + byteCount[tmp_byte] += runCnt; + while (--runCnt >= 0) dbuf[dbufCount++] = (uint32_t)tmp_byte; + runPos = 0; + } + + /* Is this the terminating symbol? */ + if (nextSym > symTotal) break; + + /* At this point, nextSym indicates a new literal character. Subtract + one to get the position in the MTF array at which this literal is + currently to be found. (Note that the result can't be -1 or 0, + because 0 and 1 are RUNA and RUNB. But another instance of the + first symbol in the mtf array, position 0, would have been handled + as part of a run above. Therefore 1 unused mtf position minus + 2 non-literal nextSym values equals -1.) */ + if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; + i = nextSym - 1; + uc = mtfSymbol[i]; + + /* Adjust the MTF array. Since we typically expect to move only a + * small number of symbols, and are bound by 256 in any case, using + * memmove here would typically be bigger and slower due to function + * call overhead and other assorted setup costs. */ + do { + mtfSymbol[i] = mtfSymbol[i-1]; + } while (--i); + mtfSymbol[0] = uc; + uc = symToByte[uc]; + + /* We have our literal byte. Save it into dbuf. */ + byteCount[uc]++; + dbuf[dbufCount++] = (uint32_t)uc; + + /* Skip group initialization if we're not done with this group. Done + * this way to avoid compiler warning. */ + end_of_huffman_loop: + if (--symCount >= 0) goto continue_this_group; + } + + /* At this point, we've read all the Huffman-coded symbols (and repeated + runs) for this block from the input stream, and decoded them into the + intermediate buffer. There are dbufCount many decoded bytes in dbuf[]. + Now undo the Burrows-Wheeler transform on dbuf. + See http://dogma.net/markn/articles/bwt/bwt.htm + */ + + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ + j = 0; + for (i = 0; i < 256; i++) { + int tmp_count = j + byteCount[i]; + byteCount[i] = j; + j = tmp_count; + } + + /* Figure out what order dbuf would be in if we sorted it. */ + for (i = 0; i < dbufCount; i++) { + uint8_t tmp_byte = (uint8_t)dbuf[i]; + int tmp_count = byteCount[tmp_byte]; + dbuf[tmp_count] |= (i << 8); + byteCount[tmp_byte] = tmp_count + 1; + } + + /* Decode first byte by hand to initialize "previous" byte. Note that it + doesn't get output, and if the first three characters are identical + it doesn't qualify as a run (hence writeRunCountdown=5). */ + if (dbufCount) { + uint32_t tmp; + if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; + tmp = dbuf[origPtr]; + bd->writeCurrent = (uint8_t)tmp; + bd->writePos = (tmp >> 8); + bd->writeRunCountdown = 5; + } + bd->writeCount = dbufCount; + + return RETVAL_OK; +} + +/* Undo Burrows-Wheeler transform on intermediate buffer to produce output. + If start_bunzip was initialized with out_fd=-1, then up to len bytes of + data are written to outbuf. Return value is number of bytes written or + error (all errors are negative numbers). If out_fd!=-1, outbuf and len + are ignored, data is written to out_fd and return is RETVAL_OK or error. + + NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes + in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0. + (Why? This allows to get rid of one local variable) +*/ +int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) +{ + const uint32_t *dbuf; + int pos, current, previous; + uint32_t CRC; + + /* If we already have error/end indicator, return it */ + if (bd->writeCount < 0) + return bd->writeCount; + + dbuf = bd->dbuf; + + /* Register-cached state (hopefully): */ + pos = bd->writePos; + current = bd->writeCurrent; + CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */ + + /* We will always have pending decoded data to write into the output + buffer unless this is the very first call (in which case we haven't + Huffman-decoded a block into the intermediate buffer yet). */ + if (bd->writeCopies) { + + dec_writeCopies: + /* Inside the loop, writeCopies means extra copies (beyond 1) */ + --bd->writeCopies; + + /* Loop outputting bytes */ + for (;;) { + + /* If the output buffer is full, save cached state and return */ + if (--len < 0) { + /* Unlikely branch. + * Use of "goto" instead of keeping code here + * helps compiler to realize this. */ + goto outbuf_full; + } + + /* Write next byte into output buffer, updating CRC */ + *outbuf++ = current; + CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current]; + + /* Loop now if we're outputting multiple copies of this byte */ + if (bd->writeCopies) { + /* Unlikely branch */ + /*--bd->writeCopies;*/ + /*continue;*/ + /* Same, but (ab)using other existing --writeCopies operation + * (and this if() compiles into just test+branch pair): */ + goto dec_writeCopies; + } + decode_next_byte: + if (--bd->writeCount < 0) + break; /* input block is fully consumed, need next one */ + + /* Follow sequence vector to undo Burrows-Wheeler transform */ + previous = current; + pos = dbuf[pos]; + current = (uint8_t)pos; + pos >>= 8; + + /* After 3 consecutive copies of the same byte, the 4th + * is a repeat count. We count down from 4 instead + * of counting up because testing for non-zero is faster */ + if (--bd->writeRunCountdown != 0) { + if (current != previous) + bd->writeRunCountdown = 4; + } else { + /* Unlikely branch */ + /* We have a repeated run, this byte indicates the count */ + bd->writeCopies = current; + current = previous; + bd->writeRunCountdown = 5; + + /* Sometimes there are just 3 bytes (run length 0) */ + if (!bd->writeCopies) goto decode_next_byte; + + /* Subtract the 1 copy we'd output anyway to get extras */ + --bd->writeCopies; + } + } /* for(;;) */ + + /* Decompression of this input block completed successfully */ + bd->writeCRC = CRC = ~CRC; + bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC; + + /* If this block had a CRC error, force file level CRC error */ + if (CRC != bd->headerCRC) { + bd->totalCRC = bd->headerCRC + 1; + return RETVAL_LAST_BLOCK; + } + } + + /* Refill the intermediate buffer by Huffman-decoding next block of input */ + { + int r = get_next_block(bd); + if (r) { /* error/end */ + bd->writeCount = r; + return (r != RETVAL_LAST_BLOCK) ? r : len; + } + } + + CRC = ~0; + pos = bd->writePos; + current = bd->writeCurrent; + goto decode_next_byte; + + outbuf_full: + /* Output buffer is full, save cached state and return */ + bd->writePos = pos; + bd->writeCurrent = current; + bd->writeCRC = CRC; + + bd->writeCopies++; + + return 0; +} + +/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are + ignored, and data is read from file handle into temporary buffer. */ + +/* Because bunzip2 is used for help text unpacking, and because bb_show_usage() + should work for NOFORK applets too, we must be extremely careful to not leak + any allocations! */ +int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, + const void *inbuf, int len) +{ + bunzip_data *bd; + unsigned i; + enum { + BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0', + h0 = ('h' << 8) + '0', + }; + + /* Figure out how much data to allocate */ + i = sizeof(bunzip_data); + if (in_fd != -1) i += IOBUF_SIZE; + + /* Allocate bunzip_data. Most fields initialize to zero. */ + bd = *bdp = xzalloc(i); + + /* Setup input buffer */ + bd->in_fd = in_fd; + if (-1 == in_fd) { + /* in this case, bd->inbuf is read-only */ + bd->inbuf = (void*)inbuf; /* cast away const-ness */ + } else { + bd->inbuf = (uint8_t*)(bd + 1); + memcpy(bd->inbuf, inbuf, len); + } + bd->inbufCount = len; + + /* Init the CRC32 table (big endian) */ + crc32_filltable(bd->crc32Table, 1); + + /* Setup for I/O error handling via longjmp */ + i = setjmp(bd->jmpbuf); + if (i) return i; + + /* Ensure that file starts with "BZh['1'-'9']." */ + /* Update: now caller verifies 1st two bytes, makes .gz/.bz2 + * integration easier */ + /* was: */ + /* i = get_bits(bd, 32); */ + /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */ + i = get_bits(bd, 16); + if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; + + /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of + uncompressed data. Allocate intermediate buffer for block. */ + /* bd->dbufSize = 100000 * (i - BZh0); */ + bd->dbufSize = 100000 * (i - h0); + + /* Cannot use xmalloc - may leak bd in NOFORK case! */ + bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0])); + if (!bd->dbuf) { + free(bd); + xfunc_die(); + } + return RETVAL_OK; +} + +void FAST_FUNC dealloc_bunzip(bunzip_data *bd) +{ + free(bd->dbuf); + free(bd); +} + + +/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ +IF_DESKTOP(long long) int FAST_FUNC +unpack_bz2_stream(int src_fd, int dst_fd) +{ + IF_DESKTOP(long long total_written = 0;) + bunzip_data *bd; + char *outbuf; + int i; + unsigned len; + + outbuf = xmalloc(IOBUF_SIZE); + len = 0; + while (1) { /* "Process one BZ... stream" loop */ + + i = start_bunzip(&bd, src_fd, outbuf + 2, len); + + if (i == 0) { + while (1) { /* "Produce some output bytes" loop */ + i = read_bunzip(bd, outbuf, IOBUF_SIZE); + if (i < 0) /* error? */ + break; + i = IOBUF_SIZE - i; /* number of bytes produced */ + if (i == 0) /* EOF? */ + break; + if (i != full_write(dst_fd, outbuf, i)) { + bb_error_msg("short write"); + i = RETVAL_SHORT_WRITE; + goto release_mem; + } + IF_DESKTOP(total_written += i;) + } + } + + if (i != RETVAL_LAST_BLOCK) { + bb_error_msg("bunzip error %d", i); + break; + } + if (bd->headerCRC != bd->totalCRC) { + bb_error_msg("CRC error"); + break; + } + + /* Successfully unpacked one BZ stream */ + i = RETVAL_OK; + + /* Do we have "BZ..." after last processed byte? + * pbzip2 (parallelized bzip2) produces such files. + */ + len = bd->inbufCount - bd->inbufPos; + memcpy(outbuf, &bd->inbuf[bd->inbufPos], len); + if (len < 2) { + if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len) + break; + len = 2; + } + if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */ + break; + dealloc_bunzip(bd); + len -= 2; + } + + release_mem: + dealloc_bunzip(bd); + free(outbuf); + + return i ? i : IF_DESKTOP(total_written) + 0; +} + +IF_DESKTOP(long long) int FAST_FUNC +unpack_bz2_stream_prime(int src_fd, int dst_fd) +{ + uint16_t magic2; + xread(src_fd, &magic2, 2); + if (magic2 != BZIP2_MAGIC) { + bb_error_msg_and_die("invalid magic"); + } + return unpack_bz2_stream(src_fd, dst_fd); +} + +#ifdef TESTING + +static char *const bunzip_errors[] = { + NULL, "Bad file checksum", "Not bzip data", + "Unexpected input EOF", "Unexpected output EOF", "Data error", + "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported" +}; + +/* Dumb little test thing, decompress stdin to stdout */ +int main(int argc, char **argv) +{ + int i; + char c; + + int i = unpack_bz2_stream_prime(0, 1); + if (i < 0) + fprintf(stderr, "%s\n", bunzip_errors[-i]); + else if (read(STDIN_FILENO, &c, 1)) + fprintf(stderr, "Trailing garbage ignored\n"); + return -i; +} +#endif diff --git a/archival/libarchive/decompress_uncompress.c b/archival/libarchive/decompress_uncompress.c new file mode 100644 index 000000000..44d894244 --- /dev/null +++ b/archival/libarchive/decompress_uncompress.c @@ -0,0 +1,307 @@ +/* vi: set sw=4 ts=4: */ +/* uncompress for busybox -- (c) 2002 Robert Griebl + * + * based on the original compress42.c source + * (see disclaimer below) + */ + +/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992. + * + * Authors: + * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) + * Jim McKie (decvax!mcvax!jim) + * Steve Davies (decvax!vax135!petsd!peora!srd) + * Ken Turkowski (decvax!decwrl!turtlevax!ken) + * James A. Woods (decvax!ihnp4!ames!jaw) + * Joe Orost (decvax!vax135!petsd!joe) + * Dave Mack (csu@alembic.acs.com) + * Peter Jannesen, Network Communication Systems + * (peter@ncs.nl) + * + * marc@suse.de : a small security fix for a buffer overflow + * + * [... History snipped ...] + * + */ + +#include "libbb.h" +#include "archive.h" + + +/* Default input buffer size */ +#define IBUFSIZ 2048 + +/* Default output buffer size */ +#define OBUFSIZ 2048 + +/* Defines for third byte of header */ +#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */ + /* Masks 0x20 and 0x40 are free. */ + /* I think 0x20 should mean that there is */ + /* a fourth header byte (for expansion). */ +#define BLOCK_MODE 0x80 /* Block compression if table is full and */ + /* compression rate is dropping flush tables */ + /* the next two codes should not be changed lightly, as they must not */ + /* lie within the contiguous general code space. */ +#define FIRST 257 /* first free entry */ +#define CLEAR 256 /* table clear output code */ + +#define INIT_BITS 9 /* initial number of bits/code */ + + +/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */ +#define HBITS 17 /* 50% occupancy */ +#define HSIZE (1< BITS) { + bb_error_msg("compressed with %d bits, can only handle " + BITS_STR" bits", maxbits); + goto err; + } + + n_bits = INIT_BITS; + maxcode = MAXCODE(INIT_BITS) - 1; + bitmask = (1 << INIT_BITS) - 1; + oldcode = -1; + finchar = 0; + outpos = 0; + posbits = 0 << 3; + + free_ent = ((block_mode) ? FIRST : 256); + + /* As above, initialize the first 256 entries in the table. */ + /*clear_tab_prefixof(); - done by xzalloc */ + + for (code = 255; code >= 0; --code) { + tab_suffixof(code) = (unsigned char) code; + } + + do { + resetbuf: + { + int i; + int e; + int o; + + o = posbits >> 3; + e = insize - o; + + for (i = 0; i < e; ++i) + inbuf[i] = inbuf[i + o]; + + insize = e; + posbits = 0; + } + + if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) { + rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ); +//error check?? + insize += rsize; + } + + inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 : + (insize << 3) - (n_bits - 1)); + + while (inbits > posbits) { + if (free_ent > maxcode) { + posbits = + ((posbits - 1) + + ((n_bits << 3) - + (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); + ++n_bits; + if (n_bits == maxbits) { + maxcode = maxmaxcode; + } else { + maxcode = MAXCODE(n_bits) - 1; + } + bitmask = (1 << n_bits) - 1; + goto resetbuf; + } + { + unsigned char *p = &inbuf[posbits >> 3]; + + code = ((((long) (p[0])) | ((long) (p[1]) << 8) | + ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask; + } + posbits += n_bits; + + + if (oldcode == -1) { + oldcode = code; + finchar = (int) oldcode; + outbuf[outpos++] = (unsigned char) finchar; + continue; + } + + if (code == CLEAR && block_mode) { + clear_tab_prefixof(); + free_ent = FIRST - 1; + posbits = + ((posbits - 1) + + ((n_bits << 3) - + (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); + n_bits = INIT_BITS; + maxcode = MAXCODE(INIT_BITS) - 1; + bitmask = (1 << INIT_BITS) - 1; + goto resetbuf; + } + + incode = code; + stackp = de_stack; + + /* Special case for KwKwK string. */ + if (code >= free_ent) { + if (code > free_ent) { + unsigned char *p; + + posbits -= n_bits; + p = &inbuf[posbits >> 3]; + + bb_error_msg + ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)", + insize, posbits, p[-1], p[0], p[1], p[2], p[3], + (posbits & 07)); + bb_error_msg("corrupted data"); + goto err; + } + + *--stackp = (unsigned char) finchar; + code = oldcode; + } + + /* Generate output characters in reverse order */ + while ((long) code >= (long) 256) { + *--stackp = tab_suffixof(code); + code = tab_prefixof(code); + } + + finchar = tab_suffixof(code); + *--stackp = (unsigned char) finchar; + + /* And put them out in forward order */ + { + int i; + + i = de_stack - stackp; + if (outpos + i >= OBUFSIZ) { + do { + if (i > OBUFSIZ - outpos) { + i = OBUFSIZ - outpos; + } + + if (i > 0) { + memcpy(outbuf + outpos, stackp, i); + outpos += i; + } + + if (outpos >= OBUFSIZ) { + full_write(fd_out, outbuf, outpos); +//error check?? + IF_DESKTOP(total_written += outpos;) + outpos = 0; + } + stackp += i; + i = de_stack - stackp; + } while (i > 0); + } else { + memcpy(outbuf + outpos, stackp, i); + outpos += i; + } + } + + /* Generate the new entry. */ + code = free_ent; + if (code < maxmaxcode) { + tab_prefixof(code) = (unsigned short) oldcode; + tab_suffixof(code) = (unsigned char) finchar; + free_ent = code + 1; + } + + /* Remember previous code. */ + oldcode = incode; + } + + } while (rsize > 0); + + if (outpos > 0) { + full_write(fd_out, outbuf, outpos); +//error check?? + IF_DESKTOP(total_written += outpos;) + } + + retval = IF_DESKTOP(total_written) + 0; + err: + free(inbuf); + free(outbuf); + free(htab); + free(codetab); + return retval; +} diff --git a/archival/libarchive/decompress_unlzma.c b/archival/libarchive/decompress_unlzma.c new file mode 100644 index 000000000..a04714341 --- /dev/null +++ b/archival/libarchive/decompress_unlzma.c @@ -0,0 +1,465 @@ +/* vi: set sw=4 ts=4: */ +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +#if ENABLE_FEATURE_LZMA_FAST +# define speed_inline ALWAYS_INLINE +# define size_inline +#else +# define speed_inline +# define size_inline ALWAYS_INLINE +#endif + + +typedef struct { + int fd; + uint8_t *ptr; + +/* Was keeping rc on stack in unlzma and separately allocating buffer, + * but with "buffer 'attached to' allocated rc" code is smaller: */ + /* uint8_t *buffer; */ +#define RC_BUFFER ((uint8_t*)(rc+1)) + + uint8_t *buffer_end; + +/* Had provisions for variable buffer, but we don't need it here */ + /* int buffer_size; */ +#define RC_BUFFER_SIZE 0x10000 + + uint32_t code; + uint32_t range; + uint32_t bound; +} rc_t; + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +/* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */ +static size_inline void rc_read(rc_t *rc) +{ + int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); +//TODO: return -1 instead +//This will make unlzma delete broken unpacked file on unpack errors + if (buffer_size <= 0) + bb_error_msg_and_die("unexpected EOF"); + rc->ptr = RC_BUFFER; + rc->buffer_end = RC_BUFFER + buffer_size; +} + +/* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */ +static void rc_do_normalize(rc_t *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} + +/* Called once */ +static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */ +{ + int i; + rc_t *rc; + + rc = xzalloc(sizeof(*rc) + RC_BUFFER_SIZE); + + rc->fd = fd; + /* rc->ptr = rc->buffer_end; */ + + for (i = 0; i < 5; i++) { +#if ENABLE_FEATURE_LZMA_FAST + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; +#else + rc_do_normalize(rc); +#endif + } + rc->range = 0xFFFFFFFF; + return rc; +} + +/* Called once */ +static ALWAYS_INLINE void rc_free(rc_t *rc) +{ + free(rc); +} + +static ALWAYS_INLINE void rc_normalize(rc_t *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) { + rc_do_normalize(rc); + } +} + +/* rc_is_bit_1 is called 9 times */ +static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + if (rc->code < rc->bound) { + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; + return 0; + } + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; + return 1; +} + +/* Called 4 times in unlzma loop */ +static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) +{ + int ret = rc_is_bit_1(rc, p); + *symbol = *symbol * 2 + ret; + return ret; +} + +/* Called once */ +static ALWAYS_INLINE int rc_direct_bit(rc_t *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static speed_inline void +rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +typedef struct { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} PACKED lzma_header_t; + + +/* #defines will force compiler to compute/optimize each one with each usage. + * Have heart and use enum instead. */ +enum { + LZMA_BASE_SIZE = 1846, + LZMA_LIT_SIZE = 768, + + LZMA_NUM_POS_BITS_MAX = 4, + + LZMA_LEN_NUM_LOW_BITS = 3, + LZMA_LEN_NUM_MID_BITS = 3, + LZMA_LEN_NUM_HIGH_BITS = 8, + + LZMA_LEN_CHOICE = 0, + LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1), + LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1), + LZMA_LEN_MID = (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))), + LZMA_LEN_HIGH = (LZMA_LEN_MID \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))), + LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)), + + LZMA_NUM_STATES = 12, + LZMA_NUM_LIT_STATES = 7, + + LZMA_START_POS_MODEL_INDEX = 4, + LZMA_END_POS_MODEL_INDEX = 14, + LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)), + + LZMA_NUM_POS_SLOT_BITS = 6, + LZMA_NUM_LEN_TO_POS_STATES = 4, + + LZMA_NUM_ALIGN_BITS = 4, + + LZMA_MATCH_MIN_LEN = 2, + + LZMA_IS_MATCH = 0, + LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), + LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES), + LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES), + LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES), + LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES), + LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), + LZMA_SPEC_POS = (LZMA_POS_SLOT \ + + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)), + LZMA_ALIGN = (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX), + LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)), + LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS), + LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS), +}; + + +IF_DESKTOP(long long) int FAST_FUNC +unpack_lzma_stream(int src_fd, int dst_fd) +{ + IF_DESKTOP(long long total_written = 0;) + lzma_header_t header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint16_t *p; + int num_bits; + int num_probs; + rc_t *rc; + int i; + uint8_t *buffer; + uint8_t previous_byte = 0; + size_t buffer_pos = 0, global_pos = 0; + int len = 0; + int state = 0; + uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; + + if (full_read(src_fd, &header, sizeof(header)) != sizeof(header) + || header.pos >= (9 * 5 * 5) + ) { + bb_error_msg("bad lzma header"); + return -1; + } + + i = header.pos / 9; + lc = header.pos % 9; + pb = i / 5; + lp = i % 5; + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + header.dict_size = SWAP_LE32(header.dict_size); + header.dst_size = SWAP_LE64(header.dst_size); + + if (header.dict_size == 0) + header.dict_size++; + + buffer = xmalloc(MIN(header.dst_size, header.dict_size)); + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = xmalloc(num_probs * sizeof(*p)); + num_probs += LZMA_LITERAL - LZMA_BASE_SIZE; + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */ + + while (global_pos + buffer_pos < header.dst_size) { + int pos_state = (buffer_pos + global_pos) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; + + if (!rc_is_bit_1(rc, prob)) { + static const char next_state[LZMA_NUM_STATES] = + { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + int mi = 1; + + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) + + (previous_byte >> (8 - lc)) + ) + ) + ); + + if (state >= LZMA_NUM_LIT_STATES) { + int match_byte; + uint32_t pos = buffer_pos - rep0; + + while (pos >= header.dict_size) + pos += header.dict_size; + match_byte = buffer[pos]; + do { + int bit; + + match_byte <<= 1; + bit = match_byte & 0x100; + bit ^= (rc_get_bit(rc, prob + 0x100 + bit + mi, &mi) << 8); /* 0x100 or 0 */ + if (bit) + break; + } while (mi < 0x100); + } + while (mi < 0x100) { + rc_get_bit(rc, prob + mi, &mi); + } + + state = next_state[state]; + + previous_byte = (uint8_t) mi; +#if ENABLE_FEATURE_LZMA_FAST + one_byte1: + buffer[buffer_pos++] = previous_byte; + if (buffer_pos == header.dict_size) { + buffer_pos = 0; + global_pos += header.dict_size; + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) + goto bad; + IF_DESKTOP(total_written += header.dict_size;) + } +#else + len = 1; + goto one_byte2; +#endif + } else { + int offset; + uint16_t *prob2; +#define prob_len prob2 + + prob2 = p + LZMA_IS_REP + state; + if (!rc_is_bit_1(rc, prob2)) { + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + state = state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob2 = p + LZMA_LEN_CODER; + } else { + prob2 += LZMA_IS_REP_G0 - LZMA_IS_REP; + if (!rc_is_bit_1(rc, prob2)) { + prob2 = (p + LZMA_IS_REP_0_LONG + + (state << LZMA_NUM_POS_BITS_MAX) + + pos_state + ); + if (!rc_is_bit_1(rc, prob2)) { +#if ENABLE_FEATURE_LZMA_FAST + uint32_t pos = buffer_pos - rep0; + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; + while (pos >= header.dict_size) + pos += header.dict_size; + previous_byte = buffer[pos]; + goto one_byte1; +#else + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; + len = 1; + goto string; +#endif + } + } else { + uint32_t distance; + + prob2 += LZMA_IS_REP_G1 - LZMA_IS_REP_G0; + distance = rep1; + if (rc_is_bit_1(rc, prob2)) { + prob2 += LZMA_IS_REP_G2 - LZMA_IS_REP_G1; + distance = rep2; + if (rc_is_bit_1(rc, prob2)) { + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob2 = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob2 + LZMA_LEN_CHOICE; + num_bits = LZMA_LEN_NUM_LOW_BITS; + if (!rc_is_bit_1(rc, prob_len)) { + prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE + + (pos_state << LZMA_LEN_NUM_LOW_BITS); + offset = 0; + } else { + prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE; + if (!rc_is_bit_1(rc, prob_len)) { + prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2 + + (pos_state << LZMA_LEN_NUM_MID_BITS); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits += LZMA_LEN_NUM_MID_BITS - LZMA_LEN_NUM_LOW_BITS; + } else { + prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits += LZMA_LEN_NUM_HIGH_BITS - LZMA_LEN_NUM_LOW_BITS; + } + } + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (state < 4) { + int pos_slot; + uint16_t *prob3; + + state += LZMA_NUM_LIT_STATES; + prob3 = p + LZMA_POS_SLOT + + ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob3, + LZMA_NUM_POS_SLOT_BITS, &pos_slot); + rep0 = pos_slot; + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i2, mi2, num_bits2 = (pos_slot >> 1) - 1; + rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + rep0 <<= num_bits2; + prob3 = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; + } else { + for (; num_bits2 != LZMA_NUM_ALIGN_BITS; num_bits2--) + rep0 = (rep0 << 1) | rc_direct_bit(rc); + rep0 <<= LZMA_NUM_ALIGN_BITS; + prob3 = p + LZMA_ALIGN; + } + i2 = 1; + mi2 = 1; + while (num_bits2--) { + if (rc_get_bit(rc, prob3 + mi2, &mi2)) + rep0 |= i2; + i2 <<= 1; + } + } + if (++rep0 == 0) + break; + } + + len += LZMA_MATCH_MIN_LEN; + IF_NOT_FEATURE_LZMA_FAST(string:) + do { + uint32_t pos = buffer_pos - rep0; + while (pos >= header.dict_size) + pos += header.dict_size; + previous_byte = buffer[pos]; + IF_NOT_FEATURE_LZMA_FAST(one_byte2:) + buffer[buffer_pos++] = previous_byte; + if (buffer_pos == header.dict_size) { + buffer_pos = 0; + global_pos += header.dict_size; + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) + goto bad; + IF_DESKTOP(total_written += header.dict_size;) + } + len--; + } while (len != 0 && buffer_pos < header.dst_size); + } + } + + { + IF_NOT_DESKTOP(int total_written = 0; /* success */) + IF_DESKTOP(total_written += buffer_pos;) + if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) { + bad: + total_written = -1; /* failure */ + } + rc_free(rc); + free(p); + free(buffer); + return total_written; + } +} diff --git a/archival/libarchive/decompress_unxz.c b/archival/libarchive/decompress_unxz.c new file mode 100644 index 000000000..e90dfb06f --- /dev/null +++ b/archival/libarchive/decompress_unxz.c @@ -0,0 +1,98 @@ +/* + * This file uses XZ Embedded library code which is written + * by Lasse Collin + * and Igor Pavlov + * + * See README file in unxz/ directory for more information. + * + * This file is: + * Copyright (C) 2010 Denys Vlasenko + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +#define XZ_FUNC FAST_FUNC +#define XZ_EXTERN static + +#define XZ_DEC_DYNALLOC + +/* Skip check (rather than fail) of unsupported hash functions */ +#define XZ_DEC_ANY_CHECK 1 + +/* We use our own crc32 function */ +#define XZ_INTERNAL_CRC32 0 +static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + return ~crc32_block_endian0(~crc, buf, size, global_crc32_table); +} + +/* We use arch-optimized unaligned accessors */ +#define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); }) +#define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); }) +#define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val)) +#define put_unaligned_be32(val, buf) move_to_unaligned16(buf, SWAP_BE32(val)) + +#include "unxz/xz_dec_bcj.c" +#include "unxz/xz_dec_lzma2.c" +#include "unxz/xz_dec_stream.c" + +IF_DESKTOP(long long) int FAST_FUNC +unpack_xz_stream(int src_fd, int dst_fd) +{ + struct xz_buf iobuf; + struct xz_dec *state; + unsigned char *membuf; + IF_DESKTOP(long long) int total = 0; + + if (!global_crc32_table) + global_crc32_table = crc32_filltable(NULL, /*endian:*/ 0); + + memset(&iobuf, 0, sizeof(iobuf)); + /* Preload XZ file signature */ + membuf = (void*) strcpy(xmalloc(2 * BUFSIZ), HEADER_MAGIC); + iobuf.in = membuf; + iobuf.in_size = HEADER_MAGIC_SIZE; + iobuf.out = membuf + BUFSIZ; + iobuf.out_size = BUFSIZ; + + /* Limit memory usage to about 64 MiB. */ + state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024); + + while (1) { + enum xz_ret r; + + if (iobuf.in_pos == iobuf.in_size) { + int rd = safe_read(src_fd, membuf, BUFSIZ); + if (rd < 0) { + bb_error_msg(bb_msg_read_error); + total = -1; + break; + } + iobuf.in_size = rd; + iobuf.in_pos = 0; + } +// bb_error_msg(">in pos:%d size:%d out pos:%d size:%d", +// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size); + r = xz_dec_run(state, &iobuf); +// bb_error_msg(" + * based on gzip sources + * + * Adjusted further by Erik Andersen to support + * files as well as stdin/stdout, and to generally behave itself wrt + * command line handling. + * + * General cleanup to better adhere to the style guide and make use of standard + * busybox functions by Glenn McGrath + * + * read_gz interface + associated hacking by Laurence Anderson + * + * Fixed huft_build() so decoding end-of-block code does not grab more bits + * than necessary (this is required by unzip applet), added inflate_cleanup() + * to free leaked bytebuffer memory (used in unzip.c), and some minor style + * guide cleanups by Ed Clark + * + * gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface + * Copyright (C) 1992-1993 Jean-loup Gailly + * The unzip code was written and put in the public domain by Mark Adler. + * Portions of the lzw code are derived from the public domain 'compress' + * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies, + * Ken Turkowski, Dave Mack and Peter Jannesen. + * + * See the file algorithm.doc for the compression algorithms and file formats. + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include +#include "libbb.h" +#include "archive.h" + +typedef struct huft_t { + unsigned char e; /* number of extra bits or operation */ + unsigned char b; /* number of bits in this code or subcode */ + union { + unsigned short n; /* literal, length base, or distance base */ + struct huft_t *t; /* pointer to next level of table */ + } v; +} huft_t; + +enum { + /* gunzip_window size--must be a power of two, and + * at least 32K for zip's deflate method */ + GUNZIP_WSIZE = 0x8000, + /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ + BMAX = 16, /* maximum bit length of any code (16 for explode) */ + N_MAX = 288, /* maximum number of codes in any set */ +}; + + +/* This is somewhat complex-looking arrangement, but it allows + * to place decompressor state either in bss or in + * malloc'ed space simply by changing #defines below. + * Sizes on i386: + * text data bss dec hex + * 5256 0 108 5364 14f4 - bss + * 4915 0 0 4915 1333 - malloc + */ +#define STATE_IN_BSS 0 +#define STATE_IN_MALLOC 1 + + +typedef struct state_t { + off_t gunzip_bytes_out; /* number of output bytes */ + uint32_t gunzip_crc; + + int gunzip_src_fd; + unsigned gunzip_outbuf_count; /* bytes in output buffer */ + + unsigned char *gunzip_window; + + uint32_t *gunzip_crc_table; + + /* bitbuffer */ + unsigned gunzip_bb; /* bit buffer */ + unsigned char gunzip_bk; /* bits in bit buffer */ + + /* input (compressed) data */ + unsigned char *bytebuffer; /* buffer itself */ + off_t to_read; /* compressed bytes to read (unzip only, -1 for gunzip) */ +// unsigned bytebuffer_max; /* buffer size */ + unsigned bytebuffer_offset; /* buffer position */ + unsigned bytebuffer_size; /* how much data is there (size <= max) */ + + /* private data of inflate_codes() */ + unsigned inflate_codes_ml; /* masks for bl and bd bits */ + unsigned inflate_codes_md; /* masks for bl and bd bits */ + unsigned inflate_codes_bb; /* bit buffer */ + unsigned inflate_codes_k; /* number of bits in bit buffer */ + unsigned inflate_codes_w; /* current gunzip_window position */ + huft_t *inflate_codes_tl; + huft_t *inflate_codes_td; + unsigned inflate_codes_bl; + unsigned inflate_codes_bd; + unsigned inflate_codes_nn; /* length and index for copy */ + unsigned inflate_codes_dd; + + smallint resume_copy; + + /* private data of inflate_get_next_window() */ + smallint method; /* method == -1 for stored, -2 for codes */ + smallint need_another_block; + smallint end_reached; + + /* private data of inflate_stored() */ + unsigned inflate_stored_n; + unsigned inflate_stored_b; + unsigned inflate_stored_k; + unsigned inflate_stored_w; + + const char *error_msg; + jmp_buf error_jmp; +} state_t; +#define gunzip_bytes_out (S()gunzip_bytes_out ) +#define gunzip_crc (S()gunzip_crc ) +#define gunzip_src_fd (S()gunzip_src_fd ) +#define gunzip_outbuf_count (S()gunzip_outbuf_count) +#define gunzip_window (S()gunzip_window ) +#define gunzip_crc_table (S()gunzip_crc_table ) +#define gunzip_bb (S()gunzip_bb ) +#define gunzip_bk (S()gunzip_bk ) +#define to_read (S()to_read ) +// #define bytebuffer_max (S()bytebuffer_max ) +// Both gunzip and unzip can use constant buffer size now (16k): +#define bytebuffer_max 0x4000 +#define bytebuffer (S()bytebuffer ) +#define bytebuffer_offset (S()bytebuffer_offset ) +#define bytebuffer_size (S()bytebuffer_size ) +#define inflate_codes_ml (S()inflate_codes_ml ) +#define inflate_codes_md (S()inflate_codes_md ) +#define inflate_codes_bb (S()inflate_codes_bb ) +#define inflate_codes_k (S()inflate_codes_k ) +#define inflate_codes_w (S()inflate_codes_w ) +#define inflate_codes_tl (S()inflate_codes_tl ) +#define inflate_codes_td (S()inflate_codes_td ) +#define inflate_codes_bl (S()inflate_codes_bl ) +#define inflate_codes_bd (S()inflate_codes_bd ) +#define inflate_codes_nn (S()inflate_codes_nn ) +#define inflate_codes_dd (S()inflate_codes_dd ) +#define resume_copy (S()resume_copy ) +#define method (S()method ) +#define need_another_block (S()need_another_block ) +#define end_reached (S()end_reached ) +#define inflate_stored_n (S()inflate_stored_n ) +#define inflate_stored_b (S()inflate_stored_b ) +#define inflate_stored_k (S()inflate_stored_k ) +#define inflate_stored_w (S()inflate_stored_w ) +#define error_msg (S()error_msg ) +#define error_jmp (S()error_jmp ) + +/* This is a generic part */ +#if STATE_IN_BSS /* Use global data segment */ +#define DECLARE_STATE /*nothing*/ +#define ALLOC_STATE /*nothing*/ +#define DEALLOC_STATE ((void)0) +#define S() state. +#define PASS_STATE /*nothing*/ +#define PASS_STATE_ONLY /*nothing*/ +#define STATE_PARAM /*nothing*/ +#define STATE_PARAM_ONLY void +static state_t state; +#endif + +#if STATE_IN_MALLOC /* Use malloc space */ +#define DECLARE_STATE state_t *state +#define ALLOC_STATE (state = xzalloc(sizeof(*state))) +#define DEALLOC_STATE free(state) +#define S() state-> +#define PASS_STATE state, +#define PASS_STATE_ONLY state +#define STATE_PARAM state_t *state, +#define STATE_PARAM_ONLY state_t *state +#endif + + +static const uint16_t mask_bits[] ALIGN2 = { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + +/* Copy lengths for literal codes 257..285 */ +static const uint16_t cplens[] ALIGN2 = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 +}; + +/* note: see note #13 above about the 258 in this list. */ +/* Extra bits for literal codes 257..285 */ +static const uint8_t cplext[] ALIGN1 = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, + 5, 5, 5, 0, 99, 99 +}; /* 99 == invalid */ + +/* Copy offsets for distance codes 0..29 */ +static const uint16_t cpdist[] ALIGN2 = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, + 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 +}; + +/* Extra bits for distance codes */ +static const uint8_t cpdext[] ALIGN1 = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13 +}; + +/* Tables for deflate from PKZIP's appnote.txt. */ +/* Order of the bit length code lengths */ +static const uint8_t border[] ALIGN1 = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + + +/* + * Free the malloc'ed tables built by huft_build(), which makes a linked + * list of the tables it made, with the links in a dummy first entry of + * each table. + * t: table to free + */ +static void huft_free(huft_t *p) +{ + huft_t *q; + + /* Go through linked list, freeing from the malloced (t[-1]) address. */ + while (p) { + q = (--p)->v.t; + free(p); + p = q; + } +} + +static void huft_free_all(STATE_PARAM_ONLY) +{ + huft_free(inflate_codes_tl); + huft_free(inflate_codes_td); + inflate_codes_tl = NULL; + inflate_codes_td = NULL; +} + +static void abort_unzip(STATE_PARAM_ONLY) NORETURN; +static void abort_unzip(STATE_PARAM_ONLY) +{ + huft_free_all(PASS_STATE_ONLY); + longjmp(error_jmp, 1); +} + +static unsigned fill_bitbuffer(STATE_PARAM unsigned bitbuffer, unsigned *current, const unsigned required) +{ + while (*current < required) { + if (bytebuffer_offset >= bytebuffer_size) { + unsigned sz = bytebuffer_max - 4; + if (to_read >= 0 && to_read < sz) /* unzip only */ + sz = to_read; + /* Leave the first 4 bytes empty so we can always unwind the bitbuffer + * to the front of the bytebuffer */ + bytebuffer_size = safe_read(gunzip_src_fd, &bytebuffer[4], sz); + if ((int)bytebuffer_size < 1) { + error_msg = "unexpected end of file"; + abort_unzip(PASS_STATE_ONLY); + } + if (to_read >= 0) /* unzip only */ + to_read -= bytebuffer_size; + bytebuffer_size += 4; + bytebuffer_offset = 4; + } + bitbuffer |= ((unsigned) bytebuffer[bytebuffer_offset]) << *current; + bytebuffer_offset++; + *current += 8; + } + return bitbuffer; +} + + +/* Given a list of code lengths and a maximum table size, make a set of + * tables to decode that set of codes. Return zero on success, one if + * the given code set is incomplete (the tables are still built in this + * case), two if the input is invalid (all zero length codes or an + * oversubscribed set of lengths) - in this case stores NULL in *t. + * + * b: code lengths in bits (all assumed <= BMAX) + * n: number of codes (assumed <= N_MAX) + * s: number of simple-valued codes (0..s-1) + * d: list of base values for non-simple codes + * e: list of extra bits for non-simple codes + * t: result: starting table + * m: maximum lookup bits, returns actual + */ +static int huft_build(const unsigned *b, const unsigned n, + const unsigned s, const unsigned short *d, + const unsigned char *e, huft_t **t, unsigned *m) +{ + unsigned a; /* counter for codes of length k */ + unsigned c[BMAX + 1]; /* bit length count table */ + unsigned eob_len; /* length of end-of-block code (value 256) */ + unsigned f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int htl; /* table level */ + unsigned i; /* counter, current code */ + unsigned j; /* counter */ + int k; /* number of bits in current code */ + unsigned *p; /* pointer into c[], b[], or v[] */ + huft_t *q; /* points to current table */ + huft_t r; /* table entry for structure assignment */ + huft_t *u[BMAX]; /* table stack */ + unsigned v[N_MAX]; /* values in order of bit length */ + int ws[BMAX + 1]; /* bits decoded stack */ + int w; /* bits decoded */ + unsigned x[BMAX + 1]; /* bit offsets, then code stack */ + unsigned *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + unsigned z; /* number of entries in current table */ + + /* Length of EOB code, if any */ + eob_len = n > 256 ? b[256] : BMAX; + + *t = NULL; + + /* Generate counts for each bit length */ + memset(c, 0, sizeof(c)); + p = (unsigned *) b; /* cast allows us to reuse p for pointing to b */ + i = n; + do { + c[*p]++; /* assume all entries <= BMAX */ + p++; /* can't combine with above line (Solaris bug) */ + } while (--i); + if (c[0] == n) { /* null input - all zero length codes */ + *m = 0; + return 2; + } + + /* Find minimum and maximum length, bound *m by those */ + for (j = 1; (c[j] == 0) && (j <= BMAX); j++) + continue; + k = j; /* minimum code length */ + for (i = BMAX; (c[i] == 0) && i; i--) + continue; + g = i; /* maximum code length */ + *m = (*m < j) ? j : ((*m > i) ? i : *m); + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) { + y -= c[j]; + if (y < 0) + return 2; /* bad input: more codes than bits */ + } + y -= c[i]; + if (y < 0) + return 2; + c[i] += y; + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; + xp = x + 2; + while (--i) { /* note that i == g from above */ + j += *p++; + *xp++ = j; + } + + /* Make a table of values in order of bit lengths */ + p = (unsigned *) b; + i = 0; + do { + j = *p++; + if (j != 0) { + v[x[j]++] = i; + } + } while (++i < n); + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + htl = -1; /* no tables yet--level -1 */ + w = ws[0] = 0; /* bits decoded */ + u[0] = NULL; /* just to keep compilers happy */ + q = NULL; /* ditto */ + z = 0; /* ditto */ + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) { + a = c[k]; + while (a--) { + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > ws[htl + 1]) { + w = ws[++htl]; + + /* compute minimum size table less than or equal to *m bits */ + z = g - w; + z = z > *m ? *m : z; /* upper limit on table size */ + j = k - w; + f = 1 << j; + if (f > a + 1) { /* try a k-w bit table */ + /* too few codes for k-w bit table */ + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + while (++j < z) { /* try smaller tables up to z bits */ + f <<= 1; + if (f <= *++xp) { + break; /* enough codes to use up j bits */ + } + f -= *xp; /* else deduct codes from patterns */ + } + } + j = (w + j > eob_len && w < eob_len) ? eob_len - w : j; /* make EOB code end at table */ + z = 1 << j; /* table entries for j-bit table */ + ws[htl+1] = w + j; /* set bits decoded in stack */ + + /* allocate and link in new table */ + q = xzalloc((z + 1) * sizeof(huft_t)); + *t = q + 1; /* link to list for huft_free() */ + t = &(q->v.t); + u[htl] = ++q; /* table starts after link */ + + /* connect to last table, if there is one */ + if (htl) { + x[htl] = i; /* save pattern for backing up */ + r.b = (unsigned char) (w - ws[htl - 1]); /* bits to dump before this table */ + r.e = (unsigned char) (16 + j); /* bits in this table */ + r.v.t = q; /* pointer to this table */ + j = (i & ((1 << w) - 1)) >> ws[htl - 1]; + u[htl - 1][j] = r; /* connect to last table */ + } + } + + /* set up table entry in r */ + r.b = (unsigned char) (k - w); + if (p >= v + n) { + r.e = 99; /* out of values--invalid code */ + } else if (*p < s) { + r.e = (unsigned char) (*p < 256 ? 16 : 15); /* 256 is EOB code */ + r.v.n = (unsigned short) (*p++); /* simple code is just the value */ + } else { + r.e = (unsigned char) e[*p - s]; /* non-simple--look up in lists */ + r.v.n = d[*p++ - s]; + } + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) { + q[j] = r; + } + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) { + i ^= j; + } + i ^= j; + + /* backup over finished tables */ + while ((i & ((1 << w) - 1)) != x[htl]) { + w = ws[--htl]; + } + } + } + + /* return actual size of base table */ + *m = ws[1]; + + /* Return 1 if we were given an incomplete table */ + return y != 0 && g != 1; +} + + +/* + * inflate (decompress) the codes in a deflated (compressed) block. + * Return an error code or zero if it all goes ok. + * + * tl, td: literal/length and distance decoder tables + * bl, bd: number of bits decoded by tl[] and td[] + */ +/* called once from inflate_block */ + +/* map formerly local static variables to globals */ +#define ml inflate_codes_ml +#define md inflate_codes_md +#define bb inflate_codes_bb +#define k inflate_codes_k +#define w inflate_codes_w +#define tl inflate_codes_tl +#define td inflate_codes_td +#define bl inflate_codes_bl +#define bd inflate_codes_bd +#define nn inflate_codes_nn +#define dd inflate_codes_dd +static void inflate_codes_setup(STATE_PARAM unsigned my_bl, unsigned my_bd) +{ + bl = my_bl; + bd = my_bd; + /* make local copies of globals */ + bb = gunzip_bb; /* initialize bit buffer */ + k = gunzip_bk; + w = gunzip_outbuf_count; /* initialize gunzip_window position */ + /* inflate the coded data */ + ml = mask_bits[bl]; /* precompute masks for speed */ + md = mask_bits[bd]; +} +/* called once from inflate_get_next_window */ +static NOINLINE int inflate_codes(STATE_PARAM_ONLY) +{ + unsigned e; /* table entry flag/number of extra bits */ + huft_t *t; /* pointer to table entry */ + + if (resume_copy) + goto do_copy; + + while (1) { /* do until end of block */ + bb = fill_bitbuffer(PASS_STATE bb, &k, bl); + t = tl + ((unsigned) bb & ml); + e = t->e; + if (e > 16) + do { + if (e == 99) + abort_unzip(PASS_STATE_ONLY);; + bb >>= t->b; + k -= t->b; + e -= 16; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + t = t->v.t + ((unsigned) bb & mask_bits[e]); + e = t->e; + } while (e > 16); + bb >>= t->b; + k -= t->b; + if (e == 16) { /* then it's a literal */ + gunzip_window[w++] = (unsigned char) t->v.n; + if (w == GUNZIP_WSIZE) { + gunzip_outbuf_count = w; + //flush_gunzip_window(); + w = 0; + return 1; // We have a block to read + } + } else { /* it's an EOB or a length */ + /* exit if end of block */ + if (e == 15) { + break; + } + + /* get length of block to copy */ + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + nn = t->v.n + ((unsigned) bb & mask_bits[e]); + bb >>= e; + k -= e; + + /* decode distance of block to copy */ + bb = fill_bitbuffer(PASS_STATE bb, &k, bd); + t = td + ((unsigned) bb & md); + e = t->e; + if (e > 16) + do { + if (e == 99) + abort_unzip(PASS_STATE_ONLY); + bb >>= t->b; + k -= t->b; + e -= 16; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + t = t->v.t + ((unsigned) bb & mask_bits[e]); + e = t->e; + } while (e > 16); + bb >>= t->b; + k -= t->b; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + dd = w - t->v.n - ((unsigned) bb & mask_bits[e]); + bb >>= e; + k -= e; + + /* do the copy */ + do_copy: + do { + /* Was: nn -= (e = (e = GUNZIP_WSIZE - ((dd &= GUNZIP_WSIZE - 1) > w ? dd : w)) > nn ? nn : e); */ + /* Who wrote THAT?? rewritten as: */ + unsigned delta; + + dd &= GUNZIP_WSIZE - 1; + e = GUNZIP_WSIZE - (dd > w ? dd : w); + delta = w > dd ? w - dd : dd - w; + if (e > nn) e = nn; + nn -= e; + + /* copy to new buffer to prevent possible overwrite */ + if (delta >= e) { + memcpy(gunzip_window + w, gunzip_window + dd, e); + w += e; + dd += e; + } else { + /* do it slow to avoid memcpy() overlap */ + /* !NOMEMCPY */ + do { + gunzip_window[w++] = gunzip_window[dd++]; + } while (--e); + } + if (w == GUNZIP_WSIZE) { + gunzip_outbuf_count = w; + resume_copy = (nn != 0); + //flush_gunzip_window(); + w = 0; + return 1; + } + } while (nn); + resume_copy = 0; + } + } + + /* restore the globals from the locals */ + gunzip_outbuf_count = w; /* restore global gunzip_window pointer */ + gunzip_bb = bb; /* restore global bit buffer */ + gunzip_bk = k; + + /* normally just after call to inflate_codes, but save code by putting it here */ + /* free the decoding tables (tl and td), return */ + huft_free_all(PASS_STATE_ONLY); + + /* done */ + return 0; +} +#undef ml +#undef md +#undef bb +#undef k +#undef w +#undef tl +#undef td +#undef bl +#undef bd +#undef nn +#undef dd + + +/* called once from inflate_block */ +static void inflate_stored_setup(STATE_PARAM int my_n, int my_b, int my_k) +{ + inflate_stored_n = my_n; + inflate_stored_b = my_b; + inflate_stored_k = my_k; + /* initialize gunzip_window position */ + inflate_stored_w = gunzip_outbuf_count; +} +/* called once from inflate_get_next_window */ +static int inflate_stored(STATE_PARAM_ONLY) +{ + /* read and output the compressed data */ + while (inflate_stored_n--) { + inflate_stored_b = fill_bitbuffer(PASS_STATE inflate_stored_b, &inflate_stored_k, 8); + gunzip_window[inflate_stored_w++] = (unsigned char) inflate_stored_b; + if (inflate_stored_w == GUNZIP_WSIZE) { + gunzip_outbuf_count = inflate_stored_w; + //flush_gunzip_window(); + inflate_stored_w = 0; + inflate_stored_b >>= 8; + inflate_stored_k -= 8; + return 1; /* We have a block */ + } + inflate_stored_b >>= 8; + inflate_stored_k -= 8; + } + + /* restore the globals from the locals */ + gunzip_outbuf_count = inflate_stored_w; /* restore global gunzip_window pointer */ + gunzip_bb = inflate_stored_b; /* restore global bit buffer */ + gunzip_bk = inflate_stored_k; + return 0; /* Finished */ +} + + +/* + * decompress an inflated block + * e: last block flag + * + * GLOBAL VARIABLES: bb, kk, + */ +/* Return values: -1 = inflate_stored, -2 = inflate_codes */ +/* One callsite in inflate_get_next_window */ +static int inflate_block(STATE_PARAM smallint *e) +{ + unsigned ll[286 + 30]; /* literal/length and distance code lengths */ + unsigned t; /* block type */ + unsigned b; /* bit buffer */ + unsigned k; /* number of bits in bit buffer */ + + /* make local bit buffer */ + + b = gunzip_bb; + k = gunzip_bk; + + /* read in last block bit */ + b = fill_bitbuffer(PASS_STATE b, &k, 1); + *e = b & 1; + b >>= 1; + k -= 1; + + /* read in block type */ + b = fill_bitbuffer(PASS_STATE b, &k, 2); + t = (unsigned) b & 3; + b >>= 2; + k -= 2; + + /* restore the global bit buffer */ + gunzip_bb = b; + gunzip_bk = k; + + /* Do we see block type 1 often? Yes! + * TODO: fix performance problem (see below) */ + //bb_error_msg("blktype %d", t); + + /* inflate that block type */ + switch (t) { + case 0: /* Inflate stored */ + { + unsigned n; /* number of bytes in block */ + unsigned b_stored; /* bit buffer */ + unsigned k_stored; /* number of bits in bit buffer */ + + /* make local copies of globals */ + b_stored = gunzip_bb; /* initialize bit buffer */ + k_stored = gunzip_bk; + + /* go to byte boundary */ + n = k_stored & 7; + b_stored >>= n; + k_stored -= n; + + /* get the length and its complement */ + b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); + n = ((unsigned) b_stored & 0xffff); + b_stored >>= 16; + k_stored -= 16; + + b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); + if (n != (unsigned) ((~b_stored) & 0xffff)) { + abort_unzip(PASS_STATE_ONLY); /* error in compressed data */ + } + b_stored >>= 16; + k_stored -= 16; + + inflate_stored_setup(PASS_STATE n, b_stored, k_stored); + + return -1; + } + case 1: + /* Inflate fixed + * decompress an inflated type 1 (fixed Huffman codes) block. We should + * either replace this with a custom decoder, or at least precompute the + * Huffman tables. TODO */ + { + int i; /* temporary variable */ + unsigned bl; /* lookup bits for tl */ + unsigned bd; /* lookup bits for td */ + /* gcc 4.2.1 is too dumb to reuse stackspace. Moved up... */ + //unsigned ll[288]; /* length list for huft_build */ + + /* set up literal table */ + for (i = 0; i < 144; i++) + ll[i] = 8; + for (; i < 256; i++) + ll[i] = 9; + for (; i < 280; i++) + ll[i] = 7; + for (; i < 288; i++) /* make a complete, but wrong code set */ + ll[i] = 8; + bl = 7; + huft_build(ll, 288, 257, cplens, cplext, &inflate_codes_tl, &bl); + /* huft_build() never return nonzero - we use known data */ + + /* set up distance table */ + for (i = 0; i < 30; i++) /* make an incomplete code set */ + ll[i] = 5; + bd = 5; + huft_build(ll, 30, 0, cpdist, cpdext, &inflate_codes_td, &bd); + + /* set up data for inflate_codes() */ + inflate_codes_setup(PASS_STATE bl, bd); + + /* huft_free code moved into inflate_codes */ + + return -2; + } + case 2: /* Inflate dynamic */ + { + enum { dbits = 6 }; /* bits in base distance lookup table */ + enum { lbits = 9 }; /* bits in base literal/length lookup table */ + + huft_t *td; /* distance code table */ + unsigned i; /* temporary variables */ + unsigned j; + unsigned l; /* last length */ + unsigned m; /* mask for bit lengths table */ + unsigned n; /* number of lengths to get */ + unsigned bl; /* lookup bits for tl */ + unsigned bd; /* lookup bits for td */ + unsigned nb; /* number of bit length codes */ + unsigned nl; /* number of literal/length codes */ + unsigned nd; /* number of distance codes */ + + //unsigned ll[286 + 30];/* literal/length and distance code lengths */ + unsigned b_dynamic; /* bit buffer */ + unsigned k_dynamic; /* number of bits in bit buffer */ + + /* make local bit buffer */ + b_dynamic = gunzip_bb; + k_dynamic = gunzip_bk; + + /* read in table lengths */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); + nl = 257 + ((unsigned) b_dynamic & 0x1f); /* number of literal/length codes */ + + b_dynamic >>= 5; + k_dynamic -= 5; + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); + nd = 1 + ((unsigned) b_dynamic & 0x1f); /* number of distance codes */ + + b_dynamic >>= 5; + k_dynamic -= 5; + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 4); + nb = 4 + ((unsigned) b_dynamic & 0xf); /* number of bit length codes */ + + b_dynamic >>= 4; + k_dynamic -= 4; + if (nl > 286 || nd > 30) + abort_unzip(PASS_STATE_ONLY); /* bad lengths */ + + /* read in bit-length-code lengths */ + for (j = 0; j < nb; j++) { + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); + ll[border[j]] = (unsigned) b_dynamic & 7; + b_dynamic >>= 3; + k_dynamic -= 3; + } + for (; j < 19; j++) + ll[border[j]] = 0; + + /* build decoding table for trees - single level, 7 bit lookup */ + bl = 7; + i = huft_build(ll, 19, 19, NULL, NULL, &inflate_codes_tl, &bl); + if (i != 0) { + abort_unzip(PASS_STATE_ONLY); //return i; /* incomplete code set */ + } + + /* read in literal and distance code lengths */ + n = nl + nd; + m = mask_bits[bl]; + i = l = 0; + while ((unsigned) i < n) { + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, (unsigned)bl); + td = inflate_codes_tl + ((unsigned) b_dynamic & m); + j = td->b; + b_dynamic >>= j; + k_dynamic -= j; + j = td->v.n; + if (j < 16) { /* length of code in bits (0..15) */ + ll[i++] = l = j; /* save last length in l */ + } else if (j == 16) { /* repeat last length 3 to 6 times */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 2); + j = 3 + ((unsigned) b_dynamic & 3); + b_dynamic >>= 2; + k_dynamic -= 2; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = l; + } + } else if (j == 17) { /* 3 to 10 zero length codes */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); + j = 3 + ((unsigned) b_dynamic & 7); + b_dynamic >>= 3; + k_dynamic -= 3; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = 0; + } + l = 0; + } else { /* j == 18: 11 to 138 zero length codes */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 7); + j = 11 + ((unsigned) b_dynamic & 0x7f); + b_dynamic >>= 7; + k_dynamic -= 7; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = 0; + } + l = 0; + } + } + + /* free decoding table for trees */ + huft_free(inflate_codes_tl); + + /* restore the global bit buffer */ + gunzip_bb = b_dynamic; + gunzip_bk = k_dynamic; + + /* build the decoding tables for literal/length and distance codes */ + bl = lbits; + + i = huft_build(ll, nl, 257, cplens, cplext, &inflate_codes_tl, &bl); + if (i != 0) + abort_unzip(PASS_STATE_ONLY); + bd = dbits; + i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &inflate_codes_td, &bd); + if (i != 0) + abort_unzip(PASS_STATE_ONLY); + + /* set up data for inflate_codes() */ + inflate_codes_setup(PASS_STATE bl, bd); + + /* huft_free code moved into inflate_codes */ + + return -2; + } + default: + abort_unzip(PASS_STATE_ONLY); + } +} + +/* Two callsites, both in inflate_get_next_window */ +static void calculate_gunzip_crc(STATE_PARAM_ONLY) +{ + gunzip_crc = crc32_block_endian0(gunzip_crc, gunzip_window, gunzip_outbuf_count, gunzip_crc_table); + gunzip_bytes_out += gunzip_outbuf_count; +} + +/* One callsite in inflate_unzip_internal */ +static int inflate_get_next_window(STATE_PARAM_ONLY) +{ + gunzip_outbuf_count = 0; + + while (1) { + int ret; + + if (need_another_block) { + if (end_reached) { + calculate_gunzip_crc(PASS_STATE_ONLY); + end_reached = 0; + /* NB: need_another_block is still set */ + return 0; /* Last block */ + } + method = inflate_block(PASS_STATE &end_reached); + need_another_block = 0; + } + + switch (method) { + case -1: + ret = inflate_stored(PASS_STATE_ONLY); + break; + case -2: + ret = inflate_codes(PASS_STATE_ONLY); + break; + default: /* cannot happen */ + abort_unzip(PASS_STATE_ONLY); + } + + if (ret == 1) { + calculate_gunzip_crc(PASS_STATE_ONLY); + return 1; /* more data left */ + } + need_another_block = 1; /* end of that block */ + } + /* Doesnt get here */ +} + + +/* Called from unpack_gz_stream() and inflate_unzip() */ +static IF_DESKTOP(long long) int +inflate_unzip_internal(STATE_PARAM int in, int out) +{ + IF_DESKTOP(long long) int n = 0; + ssize_t nwrote; + + /* Allocate all global buffers (for DYN_ALLOC option) */ + gunzip_window = xmalloc(GUNZIP_WSIZE); + gunzip_outbuf_count = 0; + gunzip_bytes_out = 0; + gunzip_src_fd = in; + + /* (re) initialize state */ + method = -1; + need_another_block = 1; + resume_copy = 0; + gunzip_bk = 0; + gunzip_bb = 0; + + /* Create the crc table */ + gunzip_crc_table = crc32_filltable(NULL, 0); + gunzip_crc = ~0; + + error_msg = "corrupted data"; + if (setjmp(error_jmp)) { + /* Error from deep inside zip machinery */ + n = -1; + goto ret; + } + + while (1) { + int r = inflate_get_next_window(PASS_STATE_ONLY); + nwrote = full_write(out, gunzip_window, gunzip_outbuf_count); + if (nwrote != (ssize_t)gunzip_outbuf_count) { + bb_perror_msg("write"); + n = -1; + goto ret; + } + IF_DESKTOP(n += nwrote;) + if (r == 0) break; + } + + /* Store unused bytes in a global buffer so calling applets can access it */ + if (gunzip_bk >= 8) { + /* Undo too much lookahead. The next read will be byte aligned + * so we can discard unused bits in the last meaningful byte. */ + bytebuffer_offset--; + bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff; + gunzip_bb >>= 8; + gunzip_bk -= 8; + } + ret: + /* Cleanup */ + free(gunzip_window); + free(gunzip_crc_table); + return n; +} + + +/* External entry points */ + +/* For unzip */ + +IF_DESKTOP(long long) int FAST_FUNC +inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out) +{ + IF_DESKTOP(long long) int n; + DECLARE_STATE; + + ALLOC_STATE; + + to_read = compr_size; +// bytebuffer_max = 0x8000; + bytebuffer_offset = 4; + bytebuffer = xmalloc(bytebuffer_max); + n = inflate_unzip_internal(PASS_STATE in, out); + free(bytebuffer); + + res->crc = gunzip_crc; + res->bytes_out = gunzip_bytes_out; + DEALLOC_STATE; + return n; +} + + +/* For gunzip */ + +/* helpers first */ + +/* Top up the input buffer with at least n bytes. */ +static int top_up(STATE_PARAM unsigned n) +{ + int count = bytebuffer_size - bytebuffer_offset; + + if (count < (int)n) { + memmove(bytebuffer, &bytebuffer[bytebuffer_offset], count); + bytebuffer_offset = 0; + bytebuffer_size = full_read(gunzip_src_fd, &bytebuffer[count], bytebuffer_max - count); + if ((int)bytebuffer_size < 0) { + bb_error_msg(bb_msg_read_error); + return 0; + } + bytebuffer_size += count; + if (bytebuffer_size < n) + return 0; + } + return 1; +} + +static uint16_t buffer_read_le_u16(STATE_PARAM_ONLY) +{ + uint16_t res; +#if BB_LITTLE_ENDIAN + move_from_unaligned16(res, &bytebuffer[bytebuffer_offset]); +#else + res = bytebuffer[bytebuffer_offset]; + res |= bytebuffer[bytebuffer_offset + 1] << 8; +#endif + bytebuffer_offset += 2; + return res; +} + +static uint32_t buffer_read_le_u32(STATE_PARAM_ONLY) +{ + uint32_t res; +#if BB_LITTLE_ENDIAN + move_from_unaligned32(res, &bytebuffer[bytebuffer_offset]); +#else + res = bytebuffer[bytebuffer_offset]; + res |= bytebuffer[bytebuffer_offset + 1] << 8; + res |= bytebuffer[bytebuffer_offset + 2] << 16; + res |= bytebuffer[bytebuffer_offset + 3] << 24; +#endif + bytebuffer_offset += 4; + return res; +} + +static int check_header_gzip(STATE_PARAM unpack_info_t *info) +{ + union { + unsigned char raw[8]; + struct { + uint8_t gz_method; + uint8_t flags; + uint32_t mtime; + uint8_t xtra_flags_UNUSED; + uint8_t os_flags_UNUSED; + } PACKED formatted; + } header; + struct BUG_header { + char BUG_header[sizeof(header) == 8 ? 1 : -1]; + }; + + /* + * Rewind bytebuffer. We use the beginning because the header has 8 + * bytes, leaving enough for unwinding afterwards. + */ + bytebuffer_size -= bytebuffer_offset; + memmove(bytebuffer, &bytebuffer[bytebuffer_offset], bytebuffer_size); + bytebuffer_offset = 0; + + if (!top_up(PASS_STATE 8)) + return 0; + memcpy(header.raw, &bytebuffer[bytebuffer_offset], 8); + bytebuffer_offset += 8; + + /* Check the compression method */ + if (header.formatted.gz_method != 8) { + return 0; + } + + if (header.formatted.flags & 0x04) { + /* bit 2 set: extra field present */ + unsigned extra_short; + + if (!top_up(PASS_STATE 2)) + return 0; + extra_short = buffer_read_le_u16(PASS_STATE_ONLY); + if (!top_up(PASS_STATE extra_short)) + return 0; + /* Ignore extra field */ + bytebuffer_offset += extra_short; + } + + /* Discard original name and file comment if any */ + /* bit 3 set: original file name present */ + /* bit 4 set: file comment present */ + if (header.formatted.flags & 0x18) { + while (1) { + do { + if (!top_up(PASS_STATE 1)) + return 0; + } while (bytebuffer[bytebuffer_offset++] != 0); + if ((header.formatted.flags & 0x18) != 0x18) + break; + header.formatted.flags &= ~0x18; + } + } + + if (info) + info->mtime = SWAP_LE32(header.formatted.mtime); + + /* Read the header checksum */ + if (header.formatted.flags & 0x02) { + if (!top_up(PASS_STATE 2)) + return 0; + bytebuffer_offset += 2; + } + return 1; +} + +IF_DESKTOP(long long) int FAST_FUNC +unpack_gz_stream_with_info(int in, int out, unpack_info_t *info) +{ + uint32_t v32; + IF_DESKTOP(long long) int n; + DECLARE_STATE; + + n = 0; + + ALLOC_STATE; + to_read = -1; +// bytebuffer_max = 0x8000; + bytebuffer = xmalloc(bytebuffer_max); + gunzip_src_fd = in; + + again: + if (!check_header_gzip(PASS_STATE info)) { + bb_error_msg("corrupted data"); + n = -1; + goto ret; + } + n += inflate_unzip_internal(PASS_STATE in, out); + if (n < 0) + goto ret; + + if (!top_up(PASS_STATE 8)) { + bb_error_msg("corrupted data"); + n = -1; + goto ret; + } + + /* Validate decompression - crc */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((~gunzip_crc) != v32) { + bb_error_msg("crc error"); + n = -1; + goto ret; + } + + /* Validate decompression - size */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((uint32_t)gunzip_bytes_out != v32) { + bb_error_msg("incorrect length"); + n = -1; + } + + if (!top_up(PASS_STATE 2)) + goto ret; /* EOF */ + + if (bytebuffer[bytebuffer_offset] == 0x1f + && bytebuffer[bytebuffer_offset + 1] == 0x8b + ) { + bytebuffer_offset += 2; + goto again; + } + /* GNU gzip says: */ + /*bb_error_msg("decompression OK, trailing garbage ignored");*/ + + ret: + free(bytebuffer); + DEALLOC_STATE; + return n; +} + +IF_DESKTOP(long long) int FAST_FUNC +unpack_gz_stream(int in, int out) +{ + return unpack_gz_stream_with_info(in, out, NULL); +} diff --git a/archival/libarchive/filter_accept_all.c b/archival/libarchive/filter_accept_all.c new file mode 100644 index 000000000..e69deb679 --- /dev/null +++ b/archival/libarchive/filter_accept_all.c @@ -0,0 +1,17 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* Accept any non-null name, its not really a filter at all */ +char FAST_FUNC filter_accept_all(archive_handle_t *archive_handle) +{ + if (archive_handle->file_header->name) + return EXIT_SUCCESS; + return EXIT_FAILURE; +} diff --git a/archival/libarchive/filter_accept_list.c b/archival/libarchive/filter_accept_list.c new file mode 100644 index 000000000..a7640af79 --- /dev/null +++ b/archival/libarchive/filter_accept_list.c @@ -0,0 +1,19 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* + * Accept names that are in the accept list, ignoring reject list. + */ +char FAST_FUNC filter_accept_list(archive_handle_t *archive_handle) +{ + if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) + return EXIT_SUCCESS; + return EXIT_FAILURE; +} diff --git a/archival/libarchive/filter_accept_list_reassign.c b/archival/libarchive/filter_accept_list_reassign.c new file mode 100644 index 000000000..d80f71668 --- /dev/null +++ b/archival/libarchive/filter_accept_list_reassign.c @@ -0,0 +1,51 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* Built and used only if ENABLE_DPKG || ENABLE_DPKG_DEB */ + +/* + * Reassign the subarchive metadata parser based on the filename extension + * e.g. if its a .tar.gz modify archive_handle->sub_archive to process a .tar.gz + * or if its a .tar.bz2 make archive_handle->sub_archive handle that + */ +char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle) +{ + /* Check the file entry is in the accept list */ + if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) { + const char *name_ptr; + + /* Find extension */ + name_ptr = strrchr(archive_handle->file_header->name, '.'); + if (!name_ptr) + return EXIT_FAILURE; + name_ptr++; + + /* Modify the subarchive handler based on the extension */ + if (ENABLE_FEATURE_SEAMLESS_GZ + && strcmp(name_ptr, "gz") == 0 + ) { + archive_handle->dpkg__action_data_subarchive = get_header_tar_gz; + return EXIT_SUCCESS; + } + if (ENABLE_FEATURE_SEAMLESS_BZ2 + && strcmp(name_ptr, "bz2") == 0 + ) { + archive_handle->dpkg__action_data_subarchive = get_header_tar_bz2; + return EXIT_SUCCESS; + } + if (ENABLE_FEATURE_SEAMLESS_LZMA + && strcmp(name_ptr, "lzma") == 0 + ) { + archive_handle->dpkg__action_data_subarchive = get_header_tar_lzma; + return EXIT_SUCCESS; + } + } + return EXIT_FAILURE; +} diff --git a/archival/libarchive/filter_accept_reject_list.c b/archival/libarchive/filter_accept_reject_list.c new file mode 100644 index 000000000..3e86cca65 --- /dev/null +++ b/archival/libarchive/filter_accept_reject_list.c @@ -0,0 +1,36 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* + * Accept names that are in the accept list and not in the reject list + */ +char FAST_FUNC filter_accept_reject_list(archive_handle_t *archive_handle) +{ + const char *key; + const llist_t *reject_entry; + const llist_t *accept_entry; + + key = archive_handle->file_header->name; + + /* If the key is in a reject list fail */ + reject_entry = find_list_entry2(archive_handle->reject, key); + if (reject_entry) { + return EXIT_FAILURE; + } + accept_entry = find_list_entry2(archive_handle->accept, key); + + /* Fail if an accept list was specified and the key wasnt in there */ + if ((accept_entry == NULL) && archive_handle->accept) { + return EXIT_FAILURE; + } + + /* Accepted */ + return EXIT_SUCCESS; +} diff --git a/archival/libarchive/find_list_entry.c b/archival/libarchive/find_list_entry.c new file mode 100644 index 000000000..5efd1af2e --- /dev/null +++ b/archival/libarchive/find_list_entry.c @@ -0,0 +1,54 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include +#include "libbb.h" +#include "archive.h" + +/* Find a string in a shell pattern list */ +const llist_t* FAST_FUNC find_list_entry(const llist_t *list, const char *filename) +{ + while (list) { + if (fnmatch(list->data, filename, 0) == 0) { + return list; + } + list = list->link; + } + return NULL; +} + +/* Same, but compares only path components present in pattern + * (extra trailing path components in filename are assumed to match) + */ +const llist_t* FAST_FUNC find_list_entry2(const llist_t *list, const char *filename) +{ + char buf[PATH_MAX]; + int pattern_slash_cnt; + const char *c; + char *d; + + while (list) { + c = list->data; + pattern_slash_cnt = 0; + while (*c) + if (*c++ == '/') pattern_slash_cnt++; + c = filename; + d = buf; + /* paranoia is better than buffer overflows */ + while (*c && d != buf + sizeof(buf)-1) { + if (*c == '/' && --pattern_slash_cnt < 0) + break; + *d++ = *c++; + } + *d = '\0'; + if (fnmatch(list->data, buf, 0) == 0) { + return list; + } + list = list->link; + } + return NULL; +} diff --git a/archival/libarchive/get_header_ar.c b/archival/libarchive/get_header_ar.c new file mode 100644 index 000000000..df603b111 --- /dev/null +++ b/archival/libarchive/get_header_ar.c @@ -0,0 +1,133 @@ +/* vi: set sw=4 ts=4: */ +/* Copyright 2001 Glenn McGrath. + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" +#include "ar.h" + +static unsigned read_num(const char *str, int base) +{ + /* This code works because + * on misformatted numbers bb_strtou returns all-ones */ + int err = bb_strtou(str, NULL, base); + if (err == -1) + bb_error_msg_and_die("invalid ar header"); + return err; +} + +char FAST_FUNC get_header_ar(archive_handle_t *archive_handle) +{ + file_header_t *typed = archive_handle->file_header; + unsigned size; + union { + char raw[60]; + struct ar_header formatted; + } ar; +#if ENABLE_FEATURE_AR_LONG_FILENAMES + static char *ar_long_names; + static unsigned ar_long_name_size; +#endif + + /* dont use xread as we want to handle the error ourself */ + if (read(archive_handle->src_fd, ar.raw, 60) != 60) { + /* End Of File */ + return EXIT_FAILURE; + } + + /* ar header starts on an even byte (2 byte aligned) + * '\n' is used for padding + */ + if (ar.raw[0] == '\n') { + /* fix up the header, we started reading 1 byte too early */ + memmove(ar.raw, &ar.raw[1], 59); + ar.raw[59] = xread_char(archive_handle->src_fd); + archive_handle->offset++; + } + archive_handle->offset += 60; + + if (ar.formatted.magic[0] != '`' || ar.formatted.magic[1] != '\n') + bb_error_msg_and_die("invalid ar header"); + + /* FIXME: more thorough routine would be in order here + * (we have something like that in tar) + * but for now we are lax. */ + ar.formatted.magic[0] = '\0'; /* else 4G-2 file will have size="4294967294`\n..." */ + typed->size = size = read_num(ar.formatted.size, 10); + + /* special filenames have '/' as the first character */ + if (ar.formatted.name[0] == '/') { + if (ar.formatted.name[1] == ' ') { + /* This is the index of symbols in the file for compilers */ + data_skip(archive_handle); + archive_handle->offset += size; + return get_header_ar(archive_handle); /* Return next header */ + } +#if ENABLE_FEATURE_AR_LONG_FILENAMES + if (ar.formatted.name[1] == '/') { + /* If the second char is a '/' then this entries data section + * stores long filename for multiple entries, they are stored + * in static variable long_names for use in future entries + */ + ar_long_name_size = size; + free(ar_long_names); + ar_long_names = xmalloc(size); + xread(archive_handle->src_fd, ar_long_names, size); + archive_handle->offset += size; + /* Return next header */ + return get_header_ar(archive_handle); + } +#else + bb_error_msg_and_die("long filenames not supported"); +#endif + } + /* Only size is always present, the rest may be missing in + * long filename pseudo file. Thus we decode the rest + * after dealing with long filename pseudo file. + */ + typed->mode = read_num(ar.formatted.mode, 8); + typed->mtime = read_num(ar.formatted.date, 10); + typed->uid = read_num(ar.formatted.uid, 10); + typed->gid = read_num(ar.formatted.gid, 10); + +#if ENABLE_FEATURE_AR_LONG_FILENAMES + if (ar.formatted.name[0] == '/') { + unsigned long_offset; + + /* The number after the '/' indicates the offset in the ar data section + * (saved in ar_long_names) that conatains the real filename */ + long_offset = read_num(&ar.formatted.name[1], 10); + if (long_offset >= ar_long_name_size) { + bb_error_msg_and_die("can't resolve long filename"); + } + typed->name = xstrdup(ar_long_names + long_offset); + } else +#endif + { + /* short filenames */ + typed->name = xstrndup(ar.formatted.name, 16); + } + + typed->name[strcspn(typed->name, " /")] = '\0'; + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_header(typed); +#if ENABLE_DPKG || ENABLE_DPKG_DEB + if (archive_handle->dpkg__sub_archive) { + while (archive_handle->dpkg__action_data_subarchive(archive_handle->dpkg__sub_archive) == EXIT_SUCCESS) + continue; + } else +#endif + archive_handle->action_data(archive_handle); + } else { + data_skip(archive_handle); + } + + archive_handle->offset += typed->size; + /* Set the file pointer to the correct spot, we may have been reading a compressed file */ + lseek(archive_handle->src_fd, archive_handle->offset, SEEK_SET); + + return EXIT_SUCCESS; +} diff --git a/archival/libarchive/get_header_cpio.c b/archival/libarchive/get_header_cpio.c new file mode 100644 index 000000000..3d99b492a --- /dev/null +++ b/archival/libarchive/get_header_cpio.c @@ -0,0 +1,186 @@ +/* vi: set sw=4 ts=4: */ +/* Copyright 2002 Laurence Anderson + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +typedef struct hardlinks_t { + struct hardlinks_t *next; + int inode; /* TODO: must match maj/min too! */ + int mode ; + int mtime; /* These three are useful only in corner case */ + int uid ; /* of hardlinks with zero size body */ + int gid ; + char name[1]; +} hardlinks_t; + +char FAST_FUNC get_header_cpio(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + char cpio_header[110]; + int namesize; + int major, minor, nlink, mode, inode; + unsigned size, uid, gid, mtime; + + /* There can be padding before archive header */ + data_align(archive_handle, 4); + + size = full_read(archive_handle->src_fd, cpio_header, 110); + if (size == 0) { + goto create_hardlinks; + } + if (size != 110) { + bb_error_msg_and_die("short read"); + } + archive_handle->offset += 110; + + if (strncmp(&cpio_header[0], "07070", 5) != 0 + || (cpio_header[5] != '1' && cpio_header[5] != '2') + ) { + bb_error_msg_and_die("unsupported cpio format, use newc or crc"); + } + + if (sscanf(cpio_header + 6, + "%8x" "%8x" "%8x" "%8x" + "%8x" "%8x" "%8x" /*maj,min:*/ "%*16c" + /*rmaj,rmin:*/"%8x" "%8x" "%8x" /*chksum: "%*8c"*/, + &inode, &mode, &uid, &gid, + &nlink, &mtime, &size, + &major, &minor, &namesize) != 10) + bb_error_msg_and_die("damaged cpio file"); + file_header->mode = mode; + file_header->uid = uid; + file_header->gid = gid; + file_header->mtime = mtime; + file_header->size = size; + + namesize &= 0x1fff; /* paranoia: limit names to 8k chars */ + file_header->name = xzalloc(namesize + 1); + /* Read in filename */ + xread(archive_handle->src_fd, file_header->name, namesize); + if (file_header->name[0] == '/') { + /* Testcase: echo /etc/hosts | cpio -pvd /tmp + * Without this code, it tries to unpack /etc/hosts + * into "/etc/hosts", not "etc/hosts". + */ + char *p = file_header->name; + do p++; while (*p == '/'); + overlapping_strcpy(file_header->name, p); + } + archive_handle->offset += namesize; + + /* Update offset amount and skip padding before file contents */ + data_align(archive_handle, 4); + + if (strcmp(file_header->name, "TRAILER!!!") == 0) { + /* Always round up. ">> 9" divides by 512 */ + archive_handle->cpio__blocks = (uoff_t)(archive_handle->offset + 511) >> 9; + goto create_hardlinks; + } + + file_header->link_target = NULL; + if (S_ISLNK(file_header->mode)) { + file_header->size &= 0x1fff; /* paranoia: limit names to 8k chars */ + file_header->link_target = xzalloc(file_header->size + 1); + xread(archive_handle->src_fd, file_header->link_target, file_header->size); + archive_handle->offset += file_header->size; + file_header->size = 0; /* Stop possible seeks in future */ + } + +// TODO: data_extract_all can't deal with hardlinks to non-files... +// when fixed, change S_ISREG to !S_ISDIR here + + if (nlink > 1 && S_ISREG(file_header->mode)) { + hardlinks_t *new = xmalloc(sizeof(*new) + namesize); + new->inode = inode; + new->mode = mode ; + new->mtime = mtime; + new->uid = uid ; + new->gid = gid ; + strcpy(new->name, file_header->name); + /* Put file on a linked list for later */ + if (size == 0) { + new->next = archive_handle->cpio__hardlinks_to_create; + archive_handle->cpio__hardlinks_to_create = new; + return EXIT_SUCCESS; /* Skip this one */ + /* TODO: this breaks cpio -t (it does not show hardlinks) */ + } + new->next = archive_handle->cpio__created_hardlinks; + archive_handle->cpio__created_hardlinks = new; + } + file_header->device = makedev(major, minor); + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_data(archive_handle); +//TODO: run "echo /etc/hosts | cpio -pv /tmp" twice. On 2nd run: +//cpio: etc/hosts not created: newer or same age file exists +//etc/hosts <-- should NOT show it +//2 blocks <-- should say "0 blocks" + archive_handle->action_header(file_header); + } else { + data_skip(archive_handle); + } + + archive_handle->offset += file_header->size; + + free(file_header->link_target); + free(file_header->name); + file_header->link_target = NULL; + file_header->name = NULL; + + return EXIT_SUCCESS; + + create_hardlinks: + free(file_header->link_target); + free(file_header->name); + + while (archive_handle->cpio__hardlinks_to_create) { + hardlinks_t *cur; + hardlinks_t *make_me = archive_handle->cpio__hardlinks_to_create; + + archive_handle->cpio__hardlinks_to_create = make_me->next; + + memset(file_header, 0, sizeof(*file_header)); + file_header->mtime = make_me->mtime; + file_header->name = make_me->name; + file_header->mode = make_me->mode; + file_header->uid = make_me->uid; + file_header->gid = make_me->gid; + /*file_header->size = 0;*/ + /*file_header->link_target = NULL;*/ + + /* Try to find a file we are hardlinked to */ + cur = archive_handle->cpio__created_hardlinks; + while (cur) { + /* TODO: must match maj/min too! */ + if (cur->inode == make_me->inode) { + file_header->link_target = cur->name; + /* link_target != NULL, size = 0: "I am a hardlink" */ + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) + archive_handle->action_data(archive_handle); + free(make_me); + goto next_link; + } + cur = cur->next; + } + /* Oops... no file with such inode was created... do it now + * (happens when hardlinked files are empty (zero length)) */ + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) + archive_handle->action_data(archive_handle); + /* Move to the list of created hardlinked files */ + make_me->next = archive_handle->cpio__created_hardlinks; + archive_handle->cpio__created_hardlinks = make_me; + next_link: ; + } + + while (archive_handle->cpio__created_hardlinks) { + hardlinks_t *p = archive_handle->cpio__created_hardlinks; + archive_handle->cpio__created_hardlinks = p->next; + free(p); + } + + return EXIT_FAILURE; /* "No more files to process" */ +} diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c new file mode 100644 index 000000000..78b0ae25f --- /dev/null +++ b/archival/libarchive/get_header_tar.c @@ -0,0 +1,461 @@ +/* vi: set sw=4 ts=4: */ +/* Licensed under GPLv2 or later, see file LICENSE in this source tree. + * + * FIXME: + * In privileged mode if uname and gname map to a uid and gid then use the + * mapped value instead of the uid/gid values in tar header + * + * References: + * GNU tar and star man pages, + * Opengroup's ustar interchange format, + * http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html + */ + +#include "libbb.h" +#include "archive.h" + +typedef uint32_t aliased_uint32_t FIX_ALIASING; +typedef off_t aliased_off_t FIX_ALIASING; + + +/* NB: _DESTROYS_ str[len] character! */ +static unsigned long long getOctal(char *str, int len) +{ + unsigned long long v; + char *end; + /* NB: leading spaces are allowed. Using strtoull to handle that. + * The downside is that we accept e.g. "-123" too :( + */ + str[len] = '\0'; + v = strtoull(str, &end, 8); + /* std: "Each numeric field is terminated by one or more + * or NUL characters". We must support ' '! */ + if (*end != '\0' && *end != ' ') { + int8_t first = str[0]; + if (!(first & 0x80)) + bb_error_msg_and_die("corrupted octal value in tar header"); + /* + * GNU tar uses "base-256 encoding" for very large numbers. + * Encoding is binary, with highest bit always set as a marker + * and sign in next-highest bit: + * 80 00 .. 00 - zero + * bf ff .. ff - largest positive number + * ff ff .. ff - minus 1 + * c0 00 .. 00 - smallest negative number + * + * Example of tar file with 8914993153 (0x213600001) byte file. + * Field starts at offset 7c: + * 00070 30 30 30 00 30 30 30 30 30 30 30 00 80 00 00 00 |000.0000000.....| + * 00080 00 00 00 02 13 60 00 01 31 31 31 32 30 33 33 36 |.....`..11120336| + * + * NB: tarballs with NEGATIVE unix times encoded that way were seen! + */ + v = first; + /* Sign-extend using 6th bit: */ + v <<= sizeof(unsigned long long)*8 - 7; + v = (long long)v >> (sizeof(unsigned long long)*8 - 7); + while (--len != 0) + v = (v << 8) + (unsigned char) *str++; + } + return v; +} +#define GET_OCTAL(a) getOctal((a), sizeof(a)) + +#if ENABLE_FEATURE_TAR_SELINUX +/* Scan a PAX header for SELinux contexts, via "RHT.security.selinux" keyword. + * This is what Red Hat's patched version of tar uses. + */ +# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux" +static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, unsigned sz) +{ + char *buf, *p; + char *result; + + p = buf = xmalloc(sz + 1); + /* prevent bb_strtou from running off the buffer */ + buf[sz] = '\0'; + xread(archive_handle->src_fd, buf, sz); + archive_handle->offset += sz; + + result = NULL; + while (sz != 0) { + char *end, *value; + unsigned len; + + /* Every record has this format: "LEN NAME=VALUE\n" */ + len = bb_strtou(p, &end, 10); + /* expect errno to be EINVAL, because the character + * following the digits should be a space + */ + p += len; + sz -= len; + if ((int)sz < 0 + || len == 0 + || errno != EINVAL + || *end != ' ' + ) { + bb_error_msg("malformed extended header, skipped"); + // More verbose version: + //bb_error_msg("malformed extended header at %"OFF_FMT"d, skipped", + // archive_handle->offset - (sz + len)); + break; + } + /* overwrite the terminating newline with NUL + * (we do not bother to check that it *was* a newline) + */ + p[-1] = '\0'; + /* Is it selinux security context? */ + value = end + 1; + if (strncmp(value, SELINUX_CONTEXT_KEYWORD"=", sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1) == 0) { + value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1; + result = xstrdup(value); + break; + } + } + + free(buf); + return result; +} +#endif + +char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + struct tar_header_t tar; + char *cp; + int i, sum_u, sum; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + int sum_s; +#endif + int parse_names; + + /* Our "private data" */ +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS +# define p_longname (archive_handle->tar__longname) +# define p_linkname (archive_handle->tar__linkname) +#else +# define p_longname 0 +# define p_linkname 0 +#endif + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX + again: +#endif + /* Align header */ + data_align(archive_handle, 512); + + again_after_align: + +#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT + /* to prevent misdetection of bz2 sig */ + *(aliased_uint32_t*)&tar = 0; + i = full_read(archive_handle->src_fd, &tar, 512); + /* If GNU tar sees EOF in above read, it says: + * "tar: A lone zero block at N", where N = kilobyte + * where EOF was met (not EOF block, actual EOF!), + * and exits with EXIT_SUCCESS. + * We will mimic exit(EXIT_SUCCESS), although we will not mimic + * the message and we don't check whether we indeed + * saw zero block directly before this. */ + if (i == 0) { + xfunc_error_retval = 0; + short_read: + bb_error_msg_and_die("short read"); + } + if (i != 512) { + IF_FEATURE_TAR_AUTODETECT(goto autodetect;) + goto short_read; + } + +#else + i = 512; + xread(archive_handle->src_fd, &tar, i); +#endif + archive_handle->offset += i; + + /* If there is no filename its an empty header */ + if (tar.name[0] == 0 && tar.prefix[0] == 0) { + if (archive_handle->tar__end) { + /* Second consecutive empty header - end of archive. + * Read until the end to empty the pipe from gz or bz2 + */ + while (full_read(archive_handle->src_fd, &tar, 512) == 512) + continue; + return EXIT_FAILURE; + } + archive_handle->tar__end = 1; + return EXIT_SUCCESS; + } + archive_handle->tar__end = 0; + + /* Check header has valid magic, "ustar" is for the proper tar, + * five NULs are for the old tar format */ + if (strncmp(tar.magic, "ustar", 5) != 0 + && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY + || memcmp(tar.magic, "\0\0\0\0", 5) != 0) + ) { +#if ENABLE_FEATURE_TAR_AUTODETECT + char FAST_FUNC (*get_header_ptr)(archive_handle_t *); + uint16_t magic2; + + autodetect: + magic2 = *(uint16_t*)tar.name; + /* tar gz/bz autodetect: check for gz/bz2 magic. + * If we see the magic, and it is the very first block, + * we can switch to get_header_tar_gz/bz2/lzma(). + * Needs seekable fd. I wish recv(MSG_PEEK) works + * on any fd... */ +# if ENABLE_FEATURE_SEAMLESS_GZ + if (magic2 == GZIP_MAGIC) { + get_header_ptr = get_header_tar_gz; + } else +# endif +# if ENABLE_FEATURE_SEAMLESS_BZ2 + if (magic2 == BZIP2_MAGIC + && tar.name[2] == 'h' && isdigit(tar.name[3]) + ) { /* bzip2 */ + get_header_ptr = get_header_tar_bz2; + } else +# endif +# if ENABLE_FEATURE_SEAMLESS_XZ + //TODO: if (magic2 == XZ_MAGIC1)... + //else +# endif + goto err; + /* Two different causes for lseek() != 0: + * unseekable fd (would like to support that too, but...), + * or not first block (false positive, it's not .gz/.bz2!) */ + if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0) + goto err; + while (get_header_ptr(archive_handle) == EXIT_SUCCESS) + continue; + return EXIT_FAILURE; + err: +#endif /* FEATURE_TAR_AUTODETECT */ + bb_error_msg_and_die("invalid tar magic"); + } + + /* Do checksum on headers. + * POSIX says that checksum is done on unsigned bytes, but + * Sun and HP-UX gets it wrong... more details in + * GNU tar source. */ +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s = ' ' * sizeof(tar.chksum); +#endif + sum_u = ' ' * sizeof(tar.chksum); + for (i = 0; i < 148; i++) { + sum_u += ((unsigned char*)&tar)[i]; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s += ((signed char*)&tar)[i]; +#endif + } + for (i = 156; i < 512; i++) { + sum_u += ((unsigned char*)&tar)[i]; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s += ((signed char*)&tar)[i]; +#endif + } + /* This field does not need special treatment (getOctal) */ + { + char *endp; /* gcc likes temp var for &endp */ + sum = strtoul(tar.chksum, &endp, 8); + if ((*endp != '\0' && *endp != ' ') + || (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) + ) { + bb_error_msg_and_die("invalid tar header checksum"); + } + } + /* don't use xstrtoul, tar.chksum may have leading spaces */ + sum = strtoul(tar.chksum, NULL, 8); + if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) { + bb_error_msg_and_die("invalid tar header checksum"); + } + + /* 0 is reserved for high perf file, treat as normal file */ + if (!tar.typeflag) tar.typeflag = '0'; + parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7'); + + /* getOctal trashes subsequent field, therefore we call it + * on fields in reverse order */ + if (tar.devmajor[0]) { + char t = tar.prefix[0]; + /* we trash prefix[0] here, but we DO need it later! */ + unsigned minor = GET_OCTAL(tar.devminor); + unsigned major = GET_OCTAL(tar.devmajor); + file_header->device = makedev(major, minor); + tar.prefix[0] = t; + } + file_header->link_target = NULL; + if (!p_linkname && parse_names && tar.linkname[0]) { + file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname)); + /* FIXME: what if we have non-link object with link_target? */ + /* Will link_target be free()ed? */ + } +#if ENABLE_FEATURE_TAR_UNAME_GNAME + file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL; + file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL; +#endif + file_header->mtime = GET_OCTAL(tar.mtime); + file_header->size = GET_OCTAL(tar.size); + file_header->gid = GET_OCTAL(tar.gid); + file_header->uid = GET_OCTAL(tar.uid); + /* Set bits 0-11 of the files mode */ + file_header->mode = 07777 & GET_OCTAL(tar.mode); + + file_header->name = NULL; + if (!p_longname && parse_names) { + /* we trash mode[0] here, it's ok */ + //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain + tar.mode[0] = '\0'; + if (tar.prefix[0]) { + /* and padding[0] */ + //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain + tar.padding[0] = '\0'; + file_header->name = concat_path_file(tar.prefix, tar.name); + } else + file_header->name = xstrdup(tar.name); + } + + /* Set bits 12-15 of the files mode */ + /* (typeflag was not trashed because chksum does not use getOctal) */ + switch (tar.typeflag) { + /* busybox identifies hard links as being regular files with 0 size and a link name */ + case '1': + file_header->mode |= S_IFREG; + break; + case '7': + /* case 0: */ + case '0': +#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY + if (last_char_is(file_header->name, '/')) { + goto set_dir; + } +#endif + file_header->mode |= S_IFREG; + break; + case '2': + file_header->mode |= S_IFLNK; + /* have seen tarballs with size field containing + * the size of the link target's name */ + size0: + file_header->size = 0; + break; + case '3': + file_header->mode |= S_IFCHR; + goto size0; /* paranoia */ + case '4': + file_header->mode |= S_IFBLK; + goto size0; + case '5': + IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:) + file_header->mode |= S_IFDIR; + goto size0; + case '6': + file_header->mode |= S_IFIFO; + goto size0; +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + case 'L': + /* free: paranoia: tar with several consecutive longnames */ + free(p_longname); + /* For paranoia reasons we allocate extra NUL char */ + p_longname = xzalloc(file_header->size + 1); + /* We read ASCIZ string, including NUL */ + xread(archive_handle->src_fd, p_longname, file_header->size); + archive_handle->offset += file_header->size; + /* return get_header_tar(archive_handle); */ + /* gcc 4.1.1 didn't optimize it into jump */ + /* so we will do it ourself, this also saves stack */ + goto again; + case 'K': + free(p_linkname); + p_linkname = xzalloc(file_header->size + 1); + xread(archive_handle->src_fd, p_linkname, file_header->size); + archive_handle->offset += file_header->size; + /* return get_header_tar(archive_handle); */ + goto again; + case 'D': /* GNU dump dir */ + case 'M': /* Continuation of multi volume archive */ + case 'N': /* Old GNU for names > 100 characters */ + case 'S': /* Sparse file */ + case 'V': /* Volume header */ +#endif +#if !ENABLE_FEATURE_TAR_SELINUX + case 'g': /* pax global header */ + case 'x': /* pax extended header */ +#else + skip_ext_hdr: +#endif + { + off_t sz; + bb_error_msg("warning: skipping header '%c'", tar.typeflag); + sz = (file_header->size + 511) & ~(off_t)511; + archive_handle->offset += sz; + sz >>= 9; /* sz /= 512 but w/o contortions for signed div */ + while (sz--) + xread(archive_handle->src_fd, &tar, 512); + /* return get_header_tar(archive_handle); */ + goto again_after_align; + } +#if ENABLE_FEATURE_TAR_SELINUX + case 'g': /* pax global header */ + case 'x': { /* pax extended header */ + char **pp; + if ((uoff_t)file_header->size > 0xfffff) /* paranoia */ + goto skip_ext_hdr; + pp = (tar.typeflag == 'g') ? &archive_handle->tar__global_sctx : &archive_handle->tar__next_file_sctx; + free(*pp); + *pp = get_selinux_sctx_from_pax_hdr(archive_handle, file_header->size); + goto again; + } +#endif + default: + bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag); + } + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + if (p_longname) { + file_header->name = p_longname; + p_longname = NULL; + } + if (p_linkname) { + file_header->link_target = p_linkname; + p_linkname = NULL; + } +#endif + if (strncmp(file_header->name, "/../"+1, 3) == 0 + || strstr(file_header->name, "/../") + ) { + bb_error_msg_and_die("name with '..' encountered: '%s'", + file_header->name); + } + + /* Strip trailing '/' in directories */ + /* Must be done after mode is set as '/' is used to check if it's a directory */ + cp = last_char_is(file_header->name, '/'); + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_header(/*archive_handle->*/ file_header); + /* Note that we kill the '/' only after action_header() */ + /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */ + if (cp) + *cp = '\0'; + archive_handle->action_data(archive_handle); + if (archive_handle->accept || archive_handle->reject) + llist_add_to(&archive_handle->passed, file_header->name); + else /* Caller isn't interested in list of unpacked files */ + free(file_header->name); + } else { + data_skip(archive_handle); + free(file_header->name); + } + archive_handle->offset += file_header->size; + + free(file_header->link_target); + /* Do not free(file_header->name)! + * It might be inserted in archive_handle->passed - see above */ +#if ENABLE_FEATURE_TAR_UNAME_GNAME + free(file_header->tar__uname); + free(file_header->tar__gname); +#endif + return EXIT_SUCCESS; +} diff --git a/archival/libarchive/get_header_tar_bz2.c b/archival/libarchive/get_header_tar_bz2.c new file mode 100644 index 000000000..60d32069f --- /dev/null +++ b/archival/libarchive/get_header_tar_bz2.c @@ -0,0 +1,21 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + open_transformer(archive_handle->src_fd, unpack_bz2_stream_prime, "bunzip2"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libarchive/get_header_tar_gz.c b/archival/libarchive/get_header_tar_gz.c new file mode 100644 index 000000000..b09f8691c --- /dev/null +++ b/archival/libarchive/get_header_tar_gz.c @@ -0,0 +1,36 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle) +{ +#if BB_MMU + unsigned char magic[2]; +#endif + + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + /* Check gzip magic only if open_transformer will invoke unpack_gz_stream (MMU case). + * Otherwise, it will invoke an external helper "gunzip -cf" (NOMMU case) which will + * need the header. */ +#if BB_MMU + xread(archive_handle->src_fd, &magic, 2); + /* Can skip this check, but error message will be less clear */ + if ((magic[0] != 0x1f) || (magic[1] != 0x8b)) { + bb_error_msg_and_die("invalid gzip magic"); + } +#endif + + open_transformer(archive_handle->src_fd, unpack_gz_stream, "gunzip"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libarchive/get_header_tar_lzma.c b/archival/libarchive/get_header_tar_lzma.c new file mode 100644 index 000000000..da08e0c72 --- /dev/null +++ b/archival/libarchive/get_header_tar_lzma.c @@ -0,0 +1,24 @@ +/* vi: set sw=4 ts=4: */ +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +char FAST_FUNC get_header_tar_lzma(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + open_transformer(archive_handle->src_fd, unpack_lzma_stream, "unlzma"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libarchive/header_list.c b/archival/libarchive/header_list.c new file mode 100644 index 000000000..c4fc75f38 --- /dev/null +++ b/archival/libarchive/header_list.c @@ -0,0 +1,12 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC header_list(const file_header_t *file_header) +{ +//TODO: cpio -vp DIR should output "DIR/NAME", not just "NAME" */ + puts(file_header->name); +} diff --git a/archival/libarchive/header_skip.c b/archival/libarchive/header_skip.c new file mode 100644 index 000000000..2bfc5253c --- /dev/null +++ b/archival/libarchive/header_skip.c @@ -0,0 +1,10 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC header_skip(const file_header_t *file_header UNUSED_PARAM) +{ +} diff --git a/archival/libarchive/header_verbose_list.c b/archival/libarchive/header_verbose_list.c new file mode 100644 index 000000000..bc4e4154b --- /dev/null +++ b/archival/libarchive/header_verbose_list.c @@ -0,0 +1,69 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC header_verbose_list(const file_header_t *file_header) +{ + struct tm tm_time; + struct tm *ptm = &tm_time; //localtime(&file_header->mtime); + +#if ENABLE_FEATURE_TAR_UNAME_GNAME + char uid[sizeof(int)*3 + 2]; + /*char gid[sizeof(int)*3 + 2];*/ + char *user; + char *group; + + localtime_r(&file_header->mtime, ptm); + + user = file_header->tar__uname; + if (user == NULL) { + sprintf(uid, "%u", (unsigned)file_header->uid); + user = uid; + } + group = file_header->tar__gname; + if (group == NULL) { + /*sprintf(gid, "%u", (unsigned)file_header->gid);*/ + group = utoa(file_header->gid); + } + printf("%s %s/%s %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s", + bb_mode_string(file_header->mode), + user, + group, + file_header->size, + 1900 + ptm->tm_year, + 1 + ptm->tm_mon, + ptm->tm_mday, + ptm->tm_hour, + ptm->tm_min, + ptm->tm_sec, + file_header->name); + +#else /* !FEATURE_TAR_UNAME_GNAME */ + + localtime_r(&file_header->mtime, ptm); + + printf("%s %u/%u %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s", + bb_mode_string(file_header->mode), + (unsigned)file_header->uid, + (unsigned)file_header->gid, + file_header->size, + 1900 + ptm->tm_year, + 1 + ptm->tm_mon, + ptm->tm_mday, + ptm->tm_hour, + ptm->tm_min, + ptm->tm_sec, + file_header->name); + +#endif /* FEATURE_TAR_UNAME_GNAME */ + + /* NB: GNU tar shows "->" for symlinks and "link to" for hardlinks */ + if (file_header->link_target) { + printf(" -> %s", file_header->link_target); + } + bb_putchar('\n'); +} diff --git a/archival/libarchive/init_handle.c b/archival/libarchive/init_handle.c new file mode 100644 index 000000000..6644ea13b --- /dev/null +++ b/archival/libarchive/init_handle.c @@ -0,0 +1,22 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +archive_handle_t* FAST_FUNC init_handle(void) +{ + archive_handle_t *archive_handle; + + /* Initialize default values */ + archive_handle = xzalloc(sizeof(archive_handle_t)); + archive_handle->file_header = xzalloc(sizeof(file_header_t)); + archive_handle->action_header = header_skip; + archive_handle->action_data = data_skip; + archive_handle->filter = filter_accept_all; + archive_handle->seek = seek_by_jump; + + return archive_handle; +} diff --git a/archival/libarchive/liblzo.h b/archival/libarchive/liblzo.h new file mode 100644 index 000000000..843997cb9 --- /dev/null +++ b/archival/libarchive/liblzo.h @@ -0,0 +1,93 @@ +/* + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "liblzo_interface.h" + +/* lzo-2.03/src/config1x.h */ +#define M2_MIN_LEN 3 +#define M2_MAX_LEN 8 +#define M3_MAX_LEN 33 +#define M4_MAX_LEN 9 +#define M1_MAX_OFFSET 0x0400 +#define M2_MAX_OFFSET 0x0800 +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff +#define M1_MARKER 0 +#define M3_MARKER 32 +#define M4_MARKER 16 + +#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET) +#define MIN_LOOKAHEAD (M2_MAX_LEN + 1) + +#define LZO_EOF_CODE + +/* lzo-2.03/src/lzo_dict.h */ +#define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex] +#define DX2(p,s1,s2) \ + (((((unsigned)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) +//#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0]) +//#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0]) +#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0]) + +#define D_SIZE (1U << D_BITS) +#define D_MASK ((1U << D_BITS) - 1) +#define D_HIGH ((D_MASK >> 1) + 1) + +#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ + ( \ + m_pos = ip - (unsigned)(ip - m_pos), \ + ((uintptr_t)m_pos < (uintptr_t)in \ + || (m_off = (unsigned)(ip - m_pos)) <= 0 \ + || m_off > max_offset) \ + ) + +#define DENTRY(p,in) (p) +#define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in) + +#define DMS(v,s) ((unsigned) (((v) & (D_MASK >> (s))) << (s))) +#define DM(v) ((unsigned) ((v) & D_MASK)) +#define DMUL(a,b) ((unsigned) ((a) * (b))) + +/* lzo-2.03/src/lzo_ptr.h */ +#define pd(a,b) ((unsigned)((a)-(b))) + +# define TEST_IP (ip < ip_end) +# define NEED_IP(x) \ + if ((unsigned)(ip_end - ip) < (unsigned)(x)) goto input_overrun + +# undef TEST_OP /* don't need both of the tests here */ +# define TEST_OP 1 +# define NEED_OP(x) \ + if ((unsigned)(op_end - op) < (unsigned)(x)) goto output_overrun + +#define HAVE_ANY_OP 1 + +//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) +# define TEST_LB(m_pos) if (m_pos < out || m_pos >= op) goto lookbehind_overrun +//# define TEST_LBO(m_pos,o) if (m_pos < out || m_pos >= op - (o)) goto lookbehind_overrun +//#else +//# define TEST_LB(m_pos) ((void) 0) +//# define TEST_LBO(m_pos,o) ((void) 0) +//#endif diff --git a/archival/libarchive/lzo1x_1.c b/archival/libarchive/lzo1x_1.c new file mode 100644 index 000000000..a88839846 --- /dev/null +++ b/archival/libarchive/lzo1x_1.c @@ -0,0 +1,35 @@ +/* LZO1X-1 compression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libbb.h" +#include "liblzo.h" + +#define D_BITS 14 +#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5) +#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) + +#define DO_COMPRESS lzo1x_1_compress + +#include "lzo1x_c.c" diff --git a/archival/libarchive/lzo1x_1o.c b/archival/libarchive/lzo1x_1o.c new file mode 100644 index 000000000..3c61253e0 --- /dev/null +++ b/archival/libarchive/lzo1x_1o.c @@ -0,0 +1,35 @@ +/* LZO1X-1(15) compression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libbb.h" +#include "liblzo.h" + +#define D_BITS 15 +#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5) +#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) + +#define DO_COMPRESS lzo1x_1_15_compress + +#include "lzo1x_c.c" diff --git a/archival/libarchive/lzo1x_9x.c b/archival/libarchive/lzo1x_9x.c new file mode 100644 index 000000000..483205155 --- /dev/null +++ b/archival/libarchive/lzo1x_9x.c @@ -0,0 +1,921 @@ +/* lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ +*/ +#include "libbb.h" + +/* The following is probably only safe on Intel-compatible processors ... */ +#define LZO_UNALIGNED_OK_2 +#define LZO_UNALIGNED_OK_4 + +#include "liblzo.h" + +#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b)) +#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b)) +#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c)) + +/*********************************************************************** +// +************************************************************************/ +#define SWD_N M4_MAX_OFFSET /* size of ring buffer */ +#define SWD_F 2048 /* upper limit for match length */ + +#define SWD_BEST_OFF (LZO_MAX3(M2_MAX_LEN, M3_MAX_LEN, M4_MAX_LEN) + 1) + +typedef struct { + int init; + + unsigned look; /* bytes in lookahead buffer */ + + unsigned m_len; + unsigned m_off; + + const uint8_t *bp; + const uint8_t *ip; + const uint8_t *in; + const uint8_t *in_end; + uint8_t *out; + + unsigned r1_lit; + +} lzo1x_999_t; + +#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) + +/* lzo_swd.c -- sliding window dictionary */ + +/*********************************************************************** +// +************************************************************************/ +#define SWD_UINT_MAX USHRT_MAX + +#ifndef SWD_HSIZE +# define SWD_HSIZE 16384 +#endif +#ifndef SWD_MAX_CHAIN +# define SWD_MAX_CHAIN 2048 +#endif + +#define HEAD3(b, p) \ + ( ((0x9f5f * ((((b[p]<<5)^b[p+1])<<5) ^ b[p+2])) >> 5) & (SWD_HSIZE-1) ) + +#if defined(LZO_UNALIGNED_OK_2) +# define HEAD2(b,p) (* (uint16_t *) &(b[p])) +#else +# define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8)) +#endif +#define NIL2 SWD_UINT_MAX + +typedef struct lzo_swd { + /* public - "built-in" */ + + /* public - configuration */ + unsigned max_chain; + int use_best_off; + + /* public - output */ + unsigned m_len; + unsigned m_off; + unsigned look; + int b_char; +#if defined(SWD_BEST_OFF) + unsigned best_off[SWD_BEST_OFF]; +#endif + + /* semi public */ + lzo1x_999_t *c; + unsigned m_pos; +#if defined(SWD_BEST_OFF) + unsigned best_pos[SWD_BEST_OFF]; +#endif + + /* private */ + unsigned ip; /* input pointer (lookahead) */ + unsigned bp; /* buffer pointer */ + unsigned rp; /* remove pointer */ + + unsigned node_count; + unsigned first_rp; + + uint8_t b[SWD_N + SWD_F]; + uint8_t b_wrap[SWD_F]; /* must follow b */ + uint16_t head3[SWD_HSIZE]; + uint16_t succ3[SWD_N + SWD_F]; + uint16_t best3[SWD_N + SWD_F]; + uint16_t llen3[SWD_HSIZE]; +#ifdef HEAD2 + uint16_t head2[65536L]; +#endif +} lzo_swd_t, *lzo_swd_p; + +#define SIZEOF_LZO_SWD_T (sizeof(lzo_swd_t)) + + +/* Access macro for head3. + * head3[key] may be uninitialized, but then its value will never be used. + */ +#define s_get_head3(s,key) s->head3[key] + + +/*********************************************************************** +// +************************************************************************/ +#define B_SIZE (SWD_N + SWD_F) + +static int swd_init(lzo_swd_p s) +{ + /* defaults */ + s->node_count = SWD_N; + + memset(s->llen3, 0, sizeof(s->llen3[0]) * (unsigned)SWD_HSIZE); +#ifdef HEAD2 + memset(s->head2, 0xff, sizeof(s->head2[0]) * 65536L); + assert(s->head2[0] == NIL2); +#endif + + s->ip = 0; + s->bp = s->ip; + s->first_rp = s->ip; + + assert(s->ip + SWD_F <= B_SIZE); + s->look = (unsigned) (s->c->in_end - s->c->ip); + if (s->look > 0) { + if (s->look > SWD_F) + s->look = SWD_F; + memcpy(&s->b[s->ip], s->c->ip, s->look); + s->c->ip += s->look; + s->ip += s->look; + } + if (s->ip == B_SIZE) + s->ip = 0; + + s->rp = s->first_rp; + if (s->rp >= s->node_count) + s->rp -= s->node_count; + else + s->rp += B_SIZE - s->node_count; + + return LZO_E_OK; +} + +#define swd_pos2off(s,pos) \ + (s->bp > (pos) ? s->bp - (pos) : B_SIZE - ((pos) - s->bp)) + + +/*********************************************************************** +// +************************************************************************/ +static void swd_getbyte(lzo_swd_p s) +{ + int c; + + if ((c = getbyte(*(s->c))) < 0) { + if (s->look > 0) + --s->look; + } else { + s->b[s->ip] = c; + if (s->ip < SWD_F) + s->b_wrap[s->ip] = c; + } + if (++s->ip == B_SIZE) + s->ip = 0; + if (++s->bp == B_SIZE) + s->bp = 0; + if (++s->rp == B_SIZE) + s->rp = 0; +} + + +/*********************************************************************** +// remove node from lists +************************************************************************/ +static void swd_remove_node(lzo_swd_p s, unsigned node) +{ + if (s->node_count == 0) { + unsigned key; + + key = HEAD3(s->b,node); + assert(s->llen3[key] > 0); + --s->llen3[key]; + +#ifdef HEAD2 + key = HEAD2(s->b,node); + assert(s->head2[key] != NIL2); + if ((unsigned) s->head2[key] == node) + s->head2[key] = NIL2; +#endif + } else + --s->node_count; +} + + +/*********************************************************************** +// +************************************************************************/ +static void swd_accept(lzo_swd_p s, unsigned n) +{ + assert(n <= s->look); + + while (n--) { + unsigned key; + + swd_remove_node(s,s->rp); + + /* add bp into HEAD3 */ + key = HEAD3(s->b, s->bp); + s->succ3[s->bp] = s_get_head3(s, key); + s->head3[key] = s->bp; + s->best3[s->bp] = SWD_F + 1; + s->llen3[key]++; + assert(s->llen3[key] <= SWD_N); + +#ifdef HEAD2 + /* add bp into HEAD2 */ + key = HEAD2(s->b, s->bp); + s->head2[key] = s->bp; +#endif + + swd_getbyte(s); + } +} + + +/*********************************************************************** +// +************************************************************************/ +static void swd_search(lzo_swd_p s, unsigned node, unsigned cnt) +{ + const uint8_t *p1; + const uint8_t *p2; + const uint8_t *px; + unsigned m_len = s->m_len; + const uint8_t *b = s->b; + const uint8_t *bp = s->b + s->bp; + const uint8_t *bx = s->b + s->bp + s->look; + unsigned char scan_end1; + + assert(s->m_len > 0); + + scan_end1 = bp[m_len - 1]; + for ( ; cnt-- > 0; node = s->succ3[node]) { + p1 = bp; + p2 = b + node; + px = bx; + + assert(m_len < s->look); + + if (p2[m_len - 1] == scan_end1 + && p2[m_len] == p1[m_len] + && p2[0] == p1[0] + && p2[1] == p1[1] + ) { + unsigned i; + assert(lzo_memcmp(bp, &b[node], 3) == 0); + + p1 += 2; p2 += 2; + do {} while (++p1 < px && *p1 == *++p2); + i = p1-bp; + + assert(lzo_memcmp(bp, &b[node], i) == 0); + +#if defined(SWD_BEST_OFF) + if (i < SWD_BEST_OFF) { + if (s->best_pos[i] == 0) + s->best_pos[i] = node + 1; + } +#endif + if (i > m_len) { + s->m_len = m_len = i; + s->m_pos = node; + if (m_len == s->look) + return; + if (m_len >= SWD_F) + return; + if (m_len > (unsigned) s->best3[node]) + return; + scan_end1 = bp[m_len - 1]; + } + } + } +} + + +/*********************************************************************** +// +************************************************************************/ +#ifdef HEAD2 + +static int swd_search2(lzo_swd_p s) +{ + unsigned key; + + assert(s->look >= 2); + assert(s->m_len > 0); + + key = s->head2[HEAD2(s->b, s->bp)]; + if (key == NIL2) + return 0; + assert(lzo_memcmp(&s->b[s->bp], &s->b[key], 2) == 0); +#if defined(SWD_BEST_OFF) + if (s->best_pos[2] == 0) + s->best_pos[2] = key + 1; +#endif + + if (s->m_len < 2) { + s->m_len = 2; + s->m_pos = key; + } + return 1; +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ +static void swd_findbest(lzo_swd_p s) +{ + unsigned key; + unsigned cnt, node; + unsigned len; + + assert(s->m_len > 0); + + /* get current head, add bp into HEAD3 */ + key = HEAD3(s->b,s->bp); + node = s->succ3[s->bp] = s_get_head3(s, key); + cnt = s->llen3[key]++; + assert(s->llen3[key] <= SWD_N + SWD_F); + if (cnt > s->max_chain) + cnt = s->max_chain; + s->head3[key] = s->bp; + + s->b_char = s->b[s->bp]; + len = s->m_len; + if (s->m_len >= s->look) { + if (s->look == 0) + s->b_char = -1; + s->m_off = 0; + s->best3[s->bp] = SWD_F + 1; + } else { +#ifdef HEAD2 + if (swd_search2(s)) +#endif + if (s->look >= 3) + swd_search(s, node, cnt); + if (s->m_len > len) + s->m_off = swd_pos2off(s,s->m_pos); + s->best3[s->bp] = s->m_len; + +#if defined(SWD_BEST_OFF) + if (s->use_best_off) { + int i; + for (i = 2; i < SWD_BEST_OFF; i++) { + if (s->best_pos[i] > 0) + s->best_off[i] = swd_pos2off(s, s->best_pos[i]-1); + else + s->best_off[i] = 0; + } + } +#endif + } + + swd_remove_node(s,s->rp); + +#ifdef HEAD2 + /* add bp into HEAD2 */ + key = HEAD2(s->b, s->bp); + s->head2[key] = s->bp; +#endif +} + +#undef HEAD3 +#undef HEAD2 +#undef s_get_head3 + + +/*********************************************************************** +// +************************************************************************/ +static int init_match(lzo1x_999_t *c, lzo_swd_p s, uint32_t use_best_off) +{ + int r; + + assert(!c->init); + c->init = 1; + + s->c = c; + + r = swd_init(s); + if (r != 0) + return r; + + s->use_best_off = use_best_off; + return r; +} + + +/*********************************************************************** +// +************************************************************************/ +static int find_match(lzo1x_999_t *c, lzo_swd_p s, + unsigned this_len, unsigned skip) +{ + assert(c->init); + + if (skip > 0) { + assert(this_len >= skip); + swd_accept(s, this_len - skip); + } else { + assert(this_len <= 1); + } + + s->m_len = 1; + s->m_len = 1; +#ifdef SWD_BEST_OFF + if (s->use_best_off) + memset(s->best_pos, 0, sizeof(s->best_pos)); +#endif + swd_findbest(s); + c->m_len = s->m_len; + c->m_off = s->m_off; + + swd_getbyte(s); + + if (s->b_char < 0) { + c->look = 0; + c->m_len = 0; + } else { + c->look = s->look + 1; + } + c->bp = c->ip - c->look; + + return LZO_E_OK; +} + +/* this is a public functions, but there is no prototype in a header file */ +static int lzo1x_999_compress_internal(const uint8_t *in , unsigned in_len, + uint8_t *out, unsigned *out_len, + void *wrkmem, + unsigned good_length, + unsigned max_lazy, + unsigned max_chain, + uint32_t use_best_off); + + +/*********************************************************************** +// +************************************************************************/ +static uint8_t *code_match(lzo1x_999_t *c, + uint8_t *op, unsigned m_len, unsigned m_off) +{ + assert(op > c->out); + if (m_len == 2) { + assert(m_off <= M1_MAX_OFFSET); + assert(c->r1_lit > 0); + assert(c->r1_lit < 4); + m_off -= 1; + *op++ = M1_MARKER | ((m_off & 3) << 2); + *op++ = m_off >> 2; + } else if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { + assert(m_len >= 3); + m_off -= 1; + *op++ = ((m_len - 1) << 5) | ((m_off & 7) << 2); + *op++ = m_off >> 3; + assert(op[-2] >= M2_MARKER); + } else if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && c->r1_lit >= 4) { + assert(m_len == 3); + assert(m_off > M2_MAX_OFFSET); + m_off -= 1 + M2_MAX_OFFSET; + *op++ = M1_MARKER | ((m_off & 3) << 2); + *op++ = m_off >> 2; + } else if (m_off <= M3_MAX_OFFSET) { + assert(m_len >= 3); + m_off -= 1; + if (m_len <= M3_MAX_LEN) + *op++ = M3_MARKER | (m_len - 2); + else { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = m_len; + } + *op++ = m_off << 2; + *op++ = m_off >> 6; + } else { + unsigned k; + + assert(m_len >= 3); + assert(m_off > 0x4000); + assert(m_off <= 0xbfff); + m_off -= 0x4000; + k = (m_off & 0x4000) >> 11; + if (m_len <= M4_MAX_LEN) + *op++ = M4_MARKER | k | (m_len - 2); + else { + m_len -= M4_MAX_LEN; + *op++ = M4_MARKER | k | 0; + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = m_len; + } + *op++ = m_off << 2; + *op++ = m_off >> 6; + } + + return op; +} + + +static uint8_t *STORE_RUN(lzo1x_999_t *c, uint8_t *op, + const uint8_t *ii, unsigned t) +{ + if (op == c->out && t <= 238) { + *op++ = 17 + t; + } else if (t <= 3) { + op[-2] |= t; + } else if (t <= 18) { + *op++ = t - 3; + } else { + unsigned tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = tt; + } + do *op++ = *ii++; while (--t > 0); + + return op; +} + + +static uint8_t *code_run(lzo1x_999_t *c, uint8_t *op, const uint8_t *ii, + unsigned lit) +{ + if (lit > 0) { + assert(m_len >= 2); + op = STORE_RUN(c, op, ii, lit); + } else { + assert(m_len >= 3); + } + c->r1_lit = lit; + + return op; +} + + +/*********************************************************************** +// +************************************************************************/ +static int len_of_coded_match(unsigned m_len, unsigned m_off, unsigned lit) +{ + int n = 4; + + if (m_len < 2) + return -1; + if (m_len == 2) + return (m_off <= M1_MAX_OFFSET && lit > 0 && lit < 4) ? 2 : -1; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) + return 2; + if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && lit >= 4) + return 2; + if (m_off <= M3_MAX_OFFSET) { + if (m_len <= M3_MAX_LEN) + return 3; + m_len -= M3_MAX_LEN; + } else if (m_off <= M4_MAX_OFFSET) { + if (m_len <= M4_MAX_LEN) + return 3; + m_len -= M4_MAX_LEN; + } else + return -1; + while (m_len > 255) { + m_len -= 255; + n++; + } + return n; +} + + +static int min_gain(unsigned ahead, unsigned lit1, + unsigned lit2, int l1, int l2, int l3) +{ + int lazy_match_min_gain = 0; + + assert (ahead >= 1); + lazy_match_min_gain += ahead; + + if (lit1 <= 3) + lazy_match_min_gain += (lit2 <= 3) ? 0 : 2; + else if (lit1 <= 18) + lazy_match_min_gain += (lit2 <= 18) ? 0 : 1; + + lazy_match_min_gain += (l2 - l1) * 2; + if (l3 > 0) + lazy_match_min_gain -= (ahead - l3) * 2; + + if (lazy_match_min_gain < 0) + lazy_match_min_gain = 0; + + return lazy_match_min_gain; +} + + +/*********************************************************************** +// +************************************************************************/ +#if defined(SWD_BEST_OFF) + +static void better_match(const lzo_swd_p swd, + unsigned *m_len, unsigned *m_off) +{ + if (*m_len <= M2_MIN_LEN) + return; + + if (*m_off <= M2_MAX_OFFSET) + return; + + /* M3/M4 -> M2 */ + if (*m_off > M2_MAX_OFFSET + && *m_len >= M2_MIN_LEN + 1 && *m_len <= M2_MAX_LEN + 1 + && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M2_MAX_OFFSET + ) { + *m_len = *m_len - 1; + *m_off = swd->best_off[*m_len]; + return; + } + + /* M4 -> M2 */ + if (*m_off > M3_MAX_OFFSET + && *m_len >= M4_MAX_LEN + 1 && *m_len <= M2_MAX_LEN + 2 + && swd->best_off[*m_len-2] && swd->best_off[*m_len-2] <= M2_MAX_OFFSET + ) { + *m_len = *m_len - 2; + *m_off = swd->best_off[*m_len]; + return; + } + /* M4 -> M3 */ + if (*m_off > M3_MAX_OFFSET + && *m_len >= M4_MAX_LEN + 1 && *m_len <= M3_MAX_LEN + 1 + && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M3_MAX_OFFSET + ) { + *m_len = *m_len - 1; + *m_off = swd->best_off[*m_len]; + } +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ +static int lzo1x_999_compress_internal(const uint8_t *in, unsigned in_len, + uint8_t *out, unsigned *out_len, + void *wrkmem, + unsigned good_length, + unsigned max_lazy, + unsigned max_chain, + uint32_t use_best_off) +{ + uint8_t *op; + const uint8_t *ii; + unsigned lit; + unsigned m_len, m_off; + lzo1x_999_t cc; + lzo1x_999_t *const c = &cc; + const lzo_swd_p swd = (lzo_swd_p) wrkmem; + int r; + + c->init = 0; + c->ip = c->in = in; + c->in_end = in + in_len; + c->out = out; + + op = out; + ii = c->ip; /* point to start of literal run */ + lit = 0; + c->r1_lit = 0; + + r = init_match(c, swd, use_best_off); + if (r != 0) + return r; + swd->max_chain = max_chain; + + r = find_match(c, swd, 0, 0); + if (r != 0) + return r; + + while (c->look > 0) { + unsigned ahead; + unsigned max_ahead; + int l1, l2, l3; + + m_len = c->m_len; + m_off = c->m_off; + + assert(c->bp == c->ip - c->look); + assert(c->bp >= in); + if (lit == 0) + ii = c->bp; + assert(ii + lit == c->bp); + assert(swd->b_char == *(c->bp)); + + if (m_len < 2 + || (m_len == 2 && (m_off > M1_MAX_OFFSET || lit == 0 || lit >= 4)) + /* Do not accept this match for compressed-data compatibility + * with LZO v1.01 and before + * [ might be a problem for decompress() and optimize() ] + */ + || (m_len == 2 && op == out) + || (op == out && lit == 0) + ) { + /* a literal */ + m_len = 0; + } + else if (m_len == M2_MIN_LEN) { + /* compression ratio improves if we code a literal in some cases */ + if (m_off > MX_MAX_OFFSET && lit >= 4) + m_len = 0; + } + + if (m_len == 0) { + /* a literal */ + lit++; + swd->max_chain = max_chain; + r = find_match(c, swd, 1, 0); + assert(r == 0); + continue; + } + + /* a match */ +#if defined(SWD_BEST_OFF) + if (swd->use_best_off) + better_match(swd, &m_len, &m_off); +#endif + + /* shall we try a lazy match ? */ + ahead = 0; + if (m_len >= max_lazy) { + /* no */ + l1 = 0; + max_ahead = 0; + } else { + /* yes, try a lazy match */ + l1 = len_of_coded_match(m_len, m_off, lit); + assert(l1 > 0); + max_ahead = LZO_MIN(2, (unsigned)l1 - 1); + } + + + while (ahead < max_ahead && c->look > m_len) { + int lazy_match_min_gain; + + if (m_len >= good_length) + swd->max_chain = max_chain >> 2; + else + swd->max_chain = max_chain; + r = find_match(c, swd, 1, 0); + ahead++; + + assert(r == 0); + assert(c->look > 0); + assert(ii + lit + ahead == c->bp); + + if (c->m_len < m_len) + continue; + if (c->m_len == m_len && c->m_off >= m_off) + continue; +#if defined(SWD_BEST_OFF) + if (swd->use_best_off) + better_match(swd, &c->m_len, &c->m_off); +#endif + l2 = len_of_coded_match(c->m_len, c->m_off, lit+ahead); + if (l2 < 0) + continue; + + /* compressed-data compatibility [see above] */ + l3 = (op == out) ? -1 : len_of_coded_match(ahead, m_off, lit); + + lazy_match_min_gain = min_gain(ahead, lit, lit+ahead, l1, l2, l3); + if (c->m_len >= m_len + lazy_match_min_gain) { + if (l3 > 0) { + /* code previous run */ + op = code_run(c, op, ii, lit); + lit = 0; + /* code shortened match */ + op = code_match(c, op, ahead, m_off); + } else { + lit += ahead; + assert(ii + lit == c->bp); + } + goto lazy_match_done; + } + } + + assert(ii + lit + ahead == c->bp); + + /* 1 - code run */ + op = code_run(c, op, ii, lit); + lit = 0; + + /* 2 - code match */ + op = code_match(c, op, m_len, m_off); + swd->max_chain = max_chain; + r = find_match(c, swd, m_len, 1+ahead); + assert(r == 0); + + lazy_match_done: ; + } + + /* store final run */ + if (lit > 0) + op = STORE_RUN(c, op, ii, lit); + +#if defined(LZO_EOF_CODE) + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; +#endif + + *out_len = op - out; + + return LZO_E_OK; +} + + +/*********************************************************************** +// +************************************************************************/ +int lzo1x_999_compress_level(const uint8_t *in, unsigned in_len, + uint8_t *out, unsigned *out_len, + void *wrkmem, + int compression_level) +{ + static const struct { + uint16_t good_length; + uint16_t max_lazy; + uint16_t max_chain; + uint16_t use_best_off; + } c[3] = { + { 8, 32, 256, 0 }, + { 32, 128, 2048, 1 }, + { SWD_F, SWD_F, 4096, 1 } /* max. compression */ + }; + + if (compression_level < 7 || compression_level > 9) + return LZO_E_ERROR; + + compression_level -= 7; + return lzo1x_999_compress_internal(in, in_len, out, out_len, wrkmem, + c[compression_level].good_length, + c[compression_level].max_lazy, + c[compression_level].max_chain, + c[compression_level].use_best_off); +} diff --git a/archival/libarchive/lzo1x_c.c b/archival/libarchive/lzo1x_c.c new file mode 100644 index 000000000..cc86f74b1 --- /dev/null +++ b/archival/libarchive/lzo1x_c.c @@ -0,0 +1,296 @@ +/* implementation of the LZO1[XY]-1 compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/*********************************************************************** +// compress a block of data. +************************************************************************/ +static NOINLINE unsigned +do_compress(const uint8_t* in, unsigned in_len, + uint8_t* out, unsigned* out_len, + void* wrkmem) +{ + register const uint8_t* ip; + uint8_t* op; + const uint8_t* const in_end = in + in_len; + const uint8_t* const ip_end = in + in_len - M2_MAX_LEN - 5; + const uint8_t* ii; + const void* *const dict = (const void**) wrkmem; + + op = out; + ip = in; + ii = ip; + + ip += 4; + for (;;) { + register const uint8_t* m_pos; + unsigned m_off; + unsigned m_len; + unsigned dindex; + + D_INDEX1(dindex,ip); + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; +#if 1 + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + D_INDEX2(dindex,ip); +#endif + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + goto literal; + + try_match: +#if 1 && defined(LZO_UNALIGNED_OK_2) + if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip) +#else + if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) +#endif + { + } else { + if (m_pos[2] == ip[2]) { +#if 0 + if (m_off <= M2_MAX_OFFSET) + goto match; + if (lit <= 3) + goto match; + if (lit == 3) { /* better compression, but slower */ + assert(op - 2 > out); op[-2] |= (uint8_t)(3); + *op++ = *ii++; *op++ = *ii++; *op++ = *ii++; + goto code_match; + } + if (m_pos[3] == ip[3]) +#endif + goto match; + } + else { + /* still need a better way for finding M1 matches */ +#if 0 + /* a M1 match */ +#if 0 + if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3) +#else + if (m_off <= M1_MAX_OFFSET && lit == 3) +#endif + { + register unsigned t; + + t = lit; + assert(op - 2 > out); op[-2] |= (uint8_t)(t); + do *op++ = *ii++; while (--t > 0); + assert(ii == ip); + m_off -= 1; + *op++ = (uint8_t)(M1_MARKER | ((m_off & 3) << 2)); + *op++ = (uint8_t)(m_off >> 2); + ip += 2; + goto match_done; + } +#endif + } + } + + /* a literal */ + literal: + UPDATE_I(dict, 0, dindex, ip, in); + ++ip; + if (ip >= ip_end) + break; + continue; + + /* a match */ +match: + UPDATE_I(dict, 0, dindex, ip, in); + /* store current literal run */ + if (pd(ip, ii) > 0) { + register unsigned t = pd(ip, ii); + + if (t <= 3) { + assert(op - 2 > out); + op[-2] |= (uint8_t)(t); + } + else if (t <= 18) + *op++ = (uint8_t)(t - 3); + else { + register unsigned tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = (uint8_t)(tt); + } + do *op++ = *ii++; while (--t > 0); + } + + /* code the match */ + assert(ii == ip); + ip += 3; + if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ + || m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++ +#ifdef LZO1Y + || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++ + || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++ +#endif + ) { + --ip; + m_len = pd(ip, ii); + assert(m_len >= 3); + assert(m_len <= M2_MAX_LEN); + + if (m_off <= M2_MAX_OFFSET) { + m_off -= 1; +#if defined(LZO1X) + *op++ = (uint8_t)(((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = (uint8_t)(m_off >> 3); +#elif defined(LZO1Y) + *op++ = (uint8_t)(((m_len + 1) << 4) | ((m_off & 3) << 2)); + *op++ = (uint8_t)(m_off >> 2); +#endif + } + else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + *op++ = (uint8_t)(M3_MARKER | (m_len - 2)); + goto m3_m4_offset; + } else { +#if defined(LZO1X) + m_off -= 0x4000; + assert(m_off > 0); + assert(m_off <= 0x7fff); + *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2)); + goto m3_m4_offset; +#elif defined(LZO1Y) + goto m4_match; +#endif + } + } + else { + { + const uint8_t* end = in_end; + const uint8_t* m = m_pos + M2_MAX_LEN + 1; + while (ip < end && *m == *ip) + m++, ip++; + m_len = pd(ip, ii); + } + assert(m_len > M2_MAX_LEN); + + if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= 33) + *op++ = (uint8_t)(M3_MARKER | (m_len - 2)); + else { + m_len -= 33; + *op++ = M3_MARKER | 0; + goto m3_m4_len; + } + } else { +#if defined(LZO1Y) + m4_match: +#endif + m_off -= 0x4000; + assert(m_off > 0); + assert(m_off <= 0x7fff); + if (m_len <= M4_MAX_LEN) + *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2)); + else { + m_len -= M4_MAX_LEN; + *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11)); + m3_m4_len: + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = (uint8_t)(m_len); + } + } + m3_m4_offset: + *op++ = (uint8_t)((m_off & 63) << 2); + *op++ = (uint8_t)(m_off >> 6); + } +#if 0 + match_done: +#endif + ii = ip; + if (ip >= ip_end) + break; + } + + *out_len = pd(op, out); + return pd(in_end, ii); +} + +/*********************************************************************** +// public entry point +************************************************************************/ +int DO_COMPRESS(const uint8_t* in, unsigned in_len, + uint8_t* out, unsigned* out_len, + void* wrkmem) +{ + uint8_t* op = out; + unsigned t; + + if (in_len <= M2_MAX_LEN + 5) + t = in_len; + else { + t = do_compress(in,in_len,op,out_len,wrkmem); + op += *out_len; + } + + if (t > 0) { + const uint8_t* ii = in + in_len - t; + + if (op == out && t <= 238) + *op++ = (uint8_t)(17 + t); + else if (t <= 3) + op[-2] |= (uint8_t)(t); + else if (t <= 18) + *op++ = (uint8_t)(t - 3); + else { + unsigned tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = (uint8_t)(tt); + } + do *op++ = *ii++; while (--t > 0); + } + + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; + + *out_len = pd(op, out); + return 0; /*LZO_E_OK*/ +} diff --git a/archival/libarchive/lzo1x_d.c b/archival/libarchive/lzo1x_d.c new file mode 100644 index 000000000..348a85510 --- /dev/null +++ b/archival/libarchive/lzo1x_d.c @@ -0,0 +1,420 @@ +/* implementation of the LZO1X decompression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libbb.h" +#include "liblzo.h" + +/*********************************************************************** +// decompress a block of data. +************************************************************************/ +/* safe decompression with overrun testing */ +int lzo1x_decompress_safe(const uint8_t* in, unsigned in_len, + uint8_t* out, unsigned* out_len, + void* wrkmem UNUSED_PARAM) +{ + register uint8_t* op; + register const uint8_t* ip; + register unsigned t; +#if defined(COPY_DICT) + unsigned m_off; + const uint8_t* dict_end; +#else + register const uint8_t* m_pos = NULL; /* possibly not needed */ +#endif + const uint8_t* const ip_end = in + in_len; +#if defined(HAVE_ANY_OP) + uint8_t* const op_end = out + *out_len; +#endif +#if defined(LZO1Z) + unsigned last_m_off = 0; +#endif + +// LZO_UNUSED(wrkmem); + +#if defined(COPY_DICT) + if (dict) { + if (dict_len > M4_MAX_OFFSET) { + dict += dict_len - M4_MAX_OFFSET; + dict_len = M4_MAX_OFFSET; + } + dict_end = dict + dict_len; + } else { + dict_len = 0; + dict_end = NULL; + } +#endif /* COPY_DICT */ + + *out_len = 0; + + op = out; + ip = in; + + if (*ip > 17) { + t = *ip++ - 17; + if (t < 4) + goto match_next; + assert(t > 0); NEED_OP(t); NEED_IP(t+1); + do *op++ = *ip++; while (--t > 0); + goto first_literal_run; + } + + while (TEST_IP && TEST_OP) { + t = *ip++; + if (t >= 16) + goto match; + /* a literal run */ + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 15 + *ip++; + } + /* copy literals */ + assert(t > 0); + NEED_OP(t+3); + NEED_IP(t+4); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +# if !defined(LZO_UNALIGNED_OK_4) + if (PTR_ALIGNED2_4(op, ip)) +# endif + { + COPY4(op, ip); + op += 4; + ip += 4; + if (--t > 0) { + if (t >= 4) { + do { + COPY4(op, ip); + op += 4; + ip += 4; + t -= 4; + } while (t >= 4); + if (t > 0) + do *op++ = *ip++; while (--t > 0); + } else { + do *op++ = *ip++; while (--t > 0); + } + } + } +# if !defined(LZO_UNALIGNED_OK_4) + else +# endif +#endif +#if !defined(LZO_UNALIGNED_OK_4) + { + *op++ = *ip++; + *op++ = *ip++; + *op++ = *ip++; + do *op++ = *ip++; while (--t > 0); + } +#endif + + first_literal_run: + t = *ip++; + if (t >= 16) + goto match; +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(3); + t = 3; COPY_DICT(t,m_off) +#else /* !COPY_DICT */ +#if defined(LZO1Z) + t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LB(m_pos); NEED_OP(3); + *op++ = *m_pos++; + *op++ = *m_pos++; + *op++ = *m_pos; +#endif /* COPY_DICT */ + goto match_done; + + /* handle matches */ + do { + match: + if (t >= 64) { /* a M2 match */ +#if defined(COPY_DICT) +#if defined(LZO1X) + m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); + t = (t >> 4) - 3; +#elif defined(LZO1Z) + m_off = t & 0x1f; + if (m_off >= 0x1c) + m_off = last_m_off; + else { + m_off = 1 + (m_off << 6) + (*ip++ >> 2); + last_m_off = m_off; + } + t = (t >> 5) - 1; +#endif +#else /* !COPY_DICT */ +#if defined(LZO1X) + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_pos = op - 1; + m_pos -= (t >> 2) & 3; + m_pos -= *ip++ << 2; + t = (t >> 4) - 3; +#elif defined(LZO1Z) + { + unsigned off = t & 0x1f; + m_pos = op; + if (off >= 0x1c) { + assert(last_m_off > 0); + m_pos -= last_m_off; + } else { + off = 1 + (off << 6) + (*ip++ >> 2); + m_pos -= off; + last_m_off = off; + } + } + t = (t >> 5) - 1; +#endif + TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); + goto copy_match; +#endif /* COPY_DICT */ + } + else if (t >= 32) { /* a M3 match */ + t &= 31; + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); + last_m_off = m_off; +#else + m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); +#endif +#else /* !COPY_DICT */ +#if defined(LZO1Z) + { + unsigned off = 1 + (ip[0] << 6) + (ip[1] >> 2); + m_pos = op - off; + last_m_off = off; + } +#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN) + m_pos = op - 1; + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos = op - 1; + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif +#endif /* COPY_DICT */ + ip += 2; + } + else if (t >= 16) { /* a M4 match */ +#if defined(COPY_DICT) + m_off = (t & 8) << 11; +#else /* !COPY_DICT */ + m_pos = op; + m_pos -= (t & 8) << 11; +#endif /* COPY_DICT */ + t &= 7; + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off += (ip[0] << 6) + (ip[1] >> 2); +#else + m_off += (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_off == 0) + goto eof_found; + m_off += 0x4000; +#if defined(LZO1Z) + last_m_off = m_off; +#endif +#else /* !COPY_DICT */ +#if defined(LZO1Z) + m_pos -= (ip[0] << 6) + (ip[1] >> 2); +#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN) + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; +#if defined(LZO1Z) + last_m_off = pd((const uint8_t*)op, m_pos); +#endif +#endif /* COPY_DICT */ + } + else { /* a M1 match */ +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = 1 + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(2); + t = 2; COPY_DICT(t,m_off) +#else /* !COPY_DICT */ +#if defined(LZO1Z) + t = 1 + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LB(m_pos); NEED_OP(2); + *op++ = *m_pos++; + *op++ = *m_pos; +#endif /* COPY_DICT */ + goto match_done; + } + + /* copy match */ +#if defined(COPY_DICT) + + NEED_OP(t+3-1); + t += 3-1; COPY_DICT(t,m_off) + +#else /* !COPY_DICT */ + + TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +# if !defined(LZO_UNALIGNED_OK_4) + if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) { + assert((op - m_pos) >= 4); /* both pointers are aligned */ +# else + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { +# endif + COPY4(op,m_pos); + op += 4; m_pos += 4; t -= 4 - (3 - 1); + do { + COPY4(op,m_pos); + op += 4; m_pos += 4; t -= 4; + } while (t >= 4); + if (t > 0) + do *op++ = *m_pos++; while (--t > 0); + } + else +#endif + { + copy_match: + *op++ = *m_pos++; *op++ = *m_pos++; + do *op++ = *m_pos++; while (--t > 0); + } + +#endif /* COPY_DICT */ + + match_done: +#if defined(LZO1Z) + t = ip[-1] & 3; +#else + t = ip[-2] & 3; +#endif + if (t == 0) + break; + + /* copy literals */ + match_next: + assert(t > 0); + assert(t < 4); + NEED_OP(t); + NEED_IP(t+1); +#if 0 + do *op++ = *ip++; while (--t > 0); +#else + *op++ = *ip++; + if (t > 1) { + *op++ = *ip++; + if (t > 2) + *op++ = *ip++; + } +#endif + t = *ip++; + } while (TEST_IP && TEST_OP); + } + +//#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) + /* no EOF code was found */ + *out_len = pd(op, out); + return LZO_E_EOF_NOT_FOUND; +//#endif + + eof_found: + assert(t == 1); + *out_len = pd(op, out); + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + +//#if defined(HAVE_NEED_IP) + input_overrun: + *out_len = pd(op, out); + return LZO_E_INPUT_OVERRUN; +//#endif + +//#if defined(HAVE_NEED_OP) + output_overrun: + *out_len = pd(op, out); + return LZO_E_OUTPUT_OVERRUN; +//#endif + +//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) + lookbehind_overrun: + *out_len = pd(op, out); + return LZO_E_LOOKBEHIND_OVERRUN; +//#endif +} diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c new file mode 100644 index 000000000..26ae565f5 --- /dev/null +++ b/archival/libarchive/open_transformer.c @@ -0,0 +1,54 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* transformer(), more than meets the eye */ +/* + * On MMU machine, the transform_prog is removed by macro magic + * in include/archive.h. On NOMMU, transformer is removed. + */ +void FAST_FUNC open_transformer(int fd, + IF_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd), + const char *transform_prog) +{ + struct fd_pair fd_pipe; + int pid; + + xpiped_pair(fd_pipe); + pid = BB_MMU ? xfork() : xvfork(); + if (pid == 0) { + /* Child */ + close(fd_pipe.rd); /* we don't want to read from the parent */ + // FIXME: error check? +#if BB_MMU + transformer(fd, fd_pipe.wr); + if (ENABLE_FEATURE_CLEAN_UP) { + close(fd_pipe.wr); /* send EOF */ + close(fd); + } + /* must be _exit! bug was actually seen here */ + _exit(EXIT_SUCCESS); +#else + { + char *argv[4]; + xmove_fd(fd, 0); + xmove_fd(fd_pipe.wr, 1); + argv[0] = (char*)transform_prog; + argv[1] = (char*)"-cf"; + argv[2] = (char*)"-"; + argv[3] = NULL; + BB_EXECVP(transform_prog, argv); + bb_perror_msg_and_die("can't execute '%s'", transform_prog); + } +#endif + /* notreached */ + } + + /* parent process */ + close(fd_pipe.wr); /* don't want to write to the child */ + xmove_fd(fd_pipe.rd, fd); +} diff --git a/archival/libarchive/seek_by_jump.c b/archival/libarchive/seek_by_jump.c new file mode 100644 index 000000000..7c2c52ae1 --- /dev/null +++ b/archival/libarchive/seek_by_jump.c @@ -0,0 +1,19 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC seek_by_jump(int fd, off_t amount) +{ + if (amount + && lseek(fd, amount, SEEK_CUR) == (off_t) -1 + ) { + if (errno == ESPIPE) + seek_by_read(fd, amount); + else + bb_perror_msg_and_die("seek failure"); + } +} diff --git a/archival/libarchive/seek_by_read.c b/archival/libarchive/seek_by_read.c new file mode 100644 index 000000000..ad931a8de --- /dev/null +++ b/archival/libarchive/seek_by_read.c @@ -0,0 +1,16 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* If we are reading through a pipe, or from stdin then we can't lseek, + * we must read and discard the data to skip over it. + */ +void FAST_FUNC seek_by_read(int fd, off_t amount) +{ + if (amount) + bb_copyfd_exact_size(fd, -1, amount); +} diff --git a/archival/libarchive/unpack_ar_archive.c b/archival/libarchive/unpack_ar_archive.c new file mode 100644 index 000000000..18dbfd54d --- /dev/null +++ b/archival/libarchive/unpack_ar_archive.c @@ -0,0 +1,22 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" +#include "ar.h" + +void FAST_FUNC unpack_ar_archive(archive_handle_t *ar_archive) +{ + char magic[7]; + + xread(ar_archive->src_fd, magic, AR_MAGIC_LEN); + if (strncmp(magic, AR_MAGIC, AR_MAGIC_LEN) != 0) { + bb_error_msg_and_die("invalid ar magic"); + } + ar_archive->offset += AR_MAGIC_LEN; + + while (get_header_ar(ar_archive) == EXIT_SUCCESS) + continue; +} diff --git a/archival/libarchive/unxz/README b/archival/libarchive/unxz/README new file mode 100644 index 000000000..c5972f6b8 --- /dev/null +++ b/archival/libarchive/unxz/README @@ -0,0 +1,135 @@ + +XZ Embedded +=========== + + XZ Embedded is a relatively small, limited implementation of the .xz + file format. Currently only decoding is implemented. + + XZ Embedded was written for use in the Linux kernel, but the code can + be easily used in other environments too, including regular userspace + applications. + + This README contains information that is useful only when the copy + of XZ Embedded isn't part of the Linux kernel tree. You should also + read linux/Documentation/xz.txt even if you aren't using XZ Embedded + as part of Linux; information in that file is not repeated in this + README. + +Compiling the Linux kernel module + + The xz_dec module depends on crc32 module, so make sure that you have + it enabled (CONFIG_CRC32). + + Building the xz_dec and xz_dec_test modules without support for BCJ + filters: + + cd linux/lib/xz + make -C /path/to/kernel/source \ + KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \ + CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m + + Building the xz_dec and xz_dec_test modules with support for BCJ + filters: + + cd linux/lib/xz + make -C /path/to/kernel/source \ + KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \ + CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \ + CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \ + CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \ + CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y + + If you want only one or a few of the BCJ filters, omit the appropriate + variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support + code shared between all BCJ filters. + + Most people don't need the xz_dec_test module. You can skip building + it by omitting CONFIG_XZ_DEC_TEST=m from the make command line. + +Compiler requirements + + XZ Embedded should compile as either GNU-C89 (used in the Linux + kernel) or with any C99 compiler. Getting the code to compile with + non-GNU C89 compiler or a C++ compiler should be quite easy as + long as there is a data type for unsigned 64-bit integer (or the + code is modified not to support large files, which needs some more + care than just using 32-bit integer instead of 64-bit). + + If you use GCC, try to use a recent version. For example, on x86, + xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when + compiled with GCC 4.3.3. + +Embedding into userspace applications + + To embed the XZ decoder, copy the following files into a single + directory in your source code tree: + + linux/include/linux/xz.h + linux/lib/xz/xz_crc32.c + linux/lib/xz/xz_dec_lzma2.c + linux/lib/xz/xz_dec_stream.c + linux/lib/xz/xz_lzma2.h + linux/lib/xz/xz_private.h + linux/lib/xz/xz_stream.h + userspace/xz_config.h + + Alternatively, xz.h may be placed into a different directory but then + that directory must be in the compiler include path when compiling + the .c files. + + Your code should use only the functions declared in xz.h. The rest of + the .h files are meant only for internal use in XZ Embedded. + + You may want to modify xz_config.h to be more suitable for your build + environment. Probably you should at least skim through it even if the + default file works as is. + +BCJ filter support + + If you want support for one or more BCJ filters, you need to copy also + linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate + #defines in xz_config.h or in compiler flags. You don't need these + #defines in the code that just uses XZ Embedded via xz.h, but having + them always #defined doesn't hurt either. + + #define Instruction set BCJ filter endianness + XZ_DEC_X86 x86 or x86-64 Little endian only + XZ_DEC_POWERPC PowerPC Big endian only + XZ_DEC_IA64 Itanium (IA-64) Big or little endian + XZ_DEC_ARM ARM Little endian only + XZ_DEC_ARMTHUMB ARM-Thumb Little endian only + XZ_DEC_SPARC SPARC Big or little endian + + While some architectures are (partially) bi-endian, the endianness + setting doesn't change the endianness of the instructions on all + architectures. That's why Itanium and SPARC filters work for both big + and little endian executables (Itanium has little endian instructions + and SPARC has big endian instructions). + + There currently is no filter for little endian PowerPC or big endian + ARM or ARM-Thumb. Implementing filters for them can be considered if + there is a need for such filters in real-world applications. + +Notes about shared libraries + + If you are including XZ Embedded into a shared library, you very + probably should rename the xz_* functions to prevent symbol + conflicts in case your library is linked against some other library + or application that also has XZ Embedded in it (which may even be + a different version of XZ Embedded). TODO: Provide an easy way + to do this. + + Please don't create a shared library of XZ Embedded itself unless + it is fine to rebuild everything depending on that shared library + everytime you upgrade to a newer version of XZ Embedded. There are + no API or ABI stability guarantees between different versions of + XZ Embedded. + +Specifying the calling convention + + XZ_FUNC macro was included to support declaring functions with __init + in Linux. Outside Linux, it can be used to specify the calling + convention on systems that support multiple calling conventions. + For example, on Windows, you may make all functions use the stdcall + calling convention by defining XZ_FUNC=__stdcall when building and + using the functions from XZ Embedded. diff --git a/archival/libarchive/unxz/xz.h b/archival/libarchive/unxz/xz.h new file mode 100644 index 000000000..c6c071c4a --- /dev/null +++ b/archival/libarchive/unxz/xz.h @@ -0,0 +1,271 @@ +/* + * XZ decompressor + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_H +#define XZ_H + +#ifdef __KERNEL__ +# include +# include +#else +# include +# include +#endif + +/* In Linux, this is used to make extern functions static when needed. */ +#ifndef XZ_EXTERN +# define XZ_EXTERN extern +#endif + +/* In Linux, this is used to mark the functions with __init when needed. */ +#ifndef XZ_FUNC +# define XZ_FUNC +#endif + +/** + * enum xz_mode - Operation mode + * + * @XZ_SINGLE: Single-call mode. This uses less RAM than + * than multi-call modes, because the LZMA2 + * dictionary doesn't need to be allocated as + * part of the decoder state. All required data + * structures are allocated at initialization, + * so xz_dec_run() cannot return XZ_MEM_ERROR. + * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2 + * dictionary buffer. All data structures are + * allocated at initialization, so xz_dec_run() + * cannot return XZ_MEM_ERROR. + * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is + * allocated once the required size has been + * parsed from the stream headers. If the + * allocation fails, xz_dec_run() will return + * XZ_MEM_ERROR. + * + * It is possible to enable support only for a subset of the above + * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC, + * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled + * with support for all operation modes, but the preboot code may + * be built with fewer features to minimize code size. + */ +enum xz_mode { + XZ_SINGLE, + XZ_PREALLOC, + XZ_DYNALLOC +}; + +/** + * enum xz_ret - Return codes + * @XZ_OK: Everything is OK so far. More input or more + * output space is required to continue. This + * return code is possible only in multi-call mode + * (XZ_PREALLOC or XZ_DYNALLOC). + * @XZ_STREAM_END: Operation finished successfully. + * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding + * is still possible in multi-call mode by simply + * calling xz_dec_run() again. + * NOTE: This return value is used only if + * XZ_DEC_ANY_CHECK was defined at build time, + * which is not used in the kernel. Unsupported + * check types return XZ_OPTIONS_ERROR if + * XZ_DEC_ANY_CHECK was not defined at build time. + * @XZ_MEM_ERROR: Allocating memory failed. This return code is + * possible only if the decoder was initialized + * with XZ_DYNALLOC. The amount of memory that was + * tried to be allocated was no more than the + * dict_max argument given to xz_dec_init(). + * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than + * allowed by the dict_max argument given to + * xz_dec_init(). This return value is possible + * only in multi-call mode (XZ_PREALLOC or + * XZ_DYNALLOC); the single-call mode (XZ_SINGLE) + * ignores the dict_max argument. + * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic + * bytes). + * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested + * compression options. In the decoder this means + * that the header CRC32 matches, but the header + * itself specifies something that we don't support. + * @XZ_DATA_ERROR: Compressed data is corrupt. + * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly + * different between multi-call and single-call + * mode; more information below. + * + * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls + * to XZ code cannot consume any input and cannot produce any new output. + * This happens when there is no new input available, or the output buffer + * is full while at least one output byte is still pending. Assuming your + * code is not buggy, you can get this error only when decoding a compressed + * stream that is truncated or otherwise corrupt. + * + * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer + * is too small, or the compressed input is corrupt in a way that makes the + * decoder produce more output than the caller expected. When it is + * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR + * is used instead of XZ_BUF_ERROR. + */ +enum xz_ret { + XZ_OK, + XZ_STREAM_END, + XZ_UNSUPPORTED_CHECK, + XZ_MEM_ERROR, + XZ_MEMLIMIT_ERROR, + XZ_FORMAT_ERROR, + XZ_OPTIONS_ERROR, + XZ_DATA_ERROR, + XZ_BUF_ERROR +}; + +/** + * struct xz_buf - Passing input and output buffers to XZ code + * @in: Beginning of the input buffer. This may be NULL if and only + * if in_pos is equal to in_size. + * @in_pos: Current position in the input buffer. This must not exceed + * in_size. + * @in_size: Size of the input buffer + * @out: Beginning of the output buffer. This may be NULL if and only + * if out_pos is equal to out_size. + * @out_pos: Current position in the output buffer. This must not exceed + * out_size. + * @out_size: Size of the output buffer + * + * Only the contents of the output buffer from out[out_pos] onward, and + * the variables in_pos and out_pos are modified by the XZ code. + */ +struct xz_buf { + const uint8_t *in; + size_t in_pos; + size_t in_size; + + uint8_t *out; + size_t out_pos; + size_t out_size; +}; + +/** + * struct xz_dec - Opaque type to hold the XZ decoder state + */ +struct xz_dec; + +/** + * xz_dec_init() - Allocate and initialize a XZ decoder state + * @mode: Operation mode + * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for + * multi-call decoding. This is ignored in single-call mode + * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes + * or 2^n + 2^(n-1) bytes (the latter sizes are less common + * in practice), so other values for dict_max don't make sense. + * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB, + * 512 KiB, and 1 MiB are probably the only reasonable values, + * except for kernel and initramfs images where a bigger + * dictionary can be fine and useful. + * + * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at + * once. The caller must provide enough output space or the decoding will + * fail. The output space is used as the dictionary buffer, which is why + * there is no need to allocate the dictionary as part of the decoder's + * internal state. + * + * Because the output buffer is used as the workspace, streams encoded using + * a big dictionary are not a problem in single-call mode. It is enough that + * the output buffer is big enough to hold the actual uncompressed data; it + * can be smaller than the dictionary size stored in the stream headers. + * + * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes + * of memory is preallocated for the LZMA2 dictionary. This way there is no + * risk that xz_dec_run() could run out of memory, since xz_dec_run() will + * never allocate any memory. Instead, if the preallocated dictionary is too + * small for decoding the given input stream, xz_dec_run() will return + * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be + * decoded to avoid allocating excessive amount of memory for the dictionary. + * + * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC): + * dict_max specifies the maximum allowed dictionary size that xz_dec_run() + * may allocate once it has parsed the dictionary size from the stream + * headers. This way excessive allocations can be avoided while still + * limiting the maximum memory usage to a sane value to prevent running the + * system out of memory when decompressing streams from untrusted sources. + * + * On success, xz_dec_init() returns a pointer to struct xz_dec, which is + * ready to be used with xz_dec_run(). If memory allocation fails, + * xz_dec_init() returns NULL. + */ +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max); + +/** + * xz_dec_run() - Run the XZ decoder + * @s: Decoder state allocated using xz_dec_init() + * @b: Input and output buffers + * + * The possible return values depend on build options and operation mode. + * See enum xz_ret for details. + * + * NOTE: If an error occurs in single-call mode (return value is not + * XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the + * contents of the output buffer from b->out[b->out_pos] onward are + * undefined. This is true even after XZ_BUF_ERROR, because with some filter + * chains, there may be a second pass over the output buffer, and this pass + * cannot be properly done if the output buffer is truncated. Thus, you + * cannot give the single-call decoder a too small buffer and then expect to + * get that amount valid data from the beginning of the stream. You must use + * the multi-call decoder if you don't want to uncompress the whole stream. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b); + +/** + * xz_dec_reset() - Reset an already allocated decoder state + * @s: Decoder state allocated using xz_dec_init() + * + * This function can be used to reset the multi-call decoder state without + * freeing and reallocating memory with xz_dec_end() and xz_dec_init(). + * + * In single-call mode, xz_dec_reset() is always called in the beginning of + * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in + * multi-call mode. + */ +XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s); + +/** + * xz_dec_end() - Free the memory allocated for the decoder state + * @s: Decoder state allocated using xz_dec_init(). If s is NULL, + * this function does nothing. + */ +XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s); + +/* + * Standalone build (userspace build or in-kernel build for boot time use) + * needs a CRC32 implementation. For normal in-kernel use, kernel's own + * CRC32 module is used instead, and users of this module don't need to + * care about the functions below. + */ +#ifndef XZ_INTERNAL_CRC32 +# ifdef __KERNEL__ +# define XZ_INTERNAL_CRC32 0 +# else +# define XZ_INTERNAL_CRC32 1 +# endif +#endif + +#if XZ_INTERNAL_CRC32 +/* + * This must be called before any other xz_* function to initialize + * the CRC32 lookup table. + */ +XZ_EXTERN void XZ_FUNC xz_crc32_init(void); + +/* + * Update CRC32 value using the polynomial from IEEE-802.3. To start a new + * calculation, the third argument must be zero. To continue the calculation, + * the previously returned value is passed as the third argument. + */ +XZ_EXTERN uint32_t XZ_FUNC xz_crc32( + const uint8_t *buf, size_t size, uint32_t crc); +#endif +#endif diff --git a/archival/libarchive/unxz/xz_config.h b/archival/libarchive/unxz/xz_config.h new file mode 100644 index 000000000..187e1cbed --- /dev/null +++ b/archival/libarchive/unxz/xz_config.h @@ -0,0 +1,123 @@ +/* + * Private includes and definitions for userspace use of XZ Embedded + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_CONFIG_H +#define XZ_CONFIG_H + +/* Uncomment as needed to enable BCJ filter decoders. */ +/* #define XZ_DEC_X86 */ +/* #define XZ_DEC_POWERPC */ +/* #define XZ_DEC_IA64 */ +/* #define XZ_DEC_ARM */ +/* #define XZ_DEC_ARMTHUMB */ +/* #define XZ_DEC_SPARC */ + +#include +#include +#include + +#include "xz.h" + +#define kmalloc(size, flags) malloc(size) +#define kfree(ptr) free(ptr) +#define vmalloc(size) malloc(size) +#define vfree(ptr) free(ptr) + +#define memeq(a, b, size) (memcmp(a, b, size) == 0) +#define memzero(buf, size) memset(buf, 0, size) + +#undef min +#undef min_t +#define min(x, y) ((x) < (y) ? (x) : (y)) +#define min_t(type, x, y) min(x, y) + +/* + * Some functions have been marked with __always_inline to keep the + * performance reasonable even when the compiler is optimizing for + * small code size. You may be able to save a few bytes by #defining + * __always_inline to plain inline, but don't complain if the code + * becomes slow. + * + * NOTE: System headers on GNU/Linux may #define this macro already, + * so if you want to change it, you need to #undef it first. + */ +#ifndef __always_inline +# ifdef __GNUC__ +# define __always_inline \ + inline __attribute__((__always_inline__)) +# else +# define __always_inline inline +# endif +#endif + +/* + * Some functions are marked to never be inlined to reduce stack usage. + * If you don't care about stack usage, you may want to modify this so + * that noinline_for_stack is #defined to be empty even when using GCC. + * Doing so may save a few bytes in binary size. + */ +#ifndef noinline_for_stack +# ifdef __GNUC__ +# define noinline_for_stack __attribute__((__noinline__)) +# else +# define noinline_for_stack +# endif +#endif + +/* Inline functions to access unaligned unsigned 32-bit integers */ +#ifndef get_unaligned_le32 +static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf) +{ + return (uint32_t)buf[0] + | ((uint32_t)buf[1] << 8) + | ((uint32_t)buf[2] << 16) + | ((uint32_t)buf[3] << 24); +} +#endif + +#ifndef get_unaligned_be32 +static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf) +{ + return (uint32_t)(buf[0] << 24) + | ((uint32_t)buf[1] << 16) + | ((uint32_t)buf[2] << 8) + | (uint32_t)buf[3]; +} +#endif + +#ifndef put_unaligned_le32 +static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf) +{ + buf[0] = (uint8_t)val; + buf[1] = (uint8_t)(val >> 8); + buf[2] = (uint8_t)(val >> 16); + buf[3] = (uint8_t)(val >> 24); +} +#endif + +#ifndef put_unaligned_be32 +static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf) +{ + buf[0] = (uint8_t)(val >> 24); + buf[1] = (uint8_t)(val >> 16); + buf[2] = (uint8_t)(val >> 8); + buf[3] = (uint8_t)val; +} +#endif + +/* + * Use get_unaligned_le32() also for aligned access for simplicity. On + * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr)) + * could save a few bytes in code size. + */ +#ifndef get_le32 +# define get_le32 get_unaligned_le32 +#endif + +#endif diff --git a/archival/libarchive/unxz/xz_dec_bcj.c b/archival/libarchive/unxz/xz_dec_bcj.c new file mode 100644 index 000000000..09162b51f --- /dev/null +++ b/archival/libarchive/unxz/xz_dec_bcj.c @@ -0,0 +1,564 @@ +/* + * Branch/Call/Jump (BCJ) filter decoders + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" + +/* + * The rest of the file is inside this ifdef. It makes things a little more + * convenient when building without support for any BCJ filters. + */ +#ifdef XZ_DEC_BCJ + +struct xz_dec_bcj { + /* Type of the BCJ filter being used */ + enum { + BCJ_X86 = 4, /* x86 or x86-64 */ + BCJ_POWERPC = 5, /* Big endian only */ + BCJ_IA64 = 6, /* Big or little endian */ + BCJ_ARM = 7, /* Little endian only */ + BCJ_ARMTHUMB = 8, /* Little endian only */ + BCJ_SPARC = 9 /* Big or little endian */ + } type; + + /* + * Return value of the next filter in the chain. We need to preserve + * this information across calls, because we must not call the next + * filter anymore once it has returned XZ_STREAM_END. + */ + enum xz_ret ret; + + /* True if we are operating in single-call mode. */ + bool single_call; + + /* + * Absolute position relative to the beginning of the uncompressed + * data (in a single .xz Block). We care only about the lowest 32 + * bits so this doesn't need to be uint64_t even with big files. + */ + uint32_t pos; + + /* x86 filter state */ + uint32_t x86_prev_mask; + + /* Temporary space to hold the variables from struct xz_buf */ + uint8_t *out; + size_t out_pos; + size_t out_size; + + struct { + /* Amount of already filtered data in the beginning of buf */ + size_t filtered; + + /* Total amount of data currently stored in buf */ + size_t size; + + /* + * Buffer to hold a mix of filtered and unfiltered data. This + * needs to be big enough to hold Alignment + 2 * Look-ahead: + * + * Type Alignment Look-ahead + * x86 1 4 + * PowerPC 4 0 + * IA-64 16 0 + * ARM 4 0 + * ARM-Thumb 2 2 + * SPARC 4 0 + */ + uint8_t buf[16]; + } temp; +}; + +#ifdef XZ_DEC_X86 +/* + * This is macro used to test the most significant byte of a memory address + * in an x86 instruction. + */ +#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF) + +static noinline_for_stack size_t XZ_FUNC bcj_x86( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const bool mask_to_allowed_status[8] + = { true, true, true, false, true, false, false, false }; + + static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + size_t i; + size_t prev_pos = (size_t)-1; + uint32_t prev_mask = s->x86_prev_mask; + uint32_t src; + uint32_t dest; + uint32_t j; + uint8_t b; + + if (size <= 4) + return 0; + + size -= 4; + for (i = 0; i < size; ++i) { + if ((buf[i] & 0xFE) != 0xE8) + continue; + + prev_pos = i - prev_pos; + if (prev_pos > 3) { + prev_mask = 0; + } else { + prev_mask = (prev_mask << (prev_pos - 1)) & 7; + if (prev_mask != 0) { + b = buf[i + 4 - mask_to_bit_num[prev_mask]]; + if (!mask_to_allowed_status[prev_mask] + || bcj_x86_test_msbyte(b)) { + prev_pos = i; + prev_mask = (prev_mask << 1) | 1; + continue; + } + } + } + + prev_pos = i; + + if (bcj_x86_test_msbyte(buf[i + 4])) { + src = get_unaligned_le32(buf + i + 1); + while (true) { + dest = src - (s->pos + (uint32_t)i + 5); + if (prev_mask == 0) + break; + + j = mask_to_bit_num[prev_mask] * 8; + b = (uint8_t)(dest >> (24 - j)); + if (!bcj_x86_test_msbyte(b)) + break; + + src = dest ^ (((uint32_t)1 << (32 - j)) - 1); + } + + dest &= 0x01FFFFFF; + dest |= (uint32_t)0 - (dest & 0x01000000); + put_unaligned_le32(dest, buf + i + 1); + i += 4; + } else { + prev_mask = (prev_mask << 1) | 1; + } + } + + prev_pos = i - prev_pos; + s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); + return i; +} +#endif + +#ifdef XZ_DEC_POWERPC +static noinline_for_stack size_t XZ_FUNC bcj_powerpc( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr & 0xFC000003) == 0x48000001) { + instr &= 0x03FFFFFC; + instr -= s->pos + (uint32_t)i; + instr &= 0x03FFFFFC; + instr |= 0x48000001; + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_IA64 +static noinline_for_stack size_t XZ_FUNC bcj_ia64( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const uint8_t branch_table[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + /* + * The local variables take a little bit stack space, but it's less + * than what LZMA2 decoder takes, so it doesn't make sense to reduce + * stack usage here without doing that for the LZMA2 decoder too. + */ + + /* Loop counters */ + size_t i; + size_t j; + + /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ + uint32_t slot; + + /* Bitwise offset of the instruction indicated by slot */ + uint32_t bit_pos; + + /* bit_pos split into byte and bit parts */ + uint32_t byte_pos; + uint32_t bit_res; + + /* Address part of an instruction */ + uint32_t addr; + + /* Mask used to detect which instructions to convert */ + uint32_t mask; + + /* 41-bit instruction stored somewhere in the lowest 48 bits */ + uint64_t instr; + + /* Instruction normalized with bit_res for easier manipulation */ + uint64_t norm; + + for (i = 0; i + 16 <= size; i += 16) { + mask = branch_table[buf[i] & 0x1F]; + for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + byte_pos = bit_pos >> 3; + bit_res = bit_pos & 7; + instr = 0; + for (j = 0; j < 6; ++j) + instr |= (uint64_t)(buf[i + j + byte_pos]) + << (8 * j); + + norm = instr >> bit_res; + + if (((norm >> 37) & 0x0F) == 0x05 + && ((norm >> 9) & 0x07) == 0) { + addr = (norm >> 13) & 0x0FFFFF; + addr |= ((uint32_t)(norm >> 36) & 1) << 20; + addr <<= 4; + addr -= s->pos + (uint32_t)i; + addr >>= 4; + + norm &= ~((uint64_t)0x8FFFFF << 13); + norm |= (uint64_t)(addr & 0x0FFFFF) << 13; + norm |= (uint64_t)(addr & 0x100000) + << (36 - 20); + + instr &= (1 << bit_res) - 1; + instr |= norm << bit_res; + + for (j = 0; j < 6; j++) + buf[i + j + byte_pos] + = (uint8_t)(instr >> (8 * j)); + } + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARM +static noinline_for_stack size_t XZ_FUNC bcj_arm( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 4) { + if (buf[i + 3] == 0xEB) { + addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) + | ((uint32_t)buf[i + 2] << 16); + addr <<= 2; + addr -= s->pos + (uint32_t)i + 8; + addr >>= 2; + buf[i] = (uint8_t)addr; + buf[i + 1] = (uint8_t)(addr >> 8); + buf[i + 2] = (uint8_t)(addr >> 16); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARMTHUMB +static noinline_for_stack size_t XZ_FUNC bcj_armthumb( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 2) { + if ((buf[i + 1] & 0xF8) == 0xF0 + && (buf[i + 3] & 0xF8) == 0xF8) { + addr = (((uint32_t)buf[i + 1] & 0x07) << 19) + | ((uint32_t)buf[i] << 11) + | (((uint32_t)buf[i + 3] & 0x07) << 8) + | (uint32_t)buf[i + 2]; + addr <<= 1; + addr -= s->pos + (uint32_t)i + 4; + addr >>= 1; + buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); + buf[i] = (uint8_t)(addr >> 11); + buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); + buf[i + 2] = (uint8_t)addr; + i += 2; + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_SPARC +static noinline_for_stack size_t XZ_FUNC bcj_sparc( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { + instr <<= 2; + instr -= s->pos + (uint32_t)i; + instr >>= 2; + instr = ((uint32_t)0x40000000 - (instr & 0x400000)) + | 0x40000000 | (instr & 0x3FFFFF); + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +/* + * Apply the selected BCJ filter. Update *pos and s->pos to match the amount + * of data that got filtered. + * + * NOTE: This is implemented as a switch statement to avoid using function + * pointers, which could be problematic in the kernel boot code, which must + * avoid pointers to static data (at least on x86). + */ +static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s, + uint8_t *buf, size_t *pos, size_t size) +{ + size_t filtered; + + buf += *pos; + size -= *pos; + + switch (s->type) { +#ifdef XZ_DEC_X86 + case BCJ_X86: + filtered = bcj_x86(s, buf, size); + break; +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: + filtered = bcj_powerpc(s, buf, size); + break; +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: + filtered = bcj_ia64(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: + filtered = bcj_arm(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: + filtered = bcj_armthumb(s, buf, size); + break; +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: + filtered = bcj_sparc(s, buf, size); + break; +#endif + default: + /* Never reached but silence compiler warnings. */ + filtered = 0; + break; + } + + *pos += filtered; + s->pos += filtered; +} + +/* + * Flush pending filtered data from temp to the output buffer. + * Move the remaining mixture of possibly filtered and unfiltered + * data to the beginning of temp. + */ +static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) +{ + size_t copy_size; + + copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); + memcpy(b->out + b->out_pos, s->temp.buf, copy_size); + b->out_pos += copy_size; + + s->temp.filtered -= copy_size; + s->temp.size -= copy_size; + memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); +} + +/* + * The BCJ filter functions are primitive in sense that they process the + * data in chunks of 1-16 bytes. To hide this issue, this function does + * some buffering. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, struct xz_buf *b) +{ + size_t out_start; + + /* + * Flush pending already filtered data to the output buffer. Return + * immediatelly if we couldn't flush everything, or if the next + * filter in the chain had already returned XZ_STREAM_END. + */ + if (s->temp.filtered > 0) { + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + } + + /* + * If we have more output space than what is currently pending in + * temp, copy the unfiltered data from temp to the output buffer + * and try to fill the output buffer by decoding more data from the + * next filter in the chain. Apply the BCJ filter on the new data + * in the output buffer. If everything cannot be filtered, copy it + * to temp and rewind the output buffer position accordingly. + */ + if (s->temp.size < b->out_size - b->out_pos) { + out_start = b->out_pos; + memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); + b->out_pos += s->temp.size; + + s->ret = xz_dec_lzma2_run(lzma2, b); + if (s->ret != XZ_STREAM_END + && (s->ret != XZ_OK || s->single_call)) + return s->ret; + + bcj_apply(s, b->out, &out_start, b->out_pos); + + /* + * As an exception, if the next filter returned XZ_STREAM_END, + * we can do that too, since the last few bytes that remain + * unfiltered are meant to remain unfiltered. + */ + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + + s->temp.size = b->out_pos - out_start; + b->out_pos -= s->temp.size; + memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); + } + + /* + * If we have unfiltered data in temp, try to fill by decoding more + * data from the next filter. Apply the BCJ filter on temp. Then we + * hopefully can fill the actual output buffer by copying filtered + * data from temp. A mix of filtered and unfiltered data may be left + * in temp; it will be taken care on the next call to this function. + */ + if (s->temp.size > 0) { + /* Make b->out{,_pos,_size} temporarily point to s->temp. */ + s->out = b->out; + s->out_pos = b->out_pos; + s->out_size = b->out_size; + b->out = s->temp.buf; + b->out_pos = s->temp.size; + b->out_size = sizeof(s->temp.buf); + + s->ret = xz_dec_lzma2_run(lzma2, b); + + s->temp.size = b->out_pos; + b->out = s->out; + b->out_pos = s->out_pos; + b->out_size = s->out_size; + + if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) + return s->ret; + + bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); + + /* + * If the next filter returned XZ_STREAM_END, we mark that + * everything is filtered, since the last unfiltered bytes + * of the stream are meant to be left as is. + */ + if (s->ret == XZ_STREAM_END) + s->temp.filtered = s->temp.size; + + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + } + + return s->ret; +} + +XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call) +{ + struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s != NULL) + s->single_call = single_call; + + return s; +} + +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset( + struct xz_dec_bcj *s, uint8_t id) +{ + switch (id) { +#ifdef XZ_DEC_X86 + case BCJ_X86: +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: +#endif + break; + + default: + /* Unsupported Filter ID */ + return XZ_OPTIONS_ERROR; + } + + s->type = id; + s->ret = XZ_OK; + s->pos = 0; + s->x86_prev_mask = 0; + s->temp.filtered = 0; + s->temp.size = 0; + + return XZ_OK; +} + +#endif diff --git a/archival/libarchive/unxz/xz_dec_lzma2.c b/archival/libarchive/unxz/xz_dec_lzma2.c new file mode 100644 index 000000000..da71cb4d4 --- /dev/null +++ b/archival/libarchive/unxz/xz_dec_lzma2.c @@ -0,0 +1,1175 @@ +/* + * LZMA2 decoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_lzma2.h" + +/* + * Range decoder initialization eats the first five bytes of each LZMA chunk. + */ +#define RC_INIT_BYTES 5 + +/* + * Minimum number of usable input buffer to safely decode one LZMA symbol. + * The worst case is that we decode 22 bits using probabilities and 26 + * direct bits. This may decode at maximum of 20 bytes of input. However, + * lzma_main() does an extra normalization before returning, thus we + * need to put 21 here. + */ +#define LZMA_IN_REQUIRED 21 + +/* + * Dictionary (history buffer) + * + * These are always true: + * start <= pos <= full <= end + * pos <= limit <= end + * + * In multi-call mode, also these are true: + * end == size + * size <= size_max + * allocated <= size + * + * Most of these variables are size_t to support single-call mode, + * in which the dictionary variables address the actual output + * buffer directly. + */ +struct dictionary { + /* Beginning of the history buffer */ + uint8_t *buf; + + /* Old position in buf (before decoding more data) */ + size_t start; + + /* Position in buf */ + size_t pos; + + /* + * How full dictionary is. This is used to detect corrupt input that + * would read beyond the beginning of the uncompressed stream. + */ + size_t full; + + /* Write limit; we don't write to buf[limit] or later bytes. */ + size_t limit; + + /* + * End of the dictionary buffer. In multi-call mode, this is + * the same as the dictionary size. In single-call mode, this + * indicates the size of the output buffer. + */ + size_t end; + + /* + * Size of the dictionary as specified in Block Header. This is used + * together with "full" to detect corrupt input that would make us + * read beyond the beginning of the uncompressed stream. + */ + uint32_t size; + + /* + * Maximum allowed dictionary size in multi-call mode. + * This is ignored in single-call mode. + */ + uint32_t size_max; + + /* + * Amount of memory currently allocated for the dictionary. + * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, + * size_max is always the same as the allocated size.) + */ + uint32_t allocated; + + /* Operation mode */ + enum xz_mode mode; +}; + +/* Range decoder */ +struct rc_dec { + uint32_t range; + uint32_t code; + + /* + * Number of initializing bytes remaining to be read + * by rc_read_init(). + */ + uint32_t init_bytes_left; + + /* + * Buffer from which we read our input. It can be either + * temp.buf or the caller-provided input buffer. + */ + const uint8_t *in; + size_t in_pos; + size_t in_limit; +}; + +/* Probabilities for a length decoder. */ +struct lzma_len_dec { + /* Probability of match length being at least 10 */ + uint16_t choice; + + /* Probability of match length being at least 18 */ + uint16_t choice2; + + /* Probabilities for match lengths 2-9 */ + uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + + /* Probabilities for match lengths 10-17 */ + uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + + /* Probabilities for match lengths 18-273 */ + uint16_t high[LEN_HIGH_SYMBOLS]; +}; + +struct lzma_dec { + /* Distances of latest four matches */ + uint32_t rep0; + uint32_t rep1; + uint32_t rep2; + uint32_t rep3; + + /* Types of the most recently seen LZMA symbols */ + enum lzma_state state; + + /* + * Length of a match. This is updated so that dict_repeat can + * be called again to finish repeating the whole match. + */ + uint32_t len; + + /* + * LZMA properties or related bit masks (number of literal + * context bits, a mask dervied from the number of literal + * position bits, and a mask dervied from the number + * position bits) + */ + uint32_t lc; + uint32_t literal_pos_mask; /* (1 << lp) - 1 */ + uint32_t pos_mask; /* (1 << pb) - 1 */ + + /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ + uint16_t is_match[STATES][POS_STATES_MAX]; + + /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ + uint16_t is_rep[STATES]; + + /* + * If 0, distance of a repeated match is rep0. + * Otherwise check is_rep1. + */ + uint16_t is_rep0[STATES]; + + /* + * If 0, distance of a repeated match is rep1. + * Otherwise check is_rep2. + */ + uint16_t is_rep1[STATES]; + + /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ + uint16_t is_rep2[STATES]; + + /* + * If 1, the repeated match has length of one byte. Otherwise + * the length is decoded from rep_len_decoder. + */ + uint16_t is_rep0_long[STATES][POS_STATES_MAX]; + + /* + * Probability tree for the highest two bits of the match + * distance. There is a separate probability tree for match + * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + */ + uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; + + /* + * Probility trees for additional bits for match distance + * when the distance is in the range [4, 127]. + */ + uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; + + /* + * Probability tree for the lowest four bits of a match + * distance that is equal to or greater than 128. + */ + uint16_t dist_align[ALIGN_SIZE]; + + /* Length of a normal match */ + struct lzma_len_dec match_len_dec; + + /* Length of a repeated match */ + struct lzma_len_dec rep_len_dec; + + /* Probabilities of literals */ + uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; +}; + +struct lzma2_dec { + /* Position in xz_dec_lzma2_run(). */ + enum lzma2_seq { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA_PREPARE, + SEQ_LZMA_RUN, + SEQ_COPY + } sequence; + + /* Next position after decoding the compressed size of the chunk. */ + enum lzma2_seq next_sequence; + + /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ + uint32_t uncompressed; + + /* + * Compressed size of LZMA chunk or compressed/uncompressed + * size of uncompressed chunk (64 KiB at maximum) + */ + uint32_t compressed; + + /* + * True if dictionary reset is needed. This is false before + * the first chunk (LZMA or uncompressed). + */ + bool need_dict_reset; + + /* + * True if new LZMA properties are needed. This is false + * before the first LZMA chunk. + */ + bool need_props; +}; + +struct xz_dec_lzma2 { + /* + * The order below is important on x86 to reduce code size and + * it shouldn't hurt on other platforms. Everything up to and + * including lzma.pos_mask are in the first 128 bytes on x86-32, + * which allows using smaller instructions to access those + * variables. On x86-64, fewer variables fit into the first 128 + * bytes, but this is still the best order without sacrificing + * the readability by splitting the structures. + */ + struct rc_dec rc; + struct dictionary dict; + struct lzma2_dec lzma2; + struct lzma_dec lzma; + + /* + * Temporary buffer which holds small number of input bytes between + * decoder calls. See lzma2_lzma() for details. + */ + struct { + uint32_t size; + uint8_t buf[3 * LZMA_IN_REQUIRED]; + } temp; +}; + +/************** + * Dictionary * + **************/ + +/* + * Reset the dictionary state. When in single-call mode, set up the beginning + * of the dictionary to point to the actual output buffer. + */ +static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b) +{ + if (DEC_IS_SINGLE(dict->mode)) { + dict->buf = b->out + b->out_pos; + dict->end = b->out_size - b->out_pos; + } + + dict->start = 0; + dict->pos = 0; + dict->limit = 0; + dict->full = 0; +} + +/* Set dictionary write limit */ +static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max) +{ + if (dict->end - dict->pos <= out_max) + dict->limit = dict->end; + else + dict->limit = dict->pos + out_max; +} + +/* Return true if at least one byte can be written into the dictionary. */ +static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict) +{ + return dict->pos < dict->limit; +} + +/* + * Get a byte from the dictionary at the given distance. The distance is + * assumed to valid, or as a special case, zero when the dictionary is + * still empty. This special case is needed for single-call decoding to + * avoid writing a '\0' to the end of the destination buffer. + */ +static __always_inline uint32_t XZ_FUNC dict_get( + const struct dictionary *dict, uint32_t dist) +{ + size_t offset = dict->pos - dist - 1; + + if (dist >= dict->pos) + offset += dict->end; + + return dict->full > 0 ? dict->buf[offset] : 0; +} + +/* + * Put one byte into the dictionary. It is assumed that there is space for it. + */ +static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte) +{ + dict->buf[dict->pos++] = byte; + + if (dict->full < dict->pos) + dict->full = dict->pos; +} + +/* + * Repeat given number of bytes from the given distance. If the distance is + * invalid, false is returned. On success, true is returned and *len is + * updated to indicate how many bytes were left to be repeated. + */ +static bool XZ_FUNC dict_repeat( + struct dictionary *dict, uint32_t *len, uint32_t dist) +{ + size_t back; + uint32_t left; + + if (dist >= dict->full || dist >= dict->size) + return false; + + left = min_t(size_t, dict->limit - dict->pos, *len); + *len -= left; + + back = dict->pos - dist - 1; + if (dist >= dict->pos) + back += dict->end; + + do { + dict->buf[dict->pos++] = dict->buf[back++]; + if (back == dict->end) + back = 0; + } while (--left > 0); + + if (dict->full < dict->pos) + dict->full = dict->pos; + + return true; +} + +/* Copy uncompressed data as is from input to dictionary and output buffers. */ +static void XZ_FUNC dict_uncompressed( + struct dictionary *dict, struct xz_buf *b, uint32_t *left) +{ + size_t copy_size; + + while (*left > 0 && b->in_pos < b->in_size + && b->out_pos < b->out_size) { + copy_size = min(b->in_size - b->in_pos, + b->out_size - b->out_pos); + if (copy_size > dict->end - dict->pos) + copy_size = dict->end - dict->pos; + if (copy_size > *left) + copy_size = *left; + + *left -= copy_size; + + memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); + dict->pos += copy_size; + + if (dict->full < dict->pos) + dict->full = dict->pos; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, b->in + b->in_pos, + copy_size); + } + + dict->start = dict->pos; + + b->out_pos += copy_size; + b->in_pos += copy_size; + + } +} + +/* + * Flush pending data from dictionary to b->out. It is assumed that there is + * enough space in b->out. This is guaranteed because caller uses dict_limit() + * before decoding data into the dictionary. + */ +static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b) +{ + size_t copy_size = dict->pos - dict->start; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, dict->buf + dict->start, + copy_size); + } + + dict->start = dict->pos; + b->out_pos += copy_size; + return copy_size; +} + +/***************** + * Range decoder * + *****************/ + +/* Reset the range decoder. */ +static void XZ_FUNC rc_reset(struct rc_dec *rc) +{ + rc->range = (uint32_t)-1; + rc->code = 0; + rc->init_bytes_left = RC_INIT_BYTES; +} + +/* + * Read the first five initial bytes into rc->code if they haven't been + * read already. (Yes, the first byte gets completely ignored.) + */ +static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b) +{ + while (rc->init_bytes_left > 0) { + if (b->in_pos == b->in_size) + return false; + + rc->code = (rc->code << 8) + b->in[b->in_pos++]; + --rc->init_bytes_left; + } + + return true; +} + +/* Return true if there may not be enough input for the next decoding loop. */ +static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc) +{ + return rc->in_pos > rc->in_limit; +} + +/* + * Return true if it is possible (from point of view of range decoder) that + * we have reached the end of the LZMA chunk. + */ +static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc) +{ + return rc->code == 0; +} + +/* Read the next input byte if needed. */ +static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc) +{ + if (rc->range < RC_TOP_VALUE) { + rc->range <<= RC_SHIFT_BITS; + rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++]; + } +} + +/* + * Decode one bit. In some versions, this function has been splitted in three + * functions so that the compiler is supposed to be able to more easily avoid + * an extra branch. In this particular version of the LZMA decoder, this + * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 + * on x86). Using a non-splitted version results in nicer looking code too. + * + * NOTE: This must return an int. Do not make it return a bool or the speed + * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, + * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) + */ +static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob) +{ + uint32_t bound; + int bit; + + rc_normalize(rc); + bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob; + if (rc->code < bound) { + rc->range = bound; + *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS; + bit = 0; + } else { + rc->range -= bound; + rc->code -= bound; + *prob -= *prob >> RC_MOVE_BITS; + bit = 1; + } + + return bit; +} + +/* Decode a bittree starting from the most significant bit. */ +static __always_inline uint32_t XZ_FUNC rc_bittree( + struct rc_dec *rc, uint16_t *probs, uint32_t limit) +{ + uint32_t symbol = 1; + + do { + if (rc_bit(rc, &probs[symbol])) + symbol = (symbol << 1) + 1; + else + symbol <<= 1; + } while (symbol < limit); + + return symbol; +} + +/* Decode a bittree starting from the least significant bit. */ +static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc, + uint16_t *probs, uint32_t *dest, uint32_t limit) +{ + uint32_t symbol = 1; + uint32_t i = 0; + + do { + if (rc_bit(rc, &probs[symbol])) { + symbol = (symbol << 1) + 1; + *dest += 1 << i; + } else { + symbol <<= 1; + } + } while (++i < limit); +} + +/* Decode direct bits (fixed fifty-fifty probability) */ +static inline void XZ_FUNC rc_direct( + struct rc_dec *rc, uint32_t *dest, uint32_t limit) +{ + uint32_t mask; + + do { + rc_normalize(rc); + rc->range >>= 1; + rc->code -= rc->range; + mask = (uint32_t)0 - (rc->code >> 31); + rc->code += rc->range & mask; + *dest = (*dest << 1) + (mask + 1); + } while (--limit > 0); +} + +/******** + * LZMA * + ********/ + +/* Get pointer to literal coder probability array. */ +static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s) +{ + uint32_t prev_byte = dict_get(&s->dict, 0); + uint32_t low = prev_byte >> (8 - s->lzma.lc); + uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc; + return s->lzma.literal[low + high]; +} + +/* Decode a literal (one 8-bit byte) */ +static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + uint32_t symbol; + uint32_t match_byte; + uint32_t match_bit; + uint32_t offset; + uint32_t i; + + probs = lzma_literal_probs(s); + + if (lzma_state_is_literal(s->lzma.state)) { + symbol = rc_bittree(&s->rc, probs, 0x100); + } else { + symbol = 1; + match_byte = dict_get(&s->dict, s->lzma.rep0) << 1; + offset = 0x100; + + do { + match_bit = match_byte & offset; + match_byte <<= 1; + i = offset + match_bit + symbol; + + if (rc_bit(&s->rc, &probs[i])) { + symbol = (symbol << 1) + 1; + offset &= match_bit; + } else { + symbol <<= 1; + offset &= ~match_bit; + } + } while (symbol < 0x100); + } + + dict_put(&s->dict, (uint8_t)symbol); + lzma_state_literal(&s->lzma.state); +} + +/* Decode the length of the match into s->lzma.len. */ +static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, + uint32_t pos_state) +{ + uint16_t *probs; + uint32_t limit; + + if (!rc_bit(&s->rc, &l->choice)) { + probs = l->low[pos_state]; + limit = LEN_LOW_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN; + } else { + if (!rc_bit(&s->rc, &l->choice2)) { + probs = l->mid[pos_state]; + limit = LEN_MID_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; + } else { + probs = l->high; + limit = LEN_HIGH_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS + + LEN_MID_SYMBOLS; + } + } + + s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit; +} + +/* Decode a match. The distance will be stored in s->lzma.rep0. */ +static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint16_t *probs; + uint32_t dist_slot; + uint32_t limit; + + lzma_state_match(&s->lzma.state); + + s->lzma.rep3 = s->lzma.rep2; + s->lzma.rep2 = s->lzma.rep1; + s->lzma.rep1 = s->lzma.rep0; + + lzma_len(s, &s->lzma.match_len_dec, pos_state); + + probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)]; + dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS; + + if (dist_slot < DIST_MODEL_START) { + s->lzma.rep0 = dist_slot; + } else { + limit = (dist_slot >> 1) - 1; + s->lzma.rep0 = 2 + (dist_slot & 1); + + if (dist_slot < DIST_MODEL_END) { + s->lzma.rep0 <<= limit; + probs = s->lzma.dist_special + s->lzma.rep0 + - dist_slot - 1; + rc_bittree_reverse(&s->rc, probs, + &s->lzma.rep0, limit); + } else { + rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS); + s->lzma.rep0 <<= ALIGN_BITS; + rc_bittree_reverse(&s->rc, s->lzma.dist_align, + &s->lzma.rep0, ALIGN_BITS); + } + } +} + +/* + * Decode a repeated match. The distance is one of the four most recently + * seen matches. The distance will be stored in s->lzma.rep0. + */ +static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint32_t tmp; + + if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) { + if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[ + s->lzma.state][pos_state])) { + lzma_state_short_rep(&s->lzma.state); + s->lzma.len = 1; + return; + } + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) { + tmp = s->lzma.rep1; + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) { + tmp = s->lzma.rep2; + } else { + tmp = s->lzma.rep3; + s->lzma.rep3 = s->lzma.rep2; + } + + s->lzma.rep2 = s->lzma.rep1; + } + + s->lzma.rep1 = s->lzma.rep0; + s->lzma.rep0 = tmp; + } + + lzma_state_long_rep(&s->lzma.state); + lzma_len(s, &s->lzma.rep_len_dec, pos_state); +} + +/* LZMA decoder core */ +static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s) +{ + uint32_t pos_state; + + /* + * If the dictionary was reached during the previous call, try to + * finish the possibly pending repeat in the dictionary. + */ + if (dict_has_space(&s->dict) && s->lzma.len > 0) + dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0); + + /* + * Decode more LZMA symbols. One iteration may consume up to + * LZMA_IN_REQUIRED - 1 bytes. + */ + while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) { + pos_state = s->dict.pos & s->lzma.pos_mask; + + if (!rc_bit(&s->rc, &s->lzma.is_match[ + s->lzma.state][pos_state])) { + lzma_literal(s); + } else { + if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state])) + lzma_rep_match(s, pos_state); + else + lzma_match(s, pos_state); + + if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0)) + return false; + } + } + + /* + * Having the range decoder always normalized when we are outside + * this function makes it easier to correctly handle end of the chunk. + */ + rc_normalize(&s->rc); + + return true; +} + +/* + * Reset the LZMA decoder and range decoder state. Dictionary is nore reset + * here, because LZMA state may be reset without resetting the dictionary. + */ +static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + size_t i; + + s->lzma.state = STATE_LIT_LIT; + s->lzma.rep0 = 0; + s->lzma.rep1 = 0; + s->lzma.rep2 = 0; + s->lzma.rep3 = 0; + + /* + * All probabilities are initialized to the same value. This hack + * makes the code smaller by avoiding a separate loop for each + * probability array. + * + * This could be optimized so that only that part of literal + * probabilities that are actually required. In the common case + * we would write 12 KiB less. + */ + probs = s->lzma.is_match[0]; + for (i = 0; i < PROBS_TOTAL; ++i) + probs[i] = RC_BIT_MODEL_TOTAL / 2; + + rc_reset(&s->rc); +} + +/* + * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks + * from the decoded lp and pb values. On success, the LZMA decoder state is + * reset and true is returned. + */ +static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props) +{ + if (props > (4 * 5 + 4) * 9 + 8) + return false; + + s->lzma.pos_mask = 0; + while (props >= 9 * 5) { + props -= 9 * 5; + ++s->lzma.pos_mask; + } + + s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1; + + s->lzma.literal_pos_mask = 0; + while (props >= 9) { + props -= 9; + ++s->lzma.literal_pos_mask; + } + + s->lzma.lc = props; + + if (s->lzma.lc + s->lzma.literal_pos_mask > 4) + return false; + + s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1; + + lzma_reset(s); + + return true; +} + +/********* + * LZMA2 * + *********/ + +/* + * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't + * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This + * wrapper function takes care of making the LZMA decoder's assumption safe. + * + * As long as there is plenty of input left to be decoded in the current LZMA + * chunk, we decode directly from the caller-supplied input buffer until + * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into + * s->temp.buf, which (hopefully) gets filled on the next call to this + * function. We decode a few bytes from the temporary buffer so that we can + * continue decoding from the caller-supplied input buffer again. + */ +static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) +{ + size_t in_avail; + uint32_t tmp; + + in_avail = b->in_size - b->in_pos; + if (s->temp.size > 0 || s->lzma2.compressed == 0) { + tmp = 2 * LZMA_IN_REQUIRED - s->temp.size; + if (tmp > s->lzma2.compressed - s->temp.size) + tmp = s->lzma2.compressed - s->temp.size; + if (tmp > in_avail) + tmp = in_avail; + + memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp); + + if (s->temp.size + tmp == s->lzma2.compressed) { + memzero(s->temp.buf + s->temp.size + tmp, + sizeof(s->temp.buf) + - s->temp.size - tmp); + s->rc.in_limit = s->temp.size + tmp; + } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) { + s->temp.size += tmp; + b->in_pos += tmp; + return true; + } else { + s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED; + } + + s->rc.in = s->temp.buf; + s->rc.in_pos = 0; + + if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp) + return false; + + s->lzma2.compressed -= s->rc.in_pos; + + if (s->rc.in_pos < s->temp.size) { + s->temp.size -= s->rc.in_pos; + memmove(s->temp.buf, s->temp.buf + s->rc.in_pos, + s->temp.size); + return true; + } + + b->in_pos += s->rc.in_pos - s->temp.size; + s->temp.size = 0; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail >= LZMA_IN_REQUIRED) { + s->rc.in = b->in; + s->rc.in_pos = b->in_pos; + + if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED) + s->rc.in_limit = b->in_pos + s->lzma2.compressed; + else + s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED; + + if (!lzma_main(s)) + return false; + + in_avail = s->rc.in_pos - b->in_pos; + if (in_avail > s->lzma2.compressed) + return false; + + s->lzma2.compressed -= in_avail; + b->in_pos = s->rc.in_pos; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail < LZMA_IN_REQUIRED) { + if (in_avail > s->lzma2.compressed) + in_avail = s->lzma2.compressed; + + memcpy(s->temp.buf, b->in + b->in_pos, in_avail); + s->temp.size = in_avail; + b->in_pos += in_avail; + } + + return true; +} + +/* + * Take care of the LZMA2 control layer, and forward the job of actual LZMA + * decoding or copying of uncompressed chunks to other functions. + */ +XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run( + struct xz_dec_lzma2 *s, struct xz_buf *b) +{ + uint32_t tmp; + + while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) { + switch (s->lzma2.sequence) { + case SEQ_CONTROL: + /* + * LZMA2 control byte + * + * Exact values: + * 0x00 End marker + * 0x01 Dictionary reset followed by + * an uncompressed chunk + * 0x02 Uncompressed chunk (no dictionary reset) + * + * Highest three bits (s->control & 0xE0): + * 0xE0 Dictionary reset, new properties and state + * reset, followed by LZMA compressed chunk + * 0xC0 New properties and state reset, followed + * by LZMA compressed chunk (no dictionary + * reset) + * 0xA0 State reset using old properties, + * followed by LZMA compressed chunk (no + * dictionary reset) + * 0x80 LZMA chunk (no dictionary or state reset) + * + * For LZMA compressed chunks, the lowest five bits + * (s->control & 1F) are the highest bits of the + * uncompressed size (bits 16-20). + * + * A new LZMA2 stream must begin with a dictionary + * reset. The first LZMA chunk must set new + * properties and reset the LZMA state. + * + * Values that don't match anything described above + * are invalid and we return XZ_DATA_ERROR. + */ + tmp = b->in[b->in_pos++]; + + if (tmp >= 0xE0 || tmp == 0x01) { + s->lzma2.need_props = true; + s->lzma2.need_dict_reset = false; + dict_reset(&s->dict, b); + } else if (s->lzma2.need_dict_reset) { + return XZ_DATA_ERROR; + } + + if (tmp >= 0x80) { + s->lzma2.uncompressed = (tmp & 0x1F) << 16; + s->lzma2.sequence = SEQ_UNCOMPRESSED_1; + + if (tmp >= 0xC0) { + /* + * When there are new properties, + * state reset is done at + * SEQ_PROPERTIES. + */ + s->lzma2.need_props = false; + s->lzma2.next_sequence + = SEQ_PROPERTIES; + + } else if (s->lzma2.need_props) { + return XZ_DATA_ERROR; + + } else { + s->lzma2.next_sequence + = SEQ_LZMA_PREPARE; + if (tmp >= 0xA0) + lzma_reset(s); + } + } else { + if (tmp == 0x00) + return XZ_STREAM_END; + + if (tmp > 0x02) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_COMPRESSED_0; + s->lzma2.next_sequence = SEQ_COPY; + } + + break; + + case SEQ_UNCOMPRESSED_1: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = SEQ_COMPRESSED_0; + break; + + case SEQ_COMPRESSED_0: + s->lzma2.compressed + = (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + s->lzma2.compressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = s->lzma2.next_sequence; + break; + + case SEQ_PROPERTIES: + if (!lzma_props(s, b->in[b->in_pos++])) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_LZMA_PREPARE; + + case SEQ_LZMA_PREPARE: + if (s->lzma2.compressed < RC_INIT_BYTES) + return XZ_DATA_ERROR; + + if (!rc_read_init(&s->rc, b)) + return XZ_OK; + + s->lzma2.compressed -= RC_INIT_BYTES; + s->lzma2.sequence = SEQ_LZMA_RUN; + + case SEQ_LZMA_RUN: + /* + * Set dictionary limit to indicate how much we want + * to be encoded at maximum. Decode new data into the + * dictionary. Flush the new data from dictionary to + * b->out. Check if we finished decoding this chunk. + * In case the dictionary got full but we didn't fill + * the output buffer yet, we may run this loop + * multiple times without changing s->lzma2.sequence. + */ + dict_limit(&s->dict, min_t(size_t, + b->out_size - b->out_pos, + s->lzma2.uncompressed)); + if (!lzma2_lzma(s, b)) + return XZ_DATA_ERROR; + + s->lzma2.uncompressed -= dict_flush(&s->dict, b); + + if (s->lzma2.uncompressed == 0) { + if (s->lzma2.compressed > 0 || s->lzma.len > 0 + || !rc_is_finished(&s->rc)) + return XZ_DATA_ERROR; + + rc_reset(&s->rc); + s->lzma2.sequence = SEQ_CONTROL; + + } else if (b->out_pos == b->out_size + || (b->in_pos == b->in_size + && s->temp.size + < s->lzma2.compressed)) { + return XZ_OK; + } + + break; + + case SEQ_COPY: + dict_uncompressed(&s->dict, b, &s->lzma2.compressed); + if (s->lzma2.compressed > 0) + return XZ_OK; + + s->lzma2.sequence = SEQ_CONTROL; + break; + } + } + + return XZ_OK; +} + +XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( + enum xz_mode mode, uint32_t dict_max) +{ + struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->dict.mode = mode; + s->dict.size_max = dict_max; + + if (DEC_IS_PREALLOC(mode)) { + s->dict.buf = vmalloc(dict_max); + if (s->dict.buf == NULL) { + kfree(s); + return NULL; + } + } else if (DEC_IS_DYNALLOC(mode)) { + s->dict.buf = NULL; + s->dict.allocated = 0; + } + + return s; +} + +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( + struct xz_dec_lzma2 *s, uint8_t props) +{ + /* This limits dictionary size to 3 GiB to keep parsing simpler. */ + if (props > 39) + return XZ_OPTIONS_ERROR; + + s->dict.size = 2 + (props & 1); + s->dict.size <<= (props >> 1) + 11; + + if (DEC_IS_MULTI(s->dict.mode)) { + if (s->dict.size > s->dict.size_max) + return XZ_MEMLIMIT_ERROR; + + s->dict.end = s->dict.size; + + if (DEC_IS_DYNALLOC(s->dict.mode)) { + if (s->dict.allocated < s->dict.size) { + vfree(s->dict.buf); + s->dict.buf = vmalloc(s->dict.size); + if (s->dict.buf == NULL) { + s->dict.allocated = 0; + return XZ_MEM_ERROR; + } + } + } + } + + s->lzma.len = 0; + + s->lzma2.sequence = SEQ_CONTROL; + s->lzma2.need_dict_reset = true; + + s->temp.size = 0; + + return XZ_OK; +} + +XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s) +{ + if (DEC_IS_MULTI(s->dict.mode)) + vfree(s->dict.buf); + + kfree(s); +} diff --git a/archival/libarchive/unxz/xz_dec_stream.c b/archival/libarchive/unxz/xz_dec_stream.c new file mode 100644 index 000000000..bdcbf1ba3 --- /dev/null +++ b/archival/libarchive/unxz/xz_dec_stream.c @@ -0,0 +1,822 @@ +/* + * .xz Stream decoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_stream.h" + +/* Hash used to validate the Index field */ +struct xz_dec_hash { + vli_type unpadded; + vli_type uncompressed; + uint32_t crc32; +}; + +struct xz_dec { + /* Position in dec_main() */ + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_START, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_UNCOMPRESS, + SEQ_BLOCK_PADDING, + SEQ_BLOCK_CHECK, + SEQ_INDEX, + SEQ_INDEX_PADDING, + SEQ_INDEX_CRC32, + SEQ_STREAM_FOOTER + } sequence; + + /* Position in variable-length integers and Check fields */ + uint32_t pos; + + /* Variable-length integer decoded by dec_vli() */ + vli_type vli; + + /* Saved in_pos and out_pos */ + size_t in_start; + size_t out_start; + + /* CRC32 value in Block or Index */ + uint32_t crc32; + + /* Type of the integrity check calculated from uncompressed data */ + enum xz_check check_type; + + /* Operation mode */ + enum xz_mode mode; + + /* + * True if the next call to xz_dec_run() is allowed to return + * XZ_BUF_ERROR. + */ + bool allow_buf_error; + + /* Information stored in Block Header */ + struct { + /* + * Value stored in the Compressed Size field, or + * VLI_UNKNOWN if Compressed Size is not present. + */ + vli_type compressed; + + /* + * Value stored in the Uncompressed Size field, or + * VLI_UNKNOWN if Uncompressed Size is not present. + */ + vli_type uncompressed; + + /* Size of the Block Header field */ + uint32_t size; + } block_header; + + /* Information collected when decoding Blocks */ + struct { + /* Observed compressed size of the current Block */ + vli_type compressed; + + /* Observed uncompressed size of the current Block */ + vli_type uncompressed; + + /* Number of Blocks decoded so far */ + vli_type count; + + /* + * Hash calculated from the Block sizes. This is used to + * validate the Index field. + */ + struct xz_dec_hash hash; + } block; + + /* Variables needed when verifying the Index field */ + struct { + /* Position in dec_index() */ + enum { + SEQ_INDEX_COUNT, + SEQ_INDEX_UNPADDED, + SEQ_INDEX_UNCOMPRESSED + } sequence; + + /* Size of the Index in bytes */ + vli_type size; + + /* Number of Records (matches block.count in valid files) */ + vli_type count; + + /* + * Hash calculated from the Records (matches block.hash in + * valid files). + */ + struct xz_dec_hash hash; + } index; + + /* + * Temporary buffer needed to hold Stream Header, Block Header, + * and Stream Footer. The Block Header is the biggest (1 KiB) + * so we reserve space according to that. buf[] has to be aligned + * to a multiple of four bytes; the size_t variables before it + * should guarantee this. + */ + struct { + size_t pos; + size_t size; + uint8_t buf[1024]; + } temp; + + struct xz_dec_lzma2 *lzma2; + +#ifdef XZ_DEC_BCJ + struct xz_dec_bcj *bcj; + bool bcj_active; +#endif +}; + +#ifdef XZ_DEC_ANY_CHECK +/* Sizes of the Check field with different Check IDs */ +static const uint8_t check_sizes[16] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 +}; +#endif + +/* + * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller + * must have set s->temp.pos to indicate how much data we are supposed + * to copy into s->temp.buf. Return true once s->temp.pos has reached + * s->temp.size. + */ +static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b) +{ + size_t copy_size = min_t(size_t, + b->in_size - b->in_pos, s->temp.size - s->temp.pos); + + memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); + b->in_pos += copy_size; + s->temp.pos += copy_size; + + if (s->temp.pos == s->temp.size) { + s->temp.pos = 0; + return true; + } + + return false; +} + +/* Decode a variable-length integer (little-endian base-128 encoding) */ +static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + uint8_t byte; + + if (s->pos == 0) + s->vli = 0; + + while (*in_pos < in_size) { + byte = in[*in_pos]; + ++*in_pos; + + s->vli |= (vli_type)(byte & 0x7F) << s->pos; + + if ((byte & 0x80) == 0) { + /* Don't allow non-minimal encodings. */ + if (byte == 0 && s->pos != 0) + return XZ_DATA_ERROR; + + s->pos = 0; + return XZ_STREAM_END; + } + + s->pos += 7; + if (s->pos == 7 * VLI_BYTES_MAX) + return XZ_DATA_ERROR; + } + + return XZ_OK; +} + +/* + * Decode the Compressed Data field from a Block. Update and validate + * the observed compressed and uncompressed sizes of the Block so that + * they don't exceed the values possibly stored in the Block Header + * (validation assumes that no integer overflow occurs, since vli_type + * is normally uint64_t). Update the CRC32 if presence of the CRC32 + * field was indicated in Stream Header. + * + * Once the decoding is finished, validate that the observed sizes match + * the sizes possibly stored in the Block Header. Update the hash and + * Block count, which are later used to validate the Index field. + */ +static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + s->in_start = b->in_pos; + s->out_start = b->out_pos; + +#ifdef XZ_DEC_BCJ + if (s->bcj_active) + ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); + else +#endif + ret = xz_dec_lzma2_run(s->lzma2, b); + + s->block.compressed += b->in_pos - s->in_start; + s->block.uncompressed += b->out_pos - s->out_start; + + /* + * There is no need to separately check for VLI_UNKNOWN, since + * the observed sizes are always smaller than VLI_UNKNOWN. + */ + if (s->block.compressed > s->block_header.compressed + || s->block.uncompressed + > s->block_header.uncompressed) + return XZ_DATA_ERROR; + + if (s->check_type == XZ_CHECK_CRC32) + s->crc32 = xz_crc32(b->out + s->out_start, + b->out_pos - s->out_start, s->crc32); + + if (ret == XZ_STREAM_END) { + if (s->block_header.compressed != VLI_UNKNOWN + && s->block_header.compressed + != s->block.compressed) + return XZ_DATA_ERROR; + + if (s->block_header.uncompressed != VLI_UNKNOWN + && s->block_header.uncompressed + != s->block.uncompressed) + return XZ_DATA_ERROR; + + s->block.hash.unpadded += s->block_header.size + + s->block.compressed; + +#ifdef XZ_DEC_ANY_CHECK + s->block.hash.unpadded += check_sizes[s->check_type]; +#else + if (s->check_type == XZ_CHECK_CRC32) + s->block.hash.unpadded += 4; +#endif + + s->block.hash.uncompressed += s->block.uncompressed; + s->block.hash.crc32 = xz_crc32( + (const uint8_t *)&s->block.hash, + sizeof(s->block.hash), s->block.hash.crc32); + + ++s->block.count; + } + + return ret; +} + +/* Update the Index size and the CRC32 value. */ +static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b) +{ + size_t in_used = b->in_pos - s->in_start; + s->index.size += in_used; + s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32); +} + +/* + * Decode the Number of Records, Unpadded Size, and Uncompressed Size + * fields from the Index field. That is, Index Padding and CRC32 are not + * decoded by this function. + * + * This can return XZ_OK (more input needed), XZ_STREAM_END (everything + * successfully decoded), or XZ_DATA_ERROR (input is corrupt). + */ +static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + do { + ret = dec_vli(s, b->in, &b->in_pos, b->in_size); + if (ret != XZ_STREAM_END) { + index_update(s, b); + return ret; + } + + switch (s->index.sequence) { + case SEQ_INDEX_COUNT: + s->index.count = s->vli; + + /* + * Validate that the Number of Records field + * indicates the same number of Records as + * there were Blocks in the Stream. + */ + if (s->index.count != s->block.count) + return XZ_DATA_ERROR; + + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + + case SEQ_INDEX_UNPADDED: + s->index.hash.unpadded += s->vli; + s->index.sequence = SEQ_INDEX_UNCOMPRESSED; + break; + + case SEQ_INDEX_UNCOMPRESSED: + s->index.hash.uncompressed += s->vli; + s->index.hash.crc32 = xz_crc32( + (const uint8_t *)&s->index.hash, + sizeof(s->index.hash), + s->index.hash.crc32); + --s->index.count; + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + } + } while (s->index.count > 0); + + return XZ_STREAM_END; +} + +/* + * Validate that the next four input bytes match the value of s->crc32. + * s->pos must be zero when starting to validate the first byte. + */ +static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b) +{ + do { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++]) + return XZ_DATA_ERROR; + + s->pos += 8; + + } while (s->pos < 32); + + s->crc32 = 0; + s->pos = 0; + + return XZ_STREAM_END; +} + +#ifdef XZ_DEC_ANY_CHECK +/* + * Skip over the Check field when the Check ID is not supported. + * Returns true once the whole Check field has been skipped over. + */ +static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b) +{ + while (s->pos < check_sizes[s->check_type]) { + if (b->in_pos == b->in_size) + return false; + + ++b->in_pos; + ++s->pos; + } + + s->pos = 0; + + return true; +} +#endif + +/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ +static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s) +{ + if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) + return XZ_FORMAT_ERROR; + + if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) + != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) + return XZ_DATA_ERROR; + + if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) + return XZ_OPTIONS_ERROR; + + /* + * Of integrity checks, we support only none (Check ID = 0) and + * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined, + * we will accept other check types too, but then the check won't + * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. + */ + s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; + +#ifdef XZ_DEC_ANY_CHECK + if (s->check_type > XZ_CHECK_MAX) + return XZ_OPTIONS_ERROR; + + if (s->check_type > XZ_CHECK_CRC32) + return XZ_UNSUPPORTED_CHECK; +#else + if (s->check_type > XZ_CHECK_CRC32) + return XZ_OPTIONS_ERROR; +#endif + + return XZ_OK; +} + +/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ +static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s) +{ + if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) + return XZ_DATA_ERROR; + + if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) + return XZ_DATA_ERROR; + + /* + * Validate Backward Size. Note that we never added the size of the + * Index CRC32 field to s->index.size, thus we use s->index.size / 4 + * instead of s->index.size / 4 - 1. + */ + if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) + return XZ_DATA_ERROR; + + if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) + return XZ_DATA_ERROR; + + /* + * Use XZ_STREAM_END instead of XZ_OK to be more convenient + * for the caller. + */ + return XZ_STREAM_END; +} + +/* Decode the Block Header and initialize the filter chain. */ +static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s) +{ + enum xz_ret ret; + + /* + * Validate the CRC32. We know that the temp buffer is at least + * eight bytes so this is safe. + */ + s->temp.size -= 4; + if (xz_crc32(s->temp.buf, s->temp.size, 0) + != get_le32(s->temp.buf + s->temp.size)) + return XZ_DATA_ERROR; + + s->temp.pos = 2; + + /* + * Catch unsupported Block Flags. We support only one or two filters + * in the chain, so we catch that with the same test. + */ +#ifdef XZ_DEC_BCJ + if (s->temp.buf[1] & 0x3E) +#else + if (s->temp.buf[1] & 0x3F) +#endif + return XZ_OPTIONS_ERROR; + + /* Compressed Size */ + if (s->temp.buf[1] & 0x40) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.compressed = s->vli; + } else { + s->block_header.compressed = VLI_UNKNOWN; + } + + /* Uncompressed Size */ + if (s->temp.buf[1] & 0x80) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.uncompressed = s->vli; + } else { + s->block_header.uncompressed = VLI_UNKNOWN; + } + +#ifdef XZ_DEC_BCJ + /* If there are two filters, the first one must be a BCJ filter. */ + s->bcj_active = s->temp.buf[1] & 0x01; + if (s->bcj_active) { + if (s->temp.size - s->temp.pos < 2) + return XZ_OPTIONS_ERROR; + + ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* + * We don't support custom start offset, + * so Size of Properties must be zero. + */ + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + } +#endif + + /* Valid Filter Flags always take at least two bytes. */ + if (s->temp.size - s->temp.pos < 2) + return XZ_DATA_ERROR; + + /* Filter ID = LZMA2 */ + if (s->temp.buf[s->temp.pos++] != 0x21) + return XZ_OPTIONS_ERROR; + + /* Size of Properties = 1-byte Filter Properties */ + if (s->temp.buf[s->temp.pos++] != 0x01) + return XZ_OPTIONS_ERROR; + + /* Filter Properties contains LZMA2 dictionary size. */ + if (s->temp.size - s->temp.pos < 1) + return XZ_DATA_ERROR; + + ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* The rest must be Header Padding. */ + while (s->temp.pos < s->temp.size) + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + + s->temp.pos = 0; + s->block.compressed = 0; + s->block.uncompressed = 0; + + return XZ_OK; +} + +static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + /* + * Store the start position for the case when we are in the middle + * of the Index field. + */ + s->in_start = b->in_pos; + + while (true) { + switch (s->sequence) { + case SEQ_STREAM_HEADER: + /* + * Stream Header is copied to s->temp, and then + * decoded from there. This way if the caller + * gives us only little input at a time, we can + * still keep the Stream Header decoding code + * simple. Similar approach is used in many places + * in this file. + */ + if (!fill_temp(s, b)) + return XZ_OK; + + /* + * If dec_stream_header() returns + * XZ_UNSUPPORTED_CHECK, it is still possible + * to continue decoding if working in multi-call + * mode. Thus, update s->sequence before calling + * dec_stream_header(). + */ + s->sequence = SEQ_BLOCK_START; + + ret = dec_stream_header(s); + if (ret != XZ_OK) + return ret; + + case SEQ_BLOCK_START: + /* We need one byte of input to continue. */ + if (b->in_pos == b->in_size) + return XZ_OK; + + /* See if this is the beginning of the Index field. */ + if (b->in[b->in_pos] == 0) { + s->in_start = b->in_pos++; + s->sequence = SEQ_INDEX; + break; + } + + /* + * Calculate the size of the Block Header and + * prepare to decode it. + */ + s->block_header.size + = ((uint32_t)b->in[b->in_pos] + 1) * 4; + + s->temp.size = s->block_header.size; + s->temp.pos = 0; + s->sequence = SEQ_BLOCK_HEADER; + + case SEQ_BLOCK_HEADER: + if (!fill_temp(s, b)) + return XZ_OK; + + ret = dec_block_header(s); + if (ret != XZ_OK) + return ret; + + s->sequence = SEQ_BLOCK_UNCOMPRESS; + + case SEQ_BLOCK_UNCOMPRESS: + ret = dec_block(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_BLOCK_PADDING; + + case SEQ_BLOCK_PADDING: + /* + * Size of Compressed Data + Block Padding + * must be a multiple of four. We don't need + * s->block.compressed for anything else + * anymore, so we use it here to test the size + * of the Block Padding field. + */ + while (s->block.compressed & 3) { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + + ++s->block.compressed; + } + + s->sequence = SEQ_BLOCK_CHECK; + + case SEQ_BLOCK_CHECK: + if (s->check_type == XZ_CHECK_CRC32) { + ret = crc32_validate(s, b); + if (ret != XZ_STREAM_END) + return ret; + } +#ifdef XZ_DEC_ANY_CHECK + else if (!check_skip(s, b)) { + return XZ_OK; + } +#endif + + s->sequence = SEQ_BLOCK_START; + break; + + case SEQ_INDEX: + ret = dec_index(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_INDEX_PADDING; + + case SEQ_INDEX_PADDING: + while ((s->index.size + (b->in_pos - s->in_start)) + & 3) { + if (b->in_pos == b->in_size) { + index_update(s, b); + return XZ_OK; + } + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + } + + /* Finish the CRC32 value and Index size. */ + index_update(s, b); + + /* Compare the hashes to validate the Index field. */ + if (!memeq(&s->block.hash, &s->index.hash, + sizeof(s->block.hash))) + return XZ_DATA_ERROR; + + s->sequence = SEQ_INDEX_CRC32; + + case SEQ_INDEX_CRC32: + ret = crc32_validate(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->temp.size = STREAM_HEADER_SIZE; + s->sequence = SEQ_STREAM_FOOTER; + + case SEQ_STREAM_FOOTER: + if (!fill_temp(s, b)) + return XZ_OK; + + return dec_stream_footer(s); + } + } + + /* Never reached */ +} + +/* + * xz_dec_run() is a wrapper for dec_main() to handle some special cases in + * multi-call and single-call decoding. + * + * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we + * are not going to make any progress anymore. This is to prevent the caller + * from calling us infinitely when the input file is truncated or otherwise + * corrupt. Since zlib-style API allows that the caller fills the input buffer + * only when the decoder doesn't produce any new output, we have to be careful + * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only + * after the second consecutive call to xz_dec_run() that makes no progress. + * + * In single-call mode, if we couldn't decode everything and no error + * occurred, either the input is truncated or the output buffer is too small. + * Since we know that the last input byte never produces any output, we know + * that if all the input was consumed and decoding wasn't finished, the file + * must be corrupt. Otherwise the output buffer has to be too small or the + * file is corrupt in a way that decoding it produces too big output. + * + * If single-call decoding fails, we reset b->in_pos and b->out_pos back to + * their original values. This is because with some filter chains there won't + * be any valid uncompressed data in the output buffer unless the decoding + * actually succeeds (that's the price to pay of using the output buffer as + * the workspace). + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b) +{ + size_t in_start; + size_t out_start; + enum xz_ret ret; + + if (DEC_IS_SINGLE(s->mode)) + xz_dec_reset(s); + + in_start = b->in_pos; + out_start = b->out_pos; + ret = dec_main(s, b); + + if (DEC_IS_SINGLE(s->mode)) { + if (ret == XZ_OK) + ret = b->in_pos == b->in_size + ? XZ_DATA_ERROR : XZ_BUF_ERROR; + + if (ret != XZ_STREAM_END) { + b->in_pos = in_start; + b->out_pos = out_start; + } + + } else if (ret == XZ_OK && in_start == b->in_pos + && out_start == b->out_pos) { + if (s->allow_buf_error) + ret = XZ_BUF_ERROR; + + s->allow_buf_error = true; + } else { + s->allow_buf_error = false; + } + + return ret; +} + +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max) +{ + struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->mode = mode; + +#ifdef XZ_DEC_BCJ + s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); + if (s->bcj == NULL) + goto error_bcj; +#endif + + s->lzma2 = xz_dec_lzma2_create(mode, dict_max); + if (s->lzma2 == NULL) + goto error_lzma2; + + xz_dec_reset(s); + return s; + +error_lzma2: +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +error_bcj: +#endif + kfree(s); + return NULL; +} + +XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s) +{ + s->sequence = SEQ_STREAM_HEADER; + s->allow_buf_error = false; + s->pos = 0; + s->crc32 = 0; + memzero(&s->block, sizeof(s->block)); + memzero(&s->index, sizeof(s->index)); + s->temp.pos = 0; + s->temp.size = STREAM_HEADER_SIZE; +} + +XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s) +{ + if (s != NULL) { + xz_dec_lzma2_end(s->lzma2); +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +#endif + kfree(s); + } +} diff --git a/archival/libarchive/unxz/xz_lzma2.h b/archival/libarchive/unxz/xz_lzma2.h new file mode 100644 index 000000000..47f21afbc --- /dev/null +++ b/archival/libarchive/unxz/xz_lzma2.h @@ -0,0 +1,204 @@ +/* + * LZMA2 definitions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_LZMA2_H +#define XZ_LZMA2_H + +/* Range coder constants */ +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (1 << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + +/* + * Maximum number of position states. A position state is the lowest pb + * number of bits of the current uncompressed offset. In some places there + * are different sets of probabilities for different position states. + */ +#define POS_STATES_MAX (1 << 4) + +/* + * This enum is used to track which LZMA symbols have occurred most recently + * and in which order. This information is used to predict the next symbol. + * + * Symbols: + * - Literal: One 8-bit byte + * - Match: Repeat a chunk of data at some distance + * - Long repeat: Multi-byte match at a recently seen distance + * - Short repeat: One-byte repeat at a recently seen distance + * + * The symbol names are in from STATE_oldest_older_previous. REP means + * either short or long repeated match, and NONLIT means any non-literal. + */ +enum lzma_state { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP +}; + +/* Total number of states */ +#define STATES 12 + +/* The lowest 7 states indicate that the previous state was a literal. */ +#define LIT_STATES 7 + +/* Indicate that the latest symbol was a literal. */ +static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state) +{ + if (*state <= STATE_SHORTREP_LIT_LIT) + *state = STATE_LIT_LIT; + else if (*state <= STATE_LIT_SHORTREP) + *state -= 3; + else + *state -= 6; +} + +/* Indicate that the latest symbol was a match. */ +static inline void XZ_FUNC lzma_state_match(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH; +} + +/* Indicate that the latest state was a long repeated match. */ +static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP; +} + +/* Indicate that the latest symbol was a short match. */ +static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP; +} + +/* Test if the previous symbol was a literal. */ +static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state) +{ + return state < LIT_STATES; +} + +/* Each literal coder is divided in three sections: + * - 0x001-0x0FF: Without match byte + * - 0x101-0x1FF: With match byte; match bit is 0 + * - 0x201-0x2FF: With match byte; match bit is 1 + * + * Match byte is used when the previous LZMA symbol was something else than + * a literal (that is, it was some kind of match). + */ +#define LITERAL_CODER_SIZE 0x300 + +/* Maximum number of literal coders */ +#define LITERAL_CODERS_MAX (1 << 4) + +/* Minimum length of a match is two bytes. */ +#define MATCH_LEN_MIN 2 + +/* Match length is encoded with 4, 5, or 10 bits. + * + * Length Bits + * 2-9 4 = Choice=0 + 3 bits + * 10-17 5 = Choice=1 + Choice2=0 + 3 bits + * 18-273 10 = Choice=1 + Choice2=1 + 8 bits + */ +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +/* + * Maximum length of a match is 273 which is a result of the encoding + * described above. + */ +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + +/* + * Different sets of probabilities are used for match distances that have + * very short match length: Lengths of 2, 3, and 4 bytes have a separate + * set of probabilities for each length. The matches with longer length + * use a shared set of probabilities. + */ +#define DIST_STATES 4 + +/* + * Get the index of the appropriate probability array for decoding + * the distance slot. + */ +static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len) +{ + return len < DIST_STATES + MATCH_LEN_MIN + ? len - MATCH_LEN_MIN : DIST_STATES - 1; +} + +/* + * The highest two bits of a 32-bit match distance are encoded using six bits. + * This six-bit value is called a distance slot. This way encoding a 32-bit + * value takes 6-36 bits, larger values taking more bits. + */ +#define DIST_SLOT_BITS 6 +#define DIST_SLOTS (1 << DIST_SLOT_BITS) + +/* Match distances up to 127 are fully encoded using probabilities. Since + * the highest two bits (distance slot) are always encoded using six bits, + * the distances 0-3 don't need any additional bits to encode, since the + * distance slot itself is the same as the actual distance. DIST_MODEL_START + * indicates the first distance slot where at least one additional bit is + * needed. + */ +#define DIST_MODEL_START 4 + +/* + * Match distances greater than 127 are encoded in three pieces: + * - distance slot: the highest two bits + * - direct bits: 2-26 bits below the highest two bits + * - alignment bits: four lowest bits + * + * Direct bits don't use any probabilities. + * + * The distance slot value of 14 is for distances 128-191. + */ +#define DIST_MODEL_END 14 + +/* Distance slots that indicate a distance <= 127. */ +#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +/* + * For match distances greater than 127, only the highest two bits and the + * lowest four bits (alignment) is encoded using probabilities. + */ +#define ALIGN_BITS 4 +#define ALIGN_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_SIZE - 1) + +/* Total number of all probability variables */ +#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE) + +/* + * LZMA remembers the four most recent match distances. Reusing these + * distances tends to take less space than re-encoding the actual + * distance value. + */ +#define REPS 4 + +#endif diff --git a/archival/libarchive/unxz/xz_private.h b/archival/libarchive/unxz/xz_private.h new file mode 100644 index 000000000..145649a83 --- /dev/null +++ b/archival/libarchive/unxz/xz_private.h @@ -0,0 +1,159 @@ +/* + * Private includes and definitions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_PRIVATE_H +#define XZ_PRIVATE_H + +#ifdef __KERNEL__ + /* XZ_PREBOOT may be defined only via decompress_unxz.c. */ +# ifndef XZ_PREBOOT +# include +# include +# include +# define memeq(a, b, size) (memcmp(a, b, size) == 0) +# define memzero(buf, size) memset(buf, 0, size) +# endif +# include +# include +# define get_le32(p) le32_to_cpup((const uint32_t *)(p)) + /* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */ +# ifndef XZ_IGNORE_KCONFIG +# ifdef CONFIG_XZ_DEC_X86 +# define XZ_DEC_X86 +# endif +# ifdef CONFIG_XZ_DEC_POWERPC +# define XZ_DEC_POWERPC +# endif +# ifdef CONFIG_XZ_DEC_IA64 +# define XZ_DEC_IA64 +# endif +# ifdef CONFIG_XZ_DEC_ARM +# define XZ_DEC_ARM +# endif +# ifdef CONFIG_XZ_DEC_ARMTHUMB +# define XZ_DEC_ARMTHUMB +# endif +# ifdef CONFIG_XZ_DEC_SPARC +# define XZ_DEC_SPARC +# endif +# endif +# include +#else + /* + * For userspace builds, use a separate header to define the required + * macros and functions. This makes it easier to adapt the code into + * different environments and avoids clutter in the Linux kernel tree. + */ +# include "xz_config.h" +#endif + +/* If no specific decoding mode is requested, enable support for all modes. */ +#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \ + && !defined(XZ_DEC_DYNALLOC) +# define XZ_DEC_SINGLE +# define XZ_DEC_PREALLOC +# define XZ_DEC_DYNALLOC +#endif + +/* + * The DEC_IS_foo(mode) macros are used in "if" statements. If only some + * of the supported modes are enabled, these macros will evaluate to true or + * false at compile time and thus allow the compiler to omit unneeded code. + */ +#ifdef XZ_DEC_SINGLE +# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE) +#else +# define DEC_IS_SINGLE(mode) (false) +#endif + +#ifdef XZ_DEC_PREALLOC +# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC) +#else +# define DEC_IS_PREALLOC(mode) (false) +#endif + +#ifdef XZ_DEC_DYNALLOC +# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC) +#else +# define DEC_IS_DYNALLOC(mode) (false) +#endif + +#if !defined(XZ_DEC_SINGLE) +# define DEC_IS_MULTI(mode) (true) +#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC) +# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE) +#else +# define DEC_IS_MULTI(mode) (false) +#endif + +/* + * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ. + * XZ_DEC_BCJ is used to enable generic support for BCJ decoders. + */ +#ifndef XZ_DEC_BCJ +# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ + || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \ + || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ + || defined(XZ_DEC_SPARC) +# define XZ_DEC_BCJ +# endif +#endif + +/* + * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used + * before calling xz_dec_lzma2_run(). + */ +XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( + enum xz_mode mode, uint32_t dict_max); + +/* + * Decode the LZMA2 properties (one byte) and reset the decoder. Return + * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not + * big enough, and XZ_OPTIONS_ERROR if props indicates something that this + * decoder doesn't support. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( + struct xz_dec_lzma2 *s, uint8_t props); + +/* Decode raw LZMA2 stream from b->in to b->out. */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run( + struct xz_dec_lzma2 *s, struct xz_buf *b); + +/* Free the memory allocated for the LZMA2 decoder. */ +XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s); + +#ifdef XZ_DEC_BCJ +/* + * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before + * calling xz_dec_bcj_run(). + */ +XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call); + +/* + * Decode the Filter ID of a BCJ filter. This implementation doesn't + * support custom start offsets, so no decoding of Filter Properties + * is needed. Returns XZ_OK if the given Filter ID is supported. + * Otherwise XZ_OPTIONS_ERROR is returned. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset( + struct xz_dec_bcj *s, uint8_t id); + +/* + * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is + * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run() + * must be called directly. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, struct xz_buf *b); + +/* Free the memory allocated for the BCJ filters. */ +#define xz_dec_bcj_end(s) kfree(s) +#endif + +#endif diff --git a/archival/libarchive/unxz/xz_stream.h b/archival/libarchive/unxz/xz_stream.h new file mode 100644 index 000000000..36f2a7cbf --- /dev/null +++ b/archival/libarchive/unxz/xz_stream.h @@ -0,0 +1,57 @@ +/* + * Definitions for handling the .xz file format + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_STREAM_H +#define XZ_STREAM_H + +#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32 +# include +# undef crc32 +# define xz_crc32(buf, size, crc) \ + (~crc32_le(~(uint32_t)(crc), buf, size)) +#endif + +/* + * See the .xz file format specification at + * http://tukaani.org/xz/xz-file-format.txt + * to understand the container format. + */ + +#define STREAM_HEADER_SIZE 12 + +#define HEADER_MAGIC "\3757zXZ\0" +#define HEADER_MAGIC_SIZE 6 + +#define FOOTER_MAGIC "YZ" +#define FOOTER_MAGIC_SIZE 2 + +/* + * Variable-length integer can hold a 63-bit unsigned integer, or a special + * value to indicate that the value is unknown. + */ +typedef uint64_t vli_type; + +#define VLI_MAX ((vli_type)-1 / 2) +#define VLI_UNKNOWN ((vli_type)-1) + +/* Maximum encoded size of a VLI */ +#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7) + +/* Integrity Check types */ +enum xz_check { + XZ_CHECK_NONE = 0, + XZ_CHECK_CRC32 = 1, + XZ_CHECK_CRC64 = 4, + XZ_CHECK_SHA256 = 10 +}; + +/* Maximum possible Check ID */ +#define XZ_CHECK_MAX 15 + +#endif -- cgit v1.2.3