summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsobomax <sobomax@FreeBSD.org>2016-07-20 16:36:17 +0000
committersobomax <sobomax@FreeBSD.org>2016-07-20 16:36:17 +0000
commite747ac9c645f3f184976184a45d5cbfb3c50d1af (patch)
tree47748c901120b386ecfa7480ddaa1b66898ec9c4
parenta7dc4a2738219ada64b9a821a01152a5643fe0c4 (diff)
downloadFreeBSD-src-e747ac9c645f3f184976184a45d5cbfb3c50d1af.zip
FreeBSD-src-e747ac9c645f3f184976184a45d5cbfb3c50d1af.tar.gz
MFC: merge in all new features and improvements into mkuzip(8) from current,
which includes: o LZMA compression; o block de-duplication; o performance improvements; o multi-thread support. This includes the following revisions: r295943,r296626,r296628,r296810,r298504,r298505,r298577 Suggested by: emaste
-rw-r--r--usr.bin/mkuzip/Makefile7
-rw-r--r--usr.bin/mkuzip/mkuz_blk.c45
-rw-r--r--usr.bin/mkuzip/mkuz_blk.h48
-rw-r--r--usr.bin/mkuzip/mkuz_blk_chain.h35
-rw-r--r--usr.bin/mkuzip/mkuz_blockcache.c148
-rw-r--r--usr.bin/mkuzip/mkuz_blockcache.h31
-rw-r--r--usr.bin/mkuzip/mkuz_cfg.h40
-rw-r--r--usr.bin/mkuzip/mkuz_cloop.h50
-rw-r--r--usr.bin/mkuzip/mkuz_conveyor.c129
-rw-r--r--usr.bin/mkuzip/mkuz_conveyor.h52
-rw-r--r--usr.bin/mkuzip/mkuz_format.h37
-rw-r--r--usr.bin/mkuzip/mkuz_fqueue.c214
-rw-r--r--usr.bin/mkuzip/mkuz_fqueue.h51
-rw-r--r--usr.bin/mkuzip/mkuz_lzma.c121
-rw-r--r--usr.bin/mkuzip/mkuz_lzma.h42
-rw-r--r--usr.bin/mkuzip/mkuz_time.c45
-rw-r--r--usr.bin/mkuzip/mkuz_time.h41
-rw-r--r--usr.bin/mkuzip/mkuz_zlib.c87
-rw-r--r--usr.bin/mkuzip/mkuz_zlib.h36
-rw-r--r--usr.bin/mkuzip/mkuzip.895
-rw-r--r--usr.bin/mkuzip/mkuzip.c384
-rw-r--r--usr.bin/mkuzip/mkuzip.h33
22 files changed, 1658 insertions, 113 deletions
diff --git a/usr.bin/mkuzip/Makefile b/usr.bin/mkuzip/Makefile
index c5eac20..7f4a57a 100644
--- a/usr.bin/mkuzip/Makefile
+++ b/usr.bin/mkuzip/Makefile
@@ -2,9 +2,10 @@
PROG= mkuzip
MAN= mkuzip.8
+SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \
+ mkuz_blk.c mkuz_fqueue.c mkuz_time.c
-DPADD= ${LIBZ}
-LDADD= -lz
-
+DPADD= ${LIBZ} ${LIBMD} ${LIBLZMA} ${LIBPTHREAD}
+LDADD= -lz -lmd -llzma -lpthread
.include <bsd.prog.mk>
diff --git a/usr.bin/mkuzip/mkuz_blk.c b/usr.bin/mkuzip/mkuz_blk.c
new file mode 100644
index 0000000..cfc5273
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_blk.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "mkuzip.h"
+#include "mkuz_blk.h"
+
+struct mkuz_blk *
+mkuz_blk_ctor(size_t blen)
+{
+ struct mkuz_blk *rval;
+
+ rval = mkuz_safe_zmalloc(sizeof(struct mkuz_blk) + blen);
+ rval->alen = blen;
+ rval->br_offset = OFFSET_UNDEF;
+ return (rval);
+}
diff --git a/usr.bin/mkuzip/mkuz_blk.h b/usr.bin/mkuzip/mkuz_blk.h
new file mode 100644
index 0000000..8e80a10c
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_blk.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define OFFSET_UNDEF UINT64_MAX
+
+struct mkuz_blk_info {
+ uint64_t offset;
+ size_t len;
+ uint32_t blkno;
+ unsigned char digest[16];
+};
+
+#define MKUZ_BLK_EOF (void *)0x1
+#define MKUZ_BLK_MORE (void *)0x2
+
+struct mkuz_blk {
+ struct mkuz_blk_info info;
+ size_t alen;
+ uint64_t br_offset;
+ unsigned char data[];
+};
+
+struct mkuz_blk *mkuz_blk_ctor(size_t);
diff --git a/usr.bin/mkuzip/mkuz_blk_chain.h b/usr.bin/mkuzip/mkuz_blk_chain.h
new file mode 100644
index 0000000..556803f
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_blk_chain.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct mkuz_blk;
+struct mkuz_bchain_link;
+
+struct mkuz_bchain_link {
+ struct mkuz_blk *this;
+ struct mkuz_bchain_link *prev;
+};
diff --git a/usr.bin/mkuzip/mkuz_blockcache.c b/usr.bin/mkuzip/mkuz_blockcache.c
new file mode 100644
index 0000000..a369eeb
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_blockcache.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(MKUZ_DEBUG)
+# include <assert.h>
+# include <stdio.h>
+#endif
+
+#include "mkuz_blockcache.h"
+#include "mkuz_blk.h"
+
+struct mkuz_blkcache_itm {
+ struct mkuz_blk_info hit;
+ struct mkuz_blkcache_itm *next;
+};
+
+static struct mkuz_blkcache {
+ struct mkuz_blkcache_itm first[256];
+} blkcache;
+
+static int
+verify_match(int fd, const struct mkuz_blk *cbp, struct mkuz_blkcache_itm *bcep)
+{
+ void *vbuf;
+ ssize_t rlen;
+ int rval;
+
+ rval = -1;
+ vbuf = malloc(cbp->info.len);
+ if (vbuf == NULL) {
+ goto e0;
+ }
+ if (lseek(fd, bcep->hit.offset, SEEK_SET) < 0) {
+ goto e1;
+ }
+ rlen = read(fd, vbuf, cbp->info.len);
+ if (rlen < 0 || (unsigned)rlen != cbp->info.len) {
+ goto e2;
+ }
+ rval = (memcmp(cbp->data, vbuf, cbp->info.len) == 0) ? 1 : 0;
+e2:
+ lseek(fd, cbp->info.offset, SEEK_SET);
+e1:
+ free(vbuf);
+e0:
+ return (rval);
+}
+
+#define I2J(x) ((intmax_t)(x))
+#define U2J(x) ((uintmax_t)(x))
+
+static unsigned char
+digest_fold(const unsigned char *mdigest)
+{
+ int i;
+ unsigned char rval;
+
+ rval = mdigest[0];
+ for (i = 1; i < 16; i++) {
+ rval = rval ^ mdigest[i];
+ }
+ return (rval);
+}
+
+struct mkuz_blk_info *
+mkuz_blkcache_regblock(int fd, const struct mkuz_blk *bp)
+{
+ struct mkuz_blkcache_itm *bcep;
+ int rval;
+ unsigned char h;
+
+#if defined(MKUZ_DEBUG)
+ assert((unsigned)lseek(fd, 0, SEEK_CUR) == bp->info.offset);
+#endif
+ h = digest_fold(bp->info.digest);
+ if (blkcache.first[h].hit.len == 0) {
+ bcep = &blkcache.first[h];
+ } else {
+ for (bcep = &blkcache.first[h]; bcep != NULL; bcep = bcep->next) {
+ if (bcep->hit.len != bp->info.len)
+ continue;
+ if (memcmp(bp->info.digest, bcep->hit.digest,
+ sizeof(bp->info.digest)) == 0) {
+ break;
+ }
+ }
+ if (bcep != NULL) {
+ rval = verify_match(fd, bp, bcep);
+ if (rval == 1) {
+#if defined(MKUZ_DEBUG)
+ fprintf(stderr, "cache hit %jd, %jd, %jd, %jd\n",
+ I2J(bcep->hit.blkno), I2J(bcep->hit.offset),
+ I2J(bp->info.offset), I2J(bp->info.len));
+#endif
+ return (&bcep->hit);
+ }
+ if (rval == 0) {
+#if defined(MKUZ_DEBUG)
+ fprintf(stderr, "block MD5 collision, you should try lottery, "
+ "man!\n");
+#endif
+ return (NULL);
+ }
+ warn("verify_match");
+ return (NULL);
+ }
+ bcep = malloc(sizeof(struct mkuz_blkcache_itm));
+ if (bcep == NULL)
+ return (NULL);
+ memset(bcep, '\0', sizeof(struct mkuz_blkcache_itm));
+ bcep->next = blkcache.first[h].next;
+ blkcache.first[h].next = bcep;
+ }
+ bcep->hit = bp->info;
+ return (NULL);
+}
diff --git a/usr.bin/mkuzip/mkuz_blockcache.h b/usr.bin/mkuzip/mkuz_blockcache.h
new file mode 100644
index 0000000..58eaea0
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_blockcache.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct mkuz_blk;
+
+struct mkuz_blk_info *mkuz_blkcache_regblock(int, const struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuz_cfg.h b/usr.bin/mkuzip/mkuz_cfg.h
new file mode 100644
index 0000000..fc183e3
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_cfg.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct mkuz_conveyor;
+
+struct mkuz_cfg {
+ int fdr;
+ int fdw;
+ int verbose;
+ int no_zcomp;
+ int en_dedup;
+ int nworkers;
+ int blksz;
+ const struct mkuz_format *handler;
+};
diff --git a/usr.bin/mkuzip/mkuz_cloop.h b/usr.bin/mkuzip/mkuz_cloop.h
new file mode 100644
index 0000000..4ed7c50
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_cloop.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* CLOOP format and related constants */
+
+/*
+ * Integer values (block size, number of blocks, offsets)
+ * are stored in big-endian (network) order on disk.
+ */
+
+#define CLOOP_MAGIC_LEN 128
+#define CLOOP_OFS_COMPR 0x0b
+#define CLOOP_OFS_VERSN (CLOOP_OFS_COMPR + 1)
+
+#define CLOOP_MAJVER_2 '2'
+#define CLOOP_MAJVER_3 '3'
+
+#define CLOOP_COMP_LIBZ 'V'
+#define CLOOP_COMP_LZMA 'L'
+
+struct cloop_header {
+ char magic[CLOOP_MAGIC_LEN]; /* cloop magic */
+ uint32_t blksz; /* block size */
+ uint32_t nblocks; /* number of blocks */
+};
diff --git a/usr.bin/mkuzip/mkuz_conveyor.c b/usr.bin/mkuzip/mkuz_conveyor.c
new file mode 100644
index 0000000..856d445
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_conveyor.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <err.h>
+#include <inttypes.h>
+#include <md5.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#if defined(MKUZ_DEBUG)
+# include <stdio.h>
+#endif
+
+#include "mkuz_conveyor.h"
+#include "mkuz_cfg.h"
+#include "mkuzip.h"
+#include "mkuz_format.h"
+#include "mkuz_blk.h"
+#include "mkuz_fqueue.h"
+#include "mkuz_blk_chain.h"
+
+static void compute_digest(struct mkuz_blk *);
+
+struct cw_args {
+ struct mkuz_conveyor *cvp;
+ struct mkuz_cfg *cfp;
+};
+
+static void *
+cworker(void *p)
+{
+ struct cw_args *cwp;
+ struct mkuz_cfg *cfp;
+ struct mkuz_blk *oblk, *iblk;
+ struct mkuz_conveyor *cvp;
+ void *c_ctx;
+
+ cwp = (struct cw_args *)p;
+ cfp = cwp->cfp;
+ cvp = cwp->cvp;
+ free(cwp);
+ c_ctx = cfp->handler->f_init(cfp->blksz);
+ for (;;) {
+ iblk = mkuz_fqueue_deq(cvp->wrk_queue);
+ if (iblk == MKUZ_BLK_EOF) {
+ /* Let other threads to see the EOF block */
+ mkuz_fqueue_enq(cvp->wrk_queue, iblk);
+ break;
+ }
+ if (cfp->no_zcomp == 0 &&
+ mkuz_memvcmp(iblk->data, '\0', iblk->info.len) != 0) {
+ /* All zeroes block */
+ oblk = mkuz_blk_ctor(0);
+ } else {
+ oblk = cfp->handler->f_compress(c_ctx, iblk);
+ if (cfp->en_dedup != 0) {
+ compute_digest(oblk);
+ }
+ }
+ oblk->info.blkno = iblk->info.blkno;
+ mkuz_fqueue_enq(cvp->results, oblk);
+ free(iblk);
+ }
+ return (NULL);
+}
+
+static void
+compute_digest(struct mkuz_blk *bp)
+{
+ MD5_CTX mcontext;
+
+ MD5Init(&mcontext);
+ MD5Update(&mcontext, bp->data, bp->info.len);
+ MD5Final(bp->info.digest, &mcontext);
+}
+
+struct mkuz_conveyor *
+mkuz_conveyor_ctor(struct mkuz_cfg *cfp)
+{
+ struct mkuz_conveyor *cp;
+ struct cw_args *cwp;
+ int i, r;
+
+ cp = mkuz_safe_zmalloc(sizeof(struct mkuz_conveyor) +
+ (sizeof(pthread_t) * cfp->nworkers));
+
+ cp->wrk_queue = mkuz_fqueue_ctor(1);
+ cp->results = mkuz_fqueue_ctor(1);
+
+ for (i = 0; i < cfp->nworkers; i++) {
+ cwp = mkuz_safe_zmalloc(sizeof(struct cw_args));
+ cwp->cfp = cfp;
+ cwp->cvp = cp;
+ r = pthread_create(&cp->wthreads[i], NULL, cworker, (void *)cwp);
+ if (r != 0) {
+ errx(1, "mkuz_conveyor_ctor: pthread_create() failed");
+ /* Not reached */
+ }
+ }
+ return (cp);
+}
diff --git a/usr.bin/mkuzip/mkuz_conveyor.h b/usr.bin/mkuzip/mkuz_conveyor.h
new file mode 100644
index 0000000..21328e7
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_conveyor.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct mkuz_fifo_queue;
+
+#define ITEMS_PER_WORKER 4
+
+#define MAX_WORKERS_AUTO 24
+
+struct mkuz_conveyor {
+ /*
+ * Work items are places in here, and picked up by workers in a FIFO
+ * fashion.
+ */
+ struct mkuz_fifo_queue *wrk_queue;
+ /*
+ * Results are dropped into this FIFO and consumer is buzzed to pick them
+ * up
+ */
+ struct mkuz_fifo_queue *results;
+
+ pthread_t wthreads[];
+};
+
+struct mkuz_cfg;
+
+struct mkuz_conveyor *mkuz_conveyor_ctor(struct mkuz_cfg *);
diff --git a/usr.bin/mkuzip/mkuz_format.h b/usr.bin/mkuzip/mkuz_format.h
new file mode 100644
index 0000000..817c012
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_format.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+DEFINE_RAW_METHOD(f_init, void *, uint32_t);
+DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *);
+
+struct mkuz_format {
+ const char *magic;
+ const char *default_sufx;
+ f_init_t f_init;
+ f_compress_t f_compress;
+};
diff --git a/usr.bin/mkuzip/mkuz_fqueue.c b/usr.bin/mkuzip/mkuz_fqueue.c
new file mode 100644
index 0000000..db47a05
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_fqueue.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <err.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(MKUZ_DEBUG)
+# include <assert.h>
+#endif
+
+#include "mkuzip.h"
+#include "mkuz_fqueue.h"
+#include "mkuz_conveyor.h"
+#include "mkuz_blk.h"
+#include "mkuz_blk_chain.h"
+
+struct mkuz_fifo_queue *
+mkuz_fqueue_ctor(int wakeup_len)
+{
+ struct mkuz_fifo_queue *fqp;
+
+ fqp = mkuz_safe_zmalloc(sizeof(struct mkuz_fifo_queue));
+ fqp->wakeup_len = wakeup_len;
+ if (pthread_mutex_init(&fqp->mtx, NULL) != 0) {
+ errx(1, "pthread_mutex_init() failed");
+ }
+ if (pthread_cond_init(&fqp->cvar, NULL) != 0) {
+ errx(1, "pthread_cond_init() failed");
+ }
+ return (fqp);
+}
+
+void
+mkuz_fqueue_enq(struct mkuz_fifo_queue *fqp, struct mkuz_blk *bp)
+{
+ struct mkuz_bchain_link *ip;
+
+ ip = mkuz_safe_zmalloc(sizeof(struct mkuz_bchain_link));
+ ip->this = bp;
+
+ pthread_mutex_lock(&fqp->mtx);
+ if (fqp->first != NULL) {
+ fqp->first->prev = ip;
+ } else {
+ fqp->last = ip;
+ }
+ fqp->first = ip;
+ fqp->length += 1;
+ if (fqp->length >= fqp->wakeup_len) {
+ pthread_cond_signal(&fqp->cvar);
+ }
+ pthread_mutex_unlock(&fqp->mtx);
+}
+
+#if defined(NOTYET)
+int
+mkuz_fqueue_enq_all(struct mkuz_fifo_queue *fqp, struct mkuz_bchain_link *cip_f,
+ struct mkuz_bchain_link *cip_l, int clen)
+{
+ int rval;
+
+ pthread_mutex_lock(&fqp->mtx);
+ if (fqp->first != NULL) {
+ fqp->first->prev = cip_l;
+ } else {
+ fqp->last = cip_l;
+ }
+ fqp->first = cip_f;
+ fqp->length += clen;
+ rval = fqp->length;
+ if (fqp->length >= fqp->wakeup_len) {
+ pthread_cond_signal(&fqp->cvar);
+ }
+ pthread_mutex_unlock(&fqp->mtx);
+ return (rval);
+}
+#endif
+
+static int
+mkuz_fqueue_check(struct mkuz_fifo_queue *fqp, cmp_cb_t cmp_cb, void *cap)
+{
+ struct mkuz_bchain_link *ip;
+
+ for (ip = fqp->last; ip != NULL; ip = ip->prev) {
+ if (cmp_cb(ip->this, cap)) {
+ return (1);
+ }
+ }
+ return (0);
+}
+
+struct mkuz_blk *
+mkuz_fqueue_deq_when(struct mkuz_fifo_queue *fqp, cmp_cb_t cmp_cb, void *cap)
+{
+ struct mkuz_bchain_link *ip, *newlast, *newfirst, *mip;
+ struct mkuz_blk *bp;
+
+ pthread_mutex_lock(&fqp->mtx);
+ while (fqp->last == NULL || !mkuz_fqueue_check(fqp, cmp_cb, cap)) {
+ pthread_cond_wait(&fqp->cvar, &fqp->mtx);
+ }
+ if (cmp_cb(fqp->last->this, cap)) {
+ mip = fqp->last;
+ fqp->last = mip->prev;
+ if (fqp->last == NULL) {
+#if defined(MKUZ_DEBUG)
+ assert(fqp->length == 1);
+#endif
+ fqp->first = NULL;
+ }
+ } else {
+#if defined(MKUZ_DEBUG)
+ assert(fqp->length > 1);
+#endif
+ newfirst = newlast = fqp->last;
+ mip = NULL;
+ for (ip = fqp->last->prev; ip != NULL; ip = ip->prev) {
+ if (cmp_cb(ip->this, cap)) {
+ mip = ip;
+ continue;
+ }
+ newfirst->prev = ip;
+ newfirst = ip;
+ }
+ newfirst->prev = NULL;
+ fqp->first = newfirst;
+ fqp->last = newlast;
+ }
+ fqp->length -= 1;
+ pthread_mutex_unlock(&fqp->mtx);
+ bp = mip->this;
+ free(mip);
+
+ return bp;
+}
+
+struct mkuz_blk *
+mkuz_fqueue_deq(struct mkuz_fifo_queue *fqp)
+{
+ struct mkuz_bchain_link *ip;
+ struct mkuz_blk *bp;
+
+ pthread_mutex_lock(&fqp->mtx);
+ while (fqp->last == NULL) {
+ pthread_cond_wait(&fqp->cvar, &fqp->mtx);
+ }
+#if defined(MKUZ_DEBUG)
+ assert(fqp->length > 0);
+#endif
+ ip = fqp->last;
+ fqp->last = ip->prev;
+ if (fqp->last == NULL) {
+#if defined(MKUZ_DEBUG)
+ assert(fqp->length == 1);
+#endif
+ fqp->first = NULL;
+ }
+ fqp->length -= 1;
+ pthread_mutex_unlock(&fqp->mtx);
+ bp = ip->this;
+ free(ip);
+
+ return bp;
+}
+
+#if defined(NOTYET)
+struct mkuz_bchain_link *
+mkuz_fqueue_deq_all(struct mkuz_fifo_queue *fqp, int *rclen)
+{
+ struct mkuz_bchain_link *rchain;
+
+ pthread_mutex_lock(&fqp->mtx);
+ while (fqp->last == NULL) {
+ pthread_cond_wait(&fqp->cvar, &fqp->mtx);
+ }
+#if defined(MKUZ_DEBUG)
+ assert(fqp->length > 0);
+#endif
+ rchain = fqp->last;
+ fqp->first = fqp->last = NULL;
+ *rclen = fqp->length;
+ fqp->length = 0;
+ pthread_mutex_unlock(&fqp->mtx);
+ return (rchain);
+}
+#endif
diff --git a/usr.bin/mkuzip/mkuz_fqueue.h b/usr.bin/mkuzip/mkuz_fqueue.h
new file mode 100644
index 0000000..4b34216f
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_fqueue.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct mkuz_fifo_queue {
+ pthread_mutex_t mtx;
+ pthread_cond_t cvar;
+ struct mkuz_bchain_link *first;
+ struct mkuz_bchain_link *last;
+ int length;
+ int wakeup_len;
+};
+
+struct mkuz_blk;
+struct mkuz_bchain_link;
+
+DEFINE_RAW_METHOD(cmp_cb, int, const struct mkuz_blk *, void *);
+
+struct mkuz_fifo_queue *mkuz_fqueue_ctor(int);
+void mkuz_fqueue_enq(struct mkuz_fifo_queue *, struct mkuz_blk *);
+struct mkuz_blk *mkuz_fqueue_deq(struct mkuz_fifo_queue *);
+struct mkuz_blk *mkuz_fqueue_deq_when(struct mkuz_fifo_queue *, cmp_cb_t, void *);
+#if defined(NOTYET)
+struct mkuz_bchain_link *mkuz_fqueue_deq_all(struct mkuz_fifo_queue *, int *);
+int mkuz_fqueue_enq_all(struct mkuz_fifo_queue *, struct mkuz_bchain_link *,
+ struct mkuz_bchain_link *, int);
+#endif
diff --git a/usr.bin/mkuzip/mkuz_lzma.c b/usr.bin/mkuzip/mkuz_lzma.c
new file mode 100644
index 0000000..8810d2e
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_lzma.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * Copyright (c) 2011 Aleksandr Rybalko <ray@ddteam.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <err.h>
+#include <stdint.h>
+
+#include <lzma.h>
+
+#include "mkuzip.h"
+#include "mkuz_lzma.h"
+#include "mkuz_blk.h"
+
+#define USED_BLOCKSIZE DEV_BSIZE
+
+struct mkuz_lzma {
+ lzma_filter filters[2];
+ lzma_options_lzma opt_lzma;
+ lzma_stream strm;
+ uint32_t blksz;
+};
+
+static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT;
+
+void *
+mkuz_lzma_init(uint32_t blksz)
+{
+ struct mkuz_lzma *ulp;
+
+ if (blksz % USED_BLOCKSIZE != 0) {
+ errx(1, "cluster size should be multiple of %d",
+ USED_BLOCKSIZE);
+ /* Not reached */
+ }
+ if (blksz > MAXPHYS) {
+ errx(1, "cluster size is too large");
+ /* Not reached */
+ }
+ ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma));
+
+ /* Init lzma encoder */
+ ulp->strm = lzma_stream_init;
+ if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT))
+ errx(1, "Error loading LZMA preset");
+
+ ulp->filters[0].id = LZMA_FILTER_LZMA2;
+ ulp->filters[0].options = &ulp->opt_lzma;
+ ulp->filters[1].id = LZMA_VLI_UNKNOWN;
+
+ ulp->blksz = blksz;
+
+ return (void *)ulp;
+}
+
+struct mkuz_blk *
+mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
+{
+ lzma_ret ret;
+ struct mkuz_blk *rval;
+ struct mkuz_lzma *ulp;
+
+ ulp = (struct mkuz_lzma *)p;
+
+ rval = mkuz_blk_ctor(ulp->blksz * 2);
+
+ ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32);
+ if (ret != LZMA_OK) {
+ if (ret == LZMA_MEMLIMIT_ERROR)
+ errx(1, "can't compress data: LZMA_MEMLIMIT_ERROR");
+
+ errx(1, "can't compress data: LZMA compressor ERROR");
+ }
+
+ ulp->strm.next_in = iblk->data;
+ ulp->strm.avail_in = ulp->blksz;
+ ulp->strm.next_out = rval->data;
+ ulp->strm.avail_out = rval->alen;
+
+ ret = lzma_code(&ulp->strm, LZMA_FINISH);
+
+ if (ret != LZMA_STREAM_END) {
+ /* Error */
+ errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, "
+ "out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in),
+ (ulp->blksz * 2 - ulp->strm.avail_out));
+ }
+
+#if 0
+ lzma_end(&ulp->strm);
+#endif
+
+ rval->info.len = rval->alen - ulp->strm.avail_out;
+ return (rval);
+}
diff --git a/usr.bin/mkuzip/mkuz_lzma.h b/usr.bin/mkuzip/mkuz_lzma.h
new file mode 100644
index 0000000..bba4542
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_lzma.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * Copyright (c) 2011 Aleksandr Rybalko <ray@ddteam.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* Format L3.0, since we move to XZ API */
+#define CLOOP_MAGIC_LZMA \
+ "#!/bin/sh\n" \
+ "#L3.0\n" \
+ "n=uncompress\n" \
+ "m=geom_$n\n" \
+ "(kldstat -m $m 2>&-||kldload $m)>&-&&" \
+ "mount_cd9660 /dev/`mdconfig -af $0`.$n $1\n" \
+ "exit $?\n"
+#define DEFAULT_SUFX_LZMA ".ulzma"
+
+void *mkuz_lzma_init(uint32_t);
+struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuz_time.c b/usr.bin/mkuzip/mkuz_time.c
new file mode 100644
index 0000000..a221957
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_time.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <math.h>
+#include <stdint.h>
+#include <time.h>
+
+#include "mkuz_time.h"
+
+double
+getdtime(void)
+{
+ struct timespec tp;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &tp) == -1)
+ return (-1);
+
+ return timespec2dtime(&tp);
+}
diff --git a/usr.bin/mkuzip/mkuz_time.h b/usr.bin/mkuzip/mkuz_time.h
new file mode 100644
index 0000000..670eec2
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_time.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _RTPP_TIME_H_
+#define _RTPP_TIME_H_
+
+#define SEC(x) ((x)->tv_sec)
+#define NSEC(x) ((x)->tv_nsec)
+
+#define timespec2dtime(s) ((double)SEC(s) + \
+ (double)NSEC(s) / 1000000000.0)
+
+/* Function prototypes */
+double getdtime(void);
+
+#endif
diff --git a/usr.bin/mkuzip/mkuz_zlib.c b/usr.bin/mkuzip/mkuz_zlib.c
new file mode 100644
index 0000000..4b191f9
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_zlib.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <err.h>
+#include <stdint.h>
+
+#include <zlib.h>
+
+#include "mkuzip.h"
+#include "mkuz_zlib.h"
+#include "mkuz_blk.h"
+
+struct mkuz_zlib {
+ uLongf oblen;
+ uint32_t blksz;
+};
+
+void *
+mkuz_zlib_init(uint32_t blksz)
+{
+ struct mkuz_zlib *zp;
+
+ if (blksz % DEV_BSIZE != 0) {
+ errx(1, "cluster size should be multiple of %d",
+ DEV_BSIZE);
+ /* Not reached */
+ }
+ if (compressBound(blksz) > MAXPHYS) {
+ errx(1, "cluster size is too large");
+ /* Not reached */
+ }
+ zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib));
+ zp->oblen = compressBound(blksz);
+ zp->blksz = blksz;
+
+ return (void *)zp;
+}
+
+struct mkuz_blk *
+mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk)
+{
+ uLongf destlen_z;
+ struct mkuz_blk *rval;
+ struct mkuz_zlib *zp;
+
+ zp = (struct mkuz_zlib *)p;
+
+ rval = mkuz_blk_ctor(zp->oblen);
+
+ destlen_z = rval->alen;
+ if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz,
+ Z_BEST_COMPRESSION) != Z_OK) {
+ errx(1, "can't compress data: compress2() "
+ "failed");
+ /* Not reached */
+ }
+
+ rval->info.len = (uint32_t)destlen_z;
+ return (rval);
+}
diff --git a/usr.bin/mkuzip/mkuz_zlib.h b/usr.bin/mkuzip/mkuz_zlib.h
new file mode 100644
index 0000000..55e57a6
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_zlib.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define DEFAULT_SUFX_ZLIB ".uzip"
+
+#define CLOOP_MAGIC_ZLIB "#!/bin/sh\n#V2.0 Format\n" \
+ "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
+ "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
+
+void *mkuz_zlib_init(uint32_t);
+struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuzip.8 b/usr.bin/mkuzip/mkuzip.8
index e6aeb4b..6920e1b 100644
--- a/usr.bin/mkuzip/mkuzip.8
+++ b/usr.bin/mkuzip/mkuzip.8
@@ -1,9 +1,27 @@
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <sobomax@FreeBSD.org> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Maxim Sobolev
-.\" ----------------------------------------------------------------------------
+.\"-
+.\" Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
@@ -20,6 +38,7 @@ class
.Op Fl v
.Op Fl o Ar outfile
.Op Fl s Ar cluster_size
+.Op Fl j Ar compression_jobs
.Ar infile
.Sh DESCRIPTION
The
@@ -39,7 +58,9 @@ works in two phases:
An
.Ar infile
image is split into clusters; each cluster is compressed using
-.Xr zlib 3 .
+.Xr zlib 3
+or
+.Xr lzma 3 .
.It
The resulting set of compressed clusters along with headers that allow
locating each individual cluster is written to the output file.
@@ -51,7 +72,23 @@ The options are:
Name of the output file
.Ar outfile .
The default is to use the input name with the suffix
-.Pa .uzip .
+.Pa .uzip
+for the
+.Xr zlib 3
+compression or
+.Pa .ulzma
+for the
+.Xr lzma 3 .
+.It Fl L
+Use
+.Xr lzma 3
+compression algorithm instead of the default
+.Xr zlib 3 .
+The
+.Xr lzma 3
+provides noticeable better compression levels on the same data set
+at the expense of much slower compression speed (10-20x) and somewhat slower
+decompression (2-3x).
.It Fl s Ar cluster_size
Split the image into clusters of
.Ar cluster_size
@@ -61,6 +98,39 @@ The
should be a multiple of 512 bytes.
.It Fl v
Display verbose messages.
+.It Fl Z
+Disable zero-blocks detection and elimination.
+When this option is set, the
+.Nm
+would compress empty blocks (i.e. clusters that consist of only zero bytes)
+just as it would any other block.
+When the option is not set, the
+.Nm
+detects such blocks and skips them from the output.
+Setting
+.Fl Z
+results is slight increase of compressed image size, typically less than 0.1%
+of a final size of the compressed image.
+.It Fl d
+Enable de-duplication.
+When the option is enabled the
+.Nm
+detects identical blocks in the input and replaces each subsequent occurence
+of such block with pointer to the very first one in the output.
+Setting this option results is moderate decrease of compressed image size,
+typically around 3-5% of a final size of the compressed image.
+.It Fl S
+Print summary about the compression ratio as well as output
+file size after file has been processed.
+.It Fl j Ar compression_jobs
+Specify the number of compression jobs that
+.Nm
+runs in parallel to speed up compression.
+When option is not specified the number of jobs set to be equal
+to the value of
+.Va hw.ncpu
+.Xr sysctl 8
+variable.
.El
.Sh NOTES
The compression ratio largely depends on the cluster size used.
@@ -92,11 +162,20 @@ disk device using
and automatically mount it using
.Xr mount_cd9660 8
on the mount point provided as the first argument to the script.
+.Pp
+The de-duplication is a
+.Fx
+specific feature and while it does not require any changes to on-disk
+compressed image format, however it did require some matching changes to the
+.Xr geom_uzip 4
+to handle resulting images correctly.
.Sh EXIT STATUS
.Ex -std
.Sh SEE ALSO
.Xr gzip 1 ,
+.Xr xz 1 ,
.Xr zlib 3 ,
+.Xr lzma 3 ,
.Xr geom 4 ,
.Xr geom_uzip 4 ,
.Xr md 4 ,
diff --git a/usr.bin/mkuzip/mkuzip.c b/usr.bin/mkuzip/mkuzip.c
index 08c8ed0..6288f4e 100644
--- a/usr.bin/mkuzip/mkuzip.c
+++ b/usr.bin/mkuzip/mkuzip.c
@@ -1,68 +1,138 @@
/*
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <sobomax@FreeBSD.ORG> wrote this file. As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return. Maxim Sobolev
- * ----------------------------------------------------------------------------
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
*
- * $FreeBSD$
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
#include <sys/types.h>
#include <sys/disk.h>
#include <sys/endian.h>
#include <sys/param.h>
+#include <sys/sysctl.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <netinet/in.h>
-#include <zlib.h>
+#include <assert.h>
+#include <ctype.h>
#include <err.h>
#include <fcntl.h>
+#include <pthread.h>
#include <signal.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#define CLSTSIZE 16384
-#define DEFAULT_SUFX ".uzip"
-
-#define CLOOP_MAGIC_LEN 128
-static char CLOOP_MAGIC_START[] = "#!/bin/sh\n#V2.0 Format\n"
- "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&"
- "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n";
-
-static char *readblock(int, char *, u_int32_t);
+#include "mkuzip.h"
+#include "mkuz_cloop.h"
+#include "mkuz_blockcache.h"
+#include "mkuz_zlib.h"
+#include "mkuz_lzma.h"
+#include "mkuz_blk.h"
+#include "mkuz_cfg.h"
+#include "mkuz_conveyor.h"
+#include "mkuz_format.h"
+#include "mkuz_fqueue.h"
+#include "mkuz_time.h"
+
+#define DEFAULT_CLSTSIZE 16384
+
+static struct mkuz_format uzip_fmt = {
+ .magic = CLOOP_MAGIC_ZLIB,
+ .default_sufx = DEFAULT_SUFX_ZLIB,
+ .f_init = &mkuz_zlib_init,
+ .f_compress = &mkuz_zlib_compress
+};
+
+static struct mkuz_format ulzma_fmt = {
+ .magic = CLOOP_MAGIC_LZMA,
+ .default_sufx = DEFAULT_SUFX_LZMA,
+ .f_init = &mkuz_lzma_init,
+ .f_compress = &mkuz_lzma_compress
+};
+
+static struct mkuz_blk *readblock(int, u_int32_t);
static void usage(void);
-static void *safe_malloc(size_t);
static void cleanup(void);
static char *cleanfile = NULL;
+static int
+cmp_blkno(const struct mkuz_blk *bp, void *p)
+{
+ uint32_t *ap;
+
+ ap = (uint32_t *)p;
+
+ return (bp->info.blkno == *ap);
+}
+
int main(int argc, char **argv)
{
- char *iname, *oname, *obuf, *ibuf;
+ struct mkuz_cfg cfs;
+ char *iname, *oname;
uint64_t *toc;
- int fdr, fdw, i, opt, verbose, tmp;
+ int i, io, opt, tmp;
+ struct {
+ int en;
+ FILE *f;
+ } summary;
struct iovec iov[2];
struct stat sb;
- uLongf destlen;
- uint64_t offset;
- struct cloop_header {
- char magic[CLOOP_MAGIC_LEN]; /* cloop magic */
- uint32_t blksz; /* block size */
- uint32_t nblocks; /* number of blocks */
- } hdr;
+ uint64_t offset, last_offset;
+ struct cloop_header hdr;
+ struct mkuz_conveyor *cvp;
+ void *c_ctx;
+ struct mkuz_blk_info *chit;
+ size_t ncpusz, ncpu;
+ double st, et;
+
+ st = getdtime();
+
+ ncpusz = sizeof(size_t);
+ if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
+ ncpu = 1;
+ } else if (ncpu > MAX_WORKERS_AUTO) {
+ ncpu = MAX_WORKERS_AUTO;
+ }
memset(&hdr, 0, sizeof(hdr));
- hdr.blksz = CLSTSIZE;
- strcpy(hdr.magic, CLOOP_MAGIC_START);
+ cfs.blksz = DEFAULT_CLSTSIZE;
oname = NULL;
- verbose = 0;
-
- while((opt = getopt(argc, argv, "o:s:v")) != -1) {
+ cfs.verbose = 0;
+ cfs.no_zcomp = 0;
+ cfs.en_dedup = 0;
+ summary.en = 0;
+ summary.f = stderr;
+ cfs.handler = &uzip_fmt;
+ cfs.nworkers = ncpu;
+ struct mkuz_blk *iblk, *oblk;
+
+ while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
switch(opt) {
case 'o':
oname = optarg;
@@ -75,20 +145,38 @@ int main(int argc, char **argv)
optarg);
/* Not reached */
}
- if (tmp % DEV_BSIZE != 0) {
- errx(1, "cluster size should be multiple of %d",
- DEV_BSIZE);
- /* Not reached */
- }
- if (compressBound(tmp) > MAXPHYS) {
- errx(1, "cluster size is too large");
- /* Not reached */
- }
- hdr.blksz = tmp;
+ cfs.blksz = tmp;
break;
case 'v':
- verbose = 1;
+ cfs.verbose = 1;
+ break;
+
+ case 'Z':
+ cfs.no_zcomp = 1;
+ break;
+
+ case 'd':
+ cfs.en_dedup = 1;
+ break;
+
+ case 'L':
+ cfs.handler = &ulzma_fmt;
+ break;
+
+ case 'S':
+ summary.en = 1;
+ summary.f = stdout;
+ break;
+
+ case 'j':
+ tmp = atoi(optarg);
+ if (tmp <= 0) {
+ errx(1, "invalid number of compression threads"
+ " specified: %s", optarg);
+ /* Not reached */
+ }
+ cfs.nworkers = tmp;
break;
default:
@@ -104,18 +192,25 @@ int main(int argc, char **argv)
/* Not reached */
}
+ strcpy(hdr.magic, cfs.handler->magic);
+
+ if (cfs.en_dedup != 0) {
+ hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
+ hdr.magic[CLOOP_OFS_COMPR] =
+ tolower(hdr.magic[CLOOP_OFS_COMPR]);
+ }
+
+ c_ctx = cfs.handler->f_init(cfs.blksz);
+
iname = argv[0];
if (oname == NULL) {
- asprintf(&oname, "%s%s", iname, DEFAULT_SUFX);
+ asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx);
if (oname == NULL) {
err(1, "can't allocate memory");
/* Not reached */
}
}
- obuf = safe_malloc(compressBound(hdr.blksz));
- ibuf = safe_malloc(hdr.blksz);
-
signal(SIGHUP, exit);
signal(SIGINT, exit);
signal(SIGTERM, exit);
@@ -123,19 +218,19 @@ int main(int argc, char **argv)
signal(SIGXFSZ, exit);
atexit(cleanup);
- fdr = open(iname, O_RDONLY);
- if (fdr < 0) {
+ cfs.fdr = open(iname, O_RDONLY);
+ if (cfs.fdr < 0) {
err(1, "open(%s)", iname);
/* Not reached */
}
- if (fstat(fdr, &sb) != 0) {
+ if (fstat(cfs.fdr, &sb) != 0) {
err(1, "fstat(%s)", iname);
/* Not reached */
}
if (S_ISCHR(sb.st_mode)) {
off_t ms;
- if (ioctl(fdr, DIOCGMEDIASIZE, &ms) < 0) {
+ if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) {
err(1, "ioctl(DIOCGMEDIASIZE)");
/* Not reached */
}
@@ -145,18 +240,18 @@ int main(int argc, char **argv)
iname);
exit(1);
}
- hdr.nblocks = sb.st_size / hdr.blksz;
- if ((sb.st_size % hdr.blksz) != 0) {
- if (verbose != 0)
+ hdr.nblocks = sb.st_size / cfs.blksz;
+ if ((sb.st_size % cfs.blksz) != 0) {
+ if (cfs.verbose != 0)
fprintf(stderr, "file size is not multiple "
- "of %d, padding data\n", hdr.blksz);
+ "of %d, padding data\n", cfs.blksz);
hdr.nblocks++;
}
- toc = safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
+ toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
- fdw = open(oname, O_WRONLY | O_TRUNC | O_CREAT,
+ cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
- if (fdw < 0) {
+ if (cfs.fdw < 0) {
err(1, "open(%s)", oname);
/* Not reached */
}
@@ -170,90 +265,156 @@ int main(int argc, char **argv)
offset = iov[0].iov_len + iov[1].iov_len;
/* Reserve space for header */
- lseek(fdw, offset, SEEK_SET);
+ lseek(cfs.fdw, offset, SEEK_SET);
- if (verbose != 0)
+ if (cfs.verbose != 0) {
fprintf(stderr, "data size %ju bytes, number of clusters "
"%u, index length %zu bytes\n", sb.st_size,
hdr.nblocks, iov[1].iov_len);
+ }
- for(i = 0; i == 0 || ibuf != NULL; i++) {
- ibuf = readblock(fdr, ibuf, hdr.blksz);
- if (ibuf != NULL) {
- destlen = compressBound(hdr.blksz);
- if (compress2(obuf, &destlen, ibuf, hdr.blksz,
- Z_BEST_COMPRESSION) != Z_OK) {
- errx(1, "can't compress data: compress2() "
- "failed");
- /* Not reached */
+ cvp = mkuz_conveyor_ctor(&cfs);
+
+ last_offset = 0;
+ iblk = oblk = NULL;
+ for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
+ iblk = readblock(cfs.fdr, cfs.blksz);
+ mkuz_fqueue_enq(cvp->wrk_queue, iblk);
+ if (iblk != MKUZ_BLK_EOF &&
+ (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
+ continue;
+ }
+drain:
+ oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
+ assert(oblk->info.blkno == (unsigned)io);
+ oblk->info.offset = offset;
+ chit = NULL;
+ if (cfs.en_dedup != 0 && oblk->info.len > 0) {
+ chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
+ /*
+ * There should be at least one non-empty block
+ * between us and the backref'ed offset, otherwise
+ * we won't be able to parse that sequence correctly
+ * as it would be indistinguishible from another
+ * empty block.
+ */
+ if (chit != NULL && chit->offset == last_offset) {
+ chit = NULL;
}
- if (verbose != 0)
- fprintf(stderr, "cluster #%d, in %u bytes, "
- "out %lu bytes\n", i, hdr.blksz, destlen);
+ }
+ if (chit != NULL) {
+ toc[io] = htobe64(chit->offset);
+ oblk->info.len = 0;
} else {
- destlen = DEV_BSIZE - (offset % DEV_BSIZE);
- memset(obuf, 0, destlen);
- if (verbose != 0)
- fprintf(stderr, "padding data with %lu bytes so "
- "that file size is multiple of %d\n", destlen,
- DEV_BSIZE);
+ if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
+ oblk->info.len) < 0) {
+ err(1, "write(%s)", oname);
+ /* Not reached */
+ }
+ toc[io] = htobe64(offset);
+ last_offset = offset;
+ offset += oblk->info.len;
}
- if (write(fdw, obuf, destlen) < 0) {
- err(1, "write(%s)", oname);
- /* Not reached */
+ if (cfs.verbose != 0) {
+ fprintf(stderr, "cluster #%d, in %u bytes, "
+ "out len=%lu offset=%lu", io, cfs.blksz,
+ (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
+ if (chit != NULL) {
+ fprintf(stderr, " (backref'ed to #%d)",
+ chit->blkno);
+ }
+ fprintf(stderr, "\n");
+ }
+ free(oblk);
+ io += 1;
+ if (iblk == MKUZ_BLK_EOF) {
+ if (io < i)
+ goto drain;
+ /* Last block, see if we need to add some padding */
+ if ((offset % DEV_BSIZE) == 0)
+ continue;
+ oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
+ oblk->info.blkno = io;
+ oblk->info.len = oblk->alen;
+ if (cfs.verbose != 0) {
+ fprintf(stderr, "padding data with %lu bytes "
+ "so that file size is multiple of %d\n",
+ (u_long)oblk->alen, DEV_BSIZE);
+ }
+ mkuz_fqueue_enq(cvp->results, oblk);
+ goto drain;
}
- toc[i] = htobe64(offset);
- offset += destlen;
}
- close(fdr);
- if (verbose != 0)
- fprintf(stderr, "compressed data to %ju bytes, saved %lld "
- "bytes, %.2f%% decrease.\n", offset, (long long)(sb.st_size - offset),
- 100.0 * (long long)(sb.st_size - offset) / (float)sb.st_size);
+ close(cfs.fdr);
+
+ if (cfs.verbose != 0 || summary.en != 0) {
+ et = getdtime();
+ fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
+ "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
+ (long long)(sb.st_size - offset),
+ 100.0 * (long long)(sb.st_size - offset) /
+ (float)sb.st_size, (float)sb.st_size / (et - st));
+ }
/* Convert to big endian */
- hdr.blksz = htonl(hdr.blksz);
+ hdr.blksz = htonl(cfs.blksz);
hdr.nblocks = htonl(hdr.nblocks);
/* Write headers into pre-allocated space */
- lseek(fdw, 0, SEEK_SET);
- if (writev(fdw, iov, 2) < 0) {
+ lseek(cfs.fdw, 0, SEEK_SET);
+ if (writev(cfs.fdw, iov, 2) < 0) {
err(1, "writev(%s)", oname);
/* Not reached */
}
cleanfile = NULL;
- close(fdw);
+ close(cfs.fdw);
exit(0);
}
-static char *
-readblock(int fd, char *ibuf, u_int32_t clstsize)
+static struct mkuz_blk *
+readblock(int fd, u_int32_t clstsize)
{
int numread;
+ struct mkuz_blk *rval;
+ static int blockcnt;
+ off_t cpos;
- bzero(ibuf, clstsize);
- numread = read(fd, ibuf, clstsize);
+ rval = mkuz_blk_ctor(clstsize);
+
+ rval->info.blkno = blockcnt;
+ blockcnt += 1;
+ cpos = lseek(fd, 0, SEEK_CUR);
+ if (cpos < 0) {
+ err(1, "readblock: lseek() failed");
+ /* Not reached */
+ }
+ rval->info.offset = cpos;
+
+ numread = read(fd, rval->data, clstsize);
if (numread < 0) {
- err(1, "read() failed");
+ err(1, "readblock: read() failed");
/* Not reached */
}
if (numread == 0) {
- return NULL;
+ free(rval);
+ return MKUZ_BLK_EOF;
}
- return ibuf;
+ rval->info.len = numread;
+ return rval;
}
static void
usage(void)
{
- fprintf(stderr, "usage: mkuzip [-v] [-o outfile] [-s cluster_size] infile\n");
+ fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
+ "[-j ncompr] infile\n");
exit(1);
}
-static void *
-safe_malloc(size_t size)
+void *
+mkuz_safe_malloc(size_t size)
{
void *retval;
@@ -265,6 +426,16 @@ safe_malloc(size_t size)
return retval;
}
+void *
+mkuz_safe_zmalloc(size_t size)
+{
+ void *retval;
+
+ retval = mkuz_safe_malloc(size);
+ bzero(retval, size);
+ return retval;
+}
+
static void
cleanup(void)
{
@@ -272,3 +443,12 @@ cleanup(void)
if (cleanfile != NULL)
unlink(cleanfile);
}
+
+int
+mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
+{
+ const u_char *mm;
+
+ mm = (const u_char *)memory;
+ return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
+}
diff --git a/usr.bin/mkuzip/mkuzip.h b/usr.bin/mkuzip/mkuzip.h
new file mode 100644
index 0000000..f41507c
--- /dev/null
+++ b/usr.bin/mkuzip/mkuzip.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
+
+void *mkuz_safe_malloc(size_t);
+void *mkuz_safe_zmalloc(size_t);
+int mkuz_memvcmp(const void *, unsigned char, size_t);
OpenPOWER on IntegriCloud