diff options
author | pjd <pjd@FreeBSD.org> | 2011-03-06 23:09:33 +0000 |
---|---|---|
committer | pjd <pjd@FreeBSD.org> | 2011-03-06 23:09:33 +0000 |
commit | 337b50efa895f3694fae9eeac35a3abd16181e75 (patch) | |
tree | 89777d06274aeb1f7f06b2647305a98d160268c9 /sbin/hastd/hast_compression.c | |
parent | 8697092f0ef00a8c8c15e4c26574e6fbfce848a2 (diff) | |
download | FreeBSD-src-337b50efa895f3694fae9eeac35a3abd16181e75.zip FreeBSD-src-337b50efa895f3694fae9eeac35a3abd16181e75.tar.gz |
Allow to compress on-the-wire data using two algorithms:
- HOLE - it simply turns all-zero blocks into few bytes header;
it is extremely fast, so it is turned on by default;
it is mostly intended to speed up initial synchronization
where we expect many zeros;
- LZF - very fast algorithm by Marc Alexander Lehmann, which shows
very decent compression ratio and has BSD license.
MFC after: 2 weeks
Diffstat (limited to 'sbin/hastd/hast_compression.c')
-rw-r--r-- | sbin/hastd/hast_compression.c | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/sbin/hastd/hast_compression.c b/sbin/hastd/hast_compression.c new file mode 100644 index 0000000..4f4a93a --- /dev/null +++ b/sbin/hastd/hast_compression.c @@ -0,0 +1,283 @@ +/*- + * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/endian.h> + +#include <errno.h> +#include <string.h> +#include <strings.h> + +#include <hast.h> +#include <lzf.h> +#include <nv.h> +#include <pjdlog.h> + +#include "hast_compression.h" + +static bool +allzeros(const void *data, size_t size) +{ + const uint64_t *p = data; + unsigned int i; + uint64_t v; + + PJDLOG_ASSERT((size % sizeof(*p)) == 0); + + /* + * This is the fastest method I found for checking if the given + * buffer contain all zeros. + * Because inside the loop we don't check at every step, we would + * get an answer only after walking through entire buffer. + * To return early if the buffer doesn't contain all zeros, we probe + * 8 bytes at the begining, in the middle and at the end of the buffer + * first. + */ + + size >>= 3; /* divide by 8 */ + if ((p[0] | p[size >> 1] | p[size - 1]) != 0) + return (false); + v = 0; + for (i = 0; i < size; i++) + v |= *p++; + return (v == 0); +} + +static void * +hast_hole_compress(const unsigned char *data, size_t *sizep) +{ + uint32_t size; + void *newbuf; + + if (!allzeros(data, *sizep)) + return (NULL); + + newbuf = malloc(sizeof(size)); + if (newbuf == NULL) { + pjdlog_warning("Unable to compress (no memory: %zu).", + (size_t)*sizep); + return (NULL); + } + size = htole32((uint32_t)*sizep); + bcopy(&size, newbuf, sizeof(size)); + *sizep = sizeof(size); + + return (newbuf); +} + +static void * +hast_hole_decompress(const unsigned char *data, size_t *sizep) +{ + uint32_t size; + void *newbuf; + + if (*sizep != sizeof(size)) { + pjdlog_error("Unable to decompress (invalid size: %zu).", + *sizep); + return (NULL); + } + + bcopy(data, &size, sizeof(size)); + size = le32toh(size); + + newbuf = malloc(size); + if (newbuf == NULL) { + pjdlog_error("Unable to decompress (no memory: %zu).", + (size_t)size); + return (NULL); + } + bzero(newbuf, size); + *sizep = size; + + return (newbuf); +} + +/* Minimum block size to try to compress. */ +#define HAST_LZF_COMPRESS_MIN 1024 + +static void * +hast_lzf_compress(const unsigned char *data, size_t *sizep) +{ + unsigned char *newbuf; + uint32_t origsize; + size_t newsize; + + origsize = *sizep; + + if (origsize <= HAST_LZF_COMPRESS_MIN) + return (NULL); + + newsize = sizeof(origsize) + origsize - HAST_LZF_COMPRESS_MIN; + newbuf = malloc(newsize); + if (newbuf == NULL) { + pjdlog_warning("Unable to compress (no memory: %zu).", + newsize); + return (NULL); + } + newsize = lzf_compress(data, *sizep, newbuf + sizeof(origsize), + newsize - sizeof(origsize)); + if (newsize == 0) { + free(newbuf); + return (NULL); + } + origsize = htole32(origsize); + bcopy(&origsize, newbuf, sizeof(origsize)); + + *sizep = sizeof(origsize) + newsize; + return (newbuf); +} + +static void * +hast_lzf_decompress(const unsigned char *data, size_t *sizep) +{ + unsigned char *newbuf; + uint32_t origsize; + size_t newsize; + + PJDLOG_ASSERT(*sizep > sizeof(origsize)); + + bcopy(data, &origsize, sizeof(origsize)); + origsize = le32toh(origsize); + PJDLOG_ASSERT(origsize > HAST_LZF_COMPRESS_MIN); + + newbuf = malloc(origsize); + if (newbuf == NULL) { + pjdlog_error("Unable to decompress (no memory: %zu).", + (size_t)origsize); + return (NULL); + } + newsize = lzf_decompress(data + sizeof(origsize), + *sizep - sizeof(origsize), newbuf, origsize); + if (newsize == 0) { + free(newbuf); + pjdlog_error("Unable to decompress."); + return (NULL); + } + PJDLOG_ASSERT(newsize == origsize); + + *sizep = newsize; + return (newbuf); +} + +const char * +compression_name(int num) +{ + + switch (num) { + case HAST_COMPRESSION_NONE: + return ("none"); + case HAST_COMPRESSION_HOLE: + return ("hole"); + case HAST_COMPRESSION_LZF: + return ("lzf"); + } + return ("unknown"); +} + +int +compression_send(const struct hast_resource *res, struct nv *nv, void **datap, + size_t *sizep, bool *freedatap) +{ + unsigned char *newbuf; + int compression; + size_t size; + + size = *sizep; + compression = res->hr_compression; + + switch (compression) { + case HAST_COMPRESSION_NONE: + return (0); + case HAST_COMPRESSION_HOLE: + newbuf = hast_hole_compress(*datap, &size); + break; + case HAST_COMPRESSION_LZF: + /* Try 'hole' compression first. */ + newbuf = hast_hole_compress(*datap, &size); + if (newbuf != NULL) + compression = HAST_COMPRESSION_HOLE; + else + newbuf = hast_lzf_compress(*datap, &size); + break; + default: + PJDLOG_ABORT("Invalid compression: %d.", res->hr_compression); + } + + if (newbuf == NULL) { + /* Unable to compress the data. */ + return (0); + } + nv_add_string(nv, compression_name(compression), "compression"); + if (nv_error(nv) != 0) { + free(newbuf); + errno = nv_error(nv); + return (-1); + } + if (*freedatap) + free(*datap); + *freedatap = true; + *datap = newbuf; + *sizep = size; + + return (0); +} + +int +compression_recv(const struct hast_resource *res __unused, struct nv *nv, + void **datap, size_t *sizep, bool *freedatap) +{ + unsigned char *newbuf; + const char *algo; + size_t size; + + algo = nv_get_string(nv, "compression"); + if (algo == NULL) + return (0); /* No compression. */ + + newbuf = NULL; + size = *sizep; + + if (strcmp(algo, "hole") == 0) + newbuf = hast_hole_decompress(*datap, &size); + else if (strcmp(algo, "lzf") == 0) + newbuf = hast_lzf_decompress(*datap, &size); + else { + pjdlog_error("Unknown compression algorithm '%s'.", algo); + return (-1); /* Unknown compression algorithm. */ + } + + if (newbuf == NULL) + return (-1); + if (*freedatap) + free(*datap); + *freedatap = true; + *datap = newbuf; + *sizep = size; + + return (0); +} |