summaryrefslogtreecommitdiffstats
path: root/lib/libarchive
diff options
context:
space:
mode:
authorkientzle <kientzle@FreeBSD.org>2007-04-14 08:20:31 +0000
committerkientzle <kientzle@FreeBSD.org>2007-04-14 08:20:31 +0000
commitf22ef7785ff8379a306fdaf2ee3838a132d04f2d (patch)
tree8cfbedf61d20f0a3edf0efe3a5de037f19df7f4d /lib/libarchive
parentaed3c204260197526cb450a788cfdd1199023e67 (diff)
downloadFreeBSD-src-f22ef7785ff8379a306fdaf2ee3838a132d04f2d.zip
FreeBSD-src-f22ef7785ff8379a306fdaf2ee3838a132d04f2d.tar.gz
Conventionally, tar archives have always included a trailing '/'
for directories. bsdtar used to add this, but that recently got lost somehow. So now I'm adding it back in libarchive. The only odd part of doing this in libarchive: Adding a directory to a tar archive and then reading it back again can yield a different name. Add a test case to exercise some boundary conditions with tar filenames and ensure that trailing slashes are added to dir names only as necessary. Thanks to: Oliver Lehmann for bringing this regression to my attention.
Diffstat (limited to 'lib/libarchive')
-rw-r--r--lib/libarchive/Makefile2
-rw-r--r--lib/libarchive/archive_write_set_format_pax.c152
-rw-r--r--lib/libarchive/archive_write_set_format_ustar.c24
-rw-r--r--lib/libarchive/test/Makefile1
-rw-r--r--lib/libarchive/test/test_tar_filenames.c156
5 files changed, 261 insertions, 74 deletions
diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile
index 83fbe5c..e0048ab 100644
--- a/lib/libarchive/Makefile
+++ b/lib/libarchive/Makefile
@@ -9,7 +9,7 @@ LDADD= -lbz2 -lz
# Major: Bumped ONLY when API/ABI breakage happens (see SHLIB_MAJOR)
# Minor: Bumped when significant new features are added
# Revision: Bumped on any notable change
-VERSION= 2.0.28
+VERSION= 2.0.29
ARCHIVE_API_MAJOR!= echo ${VERSION} | sed -e 's/[^0-9]/./g' -e 's/\..*//'
ARCHIVE_API_MINOR!= echo ${VERSION} | sed -e 's/[^0-9]/./g' -e 's/[0-9]*\.//' -e 's/\..*//'
diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c
index 35a43ff..28cbdca 100644
--- a/lib/libarchive/archive_write_set_format_pax.c
+++ b/lib/libarchive/archive_write_set_format_pax.c
@@ -67,8 +67,8 @@ static void add_pax_attr_int(struct archive_string *,
static void add_pax_attr_time(struct archive_string *,
const char *key, int64_t sec,
unsigned long nanos);
-static void add_pax_attr_w(struct archive_string *,
- const char *key, const wchar_t *wvalue);
+static void add_pax_attr_w(struct archive_string *, const char *,
+ const wchar_t *, const wchar_t *);
static ssize_t archive_write_pax_data(struct archive_write *,
const void *, size_t);
static int archive_write_pax_finish(struct archive_write *);
@@ -205,30 +205,42 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
}
+/*
+ * UTF-8 encode the concatenation of two strings.
+ *
+ * This interface eliminates the need to do some string
+ * manipulations at higher layers.
+ */
static char *
-utf8_encode(const wchar_t *wval)
+utf8_encode(const wchar_t *wval1, const wchar_t *wval2)
{
int utf8len;
- const wchar_t *wp;
+ const wchar_t *wp, **wpp;
unsigned long wc;
char *utf8_value, *p;
+ const wchar_t *vals[2];
+
+ vals[0] = wval1;
+ vals[1] = wval2;
utf8len = 0;
- for (wp = wval; *wp != L'\0'; ) {
- wc = *wp++;
- if (wc <= 0x7f)
- utf8len++;
- else if (wc <= 0x7ff)
- utf8len += 2;
- else if (wc <= 0xffff)
- utf8len += 3;
- else if (wc <= 0x1fffff)
- utf8len += 4;
- else if (wc <= 0x3ffffff)
- utf8len += 5;
- else if (wc <= 0x7fffffff)
- utf8len += 6;
- /* Ignore larger values; UTF-8 can't encode them. */
+ for (wpp = vals; wpp < vals + 2 && *wpp; wpp++) {
+ for (wp = *wpp; *wp != L'\0'; ) {
+ wc = *wp++;
+ if (wc <= 0x7f)
+ utf8len++;
+ else if (wc <= 0x7ff)
+ utf8len += 2;
+ else if (wc <= 0xffff)
+ utf8len += 3;
+ else if (wc <= 0x1fffff)
+ utf8len += 4;
+ else if (wc <= 0x3ffffff)
+ utf8len += 5;
+ else if (wc <= 0x7fffffff)
+ utf8len += 6;
+ /* Ignore larger values; UTF-8 can't encode them. */
+ }
}
utf8_value = (char *)malloc(utf8len + 1);
@@ -237,42 +249,45 @@ utf8_encode(const wchar_t *wval)
return (NULL);
}
- for (wp = wval, p = utf8_value; *wp != L'\0'; ) {
- wc = *wp++;
- if (wc <= 0x7f) {
- *p++ = (char)wc;
- } else if (wc <= 0x7ff) {
- p[0] = 0xc0 | ((wc >> 6) & 0x1f);
- p[1] = 0x80 | (wc & 0x3f);
- p += 2;
- } else if (wc <= 0xffff) {
- p[0] = 0xe0 | ((wc >> 12) & 0x0f);
- p[1] = 0x80 | ((wc >> 6) & 0x3f);
- p[2] = 0x80 | (wc & 0x3f);
- p += 3;
- } else if (wc <= 0x1fffff) {
- p[0] = 0xf0 | ((wc >> 18) & 0x07);
- p[1] = 0x80 | ((wc >> 12) & 0x3f);
- p[2] = 0x80 | ((wc >> 6) & 0x3f);
- p[3] = 0x80 | (wc & 0x3f);
- p += 4;
- } else if (wc <= 0x3ffffff) {
- p[0] = 0xf8 | ((wc >> 24) & 0x03);
- p[1] = 0x80 | ((wc >> 18) & 0x3f);
- p[2] = 0x80 | ((wc >> 12) & 0x3f);
- p[3] = 0x80 | ((wc >> 6) & 0x3f);
- p[4] = 0x80 | (wc & 0x3f);
- p += 5;
- } else if (wc <= 0x7fffffff) {
- p[0] = 0xfc | ((wc >> 30) & 0x01);
- p[1] = 0x80 | ((wc >> 24) & 0x3f);
- p[1] = 0x80 | ((wc >> 18) & 0x3f);
- p[2] = 0x80 | ((wc >> 12) & 0x3f);
- p[3] = 0x80 | ((wc >> 6) & 0x3f);
- p[4] = 0x80 | (wc & 0x3f);
- p += 6;
+ p = utf8_value;
+ for (wpp = vals; wpp < vals + 2 && *wpp; wpp++) {
+ for (wp = *wpp; *wp != L'\0'; ) {
+ wc = *wp++;
+ if (wc <= 0x7f) {
+ *p++ = (char)wc;
+ } else if (wc <= 0x7ff) {
+ p[0] = 0xc0 | ((wc >> 6) & 0x1f);
+ p[1] = 0x80 | (wc & 0x3f);
+ p += 2;
+ } else if (wc <= 0xffff) {
+ p[0] = 0xe0 | ((wc >> 12) & 0x0f);
+ p[1] = 0x80 | ((wc >> 6) & 0x3f);
+ p[2] = 0x80 | (wc & 0x3f);
+ p += 3;
+ } else if (wc <= 0x1fffff) {
+ p[0] = 0xf0 | ((wc >> 18) & 0x07);
+ p[1] = 0x80 | ((wc >> 12) & 0x3f);
+ p[2] = 0x80 | ((wc >> 6) & 0x3f);
+ p[3] = 0x80 | (wc & 0x3f);
+ p += 4;
+ } else if (wc <= 0x3ffffff) {
+ p[0] = 0xf8 | ((wc >> 24) & 0x03);
+ p[1] = 0x80 | ((wc >> 18) & 0x3f);
+ p[2] = 0x80 | ((wc >> 12) & 0x3f);
+ p[3] = 0x80 | ((wc >> 6) & 0x3f);
+ p[4] = 0x80 | (wc & 0x3f);
+ p += 5;
+ } else if (wc <= 0x7fffffff) {
+ p[0] = 0xfc | ((wc >> 30) & 0x01);
+ p[1] = 0x80 | ((wc >> 24) & 0x3f);
+ p[1] = 0x80 | ((wc >> 18) & 0x3f);
+ p[2] = 0x80 | ((wc >> 12) & 0x3f);
+ p[3] = 0x80 | ((wc >> 6) & 0x3f);
+ p[4] = 0x80 | (wc & 0x3f);
+ p += 6;
+ }
+ /* Ignore larger values; UTF-8 can't encode them. */
}
- /* Ignore larger values; UTF-8 can't encode them. */
}
*p = '\0';
@@ -280,9 +295,10 @@ utf8_encode(const wchar_t *wval)
}
static void
-add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
+add_pax_attr_w(struct archive_string *as, const char *key,
+ const wchar_t *wval1, const wchar_t *wval2)
{
- char *utf8_value = utf8_encode(wval);
+ char *utf8_value = utf8_encode(wval1, wval2);
if (utf8_value == NULL)
return;
add_pax_attr(as, key, utf8_value);
@@ -367,7 +383,7 @@ archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry)
free(url_encoded_name); /* Done with this. */
}
if (wcs_name != NULL) {
- encoded_name = utf8_encode(wcs_name);
+ encoded_name = utf8_encode(wcs_name, NULL);
free(wcs_name); /* Done with wchar_t name. */
}
@@ -401,6 +417,7 @@ archive_write_pax_header(struct archive_write *a,
const wchar_t *wp;
const char *suffix_start;
int need_extension, r, ret;
+ int need_slash = 0;
struct pax *pax;
const struct stat *st_main, *st_original;
@@ -451,18 +468,23 @@ archive_write_pax_header(struct archive_write *a,
*/
wp = archive_entry_pathname_w(entry_main);
p = archive_entry_pathname(entry_main);
- if (strlen(p) <= 100) /* Short enough for just 'name' field */
+ if (S_ISDIR(st_original->st_mode))
+ if (p[strlen(p) - 1] != '/')
+ need_slash = 1;
+ /* Short enough for just 'name' field */
+ if (strlen(p) + need_slash <= 100)
suffix_start = p; /* Record a zero-length prefix */
else
/* Find the largest suffix that fits in 'name' field. */
- suffix_start = strchr(p + strlen(p) - 100 - 1, '/');
+ suffix_start = strchr(p + strlen(p) + need_slash - 100 - 1, '/');
/*
* If name is too long, or has non-ASCII characters, add
* 'path' to pax extended attrs.
*/
if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) {
- add_pax_attr_w(&(pax->pax_header), "path", wp);
+ add_pax_attr_w(&(pax->pax_header), "path", wp,
+ need_slash ? L"/" : NULL);
archive_entry_set_pathname(entry_main,
build_ustar_entry_name(ustar_entry_name, p, strlen(p), NULL));
need_extension = 1;
@@ -484,7 +506,7 @@ archive_write_pax_header(struct archive_write *a,
/* If the link is long or has a non-ASCII character,
* store it as a pax extended attribute. */
if (strlen(linkname) > 100 || has_non_ASCII(wp)) {
- add_pax_attr_w(&(pax->pax_header), "linkpath", wp);
+ add_pax_attr_w(&(pax->pax_header), "linkpath", wp, NULL);
if (hardlink != NULL)
archive_entry_set_hardlink(entry_main,
"././@LongHardLink");
@@ -512,7 +534,7 @@ archive_write_pax_header(struct archive_write *a,
p = archive_entry_gname(entry_main);
wp = archive_entry_gname_w(entry_main);
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
- add_pax_attr_w(&(pax->pax_header), "gname", wp);
+ add_pax_attr_w(&(pax->pax_header), "gname", wp, NULL);
archive_entry_set_gname(entry_main, NULL);
need_extension = 1;
}
@@ -528,7 +550,7 @@ archive_write_pax_header(struct archive_write *a,
p = archive_entry_uname(entry_main);
wp = archive_entry_uname_w(entry_main);
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
- add_pax_attr_w(&(pax->pax_header), "uname", wp);
+ add_pax_attr_w(&(pax->pax_header), "uname", wp, NULL);
archive_entry_set_uname(entry_main, NULL);
need_extension = 1;
}
@@ -655,13 +677,13 @@ archive_write_pax_header(struct archive_write *a,
ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
if (wp != NULL && *wp != L'\0')
add_pax_attr_w(&(pax->pax_header),
- "SCHILY.acl.access", wp);
+ "SCHILY.acl.access", wp, NULL);
wp = archive_entry_acl_text_w(entry_original,
ARCHIVE_ENTRY_ACL_TYPE_DEFAULT |
ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
if (wp != NULL && *wp != L'\0')
add_pax_attr_w(&(pax->pax_header),
- "SCHILY.acl.default", wp);
+ "SCHILY.acl.default", wp, NULL);
/* Include star-compatible metadata info. */
/* Note: "SCHILY.dev{major,minor}" are NOT the
diff --git a/lib/libarchive/archive_write_set_format_ustar.c b/lib/libarchive/archive_write_set_format_ustar.c
index 75766d5..b977317 100644
--- a/lib/libarchive/archive_write_set_format_ustar.c
+++ b/lib/libarchive/archive_write_set_format_ustar.c
@@ -243,7 +243,7 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
{
unsigned int checksum;
int i, ret;
- size_t copy_length;
+ size_t copy_length, ps, extra_slash;
const char *p, *pp;
const struct stat *st;
int mytartype;
@@ -256,6 +256,7 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
* elements.
*/
memcpy(h, &template_header, 512);
+ st = archive_entry_stat(entry);
/*
* Because the block is already null-filled, and strings
@@ -264,11 +265,18 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
*/
pp = archive_entry_pathname(entry);
- if (strlen(pp) <= USTAR_name_size)
- memcpy(h + USTAR_name_offset, pp, strlen(pp));
- else {
+ ps = strlen(pp);
+ if (S_ISDIR(st->st_mode) && pp[ps - 1] != '/')
+ extra_slash = 1;
+ else
+ extra_slash = 0;
+ if (ps + extra_slash <= USTAR_name_size) {
+ memcpy(h + USTAR_name_offset, pp, ps);
+ if (extra_slash)
+ h[USTAR_name_offset + ps] = '/';
+ } else {
/* Store in two pieces, splitting at a '/'. */
- p = strchr(pp + strlen(pp) - USTAR_name_size - 1, '/');
+ p = strchr(pp + ps + extra_slash - USTAR_name_size - 1, '/');
/*
* If there is no path separator, or the prefix or
* remaining name are too large, return an error.
@@ -284,7 +292,9 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
} else {
/* Copy prefix and remainder to appropriate places */
memcpy(h + USTAR_prefix_offset, pp, p - pp);
- memcpy(h + USTAR_name_offset, p + 1, pp + strlen(pp) - p - 1);
+ memcpy(h + USTAR_name_offset, p + 1, pp + ps - p - 1);
+ if (extra_slash)
+ h[USTAR_name_offset + pp + ps - p - 1] = '/';
}
}
@@ -328,8 +338,6 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
memcpy(h + USTAR_gname_offset, p, copy_length);
}
- st = archive_entry_stat(entry);
-
if (format_number(st->st_mode & 07777, h + USTAR_mode_offset, USTAR_mode_size, USTAR_mode_max_size, strict)) {
archive_set_error(&a->archive, ERANGE, "Numeric mode too large");
ret = ARCHIVE_WARN;
diff --git a/lib/libarchive/test/Makefile b/lib/libarchive/test/Makefile
index b5da3ab..b436bbd 100644
--- a/lib/libarchive/test/Makefile
+++ b/lib/libarchive/test/Makefile
@@ -28,6 +28,7 @@ TESTS= \
test_read_large.c \
test_read_position.c \
test_read_truncated.c \
+ test_tar_filenames.c \
test_write_disk.c \
test_write_disk_perms.c \
test_write_disk_secure.c \
diff --git a/lib/libarchive/test/test_tar_filenames.c b/lib/libarchive/test/test_tar_filenames.c
new file mode 100644
index 0000000..57545ba
--- /dev/null
+++ b/lib/libarchive/test/test_tar_filenames.c
@@ -0,0 +1,156 @@
+/*-
+ * Copyright (c) 2003-2007 Tim Kientzle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+__FBSDID("$FreeBSD$");
+
+/*
+ * Exercise various lengths of filenames in tar archives,
+ * especially around the magic sizes where ustar breaks
+ * filenames into prefix/suffix.
+ */
+
+static
+test_filename(int dlen, int flen)
+{
+ char buff[8192];
+ char filename[400];
+ char dirname[400];
+ struct archive_entry *ae;
+ struct archive *a;
+ size_t used;
+ int i;
+
+ for (i = 0; i < dlen; i++)
+ filename[i] = 'a';
+ filename[i++] = '/';
+ for (; i < dlen + flen + 1; i++)
+ filename[i] = 'b';
+ filename[i++] = '\0';
+
+ strcpy(dirname, filename);
+
+ /* Create a new archive in memory. */
+ assert((a = archive_write_new()) != NULL);
+ assertA(0 == archive_write_set_format_pax_restricted(a));
+ assertA(0 == archive_write_set_compression_none(a));
+ assertA(0 == archive_write_set_bytes_per_block(a,0));
+ assertA(0 == archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ /*
+ * Write a file to it.
+ */
+ assert((ae = archive_entry_new()) != NULL);
+ archive_entry_copy_pathname(ae, filename);
+ archive_entry_set_mode(ae, S_IFREG | 0755);
+ failure("Pathname %d/%d", dlen, flen);
+ assertA(0 == archive_write_header(a, ae));
+
+ /*
+ * Write a dir to it (without trailing '/').
+ */
+ assert((ae = archive_entry_new()) != NULL);
+ archive_entry_copy_pathname(ae, dirname);
+ archive_entry_set_mode(ae, S_IFDIR | 0755);
+ failure("Dirname %d/%d", dlen, flen);
+ assertA(0 == archive_write_header(a, ae));
+
+ /* Tar adds a '/' to directory names. */
+ strcat(dirname, "/");
+
+ /*
+ * Write a dir to it (with trailing '/').
+ */
+ assert((ae = archive_entry_new()) != NULL);
+ archive_entry_copy_pathname(ae, dirname);
+ archive_entry_set_mode(ae, S_IFDIR | 0755);
+ failure("Dirname %d/%d", dlen, flen);
+ assertA(0 == archive_write_header(a, ae));
+
+ /* Close out the archive. */
+ assertA(0 == archive_write_close(a));
+#if ARCHIVE_API_VERSION > 1
+ assertA(0 == archive_write_finish(a));
+#else
+ archive_write_finish(a);
+#endif
+
+ /*
+ * Now, read the data back.
+ */
+ assert((a = archive_read_new()) != NULL);
+ assertA(0 == archive_read_support_format_all(a));
+ assertA(0 == archive_read_support_compression_all(a));
+ assertA(0 == archive_read_open_memory(a, buff, used));
+
+ /* Read the file and check the filename. */
+ assertA(0 == archive_read_next_header(a, &ae));
+ failure("Pathname %d/%d: %s", dlen, flen, archive_entry_pathname(ae));
+ assert(0 == strcmp(filename, archive_entry_pathname(ae)));
+ assert((S_IFREG | 0755) == archive_entry_mode(ae));
+
+ /*
+ * Read the two dirs and check the names.
+ *
+ * Both dirs should read back with the same name, since
+ * tar should add a trailing '/' to any dir that doesn't
+ * already have one.
+ */
+ assertA(0 == archive_read_next_header(a, &ae));
+ failure("Pathname %d/%d: %s", dlen, flen, archive_entry_pathname(ae));
+ assert(0 == strcmp(dirname, archive_entry_pathname(ae)));
+ assert((S_IFDIR | 0755) == archive_entry_mode(ae));
+
+ assertA(0 == archive_read_next_header(a, &ae));
+ failure("Pathname %d/%d: %s", dlen, flen, archive_entry_pathname(ae));
+ assert(0 == strcmp(dirname, archive_entry_pathname(ae)));
+ assert((S_IFDIR | 0755) == archive_entry_mode(ae));
+
+ /* Verify the end of the archive. */
+ assert(1 == archive_read_next_header(a, &ae));
+ assert(0 == archive_read_close(a));
+#if ARCHIVE_API_VERSION > 1
+ assert(0 == archive_read_finish(a));
+#else
+ archive_read_finish(a);
+#endif
+}
+
+DEFINE_TEST(test_tar_filenames)
+{
+ int dlen, flen;
+
+ /* Repeat the following for a variety of dir/file lengths. */
+ for (dlen = 40; dlen < 60; dlen++) {
+ for (flen = 40; flen < 60; flen++) {
+ test_filename(dlen, flen);
+ }
+ }
+
+ for (dlen = 140; dlen < 160; dlen++) {
+ for (flen = 90; flen < 110; flen++) {
+ test_filename(dlen, flen);
+ }
+ }
+}
OpenPOWER on IntegriCloud