summaryrefslogtreecommitdiffstats
path: root/lib/libarchive/archive_write_set_format_pax.c
diff options
context:
space:
mode:
authorkientzle <kientzle@FreeBSD.org>2008-03-15 01:43:59 +0000
committerkientzle <kientzle@FreeBSD.org>2008-03-15 01:43:59 +0000
commit4f3f5f46ddf2a018c660346b8b25ff014cd2a16f (patch)
tree9944fed4bc666534679b5d220e827567e56366d5 /lib/libarchive/archive_write_set_format_pax.c
parent1e6f2f459f1b97c774a8fa0651eedb166bcdd124 (diff)
downloadFreeBSD-src-4f3f5f46ddf2a018c660346b8b25ff014cd2a16f.zip
FreeBSD-src-4f3f5f46ddf2a018c660346b8b25ff014cd2a16f.tar.gz
A subtle point: "pax interchange format" mandates that all strings
(including pathname, gname, uname) be stored in UTF-8. This usually doesn't cause problems on FreeBSD because the "C" locale on FreeBSD can convert any byte to Unicode/wchar_t and from there to UTF-8. In other locales (including the "C" locale on Linux which is really ASCII), you can get into trouble with pathnames that cannot be converted to UTF-8. Libarchive's pax writer truncated pathnames and other strings at the first nonconvertible character. (ouch!) Other archivers have worked around this by storing unconvertible pathnames as raw binary, a practice which has been sanctioned by the Austin group. However, libarchive's pax reader would segfault reading headers that weren't proper UTF-8. (ouch!) Since bsdtar defaults to pax format, this affects bsdtar rather heavily. To correctly support the new "hdrcharset" header that is going into SUS and to handle conversion failures in general, libarchive's pax reader and writer have been overhauled fairly extensively. They used to do most of the pax header processing using wchar_t (Unicode); they now do most of it using char so that common logic applies to either UTF-8 or "binary" strings. As a bonus, a number of extraneous conversions to/from wchar_t have been eliminated, which should speed things up just a tad. Thanks to: Bjoern Jacke for originally reporting this to me Thanks to: Joerg Sonnenberger for noting a bad typo in my first draft of this Thanks to: Gunnar Ritter for getting the standard fixed MFC after: 5 days
Diffstat (limited to 'lib/libarchive/archive_write_set_format_pax.c')
-rw-r--r--lib/libarchive/archive_write_set_format_pax.c176
1 files changed, 126 insertions, 50 deletions
diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c
index fbec7eb..1632137 100644
--- a/lib/libarchive/archive_write_set_format_pax.c
+++ b/lib/libarchive/archive_write_set_format_pax.c
@@ -383,19 +383,25 @@ archive_write_pax_header(struct archive_write *a,
struct archive_entry *entry_original)
{
struct archive_entry *entry_main;
- const char *linkname, *p;
+ const char *p;
char *t;
- const char *hardlink;
const wchar_t *wp;
const char *suffix_start;
int need_extension, r, ret;
struct pax *pax;
+ const char *hdrcharset = NULL;
+ const char *hardlink;
+ const char *path = NULL, *linkpath = NULL;
+ const char *uname = NULL, *gname = NULL;
+ const wchar_t *path_w = NULL, *linkpath_w = NULL;
+ const wchar_t *uname_w = NULL, *gname_w = NULL;
char paxbuff[512];
char ustarbuff[512];
char ustar_entry_name[256];
char pax_entry_name[256];
+ ret = ARCHIVE_OK;
need_extension = 0;
pax = (struct pax *)a->format_data;
@@ -442,53 +448,109 @@ archive_write_pax_header(struct archive_write *a,
archive_string_empty(&(pax->pax_header)); /* Blank our work area. */
/*
+ * First, check the name fields and see if any of them
+ * require binary coding. If any of them does, then all of
+ * them do.
+ */
+ hdrcharset = NULL;
+ path = archive_entry_pathname(entry_main);
+ path_w = archive_entry_pathname_w(entry_main);
+ if (path != NULL && path_w == NULL) {
+ archive_set_error(&a->archive, EILSEQ,
+ "Can't translate pathname '%s' to UTF-8", path);
+ ret = ARCHIVE_WARN;
+ hdrcharset = "BINARY";
+ }
+ uname = archive_entry_uname(entry_main);
+ uname_w = archive_entry_uname_w(entry_main);
+ if (uname != NULL && uname_w == NULL) {
+ archive_set_error(&a->archive, EILSEQ,
+ "Can't translate uname '%s' to UTF-8", uname);
+ ret = ARCHIVE_WARN;
+ hdrcharset = "BINARY";
+ }
+ gname = archive_entry_gname(entry_main);
+ gname_w = archive_entry_gname_w(entry_main);
+ if (gname != NULL && gname_w == NULL) {
+ archive_set_error(&a->archive, EILSEQ,
+ "Can't translate gname '%s' to UTF-8", gname);
+ ret = ARCHIVE_WARN;
+ hdrcharset = "BINARY";
+ }
+ linkpath = hardlink;
+ if (linkpath != NULL) {
+ linkpath_w = archive_entry_hardlink_w(entry_main);
+ } else {
+ linkpath = archive_entry_symlink(entry_main);
+ if (linkpath != NULL)
+ linkpath_w = archive_entry_symlink_w(entry_main);
+ }
+ if (linkpath != NULL && linkpath_w == NULL) {
+ archive_set_error(&a->archive, EILSEQ,
+ "Can't translate linkpath '%s' to UTF-8", linkpath);
+ ret = ARCHIVE_WARN;
+ hdrcharset = "BINARY";
+ }
+
+ /* Store the header encoding first, to be nice to readers. */
+ if (hdrcharset != NULL)
+ add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset);
+
+ /*
* Determining whether or not the name is too big is ugly
* because of the rules for dividing names between 'name' and
* 'prefix' fields. Here, I pick out the longest possible
* suffix, then test whether the remaining prefix is too long.
*/
- wp = archive_entry_pathname_w(entry_main);
- p = archive_entry_pathname(entry_main);
- if (strlen(p) <= 100) /* Short enough for just 'name' field */
- suffix_start = p; /* Record a zero-length prefix */
+ if (strlen(path) <= 100) /* Short enough for just 'name' field */
+ suffix_start = path; /* Record a zero-length prefix */
else
/* Find the largest suffix that fits in 'name' field. */
- suffix_start = strchr(p + strlen(p) - 100 - 1, '/');
+ suffix_start = strchr(path + strlen(path) - 100 - 1, '/');
/*
* If name is too long, or has non-ASCII characters, add
- * 'path' to pax extended attrs.
+ * 'path' to pax extended attrs. (Note that an unconvertible
+ * name must have non-ASCII characters.)
*/
- if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) {
- add_pax_attr_w(&(pax->pax_header), "path", wp);
+ if (suffix_start == NULL || suffix_start - path > 155
+ || path_w == NULL || has_non_ASCII(path_w)) {
+ if (path_w == NULL || hdrcharset != NULL)
+ /* Can't do UTF-8, so store it raw. */
+ add_pax_attr(&(pax->pax_header), "path", path);
+ else
+ add_pax_attr_w(&(pax->pax_header), "path", path_w);
archive_entry_set_pathname(entry_main,
- build_ustar_entry_name(ustar_entry_name, p, strlen(p), NULL));
+ build_ustar_entry_name(ustar_entry_name,
+ path, strlen(path), NULL));
need_extension = 1;
}
- /* If link name is too long or has non-ASCII characters, add
- * 'linkpath' to pax extended attrs. */
- linkname = hardlink;
- if (linkname == NULL)
- linkname = archive_entry_symlink(entry_main);
-
- if (linkname != NULL) {
- /* There is a link name, get the wide version as well. */
- if (hardlink != NULL)
- wp = archive_entry_hardlink_w(entry_main);
- else
- wp = archive_entry_symlink_w(entry_main);
-
- /* If the link is long or has a non-ASCII character,
- * store it as a pax extended attribute. */
- if (strlen(linkname) > 100 || has_non_ASCII(wp)) {
- add_pax_attr_w(&(pax->pax_header), "linkpath", wp);
- if (hardlink != NULL)
- archive_entry_set_hardlink(entry_main,
- "././@LongHardLink");
+ if (linkpath != NULL) {
+ /* If link name is too long or has non-ASCII characters, add
+ * 'linkpath' to pax extended attrs. */
+ if (strlen(linkpath) > 100 || linkpath_w == NULL
+ || linkpath_w == NULL || has_non_ASCII(linkpath_w)) {
+ if (linkpath_w == NULL || hdrcharset != NULL)
+ /* If the linkpath is not convertible
+ * to wide, or we're encoding in
+ * binary anyway, store it raw. */
+ add_pax_attr(&(pax->pax_header),
+ "linkpath", linkpath);
else
- archive_entry_set_symlink(entry_main,
- "././@LongSymLink");
+ /* If the link is long or has a
+ * non-ASCII character, store it as a
+ * pax extended attribute. */
+ add_pax_attr_w(&(pax->pax_header),
+ "linkpath", linkpath_w);
+ if (strlen(linkpath) > 100) {
+ if (hardlink != NULL)
+ archive_entry_set_hardlink(entry_main,
+ "././@LongHardLink");
+ else
+ archive_entry_set_symlink(entry_main,
+ "././@LongSymLink");
+ }
need_extension = 1;
}
}
@@ -509,12 +571,20 @@ archive_write_pax_header(struct archive_write *a,
/* If group name is too large or has non-ASCII characters, add
* 'gname' to pax extended attrs. */
- p = archive_entry_gname(entry_main);
- wp = archive_entry_gname_w(entry_main);
- if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
- add_pax_attr_w(&(pax->pax_header), "gname", wp);
- archive_entry_set_gname(entry_main, NULL);
- need_extension = 1;
+ if (gname != NULL) {
+ if (strlen(gname) > 31
+ || gname_w == NULL
+ || has_non_ASCII(gname_w))
+ {
+ if (gname_w == NULL || hdrcharset != NULL) {
+ add_pax_attr(&(pax->pax_header),
+ "gname", gname);
+ } else {
+ add_pax_attr_w(&(pax->pax_header),
+ "gname", gname_w);
+ }
+ need_extension = 1;
+ }
}
/* If numeric UID is too large, add 'uid' to pax extended attrs. */
@@ -524,14 +594,21 @@ archive_write_pax_header(struct archive_write *a,
need_extension = 1;
}
- /* If user name is too large, add 'uname' to pax extended attrs. */
- /* TODO: If uname has non-ASCII characters, use pax attribute. */
- p = archive_entry_uname(entry_main);
- wp = archive_entry_uname_w(entry_main);
- if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
- add_pax_attr_w(&(pax->pax_header), "uname", wp);
- archive_entry_set_uname(entry_main, NULL);
- need_extension = 1;
+ /* Add 'uname' to pax extended attrs if necessary. */
+ if (uname != NULL) {
+ if (strlen(uname) > 31
+ || uname_w == NULL
+ || has_non_ASCII(uname_w))
+ {
+ if (uname_w == NULL || hdrcharset != NULL) {
+ add_pax_attr(&(pax->pax_header),
+ "uname", uname);
+ } else {
+ add_pax_attr_w(&(pax->pax_header),
+ "uname", uname_w);
+ }
+ need_extension = 1;
+ }
}
/*
@@ -733,7 +810,6 @@ archive_write_pax_header(struct archive_write *a,
__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0);
/* If we built any extended attributes, write that entry first. */
- ret = ARCHIVE_OK;
if (archive_strlen(&(pax->pax_header)) > 0) {
struct archive_entry *pax_attr_entry;
time_t s;
@@ -793,13 +869,13 @@ archive_write_pax_header(struct archive_write *a,
/* Standard ustar doesn't support ctime. */
archive_entry_set_ctime(pax_attr_entry, 0, 0);
- ret = __archive_write_format_header_ustar(a, paxbuff,
+ r = __archive_write_format_header_ustar(a, paxbuff,
pax_attr_entry, 'x', 1);
archive_entry_free(pax_attr_entry);
/* Note that the 'x' header shouldn't ever fail to format */
- if (ret != 0) {
+ if (r != 0) {
const char *msg = "archive_write_pax_header: "
"'x' header failed?! This can't happen.\n";
write(2, msg, strlen(msg));
OpenPOWER on IntegriCloud