From 456d84d6658548ed57af32c0f24db36ad1e3865f Mon Sep 17 00:00:00 2001 From: kientzle Date: Wed, 12 Oct 2005 03:26:09 +0000 Subject: In pax interchange format, use UTF8 for writing link names, usernames, or group names that contain non-ASCII characters. In particular, this corrects an inconsistency reported by Ed Maste when archiving symlinks with odd characters: long symlinks would get preserved, short ones would be changed. --- lib/libarchive/archive_entry.c | 28 +++++++++++++ lib/libarchive/archive_entry.h | 4 ++ lib/libarchive/archive_write_set_format_pax.c | 59 +++++++++++++++++---------- 3 files changed, 70 insertions(+), 21 deletions(-) (limited to 'lib/libarchive') diff --git a/lib/libarchive/archive_entry.c b/lib/libarchive/archive_entry.c index 1a0febd..82d76b4 100644 --- a/lib/libarchive/archive_entry.c +++ b/lib/libarchive/archive_entry.c @@ -203,6 +203,8 @@ aes_copy(struct aes *dest, struct aes *src) static const char * aes_get_mbs(struct aes *aes) { + if (aes->aes_mbs == NULL && aes->aes_wcs == NULL) + return NULL; if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) { /* * XXX Need to estimate the number of byte in the @@ -224,6 +226,8 @@ aes_get_mbs(struct aes *aes) static const wchar_t * aes_get_wcs(struct aes *aes) { + if (aes->aes_wcs == NULL && aes->aes_mbs == NULL) + return NULL; if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) { /* * No single byte will be more than one wide character, @@ -457,12 +461,24 @@ archive_entry_gname(struct archive_entry *entry) return (aes_get_mbs(&entry->ae_gname)); } +const wchar_t * +archive_entry_gname_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_gname)); +} + const char * archive_entry_hardlink(struct archive_entry *entry) { return (aes_get_mbs(&entry->ae_hardlink)); } +const wchar_t * +archive_entry_hardlink_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_hardlink)); +} + ino_t archive_entry_ino(struct archive_entry *entry) { @@ -536,6 +552,12 @@ archive_entry_symlink(struct archive_entry *entry) return (aes_get_mbs(&entry->ae_symlink)); } +const wchar_t * +archive_entry_symlink_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_symlink)); +} + uid_t archive_entry_uid(struct archive_entry *entry) { @@ -548,6 +570,12 @@ archive_entry_uname(struct archive_entry *entry) return (aes_get_mbs(&entry->ae_uname)); } +const wchar_t * +archive_entry_uname_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_uname)); +} + /* * Functions to set archive_entry properties. */ diff --git a/lib/libarchive/archive_entry.h b/lib/libarchive/archive_entry.h index 8d9fdef..a35f0df 100644 --- a/lib/libarchive/archive_entry.h +++ b/lib/libarchive/archive_entry.h @@ -79,7 +79,9 @@ void archive_entry_fflags(struct archive_entry *, const char *archive_entry_fflags_text(struct archive_entry *); gid_t archive_entry_gid(struct archive_entry *); const char *archive_entry_gname(struct archive_entry *); +const wchar_t *archive_entry_gname_w(struct archive_entry *); const char *archive_entry_hardlink(struct archive_entry *); +const wchar_t *archive_entry_hardlink_w(struct archive_entry *); ino_t archive_entry_ino(struct archive_entry *); mode_t archive_entry_mode(struct archive_entry *); time_t archive_entry_mtime(struct archive_entry *); @@ -92,8 +94,10 @@ dev_t archive_entry_rdevminor(struct archive_entry *); int64_t archive_entry_size(struct archive_entry *); const struct stat *archive_entry_stat(struct archive_entry *); const char *archive_entry_symlink(struct archive_entry *); +const wchar_t *archive_entry_symlink_w(struct archive_entry *); uid_t archive_entry_uid(struct archive_entry *); const char *archive_entry_uname(struct archive_entry *); +const wchar_t *archive_entry_uname_w(struct archive_entry *); /* * Set fields in an archive_entry. diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c index 547ada9..75dfe30 100644 --- a/lib/libarchive/archive_write_set_format_pax.c +++ b/lib/libarchive/archive_write_set_format_pax.c @@ -62,6 +62,7 @@ static int archive_write_pax_header(struct archive *, static char *build_pax_attribute_name(char *dest, const char *src); static char *build_ustar_entry_name(char *dest, const char *src, const char *insert); static char *format_int(char *dest, int64_t); +static int has_non_ASCII(const wchar_t *); static int write_nulls(struct archive *, size_t); /* @@ -315,7 +316,7 @@ archive_write_pax_header(struct archive *a, struct archive_entry *entry_main; const char *linkname, *p; const char *hardlink; - const wchar_t *wp, *wp2; + const wchar_t *wp; const char *suffix_start; int need_extension, r, ret; struct pax *pax; @@ -375,36 +376,42 @@ archive_write_pax_header(struct archive *a, /* Find the largest suffix that fits in 'name' field. */ suffix_start = strchr(p + strlen(p) - 100 - 1, '/'); - /* Find non-ASCII character, if any. */ - wp2 = wp; - while (*wp2 != L'\0' && *wp2 < 128) - wp2++; - /* * If name is too long, or has non-ASCII characters, add * 'path' to pax extended attrs. */ - if (suffix_start == NULL || suffix_start - p > 155 || *wp2 != L'\0') { + if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) { add_pax_attr_w(&(pax->pax_header), "path", wp); archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, p, NULL)); need_extension = 1; } - /* If link name is too long, add 'linkpath' to pax extended attrs. */ + /* If link name is too long or has non-ASCII characters, add + * 'linkpath' to pax extended attrs. */ linkname = hardlink; if (linkname == NULL) linkname = archive_entry_symlink(entry_main); - if (linkname != NULL && strlen(linkname) > 100) { - add_pax_attr(&(pax->pax_header), "linkpath", linkname); + if (linkname != NULL) { + /* There is a link name, get the wide version as well. */ if (hardlink != NULL) - archive_entry_set_hardlink(entry_main, - "././@LongHardLink"); + wp = archive_entry_hardlink_w(entry_main); else - archive_entry_set_symlink(entry_main, - "././@LongSymLink"); - need_extension = 1; + wp = archive_entry_symlink_w(entry_main); + + /* If the link is long or has a non-ASCII character, + * store it as a pax extended attribute. */ + if (strlen(linkname) > 100 || has_non_ASCII(wp)) { + add_pax_attr_w(&(pax->pax_header), "linkpath", wp); + if (hardlink != NULL) + archive_entry_set_hardlink(entry_main, + "././@LongHardLink"); + else + archive_entry_set_symlink(entry_main, + "././@LongSymLink"); + need_extension = 1; + } } /* If file size is too large, add 'size' to pax extended attrs. */ @@ -419,11 +426,12 @@ archive_write_pax_header(struct archive *a, need_extension = 1; } - /* If group name is too large, add 'gname' to pax extended attrs. */ - /* TODO: If gname has non-ASCII characters, use pax attribute. */ + /* If group name is too large or has non-ASCII characters, add + * 'gname' to pax extended attrs. */ p = archive_entry_gname(entry_main); - if (p != NULL && strlen(p) > 31) { - add_pax_attr(&(pax->pax_header), "gname", p); + wp = archive_entry_gname_w(entry_main); + if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) { + add_pax_attr_w(&(pax->pax_header), "gname", wp); archive_entry_set_gname(entry_main, NULL); need_extension = 1; } @@ -437,8 +445,9 @@ archive_write_pax_header(struct archive *a, /* If user name is too large, add 'uname' to pax extended attrs. */ /* TODO: If uname has non-ASCII characters, use pax attribute. */ p = archive_entry_uname(entry_main); - if (p != NULL && strlen(p) > 31) { - add_pax_attr(&(pax->pax_header), "uname", p); + wp = archive_entry_uname_w(entry_main); + if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) { + add_pax_attr_w(&(pax->pax_header), "uname", wp); archive_entry_set_uname(entry_main, NULL); need_extension = 1; } @@ -1001,3 +1010,11 @@ archive_write_pax_data(struct archive *a, const void *buff, size_t s) pax->entry_bytes_remaining -= s; return (ret); } + +static int +has_non_ASCII(const wchar_t *wp) +{ + while (*wp != L'\0' && *wp < 128) + wp++; + return (*wp != L'\0'); +} -- cgit v1.1