summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkientzle <kientzle@FreeBSD.org>2005-10-12 03:26:09 +0000
committerkientzle <kientzle@FreeBSD.org>2005-10-12 03:26:09 +0000
commit456d84d6658548ed57af32c0f24db36ad1e3865f (patch)
treebee5d8ed508e59cc24a9cec013daaa79f89bc24b
parent0ce580e541845da75bdd8e15bc6abdd6c573eb4f (diff)
downloadFreeBSD-src-456d84d6658548ed57af32c0f24db36ad1e3865f.zip
FreeBSD-src-456d84d6658548ed57af32c0f24db36ad1e3865f.tar.gz
In pax interchange format, use UTF8 for writing
link names, usernames, or group names that contain non-ASCII characters. In particular, this corrects an inconsistency reported by Ed Maste when archiving symlinks with odd characters: long symlinks would get preserved, short ones would be changed.
-rw-r--r--lib/libarchive/archive_entry.c28
-rw-r--r--lib/libarchive/archive_entry.h4
-rw-r--r--lib/libarchive/archive_write_set_format_pax.c59
3 files changed, 70 insertions, 21 deletions
diff --git a/lib/libarchive/archive_entry.c b/lib/libarchive/archive_entry.c
index 1a0febd..82d76b4 100644
--- a/lib/libarchive/archive_entry.c
+++ b/lib/libarchive/archive_entry.c
@@ -203,6 +203,8 @@ aes_copy(struct aes *dest, struct aes *src)
static const char *
aes_get_mbs(struct aes *aes)
{
+ if (aes->aes_mbs == NULL && aes->aes_wcs == NULL)
+ return NULL;
if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) {
/*
* XXX Need to estimate the number of byte in the
@@ -224,6 +226,8 @@ aes_get_mbs(struct aes *aes)
static const wchar_t *
aes_get_wcs(struct aes *aes)
{
+ if (aes->aes_wcs == NULL && aes->aes_mbs == NULL)
+ return NULL;
if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) {
/*
* No single byte will be more than one wide character,
@@ -457,12 +461,24 @@ archive_entry_gname(struct archive_entry *entry)
return (aes_get_mbs(&entry->ae_gname));
}
+const wchar_t *
+archive_entry_gname_w(struct archive_entry *entry)
+{
+ return (aes_get_wcs(&entry->ae_gname));
+}
+
const char *
archive_entry_hardlink(struct archive_entry *entry)
{
return (aes_get_mbs(&entry->ae_hardlink));
}
+const wchar_t *
+archive_entry_hardlink_w(struct archive_entry *entry)
+{
+ return (aes_get_wcs(&entry->ae_hardlink));
+}
+
ino_t
archive_entry_ino(struct archive_entry *entry)
{
@@ -536,6 +552,12 @@ archive_entry_symlink(struct archive_entry *entry)
return (aes_get_mbs(&entry->ae_symlink));
}
+const wchar_t *
+archive_entry_symlink_w(struct archive_entry *entry)
+{
+ return (aes_get_wcs(&entry->ae_symlink));
+}
+
uid_t
archive_entry_uid(struct archive_entry *entry)
{
@@ -548,6 +570,12 @@ archive_entry_uname(struct archive_entry *entry)
return (aes_get_mbs(&entry->ae_uname));
}
+const wchar_t *
+archive_entry_uname_w(struct archive_entry *entry)
+{
+ return (aes_get_wcs(&entry->ae_uname));
+}
+
/*
* Functions to set archive_entry properties.
*/
diff --git a/lib/libarchive/archive_entry.h b/lib/libarchive/archive_entry.h
index 8d9fdef..a35f0df 100644
--- a/lib/libarchive/archive_entry.h
+++ b/lib/libarchive/archive_entry.h
@@ -79,7 +79,9 @@ void archive_entry_fflags(struct archive_entry *,
const char *archive_entry_fflags_text(struct archive_entry *);
gid_t archive_entry_gid(struct archive_entry *);
const char *archive_entry_gname(struct archive_entry *);
+const wchar_t *archive_entry_gname_w(struct archive_entry *);
const char *archive_entry_hardlink(struct archive_entry *);
+const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
ino_t archive_entry_ino(struct archive_entry *);
mode_t archive_entry_mode(struct archive_entry *);
time_t archive_entry_mtime(struct archive_entry *);
@@ -92,8 +94,10 @@ dev_t archive_entry_rdevminor(struct archive_entry *);
int64_t archive_entry_size(struct archive_entry *);
const struct stat *archive_entry_stat(struct archive_entry *);
const char *archive_entry_symlink(struct archive_entry *);
+const wchar_t *archive_entry_symlink_w(struct archive_entry *);
uid_t archive_entry_uid(struct archive_entry *);
const char *archive_entry_uname(struct archive_entry *);
+const wchar_t *archive_entry_uname_w(struct archive_entry *);
/*
* Set fields in an archive_entry.
diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c
index 547ada9..75dfe30 100644
--- a/lib/libarchive/archive_write_set_format_pax.c
+++ b/lib/libarchive/archive_write_set_format_pax.c
@@ -62,6 +62,7 @@ static int archive_write_pax_header(struct archive *,
static char *build_pax_attribute_name(char *dest, const char *src);
static char *build_ustar_entry_name(char *dest, const char *src, const char *insert);
static char *format_int(char *dest, int64_t);
+static int has_non_ASCII(const wchar_t *);
static int write_nulls(struct archive *, size_t);
/*
@@ -315,7 +316,7 @@ archive_write_pax_header(struct archive *a,
struct archive_entry *entry_main;
const char *linkname, *p;
const char *hardlink;
- const wchar_t *wp, *wp2;
+ const wchar_t *wp;
const char *suffix_start;
int need_extension, r, ret;
struct pax *pax;
@@ -375,36 +376,42 @@ archive_write_pax_header(struct archive *a,
/* Find the largest suffix that fits in 'name' field. */
suffix_start = strchr(p + strlen(p) - 100 - 1, '/');
- /* Find non-ASCII character, if any. */
- wp2 = wp;
- while (*wp2 != L'\0' && *wp2 < 128)
- wp2++;
-
/*
* If name is too long, or has non-ASCII characters, add
* 'path' to pax extended attrs.
*/
- if (suffix_start == NULL || suffix_start - p > 155 || *wp2 != L'\0') {
+ if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) {
add_pax_attr_w(&(pax->pax_header), "path", wp);
archive_entry_set_pathname(entry_main,
build_ustar_entry_name(ustar_entry_name, p, NULL));
need_extension = 1;
}
- /* If link name is too long, add 'linkpath' to pax extended attrs. */
+ /* If link name is too long or has non-ASCII characters, add
+ * 'linkpath' to pax extended attrs. */
linkname = hardlink;
if (linkname == NULL)
linkname = archive_entry_symlink(entry_main);
- if (linkname != NULL && strlen(linkname) > 100) {
- add_pax_attr(&(pax->pax_header), "linkpath", linkname);
+ if (linkname != NULL) {
+ /* There is a link name, get the wide version as well. */
if (hardlink != NULL)
- archive_entry_set_hardlink(entry_main,
- "././@LongHardLink");
+ wp = archive_entry_hardlink_w(entry_main);
else
- archive_entry_set_symlink(entry_main,
- "././@LongSymLink");
- need_extension = 1;
+ wp = archive_entry_symlink_w(entry_main);
+
+ /* If the link is long or has a non-ASCII character,
+ * store it as a pax extended attribute. */
+ if (strlen(linkname) > 100 || has_non_ASCII(wp)) {
+ add_pax_attr_w(&(pax->pax_header), "linkpath", wp);
+ if (hardlink != NULL)
+ archive_entry_set_hardlink(entry_main,
+ "././@LongHardLink");
+ else
+ archive_entry_set_symlink(entry_main,
+ "././@LongSymLink");
+ need_extension = 1;
+ }
}
/* If file size is too large, add 'size' to pax extended attrs. */
@@ -419,11 +426,12 @@ archive_write_pax_header(struct archive *a,
need_extension = 1;
}
- /* If group name is too large, add 'gname' to pax extended attrs. */
- /* TODO: If gname has non-ASCII characters, use pax attribute. */
+ /* If group name is too large or has non-ASCII characters, add
+ * 'gname' to pax extended attrs. */
p = archive_entry_gname(entry_main);
- if (p != NULL && strlen(p) > 31) {
- add_pax_attr(&(pax->pax_header), "gname", p);
+ wp = archive_entry_gname_w(entry_main);
+ if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
+ add_pax_attr_w(&(pax->pax_header), "gname", wp);
archive_entry_set_gname(entry_main, NULL);
need_extension = 1;
}
@@ -437,8 +445,9 @@ archive_write_pax_header(struct archive *a,
/* If user name is too large, add 'uname' to pax extended attrs. */
/* TODO: If uname has non-ASCII characters, use pax attribute. */
p = archive_entry_uname(entry_main);
- if (p != NULL && strlen(p) > 31) {
- add_pax_attr(&(pax->pax_header), "uname", p);
+ wp = archive_entry_uname_w(entry_main);
+ if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
+ add_pax_attr_w(&(pax->pax_header), "uname", wp);
archive_entry_set_uname(entry_main, NULL);
need_extension = 1;
}
@@ -1001,3 +1010,11 @@ archive_write_pax_data(struct archive *a, const void *buff, size_t s)
pax->entry_bytes_remaining -= s;
return (ret);
}
+
+static int
+has_non_ASCII(const wchar_t *wp)
+{
+ while (*wp != L'\0' && *wp < 128)
+ wp++;
+ return (*wp != L'\0');
+}
OpenPOWER on IntegriCloud