summaryrefslogtreecommitdiffstats
path: root/lib/libarchive
diff options
context:
space:
mode:
authorkientzle <kientzle@FreeBSD.org>2006-03-21 16:55:46 +0000
committerkientzle <kientzle@FreeBSD.org>2006-03-21 16:55:46 +0000
commit537ab73b2f9d8cc3e849014fd44738b6a48f6c99 (patch)
treec3aca38252d2ed10ab3875ada2f51f284743c856 /lib/libarchive
parentb09a8950a1301d97aef8e2975e34a3ba5bc451c3 (diff)
downloadFreeBSD-src-537ab73b2f9d8cc3e849014fd44738b6a48f6c99.zip
FreeBSD-src-537ab73b2f9d8cc3e849014fd44738b6a48f6c99.tar.gz
POSIX.1e-style Extended Attribute support
This commit implements storing/reading POSIX.1e-style extended attribute information in "pax" format archives. An outline of the storage format is in the tar.5 manpage. The archive_read_extract() function has code to restore those archives to disk for Linux; FreeBSD implementation is forthcoming. Many thanks to Jaakko Heinonen for finding flaws in earlier proposals and doing the bulk of the coding in this work.
Diffstat (limited to 'lib/libarchive')
-rw-r--r--lib/libarchive/Makefile2
-rw-r--r--lib/libarchive/archive.h.in1
-rw-r--r--lib/libarchive/archive_entry.c113
-rw-r--r--lib/libarchive/archive_entry.h19
-rw-r--r--lib/libarchive/archive_read_extract.c137
-rw-r--r--lib/libarchive/archive_read_support_format_cpio.c2
-rw-r--r--lib/libarchive/archive_read_support_format_iso9660.c4
-rw-r--r--lib/libarchive/archive_read_support_format_tar.c203
-rw-r--r--lib/libarchive/archive_write_set_format_pax.c169
-rw-r--r--lib/libarchive/tar.515
10 files changed, 628 insertions, 37 deletions
diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile
index 38c9067..23272d7 100644
--- a/lib/libarchive/Makefile
+++ b/lib/libarchive/Makefile
@@ -9,7 +9,7 @@ LDADD= -lbz2 -lz
# Major: Bumped ONLY when API/ABI breakage happens.
# Minor: Bumped when significant new features are added (see SHLIB_MAJOR)
# Revision: Bumped on any notable change
-VERSION= 1.2.41
+VERSION= 1.2.51
ARCHIVE_API_MAJOR!= echo ${VERSION} | sed -e 's/\..*//'
ARCHIVE_API_MINOR!= echo ${VERSION} | sed -e 's/[0-9]*\.//' | sed -e 's/\..*//'
diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in
index 666917c..62c4d52 100644
--- a/lib/libarchive/archive.h.in
+++ b/lib/libarchive/archive.h.in
@@ -253,6 +253,7 @@ int archive_read_data_into_fd(struct archive *, int fd);
#define ARCHIVE_EXTRACT_UNLINK (16) /* Default: don't unlink existing files */
#define ARCHIVE_EXTRACT_ACL (32) /* Default: don't restore ACLs */
#define ARCHIVE_EXTRACT_FFLAGS (64) /* Default: don't restore fflags */
+#define ARCHIVE_EXTRACT_XATTR (128) /* Default: don't restore xattrs */
int archive_read_extract(struct archive *, struct archive_entry *,
int flags);
diff --git a/lib/libarchive/archive_entry.c b/lib/libarchive/archive_entry.c
index de7238d..c67c64b 100644
--- a/lib/libarchive/archive_entry.c
+++ b/lib/libarchive/archive_entry.c
@@ -59,13 +59,13 @@ static size_t wcslen(const wchar_t *s)
static wchar_t * wcscpy(wchar_t *s1, const wchar_t *s2)
{
wchar_t *dest = s1;
- while((*s1 = *s2) != L'\0')
+ while ((*s1 = *s2) != L'\0')
++s1, ++s2;
return dest;
}
-#define wmemcpy(a,b,i) (wchar_t *)memcpy((a),(b),(i)*sizeof(wchar_t))
+#define wmemcpy(a,b,i) (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t))
/* Good enough for simple equality testing, but not for sorting. */
-#define wmemcmp(a,b,i) memcmp((a),(b),(i)*sizeof(wchar_t))
+#define wmemcmp(a,b,i) memcmp((a), (b), (i) * sizeof(wchar_t))
#endif
#include "archive.h"
@@ -97,6 +97,14 @@ struct ae_acl {
struct aes name; /* uname/gname */
};
+struct ae_xattr {
+ struct ae_xattr *next;
+
+ char *name;
+ void *value;
+ size_t size;
+};
+
static void aes_clean(struct aes *);
static void aes_copy(struct aes *dest, struct aes *src);
static const char * aes_get_mbs(struct aes *);
@@ -170,6 +178,9 @@ struct archive_entry {
struct ae_acl *acl_p;
int acl_state; /* See acl_next for details. */
wchar_t *acl_text_w;
+
+ struct ae_xattr *xattr_head;
+ struct ae_xattr *xattr_p;
};
static void
@@ -332,6 +343,7 @@ archive_entry_clear(struct archive_entry *entry)
aes_clean(&entry->ae_symlink);
aes_clean(&entry->ae_uname);
archive_entry_acl_clear(entry);
+ archive_entry_xattr_clear(entry);
memset(entry, 0, sizeof(*entry));
return entry;
}
@@ -358,6 +370,7 @@ archive_entry_clone(struct archive_entry *entry)
aes_copy(&entry2->ae_uname, &entry->ae_uname);
/* XXX TODO: Copy ACL data over as well. XXX */
+ /* XXX TODO: Copy xattr data over as well. XXX */
return (entry2);
}
@@ -1054,7 +1067,7 @@ archive_entry_acl_text_w(struct archive_entry *entry, int flags)
length ++; /* colon */
length += 3; /* rwx */
length += 1; /* colon */
- length += max(sizeof(uid_t),sizeof(gid_t)) * 3 + 1;
+ length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1;
length ++; /* newline */
}
ap = ap->next;
@@ -1346,6 +1359,98 @@ fail:
}
/*
+ * extended attribute handling
+ */
+
+void
+archive_entry_xattr_clear(struct archive_entry *entry)
+{
+ struct ae_xattr *xp;
+
+ while (entry->xattr_head != NULL) {
+ xp = entry->xattr_head->next;
+ free(entry->xattr_head->name);
+ free(entry->xattr_head->value);
+ free(entry->xattr_head);
+ entry->xattr_head = xp;
+ }
+
+ entry->xattr_head = NULL;
+}
+
+void
+archive_entry_xattr_add_entry(struct archive_entry *entry,
+ const char *name, const void *value, size_t size)
+{
+ struct ae_xattr *xp;
+
+ for (xp = entry->xattr_head; xp != NULL; xp = xp->next)
+ ;
+
+ if ((xp = malloc(sizeof(struct ae_xattr))) == NULL)
+ /* XXX Error XXX */
+ return;
+
+ xp->name = strdup(name);
+ if ((xp -> value = malloc(size)) != NULL) {
+ memcpy(xp -> value, value, size);
+ xp -> size = size;
+ } else
+ xp -> size = 0;
+
+ xp->next = entry->xattr_head;
+ entry->xattr_head = xp;
+}
+
+
+/*
+ * returns number of the extended attribute entries
+ */
+int
+archive_entry_xattr_count(struct archive_entry *entry)
+{
+ struct ae_xattr *xp;
+ int count = 0;
+
+ for (xp = entry->xattr_head; xp != NULL; xp = xp->next)
+ count++;
+
+ return count;
+}
+
+int
+archive_entry_xattr_reset(struct archive_entry * entry)
+{
+ entry->xattr_p = entry->xattr_head;
+
+ return archive_entry_xattr_count(entry);
+}
+
+int
+archive_entry_xattr_next(struct archive_entry * entry,
+ const char **name, const void **value, size_t *size)
+{
+ if (entry->xattr_p) {
+ *name = entry->xattr_p->name;
+ *value = entry->xattr_p->value;
+ *size = entry->xattr_p->size;
+
+ entry->xattr_p = entry->xattr_p->next;
+
+ return (ARCHIVE_OK);
+ } else {
+ *name = NULL;
+ *name = NULL;
+ *size = (size_t)0;
+ return (ARCHIVE_WARN);
+ }
+}
+
+/*
+ * end of xattr handling
+ */
+
+/*
* Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated
* to point to just after the separator. *start points to the first
* character of the matched text and *end just after the last
diff --git a/lib/libarchive/archive_entry.h b/lib/libarchive/archive_entry.h
index a35f0df..3c3f73d 100644
--- a/lib/libarchive/archive_entry.h
+++ b/lib/libarchive/archive_entry.h
@@ -229,4 +229,23 @@ int __archive_entry_acl_parse_w(struct archive_entry *,
}
#endif
+/*
+ * extended attributes
+ */
+
+void archive_entry_xattr_clear(struct archive_entry *);
+void archive_entry_xattr_add_entry(struct archive_entry *,
+ const char *name, const void *value, size_t size);
+
+/*
+ * To retrieve the xattr list, first "reset", then repeatedly ask for the
+ * "next" entry.
+ */
+
+int archive_entry_xattr_count(struct archive_entry *);
+int archive_entry_xattr_reset(struct archive_entry *);
+int archive_entry_xattr_next(struct archive_entry *,
+ const char **name, const void **value, size_t *);
+
+
#endif /* !ARCHIVE_ENTRY_H_INCLUDED */
diff --git a/lib/libarchive/archive_read_extract.c b/lib/libarchive/archive_read_extract.c
index cde42b8..f7127f9 100644
--- a/lib/libarchive/archive_read_extract.c
+++ b/lib/libarchive/archive_read_extract.c
@@ -31,6 +31,9 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_SYS_ACL_H
#include <sys/acl.h>
#endif
+#ifdef HAVE_ATTR_XATTR_H
+#include <attr/xattr.h>
+#endif
#ifdef HAVE_SYS_IOCTL_H
#include <sys/ioctl.h>
#endif
@@ -134,6 +137,7 @@ static int set_acl(struct archive *, int fd, struct archive_entry *,
acl_type_t, int archive_entry_acl_type, const char *tn);
#endif
static int set_acls(struct archive *, int fd, struct archive_entry *);
+static int set_xattrs(struct archive *, int fd, struct archive_entry *);
static int set_fflags(struct archive *, int fd, const char *name, mode_t,
unsigned long fflags_set, unsigned long fflags_clear);
static int set_ownership(struct archive *, int fd, struct archive_entry *,
@@ -1086,6 +1090,12 @@ set_perm(struct archive *a, int fd, struct archive_entry *entry,
return (r);
}
+ if (flags & ARCHIVE_EXTRACT_XATTR) {
+ r = set_xattrs(a, fd, entry);
+ if (r != ARCHIVE_OK)
+ return (r);
+ }
+
/*
* Make 'critical_flags' hold all file flags that can't be
* immediately restored. For example, on BSD systems,
@@ -1201,7 +1211,7 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
return (ARCHIVE_WARN);
}
-#elif defined(__linux)
+#elif defined(__linux) && defined(EXT2_IOC_GETFLAGS) && defined(EXT2_IOC_SETFLAGS)
/*
* Linux has flags too, but uses ioctl() to access them instead of
@@ -1214,8 +1224,8 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
struct extract *extract;
int ret;
int myfd = fd;
- int err;
unsigned long newflags, oldflags;
+ unsigned long sf_mask = 0;
extract = a->extract;
if (set == 0 && clear == 0)
@@ -1231,10 +1241,18 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
return (ARCHIVE_OK);
/*
- * Linux has no define for the flags that are only settable
- * by the root user...
+ * Linux has no define for the flags that are only settable by
+ * the root user. This code may seem a little complex, but
+ * there seem to be some Linux systems that lack these
+ * defines. (?) The code below degrades reasonably gracefully
+ * if sf_mask is incomplete.
*/
-#define SF_MASK (EXT2_IMMUTABLE_FL|EXT2_APPEND_FL)
+#ifdef EXT2_IMMUTABLE_FL
+ sf_mask |= EXT2_IMMUTABLE_FL;
+#endif
+#ifdef EXT2_APPEND_FL
+ sf_mask |= EXT2_APPEND_FL;
+#endif
/*
* XXX As above, this would be way simpler if we didn't have
* to read the current flags from disk. XXX
@@ -1250,8 +1268,8 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
}
/* If we couldn't set all the flags, try again with a subset. */
if (ioctl(myfd, EXT2_IOC_GETFLAGS, &oldflags) >= 0) {
- newflags &= ~SF_MASK;
- oldflags &= SF_MASK;
+ newflags &= ~sf_mask;
+ oldflags &= sf_mask;
newflags |= oldflags;
if (ioctl(myfd, EXT2_IOC_SETFLAGS, &newflags) >= 0)
goto cleanup;
@@ -1389,12 +1407,13 @@ set_acl(struct archive *a, int fd, struct archive_entry *entry,
if (fd >= 0 && acl_type == ACL_TYPE_ACCESS && acl_set_fd(fd, acl) == 0)
ret = ARCHIVE_OK;
else
-#endif
+#else
#if HAVE_ACL_SET_FD_NP
if (fd >= 0 && acl_set_fd_np(fd, acl, acl_type) == 0)
ret = ARCHIVE_OK;
else
#endif
+#endif
if (acl_set_file(name, acl_type, acl) != 0) {
archive_set_error(a, errno, "Failed to set %s acl", typename);
ret = ARCHIVE_WARN;
@@ -1404,9 +1423,85 @@ set_acl(struct archive *a, int fd, struct archive_entry *entry,
}
#endif
+#if HAVE_LSETXATTR
/*
- * The following routines do some basic caching of uname/gname lookups.
- * All such lookups go through these routines, including ACL conversions.
+ * Restore extended attributes - Linux implementation
+ */
+static int
+set_xattrs(struct archive *a, int fd, struct archive_entry *entry)
+{
+ static int warning_done = 0;
+ int ret = ARCHIVE_OK;
+ int i = archive_entry_xattr_reset(entry);
+
+ while (i--) {
+ const char *name;
+ const void *value;
+ size_t size;
+ archive_entry_xattr_next(entry, &name, &value, &size);
+ if (name != NULL &&
+ strncmp(name, "xfsroot.", 8) != 0 &&
+ strncmp(name, "system.", 7) != 0) {
+ int e;
+#if HAVE_FSETXATTR
+ if (fd >= 0)
+ e = fsetxattr(fd, name, value, size, 0);
+ else
+#endif
+ {
+ e = lsetxattr(archive_entry_pathname(entry),
+ name, value, size, 0);
+ }
+ if (e == -1) {
+ if (err == ENOTSUP) {
+ if (!warning_done) {
+ warning_done = 1;
+ archive_set_error(a, err,
+ "Cannot restore extended "
+ "attributes on this file "
+ "system");
+ }
+ } else
+ archive_set_error(a, err,
+ "Failed to set extended attribute");
+ ret = ARCHIVE_WARN;
+ }
+ } else {
+ archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Invalid extended attribute encountered");
+ ret = ARCHIVE_WARN;
+ }
+ }
+ return (ret);
+}
+#else
+/*
+ * Restore extended attributes - stub implementation for unsupported systems
+ */
+static int
+set_xattrs(struct archive *a, int fd, struct archive_entry *entry)
+{
+ static int warning_done = 0;
+ (void)a; /* UNUSED */
+ (void)fd; /* UNUSED */
+ (void)entry; /* UNUSED */
+ if (!warning_done) {
+ warning_done = 1;
+ archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
+ "Cannot restore extended attributes on this system");
+ return (ARCHIVE_WARN);
+ }
+ /* Warning was already emitted; suppress further warnings. */
+ return (ARCHIVE_OK);
+}
+#endif
+
+/*
+ * The following routines do some basic caching of uname/gname
+ * lookups. All such lookups go through these routines, including ACL
+ * conversions. Even a small cache here provides an enormous speedup,
+ * especially on systems using NIS, LDAP, or a similar networked
+ * directory system.
*
* TODO: Provide an API for clients to override these routines.
*/
@@ -1485,17 +1580,17 @@ lookup_uid(struct archive *a, const char *uname, uid_t uid)
static unsigned int
hash(const char *p)
{
- /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
- as used by ELF for hashing function names. */
- unsigned g,h = 0;
- while(*p != '\0') {
- h = ( h << 4 ) + *p++;
- if (( g = h & 0xF0000000 )) {
- h ^= g >> 24;
- h &= 0x0FFFFFFF;
- }
- }
- return h;
+ /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
+ as used by ELF for hashing function names. */
+ unsigned g, h = 0;
+ while (*p != '\0') {
+ h = ( h << 4 ) + *p++;
+ if (( g = h & 0xF0000000 )) {
+ h ^= g >> 24;
+ h &= 0x0FFFFFFF;
+ }
+ }
+ return h;
}
void
diff --git a/lib/libarchive/archive_read_support_format_cpio.c b/lib/libarchive/archive_read_support_format_cpio.c
index 946a74e..75b0417 100644
--- a/lib/libarchive/archive_read_support_format_cpio.c
+++ b/lib/libarchive/archive_read_support_format_cpio.c
@@ -260,7 +260,7 @@ archive_read_format_cpio_read_header(struct archive *a,
}
/* Compare name to "TRAILER!!!" to test for end-of-archive. */
- if (namelength == 11 && strcmp(h,"TRAILER!!!")==0) {
+ if (namelength == 11 && strcmp(h, "TRAILER!!!") == 0) {
/* TODO: Store file location of start of block. */
archive_set_error(a, 0, NULL);
return (ARCHIVE_EOF);
diff --git a/lib/libarchive/archive_read_support_format_iso9660.c b/lib/libarchive/archive_read_support_format_iso9660.c
index 510d7f0..065bf9b 100644
--- a/lib/libarchive/archive_read_support_format_iso9660.c
+++ b/lib/libarchive/archive_read_support_format_iso9660.c
@@ -364,7 +364,7 @@ archive_read_format_iso9660_read_header(struct archive *a,
/* If this is a directory, read in all of the entries right now. */
if (S_ISDIR(st.st_mode)) {
- while(iso9660->entry_bytes_remaining > 0) {
+ while (iso9660->entry_bytes_remaining > 0) {
const void *block;
const unsigned char *p;
ssize_t step = iso9660->logical_block_size;
@@ -918,7 +918,7 @@ next_entry(struct iso9660 *iso9660)
+ iso9660->pending_files[0]->size;
/* Now, try to find an earlier one. */
- for(i = 0; i < iso9660->pending_files_used; i++) {
+ for (i = 0; i < iso9660->pending_files_used; i++) {
/* Use the position of the file *end* as our comparison. */
uint64_t end_offset = iso9660->pending_files[i]->offset
+ iso9660->pending_files[i]->size;
diff --git a/lib/libarchive/archive_read_support_format_tar.c b/lib/libarchive/archive_read_support_format_tar.c
index 2ab8684..6f1045a 100644
--- a/lib/libarchive/archive_read_support_format_tar.c
+++ b/lib/libarchive/archive_read_support_format_tar.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003-2004 Tim Kientzle
+ * Copyright (c) 2003-2006 Tim Kientzle
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$");
static int wcscmp(const wchar_t *s1, const wchar_t *s2)
{
int diff = *s1 - *s2;
- while(*s1 && diff == 0)
+ while (*s1 && diff == 0)
diff = (int)*++s1 - (int)*++s2;
return diff;
}
@@ -155,6 +155,7 @@ struct tar {
static size_t UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n);
static int archive_block_is_null(const unsigned char *p);
+static char *base64_decode(const wchar_t *, size_t, size_t *);
static int gnu_read_sparse_data(struct archive *, struct tar *,
const struct archive_entry_header_gnutar *header);
static void gnu_parse_sparse_data(struct archive *, struct tar *,
@@ -199,7 +200,10 @@ static int64_t tar_atol256(const char *, unsigned);
static int64_t tar_atol8(const char *, unsigned);
static int tar_read_header(struct archive *, struct tar *,
struct archive_entry *, struct stat *);
+static int tohex(int c);
+static char *url_decode(const char *);
static int utf8_decode(wchar_t *, const char *, size_t length);
+static char *wide_to_narrow(const wchar_t *wval);
/*
* ANSI C99 defines constants for these, but not everyone supports
@@ -1154,7 +1158,42 @@ pax_header(struct archive *a, struct tar *tar, struct archive_entry *entry,
return (err);
}
+static int
+pax_attribute_xattr(struct archive_entry *entry,
+ wchar_t *name, wchar_t *value)
+{
+ char *name_decoded, *name_narrow;
+ void *value_decoded;
+ size_t value_len;
+
+ if (wcslen(name) < 18 || (wcsncmp(name, L"LIBARCHIVE.xattr.", 17)) != 0)
+ return 3;
+
+ name += 17;
+
+ /* URL-decode name */
+ name_narrow = wide_to_narrow(name);
+ if (name_narrow == NULL)
+ return 2;
+ name_decoded = url_decode(name_narrow);
+ free(name_narrow);
+ if (name_decoded == NULL)
+ return 2;
+
+ /* Base-64 decode value */
+ value_decoded = base64_decode(value, wcslen(value), &value_len);
+ if (value_decoded == NULL) {
+ free(name_decoded);
+ return 1;
+ }
+ archive_entry_xattr_add_entry(entry, name_decoded,
+ value_decoded, value_len);
+
+ free(name_decoded);
+ free(value_decoded);
+ return 0;
+}
/*
* Parse a single key=value attribute. key/value pointers are
@@ -1184,6 +1223,8 @@ pax_attribute(struct archive_entry *entry, struct stat *st,
if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0)
archive_entry_set_xxxxxx(entry, value);
*/
+ if (wcsncmp(key, L"LIBARCHIVE.xattr.", 17)==0)
+ pax_attribute_xattr(entry, key, value);
break;
case 'S':
/* We support some keys used by the "star" archiver */
@@ -1599,7 +1640,7 @@ utf8_decode(wchar_t *dest, const char *src, size_t length)
int err;
err = 0;
- while(length > 0) {
+ while (length > 0) {
n = UTF8_mbrtowc(dest, src, length);
if (n == 0)
break;
@@ -1721,3 +1762,159 @@ UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n)
}
return (wch == L'\0' ? 0 : len);
}
+
+
+/*
+ * base64_decode - Base64 decode
+ *
+ * This accepts most variations of base-64 encoding, including:
+ * * with or without line breaks
+ * * with or without the final group padded with '=' or '_' characters
+ * (The most economical Base-64 variant does not pad the last group and
+ * omits line breaks; RFC1341 used for MIME requires both.)
+ */
+static char *
+base64_decode(const wchar_t *src, size_t len, size_t *out_len)
+{
+ static const unsigned char digits[64] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ static unsigned char decode_table[128];
+ char *out, *d;
+
+ /* If the decode table is not yet initialized, prepare it. */
+ if (decode_table[digits[1]] != 1) {
+ size_t i;
+ memset(decode_table, 0xff, sizeof(decode_table));
+ for (i = 0; i < sizeof(digits); i++)
+ decode_table[digits[i]] = i;
+ }
+
+ /* Allocate enough space to hold the entire output. */
+ /* Note that we may not use all of this... */
+ out = malloc((len * 3 + 3) / 4);
+ if (out == NULL) {
+ *out_len = 0;
+ return (NULL);
+ }
+ d = out;
+
+ while (len > 0) {
+ /* Collect the next group of (up to) four characters. */
+ int v = 0;
+ int group_size = 0;
+ while (group_size < 4 && len > 0) {
+ /* '=' or '_' padding indicates final group. */
+ if (*src == '=' || *src == '_') {
+ len = 0;
+ break;
+ }
+ /* Skip illegal characters (including line breaks) */
+ if (*src > 127 || *src < 32
+ || decode_table[*src] == 0xff) {
+ len--;
+ src++;
+ continue;
+ }
+ v <<= 6;
+ v |= decode_table[*src++];
+ len --;
+ group_size++;
+ }
+ /* Align a short group properly. */
+ v <<= 6 * (4 - group_size);
+ /* Unpack the group we just collected. */
+ switch (group_size) {
+ case 4: d[2] = v & 0xff;
+ /* FALLTHROUGH */
+ case 3: d[1] = (v >> 8) & 0xff;
+ /* FALLTHROUGH */
+ case 2: d[0] = (v >> 16) & 0xff;
+ break;
+ case 1: /* this is invalid! */
+ break;
+ }
+ d += group_size * 3 / 4;
+ }
+
+ *out_len = d - out;
+ return (out);
+}
+
+/*
+ * This is a little tricky because the C99 standard wcstombs()
+ * function returns the number of bytes that were converted,
+ * not the number that should be converted. As a result,
+ * we can never accurately size the output buffer (without
+ * doing a tedious output size calculation in advance).
+ * This approach (try a conversion, then try again if it fails)
+ * will almost always succeed on the first try, and is thus
+ * much faster, at the cost of sometimes requiring multiple
+ * passes while we expand the buffer.
+ */
+static char *
+wide_to_narrow(const wchar_t *wval)
+{
+ int converted_length;
+ /* Guess an output buffer size and try the conversion. */
+ int alloc_length = wcslen(wval) * 3;
+ char *mbs_val = malloc(alloc_length + 1);
+ if (mbs_val == NULL)
+ return (NULL);
+ converted_length = wcstombs(mbs_val, wval, alloc_length);
+
+ /* If we exhausted the buffer, resize and try again. */
+ while (converted_length >= alloc_length) {
+ free(mbs_val);
+ alloc_length *= 2;
+ mbs_val = malloc(alloc_length + 1);
+ if (mbs_val == NULL)
+ return (NULL);
+ converted_length = wcstombs(mbs_val, wval, alloc_length);
+ }
+
+ /* Ensure a trailing null and return the final string. */
+ mbs_val[alloc_length] = '\0';
+ return (mbs_val);
+}
+
+static char *
+url_decode(const char *in)
+{
+ char *out, *d;
+ const char *s;
+
+ out = malloc(strlen(in) + 1);
+ if (out == NULL)
+ return (NULL);
+ for (s = in, d = out; *s != '\0'; ) {
+ if (*s == '%') {
+ /* Try to convert % escape */
+ int digit1 = tohex(s[1]);
+ int digit2 = tohex(s[2]);
+ if (digit1 >= 0 && digit2 >= 0) {
+ /* Looks good, consume three chars */
+ s += 3;
+ /* Convert output */
+ *d++ = ((digit1 << 4) | digit2);
+ continue;
+ }
+ /* Else fall through and treat '%' as normal char */
+ }
+ *d++ = *s++;
+ }
+ *d = '\0';
+ return (out);
+}
+
+static int
+tohex(int c)
+{
+ if (c >= '0' && c <= '9')
+ return (c - '0');
+ else if (c >= 'A' && c <= 'F')
+ return (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'f')
+ return (c - 'a' + 10);
+ else
+ return (-1);
+}
diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c
index 4e9857b..c256cb2 100644
--- a/lib/libarchive/archive_write_set_format_pax.c
+++ b/lib/libarchive/archive_write_set_format_pax.c
@@ -66,11 +66,13 @@ static int archive_write_pax_finish(struct archive *);
static int archive_write_pax_finish_entry(struct archive *);
static int archive_write_pax_header(struct archive *,
struct archive_entry *);
+static char *base64_encode(const char *src, size_t len);
static char *build_pax_attribute_name(char *dest, const char *src);
static char *build_ustar_entry_name(char *dest, const char *src,
size_t src_length, const char *insert);
static char *format_int(char *dest, int64_t);
static int has_non_ASCII(const wchar_t *);
+static char *url_encode(const char *in);
static int write_nulls(struct archive *, size_t);
/*
@@ -142,7 +144,7 @@ add_pax_attr_time(struct archive_string *as, const char *key,
t = tmp + sizeof(tmp) - 1;
/* Skip trailing zeros in the fractional part. */
- for(digit = 0, i = 10; i > 0 && digit == 0; i--) {
+ for (digit = 0, i = 10; i > 0 && digit == 0; i--) {
digit = nanos % 10;
nanos /= 10;
}
@@ -190,10 +192,10 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
}
-static void
-add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
+static char *
+utf8_encode(const wchar_t *wval)
{
- int utf8len;
+ int utf8len;
const wchar_t *wp;
unsigned long wc;
char *utf8_value, *p;
@@ -217,8 +219,10 @@ add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
}
utf8_value = malloc(utf8len + 1);
- if (utf8_value == NULL)
+ if (utf8_value == NULL) {
__archive_errx(1, "Not enough memory for attributes");
+ return (NULL);
+ }
for (wp = wval, p = utf8_value; *wp != L'\0'; ) {
wc = *wp++;
@@ -258,6 +262,16 @@ add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
/* Ignore larger values; UTF-8 can't encode them. */
}
*p = '\0';
+
+ return (utf8_value);
+}
+
+static void
+add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
+{
+ char *utf8_value = utf8_encode(wval);
+ if (utf8_value == NULL)
+ return;
add_pax_attr(as, key, utf8_value);
free(utf8_value);
}
@@ -311,6 +325,53 @@ add_pax_attr(struct archive_string *as, const char *key, const char *value)
archive_strappend_char(as, '\n');
}
+static void
+archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry)
+{
+ struct archive_string s;
+ int i = archive_entry_xattr_reset(entry);
+
+ while (i--) {
+ const char *name;
+ const void *value;
+ char *encoded_value;
+ char *url_encoded_name = NULL, *encoded_name = NULL;
+ wchar_t *wcs_name = NULL;
+ size_t size;
+
+ archive_entry_xattr_next(entry, &name, &value, &size);
+ /* Name is URL-encoded, then converted to wchar_t,
+ * then UTF-8 encoded. */
+ url_encoded_name = url_encode(name);
+ if (url_encoded_name != NULL) {
+ /* Convert narrow-character to wide-character. */
+ int wcs_length = strlen(url_encoded_name);
+ wcs_name = malloc((wcs_length + 1) * sizeof(wchar_t));
+ if (wcs_name == NULL)
+ __archive_errx(1, "No memory for xattr conversion");
+ mbstowcs(wcs_name, url_encoded_name, wcs_length);
+ wcs_name[wcs_length] = 0;
+ free(url_encoded_name); /* Done with this. */
+ }
+ if (wcs_name != NULL) {
+ encoded_name = utf8_encode(wcs_name);
+ free(wcs_name); /* Done with wchar_t name. */
+ }
+
+ encoded_value = base64_encode(value, size);
+
+ if (encoded_name != NULL && encoded_value != NULL) {
+ archive_string_init(&s);
+ archive_strcpy(&s, "LIBARCHIVE.xattr.");
+ archive_strcat(&s, encoded_name);
+ add_pax_attr(&(pax->pax_header), s.s, encoded_value);
+ archive_string_free(&s);
+ }
+ free(encoded_name);
+ free(encoded_value);
+ }
+}
+
/*
* TODO: Consider adding 'comment' and 'charset' fields to
* archive_entry so that clients can specify them. Also, consider
@@ -538,6 +599,10 @@ archive_write_pax_header(struct archive *a,
ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0)
need_extension = 1;
+ /* If there are extended attributes, we need an extension */
+ if (!need_extension && archive_entry_xattr_count(entry_original) > 0)
+ need_extension = 1;
+
/*
* The following items are handled differently in "pax
* restricted" format. In particular, in "pax restricted"
@@ -595,6 +660,9 @@ archive_write_pax_header(struct archive *a,
st_main->st_ino);
add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink",
st_main->st_nlink);
+
+ /* Store extended attributes */
+ archive_write_pax_header_xattrs(pax, entry_original);
}
/* Only regular files have data. */
@@ -1026,3 +1094,94 @@ has_non_ASCII(const wchar_t *wp)
wp++;
return (*wp != L'\0');
}
+
+/*
+ * Used by extended attribute support; encodes the name
+ * so that there will be no '=' characters in the result.
+ */
+static char *
+url_encode(const char *in)
+{
+ const char *s;
+ char *d;
+ int out_len = 0;
+ char *out;
+
+ for (s = in; *s != '\0'; s++) {
+ if (*s < 33 || *s > 126 || *s == '%' || *s == '=')
+ out_len += 3;
+ else
+ out_len++;
+ }
+
+ out = (char *)malloc(out_len + 1);
+ if (out == NULL)
+ return (NULL);
+
+ for (s = in, d = out; *s != '\0'; s++) {
+ /* encode any non-printable ASCII character or '%' or '=' */
+ if (*s < 33 || *s > 126 || *s == '%' || *s == '=') {
+ /* URL encoding is '%' followed by two hex digits */
+ *d++ = '%';
+ *d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)];
+ *d++ = "0123456789ABCDEF"[0x0f & *s];
+ } else {
+ *d++ = *s;
+ }
+ }
+ *d = '\0';
+ return (out);
+}
+
+/*
+ * Encode a sequence of bytes into a C string using base-64 encoding.
+ *
+ * Returns a null-terminated C string allocated with malloc(); caller
+ * is responsible for freeing the result.
+ */
+static char *
+base64_encode(const char *s, size_t len)
+{
+ static const char digits[64] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ int v;
+ char *d, *out;
+
+ /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
+ out = malloc((len * 4 + 2) / 3 + 1);
+ if (out == NULL)
+ return (NULL);
+ d = out;
+
+ /* Convert each group of 3 bytes into 4 characters. */
+ while (len >= 3) {
+ v = (((int)s[0] << 16) & 0xff0000)
+ | (((int)s[1] << 8) & 0xff00)
+ | (((int)s[2]) & 0x00ff);
+ s += 3;
+ len -= 3;
+ *d++ = digits[(v >> 18) & 0x3f];
+ *d++ = digits[(v >> 12) & 0x3f];
+ *d++ = digits[(v >> 6) & 0x3f];
+ *d++ = digits[(v) & 0x3f];
+ }
+ /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
+ switch (len) {
+ case 0: break;
+ case 1:
+ v = (((int)s[0] << 16) & 0xff0000);
+ *d++ = digits[(v >> 18) & 0x3f];
+ *d++ = digits[(v >> 12) & 0x3f];
+ break;
+ case 2:
+ v = (((int)s[0] << 16) & 0xff0000)
+ | (((int)s[1] << 8) & 0xff00);
+ *d++ = digits[(v >> 18) & 0x3f];
+ *d++ = digits[(v >> 12) & 0x3f];
+ *d++ = digits[(v >> 6) & 0x3f];
+ break;
+ }
+ /* Add trailing NUL character so output is a valid C string. */
+ *d++ = '\0';
+ return (out);
+}
diff --git a/lib/libarchive/tar.5 b/lib/libarchive/tar.5
index d46e1b5..242a3d0 100644
--- a/lib/libarchive/tar.5
+++ b/lib/libarchive/tar.5
@@ -399,6 +399,21 @@ Schilling's
.Cm SCHILY.*
extensions can store all of the data from
.Va struct stat .
+.It Cm LIBARCHIVE.xattr. Ns Ar namespace Ns . Ns Ar key
+Libarchive stores POSIX.1e-style extended attributes using
+keys of this form. The
+.Ar key
+value is URL-encoded:
+All non-ASCII characters and the two special characters
+.Dq =
+and
+.Dq %
+are encoded as
+.Dq %
+followed by two uppercase hexadecimal digits.
+The value of this key is the extended attribute value
+encoded in base 64.
+XXX Detail the base-64 format here XXX
.It Cm VENDOR.*
XXX document other vendor-specific extensions XXX
.El
OpenPOWER on IntegriCloud