POSIX.1e-style Extended Attribute support

This commit implements storing/reading POSIX.1e-style extended attribute information in "pax" format archives. An outline of the storage format is in the tar.5 manpage. The archive_read_extract() function has code to restore those archives to disk for Linux; FreeBSD implementation is forthcoming. Many thanks to Jaakko Heinonen for finding flaws in earlier proposals and doing the bulk of the coding in this work.
author: kientzle <kientzle@FreeBSD.org> 2006-03-21 16:55:46 +0000
committer: kientzle <kientzle@FreeBSD.org> 2006-03-21 16:55:46 +0000
commit: 537ab73b2f9d8cc3e849014fd44738b6a48f6c99 (patch)
tree: c3aca38252d2ed10ab3875ada2f51f284743c856 /lib/libarchive
parent: b09a8950a1301d97aef8e2975e34a3ba5bc451c3 (diff)
download: FreeBSD-src-537ab73b2f9d8cc3e849014fd44738b6a48f6c99.zip
FreeBSD-src-537ab73b2f9d8cc3e849014fd44738b6a48f6c99.tar.gz
10 files changed, 628 insertions, 37 deletions
diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile
index 38c9067..23272d7 100644
--- a/lib/libarchive/Makefile
+++ b/lib/libarchive/Makefile
@@ -9,7 +9,7 @@ LDADD=	-lbz2 -lz
 #  Major: Bumped ONLY when API/ABI breakage happens.
 #  Minor: Bumped when significant new features are added (see SHLIB_MAJOR)
 #  Revision: Bumped on any notable change
-VERSION= 1.2.41
+VERSION= 1.2.51
 ARCHIVE_API_MAJOR!=	echo ${VERSION} | sed -e 's/\..*//'
 ARCHIVE_API_MINOR!=	echo ${VERSION} | sed -e 's/[0-9]*\.//' | sed -e 's/\..*//'
 
diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in
index 666917c..62c4d52 100644
--- a/lib/libarchive/archive.h.in
+++ b/lib/libarchive/archive.h.in
@@ -253,6 +253,7 @@ int		 archive_read_data_into_fd(struct archive *, int fd);
 #define	ARCHIVE_EXTRACT_UNLINK	(16) /* Default: don't unlink existing files */
 #define	ARCHIVE_EXTRACT_ACL	(32) /* Default: don't restore ACLs */
 #define	ARCHIVE_EXTRACT_FFLAGS	(64) /* Default: don't restore fflags */
+#define	ARCHIVE_EXTRACT_XATTR   (128) /* Default: don't restore xattrs */
 
 int		 archive_read_extract(struct archive *, struct archive_entry *,
 		     int flags);
diff --git a/lib/libarchive/archive_entry.c b/lib/libarchive/archive_entry.c
index de7238d..c67c64b 100644
--- a/lib/libarchive/archive_entry.c
+++ b/lib/libarchive/archive_entry.c
@@ -59,13 +59,13 @@ static size_t wcslen(const wchar_t *s)
 static wchar_t * wcscpy(wchar_t *s1, const wchar_t *s2)
 {
 	wchar_t *dest = s1;
-	while((*s1 = *s2) != L'\0')
+	while ((*s1 = *s2) != L'\0')
 		++s1, ++s2;
 	return dest;
 }
-#define wmemcpy(a,b,i)  (wchar_t *)memcpy((a),(b),(i)*sizeof(wchar_t))
+#define wmemcpy(a,b,i)  (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t))
 /* Good enough for simple equality testing, but not for sorting. */
-#define wmemcmp(a,b,i)  memcmp((a),(b),(i)*sizeof(wchar_t))
+#define wmemcmp(a,b,i)  memcmp((a), (b), (i) * sizeof(wchar_t))
 #endif
 
 #include "archive.h"
@@ -97,6 +97,14 @@ struct ae_acl {
 	struct aes name;		/* uname/gname */
 };
 
+struct ae_xattr {
+	struct ae_xattr *next;
+
+	char	*name;
+	void	*value;
+	size_t	size;
+};
+
 static void	aes_clean(struct aes *);
 static void	aes_copy(struct aes *dest, struct aes *src);
 static const char *	aes_get_mbs(struct aes *);
@@ -170,6 +178,9 @@ struct archive_entry {
 	struct ae_acl	*acl_p;
 	int		 acl_state;	/* See acl_next for details. */
 	wchar_t		*acl_text_w;
+
+	struct ae_xattr *xattr_head;
+	struct ae_xattr *xattr_p;
 };
 
 static void
@@ -332,6 +343,7 @@ archive_entry_clear(struct archive_entry *entry)
 	aes_clean(&entry->ae_symlink);
 	aes_clean(&entry->ae_uname);
 	archive_entry_acl_clear(entry);
+	archive_entry_xattr_clear(entry);
 	memset(entry, 0, sizeof(*entry));
 	return entry;
 }
@@ -358,6 +370,7 @@ archive_entry_clone(struct archive_entry *entry)
 	aes_copy(&entry2->ae_uname, &entry->ae_uname);
 
 	/* XXX TODO: Copy ACL data over as well. XXX */
+	/* XXX TODO: Copy xattr data over as well. XXX */
 	return (entry2);
 }
 
@@ -1054,7 +1067,7 @@ archive_entry_acl_text_w(struct archive_entry *entry, int flags)
 			length ++; /* colon */
 			length += 3; /* rwx */
 			length += 1; /* colon */
-			length += max(sizeof(uid_t),sizeof(gid_t)) * 3 + 1;
+			length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1;
 			length ++; /* newline */
 		}
 		ap = ap->next;
@@ -1346,6 +1359,98 @@ fail:
 }
 
 /*
+ * extended attribute handling
+ */
+
+void
+archive_entry_xattr_clear(struct archive_entry *entry)
+{
+	struct ae_xattr	*xp;
+
+	while (entry->xattr_head != NULL) {
+		xp = entry->xattr_head->next;
+		free(entry->xattr_head->name);
+		free(entry->xattr_head->value);
+		free(entry->xattr_head);
+		entry->xattr_head = xp;
+	}
+
+	entry->xattr_head = NULL;
+}
+
+void
+archive_entry_xattr_add_entry(struct archive_entry *entry,
+	const char *name, const void *value, size_t size)
+{
+	struct ae_xattr	*xp;
+
+	for (xp = entry->xattr_head; xp != NULL; xp = xp->next)
+		;
+
+	if ((xp = malloc(sizeof(struct ae_xattr))) == NULL)
+		/* XXX Error XXX */
+		return;
+
+	xp->name = strdup(name);
+	if ((xp -> value = malloc(size)) != NULL) {
+		memcpy(xp -> value, value, size);
+		xp -> size = size;
+	} else
+		xp -> size = 0;
+
+	xp->next = entry->xattr_head;
+	entry->xattr_head = xp;
+}
+
+
+/*
+ * returns number of the extended attribute entries
+ */
+int
+archive_entry_xattr_count(struct archive_entry *entry)
+{
+	struct ae_xattr *xp;
+	int count = 0;
+
+	for (xp = entry->xattr_head; xp != NULL; xp = xp->next)
+		count++;
+
+	return count;
+}
+
+int
+archive_entry_xattr_reset(struct archive_entry * entry)
+{
+	entry->xattr_p = entry->xattr_head;
+
+	return archive_entry_xattr_count(entry);
+}
+
+int
+archive_entry_xattr_next(struct archive_entry * entry,
+	const char **name, const void **value, size_t *size)
+{
+	if (entry->xattr_p) {
+		*name = entry->xattr_p->name;	
+		*value = entry->xattr_p->value;	
+		*size = entry->xattr_p->size;
+
+		entry->xattr_p = entry->xattr_p->next;
+
+		return (ARCHIVE_OK);
+	} else {
+		*name = NULL;
+		*name = NULL;
+		*size = (size_t)0;
+		return (ARCHIVE_WARN);
+	}
+}
+
+/*
+ * end of xattr handling
+ */
+
+/*
  * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]".  *wp is updated
  * to point to just after the separator.  *start points to the first
  * character of the matched text and *end just after the last
diff --git a/lib/libarchive/archive_entry.h b/lib/libarchive/archive_entry.h
index a35f0df..3c3f73d 100644
--- a/lib/libarchive/archive_entry.h
+++ b/lib/libarchive/archive_entry.h
@@ -229,4 +229,23 @@ int		 __archive_entry_acl_parse_w(struct archive_entry *,
 }
 #endif
 
+/*
+ * extended attributes
+ */
+
+void	 archive_entry_xattr_clear(struct archive_entry *);
+void	 archive_entry_xattr_add_entry(struct archive_entry *,
+	     const char *name, const void *value, size_t size);
+
+/*
+ * To retrieve the xattr list, first "reset", then repeatedly ask for the
+ * "next" entry.
+ */
+
+int	archive_entry_xattr_count(struct archive_entry *);
+int	archive_entry_xattr_reset(struct archive_entry *);
+int	archive_entry_xattr_next(struct archive_entry *,
+	     const char **name, const void **value, size_t *);
+
+
 #endif /* !ARCHIVE_ENTRY_H_INCLUDED */
diff --git a/lib/libarchive/archive_read_extract.c b/lib/libarchive/archive_read_extract.c
index cde42b8..f7127f9 100644
--- a/lib/libarchive/archive_read_extract.c
+++ b/lib/libarchive/archive_read_extract.c
@@ -31,6 +31,9 @@ __FBSDID("$FreeBSD$");
 #ifdef HAVE_SYS_ACL_H
 #include <sys/acl.h>
 #endif
+#ifdef HAVE_ATTR_XATTR_H
+#include <attr/xattr.h>
+#endif
 #ifdef HAVE_SYS_IOCTL_H
 #include <sys/ioctl.h>
 #endif
@@ -134,6 +137,7 @@ static int	set_acl(struct archive *, int fd, struct archive_entry *,
 		    acl_type_t, int archive_entry_acl_type, const char *tn);
 #endif
 static int	set_acls(struct archive *, int fd, struct archive_entry *);
+static int	set_xattrs(struct archive *, int fd, struct archive_entry *);
 static int	set_fflags(struct archive *, int fd, const char *name, mode_t,
 		    unsigned long fflags_set, unsigned long fflags_clear);
 static int	set_ownership(struct archive *, int fd, struct archive_entry *,
@@ -1086,6 +1090,12 @@ set_perm(struct archive *a, int fd, struct archive_entry *entry,
 			return (r);
 	}
 
+	if (flags & ARCHIVE_EXTRACT_XATTR) {
+		r = set_xattrs(a, fd, entry);
+		if (r != ARCHIVE_OK)
+			return (r);
+	}
+
 	/*
 	 * Make 'critical_flags' hold all file flags that can't be
 	 * immediately restored.  For example, on BSD systems,
@@ -1201,7 +1211,7 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
 	return (ARCHIVE_WARN);
 }
 
-#elif defined(__linux)
+#elif defined(__linux) && defined(EXT2_IOC_GETFLAGS) && defined(EXT2_IOC_SETFLAGS)
 
 /*
  * Linux has flags too, but uses ioctl() to access them instead of
@@ -1214,8 +1224,8 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
 	struct extract *extract;
 	int		 ret;
 	int		 myfd = fd;
-	int		 err;
 	unsigned long newflags, oldflags;
+	unsigned long sf_mask = 0;
 
 	extract = a->extract;
 	if (set == 0  && clear == 0)
@@ -1231,10 +1241,18 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
 		return (ARCHIVE_OK);
 
 	/*
-	 * Linux has no define for the flags that are only settable
-	 * by the root user...
+	 * Linux has no define for the flags that are only settable by
+	 * the root user.  This code may seem a little complex, but
+	 * there seem to be some Linux systems that lack these
+	 * defines. (?)  The code below degrades reasonably gracefully
+	 * if sf_mask is incomplete.
 	 */
-#define	SF_MASK                 (EXT2_IMMUTABLE_FL|EXT2_APPEND_FL)
+#ifdef EXT2_IMMUTABLE_FL
+	sf_mask |= EXT2_IMMUTABLE_FL;
+#endif
+#ifdef EXT2_APPEND_FL
+	sf_mask |= EXT2_APPEND_FL;
+#endif
 	/*
 	 * XXX As above, this would be way simpler if we didn't have
 	 * to read the current flags from disk. XXX
@@ -1250,8 +1268,8 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode,
 	}
 	/* If we couldn't set all the flags, try again with a subset. */
 	if (ioctl(myfd, EXT2_IOC_GETFLAGS, &oldflags) >= 0) {
-		newflags &= ~SF_MASK;
-		oldflags &= SF_MASK;
+		newflags &= ~sf_mask;
+		oldflags &= sf_mask;
 		newflags |= oldflags;
 		if (ioctl(myfd, EXT2_IOC_SETFLAGS, &newflags) >= 0)
 			goto cleanup;
@@ -1389,12 +1407,13 @@ set_acl(struct archive *a, int fd, struct archive_entry *entry,
 	if (fd >= 0 && acl_type == ACL_TYPE_ACCESS && acl_set_fd(fd, acl) == 0)
 		ret = ARCHIVE_OK;
 	else
-#endif
+#else
 #if HAVE_ACL_SET_FD_NP
 	if (fd >= 0 && acl_set_fd_np(fd, acl, acl_type) == 0)
 		ret = ARCHIVE_OK;
 	else
 #endif
+#endif
 	if (acl_set_file(name, acl_type, acl) != 0) {
 		archive_set_error(a, errno, "Failed to set %s acl", typename);
 		ret = ARCHIVE_WARN;
@@ -1404,9 +1423,85 @@ set_acl(struct archive *a, int fd, struct archive_entry *entry,
 }
 #endif
 
+#if HAVE_LSETXATTR
 /*
- * The following routines do some basic caching of uname/gname lookups.
- * All such lookups go through these routines, including ACL conversions.
+ * Restore extended attributes -  Linux implementation
+ */
+static int
+set_xattrs(struct archive *a, int fd, struct archive_entry *entry)
+{
+	static int warning_done = 0;
+	int ret = ARCHIVE_OK;
+	int i = archive_entry_xattr_reset(entry);
+
+	while (i--) {
+		const char *name;
+		const void *value;
+		size_t size;
+		archive_entry_xattr_next(entry, &name, &value, &size);
+		if (name != NULL &&
+				strncmp(name, "xfsroot.", 8) != 0 &&
+				strncmp(name, "system.", 7) != 0) {
+			int e;
+#if HAVE_FSETXATTR
+			if (fd >= 0)
+				e = fsetxattr(fd, name, value, size, 0);
+			else
+#endif
+			{
+				e = lsetxattr(archive_entry_pathname(entry),
+				    name, value, size, 0);
+			}
+			if (e == -1) {
+				if (err == ENOTSUP) {
+					if (!warning_done) {
+						warning_done = 1;
+						archive_set_error(a, err,
+						    "Cannot restore extended "
+						    "attributes on this file "
+						    "system");
+					}
+				} else
+					archive_set_error(a, err,
+					    "Failed to set extended attribute");
+				ret = ARCHIVE_WARN;
+			}
+		} else {
+			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
+			    "Invalid extended attribute encountered");
+			ret = ARCHIVE_WARN;
+		}
+	}
+	return (ret);
+}
+#else
+/*
+ * Restore extended attributes - stub implementation for unsupported systems
+ */
+static int
+set_xattrs(struct archive *a, int fd, struct archive_entry *entry)
+{
+	static int warning_done = 0;
+	(void)a; /* UNUSED */
+	(void)fd; /* UNUSED */
+	(void)entry; /* UNUSED */
+	if (!warning_done) {
+		warning_done = 1;
+		archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
+		    "Cannot restore extended attributes on this system");
+		return (ARCHIVE_WARN);
+	}
+	/* Warning was already emitted; suppress further warnings. */
+	return (ARCHIVE_OK);
+}
+#endif
+
+/*
+ * The following routines do some basic caching of uname/gname
+ * lookups.  All such lookups go through these routines, including ACL
+ * conversions.  Even a small cache here provides an enormous speedup,
+ * especially on systems using NIS, LDAP, or a similar networked
+ * directory system.
  *
  * TODO: Provide an API for clients to override these routines.
  */
@@ -1485,17 +1580,17 @@ lookup_uid(struct archive *a, const char *uname, uid_t uid)
 static unsigned int
 hash(const char *p)
 {
-  /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
-     as used by ELF for hashing function names. */
-  unsigned g,h = 0;
-  while(*p != '\0') {
-    h = ( h << 4 ) + *p++;
-    if (( g = h & 0xF0000000 )) {
-      h ^= g >> 24;
-      h &= 0x0FFFFFFF;
-    }
-  }
-  return h;
+	/* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
+	   as used by ELF for hashing function names. */
+	unsigned g, h = 0;
+	while (*p != '\0') {
+		h = ( h << 4 ) + *p++;
+		if (( g = h & 0xF0000000 )) {
+			h ^= g >> 24;
+			h &= 0x0FFFFFFF;
+		}
+	}
+	return h;
 }
 
 void
diff --git a/lib/libarchive/archive_read_support_format_cpio.c b/lib/libarchive/archive_read_support_format_cpio.c
index 946a74e..75b0417 100644
--- a/lib/libarchive/archive_read_support_format_cpio.c
+++ b/lib/libarchive/archive_read_support_format_cpio.c
@@ -260,7 +260,7 @@ archive_read_format_cpio_read_header(struct archive *a,
 	}
 
 	/* Compare name to "TRAILER!!!" to test for end-of-archive. */
-	if (namelength == 11 && strcmp(h,"TRAILER!!!")==0) {
+	if (namelength == 11 && strcmp(h, "TRAILER!!!") == 0) {
 	    /* TODO: Store file location of start of block. */
 	    archive_set_error(a, 0, NULL);
 	    return (ARCHIVE_EOF);
diff --git a/lib/libarchive/archive_read_support_format_iso9660.c b/lib/libarchive/archive_read_support_format_iso9660.c
index 510d7f0..065bf9b 100644
--- a/lib/libarchive/archive_read_support_format_iso9660.c
+++ b/lib/libarchive/archive_read_support_format_iso9660.c
@@ -364,7 +364,7 @@ archive_read_format_iso9660_read_header(struct archive *a,
 
 	/* If this is a directory, read in all of the entries right now. */
 	if (S_ISDIR(st.st_mode)) {
-		while(iso9660->entry_bytes_remaining > 0) {
+		while (iso9660->entry_bytes_remaining > 0) {
 			const void *block;
 			const unsigned char *p;
 			ssize_t step = iso9660->logical_block_size;
@@ -918,7 +918,7 @@ next_entry(struct iso9660 *iso9660)
 	    + iso9660->pending_files[0]->size;
 
 	/* Now, try to find an earlier one. */
-	for(i = 0; i < iso9660->pending_files_used; i++) {
+	for (i = 0; i < iso9660->pending_files_used; i++) {
 		/* Use the position of the file *end* as our comparison. */
 		uint64_t end_offset = iso9660->pending_files[i]->offset
 		    + iso9660->pending_files[i]->size;
diff --git a/lib/libarchive/archive_read_support_format_tar.c b/lib/libarchive/archive_read_support_format_tar.c
index 2ab8684..6f1045a 100644
--- a/lib/libarchive/archive_read_support_format_tar.c
+++ b/lib/libarchive/archive_read_support_format_tar.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2003-2004 Tim Kientzle
+ * Copyright (c) 2003-2006 Tim Kientzle
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$");
 static int wcscmp(const wchar_t *s1, const wchar_t *s2)
 {
 	int diff = *s1 - *s2;
-	while(*s1 && diff == 0)
+	while (*s1 && diff == 0)
 		diff = (int)*++s1 - (int)*++s2;
 	return diff;
 }
@@ -155,6 +155,7 @@ struct tar {
 
 static size_t	UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n);
 static int	archive_block_is_null(const unsigned char *p);
+static char	*base64_decode(const wchar_t *, size_t, size_t *);
 static int	gnu_read_sparse_data(struct archive *, struct tar *,
 		    const struct archive_entry_header_gnutar *header);
 static void	gnu_parse_sparse_data(struct archive *, struct tar *,
@@ -199,7 +200,10 @@ static int64_t	tar_atol256(const char *, unsigned);
 static int64_t	tar_atol8(const char *, unsigned);
 static int	tar_read_header(struct archive *, struct tar *,
 		    struct archive_entry *, struct stat *);
+static int	tohex(int c);
+static char	*url_decode(const char *);
 static int	utf8_decode(wchar_t *, const char *, size_t length);
+static char	*wide_to_narrow(const wchar_t *wval);
 
 /*
  * ANSI C99 defines constants for these, but not everyone supports
@@ -1154,7 +1158,42 @@ pax_header(struct archive *a, struct tar *tar, struct archive_entry *entry,
 	return (err);
 }
 
+static int
+pax_attribute_xattr(struct archive_entry *entry,
+	wchar_t *name, wchar_t *value)
+{
+	char *name_decoded, *name_narrow;
+	void *value_decoded;
+	size_t value_len;
+
+	if (wcslen(name) < 18 || (wcsncmp(name, L"LIBARCHIVE.xattr.", 17)) != 0)
+		return 3;
+
+	name += 17;
+
+	/* URL-decode name */
+	name_narrow = wide_to_narrow(name);
+	if (name_narrow == NULL)
+		return 2;
+	name_decoded = url_decode(name_narrow);
+	free(name_narrow);
+	if (name_decoded == NULL)
+		return 2;
+
+	/* Base-64 decode value */
+	value_decoded = base64_decode(value, wcslen(value), &value_len);
+	if (value_decoded == NULL) {
+		free(name_decoded);
+		return 1;
+	}
 
+	archive_entry_xattr_add_entry(entry, name_decoded,
+		value_decoded, value_len);
+
+	free(name_decoded);
+	free(value_decoded);
+	return 0;
+}
 
 /*
  * Parse a single key=value attribute.  key/value pointers are
@@ -1184,6 +1223,8 @@ pax_attribute(struct archive_entry *entry, struct stat *st,
 		if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0)
 			archive_entry_set_xxxxxx(entry, value);
 */
+		if (wcsncmp(key, L"LIBARCHIVE.xattr.", 17)==0)
+			pax_attribute_xattr(entry, key, value);
 		break;
 	case 'S':
 		/* We support some keys used by the "star" archiver */
@@ -1599,7 +1640,7 @@ utf8_decode(wchar_t *dest, const char *src, size_t length)
 	int err;
 
 	err = 0;
-	while(length > 0) {
+	while (length > 0) {
 		n = UTF8_mbrtowc(dest, src, length);
 		if (n == 0)
 			break;
@@ -1721,3 +1762,159 @@ UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n)
 	}
         return (wch == L'\0' ? 0 : len);
 }
+
+
+/*
+ * base64_decode - Base64 decode
+ *
+ * This accepts most variations of base-64 encoding, including:
+ *    * with or without line breaks
+ *    * with or without the final group padded with '=' or '_' characters
+ * (The most economical Base-64 variant does not pad the last group and
+ * omits line breaks; RFC1341 used for MIME requires both.)
+ */
+static char *
+base64_decode(const wchar_t *src, size_t len, size_t *out_len)
+{
+	static const unsigned char digits[64] =
+	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+	static unsigned char decode_table[128];
+	char *out, *d;
+
+	/* If the decode table is not yet initialized, prepare it. */
+	if (decode_table[digits[1]] != 1) {
+		size_t i;
+		memset(decode_table, 0xff, sizeof(decode_table));
+		for (i = 0; i < sizeof(digits); i++)
+			decode_table[digits[i]] = i;
+	}
+
+	/* Allocate enough space to hold the entire output. */
+	/* Note that we may not use all of this... */
+	out = malloc((len * 3 + 3) / 4);
+	if (out == NULL) {
+		*out_len = 0;
+		return (NULL);
+	}
+	d = out;
+
+	while (len > 0) {
+		/* Collect the next group of (up to) four characters. */
+		int v = 0;
+		int group_size = 0;
+		while (group_size < 4 && len > 0) {
+			/* '=' or '_' padding indicates final group. */
+			if (*src == '=' || *src == '_') {
+				len = 0;
+				break;
+			}
+			/* Skip illegal characters (including line breaks) */
+			if (*src > 127 || *src < 32
+			    || decode_table[*src] == 0xff) {
+				len--;
+				src++;
+				continue;
+			}
+			v <<= 6;
+			v |= decode_table[*src++];
+			len --;
+			group_size++;
+		}
+		/* Align a short group properly. */
+		v <<= 6 * (4 - group_size);
+		/* Unpack the group we just collected. */
+		switch (group_size) {
+		case 4: d[2] = v & 0xff;
+			/* FALLTHROUGH */
+		case 3: d[1] = (v >> 8) & 0xff;
+			/* FALLTHROUGH */
+		case 2: d[0] = (v >> 16) & 0xff;
+			break;
+		case 1: /* this is invalid! */
+			break;
+		}
+		d += group_size * 3 / 4;
+	}
+
+	*out_len = d - out;
+	return (out);
+}
+
+/*
+ * This is a little tricky because the C99 standard wcstombs()
+ * function returns the number of bytes that were converted,
+ * not the number that should be converted.  As a result,
+ * we can never accurately size the output buffer (without
+ * doing a tedious output size calculation in advance).
+ * This approach (try a conversion, then try again if it fails)
+ * will almost always succeed on the first try, and is thus
+ * much faster, at the cost of sometimes requiring multiple
+ * passes while we expand the buffer.
+ */
+static char *
+wide_to_narrow(const wchar_t *wval)
+{
+	int converted_length;
+	/* Guess an output buffer size and try the conversion. */
+	int alloc_length = wcslen(wval) * 3;
+	char *mbs_val = malloc(alloc_length + 1);
+	if (mbs_val == NULL)
+		return (NULL);
+	converted_length = wcstombs(mbs_val, wval, alloc_length);
+
+	/* If we exhausted the buffer, resize and try again. */
+	while (converted_length >= alloc_length) {
+		free(mbs_val);
+		alloc_length *= 2;
+		mbs_val = malloc(alloc_length + 1);
+		if (mbs_val == NULL)
+			return (NULL);
+		converted_length = wcstombs(mbs_val, wval, alloc_length);
+	}
+
+	/* Ensure a trailing null and return the final string. */
+	mbs_val[alloc_length] = '\0';
+	return (mbs_val);
+}
+
+static char *
+url_decode(const char *in)
+{
+	char *out, *d;
+	const char *s;
+
+	out = malloc(strlen(in) + 1);
+	if (out == NULL)
+		return (NULL);
+	for (s = in, d = out; *s != '\0'; ) {
+		if (*s == '%') {
+			/* Try to convert % escape */
+			int digit1 = tohex(s[1]);
+			int digit2 = tohex(s[2]);
+			if (digit1 >= 0 && digit2 >= 0) {
+				/* Looks good, consume three chars */
+				s += 3;
+				/* Convert output */
+				*d++ = ((digit1 << 4) | digit2);
+				continue;
+			}
+			/* Else fall through and treat '%' as normal char */
+		}
+		*d++ = *s++;
+	}
+	*d = '\0';
+	return (out);
+}
+
+static int
+tohex(int c)
+{
+	if (c >= '0' && c <= '9')
+		return (c - '0');
+	else if (c >= 'A' && c <= 'F')
+		return (c - 'A' + 10);
+	else if (c >= 'a' && c <= 'f')
+		return (c - 'a' + 10);
+	else
+		return (-1);
+}
diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c
index 4e9857b..c256cb2 100644
--- a/lib/libarchive/archive_write_set_format_pax.c
+++ b/lib/libarchive/archive_write_set_format_pax.c
@@ -66,11 +66,13 @@ static int		 archive_write_pax_finish(struct archive *);
 static int		 archive_write_pax_finish_entry(struct archive *);
 static int		 archive_write_pax_header(struct archive *,
 			     struct archive_entry *);
+static char		*base64_encode(const char *src, size_t len);
 static char		*build_pax_attribute_name(char *dest, const char *src);
 static char		*build_ustar_entry_name(char *dest, const char *src,
 			     size_t src_length, const char *insert);
 static char		*format_int(char *dest, int64_t);
 static int		 has_non_ASCII(const wchar_t *);
+static char		*url_encode(const char *in);
 static int		 write_nulls(struct archive *, size_t);
 
 /*
@@ -142,7 +144,7 @@ add_pax_attr_time(struct archive_string *as, const char *key,
 	t = tmp + sizeof(tmp) - 1;
 
 	/* Skip trailing zeros in the fractional part. */
-	for(digit = 0, i = 10; i > 0 && digit == 0; i--) {
+	for (digit = 0, i = 10; i > 0 && digit == 0; i--) {
 		digit = nanos % 10;
 		nanos /= 10;
 	}
@@ -190,10 +192,10 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
 	add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
 }
 
-static void
-add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
+static char *
+utf8_encode(const wchar_t *wval)
 {
-	int	utf8len;
+	int utf8len;
 	const wchar_t *wp;
 	unsigned long wc;
 	char *utf8_value, *p;
@@ -217,8 +219,10 @@ add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
 	}
 
 	utf8_value = malloc(utf8len + 1);
-	if (utf8_value == NULL)
+	if (utf8_value == NULL) {
 		__archive_errx(1, "Not enough memory for attributes");
+		return (NULL);
+	}
 
 	for (wp = wval, p = utf8_value; *wp != L'\0'; ) {
 		wc = *wp++;
@@ -258,6 +262,16 @@ add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
 		/* Ignore larger values; UTF-8 can't encode them. */
 	}
 	*p = '\0';
+
+	return (utf8_value);
+}
+
+static void
+add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
+{
+	char *utf8_value = utf8_encode(wval);
+	if (utf8_value == NULL)
+		return;
 	add_pax_attr(as, key, utf8_value);
 	free(utf8_value);
 }
@@ -311,6 +325,53 @@ add_pax_attr(struct archive_string *as, const char *key, const char *value)
 	archive_strappend_char(as, '\n');
 }
 
+static void
+archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry)
+{
+	struct archive_string s;
+	int i = archive_entry_xattr_reset(entry);
+
+	while (i--) {
+		const char *name;
+		const void *value;
+		char *encoded_value;
+		char *url_encoded_name = NULL, *encoded_name = NULL;
+		wchar_t *wcs_name = NULL;
+		size_t size;
+
+		archive_entry_xattr_next(entry, &name, &value, &size);
+		/* Name is URL-encoded, then converted to wchar_t,
+		 * then UTF-8 encoded. */
+		url_encoded_name = url_encode(name);
+		if (url_encoded_name != NULL) {
+			/* Convert narrow-character to wide-character. */
+			int wcs_length = strlen(url_encoded_name);
+			wcs_name = malloc((wcs_length + 1) * sizeof(wchar_t));
+			if (wcs_name == NULL)
+				__archive_errx(1, "No memory for xattr conversion");
+			mbstowcs(wcs_name, url_encoded_name, wcs_length);
+			wcs_name[wcs_length] = 0;
+			free(url_encoded_name); /* Done with this. */
+		}
+		if (wcs_name != NULL) {
+			encoded_name = utf8_encode(wcs_name);
+			free(wcs_name); /* Done with wchar_t name. */
+		}
+
+		encoded_value = base64_encode(value, size);
+
+		if (encoded_name != NULL && encoded_value != NULL) {
+			archive_string_init(&s);
+			archive_strcpy(&s, "LIBARCHIVE.xattr.");
+			archive_strcat(&s, encoded_name);
+			add_pax_attr(&(pax->pax_header), s.s, encoded_value);
+			archive_string_free(&s);
+		}
+		free(encoded_name);
+		free(encoded_value);
+	}
+}
+
 /*
  * TODO: Consider adding 'comment' and 'charset' fields to
  * archive_entry so that clients can specify them.  Also, consider
@@ -538,6 +599,10 @@ archive_write_pax_header(struct archive *a,
 		ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0)
 		need_extension = 1;
 
+	/* If there are extended attributes, we need an extension */
+	if (!need_extension && archive_entry_xattr_count(entry_original) > 0)
+		need_extension = 1;
+
 	/*
 	 * The following items are handled differently in "pax
 	 * restricted" format.  In particular, in "pax restricted"
@@ -595,6 +660,9 @@ archive_write_pax_header(struct archive *a,
 		    st_main->st_ino);
 		add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink",
 		    st_main->st_nlink);
+
+		/* Store extended attributes */
+		archive_write_pax_header_xattrs(pax, entry_original);
 	}
 
 	/* Only regular files have data. */
@@ -1026,3 +1094,94 @@ has_non_ASCII(const wchar_t *wp)
 		wp++;
 	return (*wp != L'\0');
 }
+
+/*
+ * Used by extended attribute support; encodes the name
+ * so that there will be no '=' characters in the result.
+ */
+static char *
+url_encode(const char *in)
+{
+	const char *s;
+	char *d;
+	int out_len = 0;
+	char *out;
+
+	for (s = in; *s != '\0'; s++) {
+		if (*s < 33 || *s > 126 || *s == '%' || *s == '=')
+			out_len += 3;
+		else
+			out_len++;
+	}
+
+	out = (char *)malloc(out_len + 1);
+	if (out == NULL)
+		return (NULL);
+
+	for (s = in, d = out; *s != '\0'; s++) {
+		/* encode any non-printable ASCII character or '%' or '=' */
+		if (*s < 33 || *s > 126 || *s == '%' || *s == '=') {
+			/* URL encoding is '%' followed by two hex digits */
+			*d++ = '%';
+			*d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)];
+			*d++ = "0123456789ABCDEF"[0x0f & *s];
+		} else {
+			*d++ = *s;
+		}
+	}
+	*d = '\0';
+	return (out);
+}
+
+/*
+ * Encode a sequence of bytes into a C string using base-64 encoding.
+ *
+ * Returns a null-terminated C string allocated with malloc(); caller
+ * is responsible for freeing the result.
+ */
+static char *
+base64_encode(const char *s, size_t len)
+{
+	static const char digits[64] =
+	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+	int v;
+	char *d, *out;
+
+	/* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
+	out = malloc((len * 4 + 2) / 3 + 1);
+	if (out == NULL)
+		return (NULL);
+	d = out;
+
+	/* Convert each group of 3 bytes into 4 characters. */
+	while (len >= 3) {
+		v = (((int)s[0] << 16) & 0xff0000)
+		    | (((int)s[1] << 8) & 0xff00)
+		    | (((int)s[2]) & 0x00ff);
+		s += 3;
+		len -= 3;
+		*d++ = digits[(v >> 18) & 0x3f];
+		*d++ = digits[(v >> 12) & 0x3f];
+		*d++ = digits[(v >> 6) & 0x3f];
+		*d++ = digits[(v) & 0x3f];
+	}
+	/* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
+	switch (len) {
+	case 0: break;
+	case 1:
+		v = (((int)s[0] << 16) & 0xff0000);
+		*d++ = digits[(v >> 18) & 0x3f];
+		*d++ = digits[(v >> 12) & 0x3f];
+		break;
+	case 2:
+		v = (((int)s[0] << 16) & 0xff0000)
+		    | (((int)s[1] << 8) & 0xff00);
+		*d++ = digits[(v >> 18) & 0x3f];
+		*d++ = digits[(v >> 12) & 0x3f];
+		*d++ = digits[(v >> 6) & 0x3f];
+		break;
+	}
+	/* Add trailing NUL character so output is a valid C string. */
+	*d++ = '\0';
+	return (out);
+}
diff --git a/lib/libarchive/tar.5 b/lib/libarchive/tar.5
index d46e1b5..242a3d0 100644
--- a/lib/libarchive/tar.5
+++ b/lib/libarchive/tar.5
@@ -399,6 +399,21 @@ Schilling's
 .Cm SCHILY.*
 extensions can store all of the data from
 .Va struct stat .
+.It Cm LIBARCHIVE.xattr. Ns Ar namespace Ns . Ns Ar key
+Libarchive stores POSIX.1e-style extended attributes using
+keys of this form.  The
+.Ar key
+value is URL-encoded:
+All non-ASCII characters and the two special characters
+.Dq =
+and
+.Dq %
+are encoded as
+.Dq %
+followed by two uppercase hexadecimal digits.
+The value of this key is the extended attribute value
+encoded in base 64.
+XXX Detail the base-64 format here XXX
 .It Cm VENDOR.*
 XXX document other vendor-specific extensions XXX
 .El
author	kientzle <kientzle@FreeBSD.org>	2006-03-21 16:55:46 +0000
committer	kientzle <kientzle@FreeBSD.org>	2006-03-21 16:55:46 +0000
commit	537ab73b2f9d8cc3e849014fd44738b6a48f6c99 (patch)
tree	c3aca38252d2ed10ab3875ada2f51f284743c856 /lib/libarchive
parent	b09a8950a1301d97aef8e2975e34a3ba5bc451c3 (diff)
download	FreeBSD-src-537ab73b2f9d8cc3e849014fd44738b6a48f6c99.zip FreeBSD-src-537ab73b2f9d8cc3e849014fd44738b6a48f6c99.tar.gz