diff options
author | kientzle <kientzle@FreeBSD.org> | 2004-03-19 22:37:06 +0000 |
---|---|---|
committer | kientzle <kientzle@FreeBSD.org> | 2004-03-19 22:37:06 +0000 |
commit | ef0d6eb5985e7a4f4fd2f0f42925e8328b8cf004 (patch) | |
tree | 31fad164e4fe18412497337d4c8e1a7e7c792609 /lib/libarchive/archive_write_set_format_pax.c | |
parent | edf0b18239fcd09677972580f323c10d3a59afab (diff) | |
download | FreeBSD-src-ef0d6eb5985e7a4f4fd2f0f42925e8328b8cf004.zip FreeBSD-src-ef0d6eb5985e7a4f4fd2f0f42925e8328b8cf004.tar.gz |
Many fixes:
* Disabled shared-library building, as some API breakage is
still likely. (I didn't realize it was turned on by default.) If
you have an existing /usr/lib/libarchive.so.2, I recommend deleting it.
* Pax interchange format now correctly stores and reads UTF8
for extended attributes. In particular, pax format can portably
handle arbitrarily long pathnames containing arbitrary characters.
* Library compiles cleanly at -O2, -O3, and WARNS=6 on all
FreeBSD-CURRENT platforms.
* Minor portability improvements inspired by Juergen Lock
and Greg Lewis. (Less reliance on stdint.h, isolating of
various portability-challenged constructs.)
* archive_entry transparently converts multi-byte <-> wide character
strings, allowing clients and format handlers to deal with either
one, as appropriate.
* Support for reading 'L' and 'K' entries in standard tar archives
for star compatibility.
* Recognize (but don't yet handle) ACL entries from Solaris tar.
* Pushed format-specific data for format readers down into
format-specific storage and out of library-global storage. This
should make it easier to maintain individual formats without mucking
with the core library management.
* Documentation updates to track the above changes.
* Updates to tar.5 to correct a few mistakes and add some additional
information about GNU tar and Solaris tar formats.
Notes:
* The basic 'tar' reader is getting more general; there's not much
point in keeping the 'gnutar' reader separate. Merging the two
would lose a bunch of duplicate code.
* The libc ACL support is looking increasingly inadequate for my needs
here. I might need to assemble some fairly significant code for
parsing and building ACLs. <sigh>
Diffstat (limited to 'lib/libarchive/archive_write_set_format_pax.c')
-rw-r--r-- | lib/libarchive/archive_write_set_format_pax.c | 100 |
1 files changed, 91 insertions, 9 deletions
diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c index 8c5342e..441c190 100644 --- a/lib/libarchive/archive_write_set_format_pax.c +++ b/lib/libarchive/archive_write_set_format_pax.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <wchar.h> #include "archive.h" #include "archive_entry.h" @@ -54,6 +55,8 @@ static void add_pax_attr_int(struct archive_string *, static void add_pax_attr_time(struct archive_string *, const char *key, int64_t sec, unsigned long nanos); +static void add_pax_attr_w(struct archive_string *, + const char *key, const wchar_t *wvalue); static int archive_write_pax_data(struct archive *, const void *, size_t); static int archive_write_pax_finish(struct archive *); @@ -183,6 +186,73 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value) add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); } +static void +add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) +{ + int utf8len; + const wchar_t *wp; + wchar_t wc; + char *utf8_value, *p; + + utf8len = 0; + for (wp = wval; *wp != L'\0'; ) { + wc = *wp++; + if (wc <= 0x7f) + utf8len++; + else if (wc <= 0x7ff) + utf8len += 2; + else if (wc <= 0xffff) + utf8len += 3; + else if (wc <= 0x1fffff) + utf8len += 4; + else if (wc <= 0x3ffffff) + utf8len += 5; + else + utf8len += 6; + } + + utf8_value = malloc(utf8len + 1); + for (wp = wval, p = utf8_value; *wp != L'\0'; ) { + wc = *wp++; + if (wc <= 0x7f) { + *p++ = (char)wc; + } else if (wc <= 0x7ff) { + p[0] = 0xc0 | ((wc >> 6) & 0x1f); + p[1] = 0x80 | (wc & 0x3f); + p += 2; + } else if (wc <= 0xffff) { + p[0] = 0xe0 | ((wc >> 12) & 0x0f); + p[1] = 0x80 | ((wc >> 6) & 0x3f); + p[2] = 0x80 | (wc & 0x3f); + p += 3; + } else if (wc <= 0x1fffff) { + p[0] = 0xf0 | ((wc >> 18) & 0x07); + p[1] = 0x80 | ((wc >> 12) & 0x3f); + p[2] = 0x80 | ((wc >> 6) & 0x3f); + p[3] = 0x80 | (wc & 0x3f); + p += 4; + } else if (wc <= 0x3ffffff) { + p[0] = 0xf8 | ((wc >> 24) & 0x03); + p[1] = 0x80 | ((wc >> 18) & 0x3f); + p[2] = 0x80 | ((wc >> 12) & 0x3f); + p[3] = 0x80 | ((wc >> 6) & 0x3f); + p[4] = 0x80 | (wc & 0x3f); + p += 5; + } else if (wc <= 0x7fffffff) { + p[0] = 0xfc | ((wc >> 30) & 0x01); + p[1] = 0x80 | ((wc >> 24) & 0x3f); + p[1] = 0x80 | ((wc >> 18) & 0x3f); + p[2] = 0x80 | ((wc >> 12) & 0x3f); + p[3] = 0x80 | ((wc >> 6) & 0x3f); + p[4] = 0x80 | (wc & 0x3f); + p += 6; + } + } + + add_pax_attr(as, key, utf8_value); + free(utf8_value); +} + /* * Add a key/value attribute to the pax header. This function handles * the length field and various other syntactic requirements. @@ -243,16 +313,18 @@ archive_write_pax_header(struct archive *a, struct archive_entry *entry_original) { struct archive_entry *entry_main; - const char *linkname, *name_start, *p; + const char *linkname, *p; + const wchar_t *wp, *wp2, *wname_start; int need_extension, oldstate, r, ret; struct pax *pax; const struct stat *st_main, *st_original; - struct archive_string pax_entry_name = EMPTY_ARCHIVE_STRING; + struct archive_string pax_entry_name; char paxbuff[512]; char ustarbuff[512]; char ustar_entry_name[256]; + archive_string_init(&pax_entry_name); need_extension = 0; pax = a->format_data; pax->written = 1; @@ -281,7 +353,7 @@ archive_write_pax_header(struct archive *a, } /* Copy entry so we can modify it as needed. */ - entry_main = archive_entry_dup(entry_original); + entry_main = archive_entry_clone(entry_original); archive_string_empty(&(pax->pax_header)); /* Blank our work area. */ st_main = archive_entry_stat(entry_main); @@ -291,16 +363,26 @@ archive_write_pax_header(struct archive *a, * 'prefix' fields. Here, I pick out the longest possible * suffix, then test whether the remaining prefix is too long. */ + wp = archive_entry_pathname_w(entry_main); p = archive_entry_pathname(entry_main); - if (strlen(p) <= 100) /* Short enough for just 'name' field */ - name_start = p; /* Record a zero-length prefix */ + if (wcslen(wp) <= 100) /* Short enough for just 'name' field */ + wname_start = wp; /* Record a zero-length prefix */ else /* Find the largest suffix that fits in 'name' field. */ - name_start = strchr(p + strlen(p) - 100 - 1, '/'); + wname_start = wcschr(wp + wcslen(wp) - 100 - 1, '/'); - /* If name is too long, add 'path' to pax extended attrs. */ - if (name_start == NULL || name_start - p > 155) { - add_pax_attr(&(pax->pax_header), "path", p); + /* Find non-ASCII character, if any. */ + wp2 = wp; + while (*wp2 != L'\0' && *wp2 < 128) + wp2++; + + /* + * If name is too long, or has non-ASCII characters, add + * 'path' to pax extended attrs. + */ + if (wname_start == NULL || wname_start - wp > 155 || + *wp2 != L'\0') { + add_pax_attr_w(&(pax->pax_header), "path", wp); archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, p)); need_extension = 1; |