diff options
Diffstat (limited to 'libarchive/archive_write_set_format_pax.c')
-rw-r--r-- | libarchive/archive_write_set_format_pax.c | 1108 |
1 files changed, 778 insertions, 330 deletions
diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c index 00c3f3f..a62d99d 100644 --- a/libarchive/archive_write_set_format_pax.c +++ b/libarchive/archive_write_set_format_pax.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2003-2007 Tim Kientzle + * Copyright (c) 2010-2011 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,13 +39,28 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_format_pax.c 201162 20 #include "archive.h" #include "archive_entry.h" +#include "archive_entry_locale.h" #include "archive_private.h" #include "archive_write_private.h" +struct sparse_block { + struct sparse_block *next; + int is_hole; + uint64_t offset; + uint64_t remaining; +}; + struct pax { uint64_t entry_bytes_remaining; uint64_t entry_padding; + struct archive_string l_url_encoded_name; struct archive_string pax_header; + struct archive_string sparse_map; + size_t sparse_map_padding; + struct sparse_block *sparse_list; + struct sparse_block *sparse_tail; + struct archive_string_conv *sconv_utf8; + int opt_binary; }; static void add_pax_attr(struct archive_string *, const char *key, @@ -54,23 +70,25 @@ static void add_pax_attr_int(struct archive_string *, static void add_pax_attr_time(struct archive_string *, const char *key, int64_t sec, unsigned long nanos); -static void add_pax_attr_w(struct archive_string *, - const char *key, const wchar_t *wvalue); static ssize_t archive_write_pax_data(struct archive_write *, const void *, size_t); -static int archive_write_pax_finish(struct archive_write *); -static int archive_write_pax_destroy(struct archive_write *); +static int archive_write_pax_close(struct archive_write *); +static int archive_write_pax_free(struct archive_write *); static int archive_write_pax_finish_entry(struct archive_write *); static int archive_write_pax_header(struct archive_write *, struct archive_entry *); +static int archive_write_pax_options(struct archive_write *, + const char *, const char *); static char *base64_encode(const char *src, size_t len); +static char *build_gnu_sparse_name(char *dest, const char *src); static char *build_pax_attribute_name(char *dest, const char *src); static char *build_ustar_entry_name(char *dest, const char *src, size_t src_length, const char *insert); static char *format_int(char *dest, int64_t); -static int has_non_ASCII(const wchar_t *); +static int has_non_ASCII(const char *); +static void sparse_list_clear(struct pax *); +static int sparse_list_add(struct pax *, int64_t, int64_t); static char *url_encode(const char *in); -static int write_nulls(struct archive_write *, size_t); /* * Set output format to 'restricted pax' format. @@ -84,6 +102,10 @@ archive_write_set_format_pax_restricted(struct archive *_a) { struct archive_write *a = (struct archive_write *)_a; int r; + + archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_NEW, "archive_write_set_format_pax_restricted"); + r = archive_write_set_format_pax(&a->archive); a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED; a->archive.archive_format_name = "restricted POSIX pax interchange"; @@ -99,29 +121,79 @@ archive_write_set_format_pax(struct archive *_a) struct archive_write *a = (struct archive_write *)_a; struct pax *pax; - if (a->format_destroy != NULL) - (a->format_destroy)(a); + archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_NEW, "archive_write_set_format_pax"); + + if (a->format_free != NULL) + (a->format_free)(a); pax = (struct pax *)malloc(sizeof(*pax)); if (pax == NULL) { - archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data"); + archive_set_error(&a->archive, ENOMEM, + "Can't allocate pax data"); return (ARCHIVE_FATAL); } memset(pax, 0, sizeof(*pax)); a->format_data = pax; - - a->pad_uncompressed = 1; a->format_name = "pax"; + a->format_options = archive_write_pax_options; a->format_write_header = archive_write_pax_header; a->format_write_data = archive_write_pax_data; - a->format_finish = archive_write_pax_finish; - a->format_destroy = archive_write_pax_destroy; + a->format_close = archive_write_pax_close; + a->format_free = archive_write_pax_free; a->format_finish_entry = archive_write_pax_finish_entry; a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange"; return (ARCHIVE_OK); } +static int +archive_write_pax_options(struct archive_write *a, const char *key, + const char *val) +{ + struct pax *pax = (struct pax *)a->format_data; + int ret = ARCHIVE_FAILED; + + if (strcmp(key, "hdrcharset") == 0) { + /* + * The character-set we can use are defined in + * IEEE Std 1003.1-2001 + */ + if (val == NULL || val[0] == 0) + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "pax: hdrcharset option needs a character-set name"); + else if (strcmp(val, "BINARY") == 0 || + strcmp(val, "binary") == 0) { + /* + * Specify binary mode. We will not convert + * filenames, uname and gname to any charsets. + */ + pax->opt_binary = 1; + ret = ARCHIVE_OK; + } else if (strcmp(val, "UTF-8") == 0) { + /* + * Specify UTF-8 character-set to be used for + * filenames. This is almost the test that + * running platform supports the string conversion. + * Especially libarchive_test needs this trick for + * its test. + */ + pax->sconv_utf8 = archive_string_conversion_to_charset( + &(a->archive), "UTF-8", 0); + if (pax->sconv_utf8 == NULL) + ret = ARCHIVE_FATAL; + else + ret = ARCHIVE_OK; + } else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "pax: invalid charset name"); + } else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "pax: unknown keyword ``%s''", key); + + return (ret); +} + /* * Note: This code assumes that 'nanos' has the same sign as 'sec', * which implies that sec=-1, nanos=200000000 represents -1.2 seconds @@ -168,18 +240,17 @@ add_pax_attr_time(struct archive_string *as, const char *key, static char * format_int(char *t, int64_t i) { - int sign; + uint64_t ui; - if (i < 0) { - sign = -1; - i = -i; - } else - sign = 1; + if (i < 0) + ui = (i == INT64_MIN) ? (uint64_t)(INT64_MAX) + 1 : (uint64_t)(-i); + else + ui = i; do { - *--t = "0123456789"[i % 10]; - } while (i /= 10); - if (sign < 0) + *--t = "0123456789"[ui % 10]; + } while (ui /= 10); + if (i < 0) *--t = '-'; return (t); } @@ -193,106 +264,6 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value) add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); } -static char * -utf8_encode(const wchar_t *wval) -{ - int utf8len; - const wchar_t *wp; - unsigned long wc; - char *utf8_value, *p; - - utf8len = 0; - for (wp = wval; *wp != L'\0'; ) { - wc = *wp++; - - if (wc >= 0xd800 && wc <= 0xdbff - && *wp >= 0xdc00 && *wp <= 0xdfff) { - /* This is a surrogate pair. Combine into a - * full Unicode value before encoding into - * UTF-8. */ - wc = (wc - 0xd800) << 10; /* High 10 bits */ - wc += (*wp++ - 0xdc00); /* Low 10 bits */ - wc += 0x10000; /* Skip BMP */ - } - if (wc <= 0x7f) - utf8len++; - else if (wc <= 0x7ff) - utf8len += 2; - else if (wc <= 0xffff) - utf8len += 3; - else if (wc <= 0x1fffff) - utf8len += 4; - else if (wc <= 0x3ffffff) - utf8len += 5; - else if (wc <= 0x7fffffff) - utf8len += 6; - /* Ignore larger values; UTF-8 can't encode them. */ - } - - utf8_value = (char *)malloc(utf8len + 1); - if (utf8_value == NULL) { - __archive_errx(1, "Not enough memory for attributes"); - return (NULL); - } - - for (wp = wval, p = utf8_value; *wp != L'\0'; ) { - wc = *wp++; - if (wc >= 0xd800 && wc <= 0xdbff - && *wp >= 0xdc00 && *wp <= 0xdfff) { - /* Combine surrogate pair. */ - wc = (wc - 0xd800) << 10; - wc += *wp++ - 0xdc00 + 0x10000; - } - if (wc <= 0x7f) { - *p++ = (char)wc; - } else if (wc <= 0x7ff) { - p[0] = 0xc0 | ((wc >> 6) & 0x1f); - p[1] = 0x80 | (wc & 0x3f); - p += 2; - } else if (wc <= 0xffff) { - p[0] = 0xe0 | ((wc >> 12) & 0x0f); - p[1] = 0x80 | ((wc >> 6) & 0x3f); - p[2] = 0x80 | (wc & 0x3f); - p += 3; - } else if (wc <= 0x1fffff) { - p[0] = 0xf0 | ((wc >> 18) & 0x07); - p[1] = 0x80 | ((wc >> 12) & 0x3f); - p[2] = 0x80 | ((wc >> 6) & 0x3f); - p[3] = 0x80 | (wc & 0x3f); - p += 4; - } else if (wc <= 0x3ffffff) { - p[0] = 0xf8 | ((wc >> 24) & 0x03); - p[1] = 0x80 | ((wc >> 18) & 0x3f); - p[2] = 0x80 | ((wc >> 12) & 0x3f); - p[3] = 0x80 | ((wc >> 6) & 0x3f); - p[4] = 0x80 | (wc & 0x3f); - p += 5; - } else if (wc <= 0x7fffffff) { - p[0] = 0xfc | ((wc >> 30) & 0x01); - p[1] = 0x80 | ((wc >> 24) & 0x3f); - p[1] = 0x80 | ((wc >> 18) & 0x3f); - p[2] = 0x80 | ((wc >> 12) & 0x3f); - p[3] = 0x80 | ((wc >> 6) & 0x3f); - p[4] = 0x80 | (wc & 0x3f); - p += 6; - } - /* Ignore larger values; UTF-8 can't encode them. */ - } - *p = '\0'; - - return (utf8_value); -} - -static void -add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) -{ - char *utf8_value = utf8_encode(wval); - if (utf8_value == NULL) - return; - add_pax_attr(as, key, utf8_value); - free(utf8_value); -} - /* * Add a key/value attribute to the pax header. This function handles * the length field and various other syntactic requirements. @@ -342,8 +313,9 @@ add_pax_attr(struct archive_string *as, const char *key, const char *value) archive_strappend_char(as, '\n'); } -static void -archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) +static int +archive_write_pax_header_xattrs(struct archive_write *a, + struct pax *pax, struct archive_entry *entry) { struct archive_string s; int i = archive_entry_xattr_reset(entry); @@ -353,26 +325,24 @@ archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) const void *value; char *encoded_value; char *url_encoded_name = NULL, *encoded_name = NULL; - wchar_t *wcs_name = NULL; size_t size; + int r; archive_entry_xattr_next(entry, &name, &value, &size); - /* Name is URL-encoded, then converted to wchar_t, - * then UTF-8 encoded. */ url_encoded_name = url_encode(name); if (url_encoded_name != NULL) { - /* Convert narrow-character to wide-character. */ - size_t wcs_length = strlen(url_encoded_name); - wcs_name = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t)); - if (wcs_name == NULL) - __archive_errx(1, "No memory for xattr conversion"); - mbstowcs(wcs_name, url_encoded_name, wcs_length); - wcs_name[wcs_length] = 0; + /* Convert narrow-character to UTF-8. */ + r = archive_strcpy_in_locale( + &(pax->l_url_encoded_name), + url_encoded_name, pax->sconv_utf8); free(url_encoded_name); /* Done with this. */ - } - if (wcs_name != NULL) { - encoded_name = utf8_encode(wcs_name); - free(wcs_name); /* Done with wchar_t name. */ + if (r == 0) + encoded_name = pax->l_url_encoded_name.s; + else if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Linkname"); + return (ARCHIVE_FATAL); + } } encoded_value = base64_encode((const char *)value, size); @@ -384,9 +354,99 @@ archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) add_pax_attr(&(pax->pax_header), s.s, encoded_value); archive_string_free(&s); } - free(encoded_name); free(encoded_value); } + return (ARCHIVE_OK); +} + +static int +get_entry_hardlink(struct archive_write *a, struct archive_entry *entry, + const char **name, size_t *length, struct archive_string_conv *sc) +{ + int r; + + r = archive_entry_hardlink_l(entry, name, length, sc); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Linkname"); + return (ARCHIVE_FATAL); + } + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); +} + +static int +get_entry_pathname(struct archive_write *a, struct archive_entry *entry, + const char **name, size_t *length, struct archive_string_conv *sc) +{ + int r; + + r = archive_entry_pathname_l(entry, name, length, sc); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Pathname"); + return (ARCHIVE_FATAL); + } + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); +} + +static int +get_entry_uname(struct archive_write *a, struct archive_entry *entry, + const char **name, size_t *length, struct archive_string_conv *sc) +{ + int r; + + r = archive_entry_uname_l(entry, name, length, sc); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Uname"); + return (ARCHIVE_FATAL); + } + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); +} + +static int +get_entry_gname(struct archive_write *a, struct archive_entry *entry, + const char **name, size_t *length, struct archive_string_conv *sc) +{ + int r; + + r = archive_entry_gname_l(entry, name, length, sc); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Gname"); + return (ARCHIVE_FATAL); + } + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); +} + +static int +get_entry_symlink(struct archive_write *a, struct archive_entry *entry, + const char **name, size_t *length, struct archive_string_conv *sc) +{ + int r; + + r = archive_entry_symlink_l(entry, name, length, sc); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Linkname"); + return (ARCHIVE_FATAL); + } + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); } /* @@ -394,6 +454,8 @@ archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) * archive_entry so that clients can specify them. Also, consider * adding generic key/value tags so clients can add arbitrary * key/value data. + * + * TODO: Break up this 700-line function!!!! Yowza! */ static int archive_write_pax_header(struct archive_write *a, @@ -402,27 +464,72 @@ archive_write_pax_header(struct archive_write *a, struct archive_entry *entry_main; const char *p; char *t; - const wchar_t *wp; const char *suffix; int need_extension, r, ret; + int sparse_count; + uint64_t sparse_total, real_size; struct pax *pax; - const char *hdrcharset = NULL; const char *hardlink; const char *path = NULL, *linkpath = NULL; const char *uname = NULL, *gname = NULL; - const wchar_t *path_w = NULL, *linkpath_w = NULL; - const wchar_t *uname_w = NULL, *gname_w = NULL; + const void *mac_metadata; + size_t mac_metadata_size; + struct archive_string_conv *sconv; + size_t hardlink_length, path_length, linkpath_length; + size_t uname_length, gname_length; char paxbuff[512]; char ustarbuff[512]; char ustar_entry_name[256]; char pax_entry_name[256]; + char gnu_sparse_name[256]; + struct archive_string entry_name; ret = ARCHIVE_OK; need_extension = 0; pax = (struct pax *)a->format_data; - hardlink = archive_entry_hardlink(entry_original); + /* Sanity check. */ + if (archive_entry_pathname(entry_original) == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Can't record entry in tar file without pathname"); + return (ARCHIVE_FAILED); + } + + /* + * Choose a header encoding. + */ + if (pax->opt_binary) + sconv = NULL;/* Binary mode. */ + else { + /* Header encoding is UTF-8. */ + if (pax->sconv_utf8 == NULL) { + /* Initialize the string conversion object + * we must need */ + pax->sconv_utf8 = archive_string_conversion_to_charset( + &(a->archive), "UTF-8", 1); + if (pax->sconv_utf8 == NULL) + /* Couldn't allocate memory */ + return (ARCHIVE_FAILED); + } + sconv = pax->sconv_utf8; + } + + r = get_entry_hardlink(a, entry_original, &hardlink, + &hardlink_length, sconv); + if (r == ARCHIVE_FATAL) + return (r); + else if (r != ARCHIVE_OK) { + r = get_entry_hardlink(a, entry_original, &hardlink, + &hardlink_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Can't translate linkname '%s' to %s", hardlink, + archive_string_conversion_charset_name(sconv)); + ret = ARCHIVE_WARN; + sconv = NULL;/* The header charset switches to binary mode. */ + } /* Make sure this is a type of entry that we can handle here */ if (hardlink == NULL) { @@ -456,68 +563,217 @@ archive_write_pax_header(struct archive_write *a, archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "tar format cannot archive socket"); - return (ARCHIVE_WARN); + return (ARCHIVE_FAILED); default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "tar format cannot archive this (type=0%lo)", - (unsigned long)archive_entry_filetype(entry_original)); - return (ARCHIVE_WARN); + (unsigned long) + archive_entry_filetype(entry_original)); + return (ARCHIVE_FAILED); + } + } + + /* + * If Mac OS metadata blob is here, recurse to write that + * as a separate entry. This is really a pretty poor design: + * In particular, it doubles the overhead for long filenames. + * TODO: Help Apple folks design something better and figure + * out how to transition from this legacy format. + * + * Note that this code is present on every platform; clients + * on non-Mac are unlikely to ever provide this data, but + * applications that copy entries from one archive to another + * should not lose data just because the local filesystem + * can't store it. + */ + mac_metadata = + archive_entry_mac_metadata(entry_original, &mac_metadata_size); + if (mac_metadata != NULL) { + const char *oname; + char *name, *bname; + size_t name_length; + struct archive_entry *extra = archive_entry_new2(&a->archive); + + oname = archive_entry_pathname(entry_original); + name_length = strlen(oname); + name = malloc(name_length + 3); + if (name == NULL) { + /* XXX error message */ + return (ARCHIVE_FAILED); + } + strcpy(name, oname); + /* Find last '/'; strip trailing '/' characters */ + bname = strrchr(name, '/'); + while (bname != NULL && bname[1] == '\0') { + *bname = '\0'; + bname = strrchr(name, '/'); + } + if (bname == NULL) { + memmove(name + 2, name, name_length + 1); + memmove(name, "._", 2); + } else { + bname += 1; + memmove(bname + 2, bname, strlen(bname) + 1); + memmove(bname, "._", 2); } + archive_entry_copy_pathname(extra, name); + free(name); + + archive_entry_set_size(extra, mac_metadata_size); + archive_entry_set_filetype(extra, AE_IFREG); + archive_entry_set_perm(extra, + archive_entry_perm(entry_original)); + archive_entry_set_mtime(extra, + archive_entry_mtime(entry_original), + archive_entry_mtime_nsec(entry_original)); + archive_entry_set_gid(extra, + archive_entry_gid(entry_original)); + archive_entry_set_gname(extra, + archive_entry_gname(entry_original)); + archive_entry_set_uid(extra, + archive_entry_uid(entry_original)); + archive_entry_set_uname(extra, + archive_entry_uname(entry_original)); + + /* Recurse to write the special copyfile entry. */ + r = archive_write_pax_header(a, extra); + if (r < ARCHIVE_WARN) + return (r); + if (r < ret) + ret = r; + r = archive_write_pax_data(a, mac_metadata, mac_metadata_size); + if (r < ARCHIVE_WARN) + return (r); + if (r < ret) + ret = r; + r = archive_write_pax_finish_entry(a); + if (r < ARCHIVE_WARN) + return (r); + if (r < ret) + ret = r; } /* Copy entry so we can modify it as needed. */ entry_main = archive_entry_clone(entry_original); archive_string_empty(&(pax->pax_header)); /* Blank our work area. */ + archive_string_empty(&(pax->sparse_map)); + sparse_total = 0; + sparse_list_clear(pax); + + if (hardlink == NULL && + archive_entry_filetype(entry_main) == AE_IFREG) + sparse_count = archive_entry_sparse_reset(entry_main); + else + sparse_count = 0; + if (sparse_count) { + int64_t offset, length, last_offset = 0; + /* Get the last entry of sparse block. */ + while (archive_entry_sparse_next( + entry_main, &offset, &length) == ARCHIVE_OK) + last_offset = offset + length; + + /* If the last sparse block does not reach the end of file, + * We have to add a empty sparse block as the last entry to + * manage storing file data. */ + if (last_offset < archive_entry_size(entry_main)) + archive_entry_sparse_add_entry(entry_main, + archive_entry_size(entry_main), 0); + sparse_count = archive_entry_sparse_reset(entry_main); + } /* * First, check the name fields and see if any of them * require binary coding. If any of them does, then all of * them do. */ - hdrcharset = NULL; - path = archive_entry_pathname(entry_main); - path_w = archive_entry_pathname_w(entry_main); - if (path != NULL && path_w == NULL) { + r = get_entry_pathname(a, entry_main, &path, &path_length, sconv); + if (r == ARCHIVE_FATAL) + return (r); + else if (r != ARCHIVE_OK) { + r = get_entry_pathname(a, entry_main, &path, + &path_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Can't translate pathname '%s' to UTF-8", path); + "Can't translate pathname '%s' to %s", path, + archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; - hdrcharset = "BINARY"; + sconv = NULL;/* The header charset switches to binary mode. */ } - uname = archive_entry_uname(entry_main); - uname_w = archive_entry_uname_w(entry_main); - if (uname != NULL && uname_w == NULL) { + r = get_entry_uname(a, entry_main, &uname, &uname_length, sconv); + if (r == ARCHIVE_FATAL) + return (r); + else if (r != ARCHIVE_OK) { + r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Can't translate uname '%s' to UTF-8", uname); + "Can't translate uname '%s' to %s", uname, + archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; - hdrcharset = "BINARY"; + sconv = NULL;/* The header charset switches to binary mode. */ } - gname = archive_entry_gname(entry_main); - gname_w = archive_entry_gname_w(entry_main); - if (gname != NULL && gname_w == NULL) { + r = get_entry_gname(a, entry_main, &gname, &gname_length, sconv); + if (r == ARCHIVE_FATAL) + return (r); + else if (r != ARCHIVE_OK) { + r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Can't translate gname '%s' to UTF-8", gname); + "Can't translate gname '%s' to %s", gname, + archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; - hdrcharset = "BINARY"; + sconv = NULL;/* The header charset switches to binary mode. */ } linkpath = hardlink; - if (linkpath != NULL) { - linkpath_w = archive_entry_hardlink_w(entry_main); - } else { - linkpath = archive_entry_symlink(entry_main); - if (linkpath != NULL) - linkpath_w = archive_entry_symlink_w(entry_main); + linkpath_length = hardlink_length; + if (linkpath == NULL) { + r = get_entry_symlink(a, entry_main, &linkpath, + &linkpath_length, sconv); + if (r == ARCHIVE_FATAL) + return (r); + else if (r != ARCHIVE_OK) { + r = get_entry_symlink(a, entry_main, &linkpath, + &linkpath_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Can't translate linkname '%s' to %s", linkpath, + archive_string_conversion_charset_name(sconv)); + ret = ARCHIVE_WARN; + sconv = NULL; + } } - if (linkpath != NULL && linkpath_w == NULL) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Can't translate linkpath '%s' to UTF-8", linkpath); - ret = ARCHIVE_WARN; - hdrcharset = "BINARY"; + + /* If any string conversions failed, get all attributes + * in binary-mode. */ + if (sconv == NULL && !pax->opt_binary) { + if (hardlink != NULL) { + r = get_entry_hardlink(a, entry_main, &hardlink, + &hardlink_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); + linkpath = hardlink; + linkpath_length = hardlink_length; + } + r = get_entry_pathname(a, entry_main, &path, + &path_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); + r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); + r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL); + if (r == ARCHIVE_FATAL) + return (r); } /* Store the header encoding first, to be nice to readers. */ - if (hdrcharset != NULL) - add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset); + if (sconv == NULL) + add_pax_attr(&(pax->pax_header), "hdrcharset", "BINARY"); /* @@ -525,38 +781,25 @@ archive_write_pax_header(struct archive_write *a, * 'path' to pax extended attrs. (Note that an unconvertible * name must have non-ASCII characters.) */ - if (path == NULL) { - /* We don't have a narrow version, so we have to store - * the wide version. */ - add_pax_attr_w(&(pax->pax_header), "path", path_w); - archive_entry_set_pathname(entry_main, "@WidePath"); - need_extension = 1; - } else if (has_non_ASCII(path_w)) { + if (has_non_ASCII(path)) { /* We have non-ASCII characters. */ - if (path_w == NULL || hdrcharset != NULL) { - /* Can't do UTF-8, so store it raw. */ - add_pax_attr(&(pax->pax_header), "path", path); - } else { - /* Store UTF-8 */ - add_pax_attr_w(&(pax->pax_header), - "path", path_w); - } + add_pax_attr(&(pax->pax_header), "path", path); archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, - path, strlen(path), NULL)); + path, path_length, NULL)); need_extension = 1; } else { /* We have an all-ASCII path; we'd like to just store * it in the ustar header if it will fit. Yes, this * duplicates some of the logic in - * write_set_format_ustar.c + * archive_write_set_format_ustar.c */ - if (strlen(path) <= 100) { + if (path_length <= 100) { /* Fits in the old 100-char tar name field. */ } else { /* Find largest suffix that will fit. */ /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */ - suffix = strchr(path + strlen(path) - 100 - 1, '/'); + suffix = strchr(path + path_length - 100 - 1, '/'); /* Don't attempt an empty prefix. */ if (suffix == path) suffix = strchr(suffix + 1, '/'); @@ -571,18 +814,10 @@ archive_write_pax_header(struct archive_write *a, || suffix[1] == '\0' /* empty suffix */ || suffix - path > 155) /* Prefix > 155 chars */ { - if (path_w == NULL || hdrcharset != NULL) { - /* Can't do UTF-8, so store it raw. */ - add_pax_attr(&(pax->pax_header), - "path", path); - } else { - /* Store UTF-8 */ - add_pax_attr_w(&(pax->pax_header), - "path", path_w); - } + add_pax_attr(&(pax->pax_header), "path", path); archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, - path, strlen(path), NULL)); + path, path_length, NULL)); need_extension = 1; } } @@ -591,21 +826,9 @@ archive_write_pax_header(struct archive_write *a, if (linkpath != NULL) { /* If link name is too long or has non-ASCII characters, add * 'linkpath' to pax extended attrs. */ - if (strlen(linkpath) > 100 || linkpath_w == NULL - || linkpath_w == NULL || has_non_ASCII(linkpath_w)) { - if (linkpath_w == NULL || hdrcharset != NULL) - /* If the linkpath is not convertible - * to wide, or we're encoding in - * binary anyway, store it raw. */ - add_pax_attr(&(pax->pax_header), - "linkpath", linkpath); - else - /* If the link is long or has a - * non-ASCII character, store it as a - * pax extended attribute. */ - add_pax_attr_w(&(pax->pax_header), - "linkpath", linkpath_w); - if (strlen(linkpath) > 100) { + if (linkpath_length > 100 || has_non_ASCII(linkpath)) { + add_pax_attr(&(pax->pax_header), "linkpath", linkpath); + if (linkpath_length > 100) { if (hardlink != NULL) archive_entry_set_hardlink(entry_main, "././@LongHardLink"); @@ -616,6 +839,10 @@ archive_write_pax_header(struct archive_write *a, need_extension = 1; } } + /* Save a pathname since it will be renamed if `entry_main` has + * sparse blocks. */ + archive_string_init(&entry_name); + archive_strcpy(&entry_name, archive_entry_pathname(entry_main)); /* If file size is too large, add 'size' to pax extended attrs. */ if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) { @@ -634,17 +861,8 @@ archive_write_pax_header(struct archive_write *a, /* If group name is too large or has non-ASCII characters, add * 'gname' to pax extended attrs. */ if (gname != NULL) { - if (strlen(gname) > 31 - || gname_w == NULL - || has_non_ASCII(gname_w)) - { - if (gname_w == NULL || hdrcharset != NULL) { - add_pax_attr(&(pax->pax_header), - "gname", gname); - } else { - add_pax_attr_w(&(pax->pax_header), - "gname", gname_w); - } + if (gname_length > 31 || has_non_ASCII(gname)) { + add_pax_attr(&(pax->pax_header), "gname", gname); need_extension = 1; } } @@ -658,17 +876,8 @@ archive_write_pax_header(struct archive_write *a, /* Add 'uname' to pax extended attrs if necessary. */ if (uname != NULL) { - if (strlen(uname) > 31 - || uname_w == NULL - || has_non_ASCII(uname_w)) - { - if (uname_w == NULL || hdrcharset != NULL) { - add_pax_attr(&(pax->pax_header), - "uname", uname); - } else { - add_pax_attr_w(&(pax->pax_header), - "uname", uname_w); - } + if (uname_length > 31 || has_non_ASCII(uname)) { + add_pax_attr(&(pax->pax_header), "uname", uname); need_extension = 1; } } @@ -690,7 +899,7 @@ archive_write_pax_header(struct archive_write *a, * If rdevmajor is too large, add 'SCHILY.devmajor' to * extended attributes. */ - dev_t rdevmajor, rdevminor; + int rdevmajor, rdevminor; rdevmajor = archive_entry_rdevmajor(entry_main); rdevminor = archive_entry_rdevminor(entry_main); if (rdevmajor >= (1 << 18)) { @@ -756,6 +965,10 @@ archive_write_pax_header(struct archive_write *a, if (!need_extension && archive_entry_xattr_count(entry_original) > 0) need_extension = 1; + /* If there are sparse info, we need an extension */ + if (!need_extension && sparse_count > 0) + need_extension = 1; + /* * The following items are handled differently in "pax * restricted" format. In particular, in "pax restricted" @@ -800,31 +1013,98 @@ archive_write_pax_header(struct archive_write *a, add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p); /* I use star-compatible ACL attributes. */ - wp = archive_entry_acl_text_w(entry_original, + r = archive_entry_acl_text_l(entry_original, ARCHIVE_ENTRY_ACL_TYPE_ACCESS | - ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); - if (wp != NULL && *wp != L'\0') - add_pax_attr_w(&(pax->pax_header), - "SCHILY.acl.access", wp); - wp = archive_entry_acl_text_w(entry_original, + ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID, + &p, NULL, pax->sconv_utf8); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for " + "ACL.access"); + return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Can't translate ACL.access to UTF-8"); + ret = ARCHIVE_WARN; + } else if (p != NULL && *p != '\0') { + add_pax_attr(&(pax->pax_header), + "SCHILY.acl.access", p); + } + r = archive_entry_acl_text_l(entry_original, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT | - ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); - if (wp != NULL && *wp != L'\0') - add_pax_attr_w(&(pax->pax_header), - "SCHILY.acl.default", wp); - - /* Include star-compatible metadata info. */ - /* Note: "SCHILY.dev{major,minor}" are NOT the - * major/minor portions of "SCHILY.dev". */ - add_pax_attr_int(&(pax->pax_header), "SCHILY.dev", - archive_entry_dev(entry_main)); - add_pax_attr_int(&(pax->pax_header), "SCHILY.ino", - archive_entry_ino64(entry_main)); - add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink", - archive_entry_nlink(entry_main)); + ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID, + &p, NULL, pax->sconv_utf8); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for " + "ACL.default"); + return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Can't translate ACL.default to UTF-8"); + ret = ARCHIVE_WARN; + } else if (p != NULL && *p != '\0') { + add_pax_attr(&(pax->pax_header), + "SCHILY.acl.default", p); + } + + /* We use GNU-tar-compatible sparse attributes. */ + if (sparse_count > 0) { + int64_t soffset, slength; + + add_pax_attr_int(&(pax->pax_header), + "GNU.sparse.major", 1); + add_pax_attr_int(&(pax->pax_header), + "GNU.sparse.minor", 0); + add_pax_attr(&(pax->pax_header), + "GNU.sparse.name", entry_name.s); + add_pax_attr_int(&(pax->pax_header), + "GNU.sparse.realsize", + archive_entry_size(entry_main)); + + /* Rename the file name which will be used for + * ustar header to a special name, which GNU + * PAX Format 1.0 requires */ + archive_entry_set_pathname(entry_main, + build_gnu_sparse_name(gnu_sparse_name, + entry_name.s)); + + /* + * - Make a sparse map, which will precede a file data. + * - Get the total size of available data of sparse. + */ + archive_string_sprintf(&(pax->sparse_map), "%d\n", + sparse_count); + while (archive_entry_sparse_next(entry_main, + &soffset, &slength) == ARCHIVE_OK) { + archive_string_sprintf(&(pax->sparse_map), + "%jd\n%jd\n", + (intmax_t)soffset, + (intmax_t)slength); + sparse_total += slength; + if (sparse_list_add(pax, soffset, slength) + != ARCHIVE_OK) { + archive_set_error(&a->archive, + ENOMEM, + "Can't allocate memory"); + archive_entry_free(entry_main); + archive_string_free(&entry_name); + return (ARCHIVE_FATAL); + } + } + } /* Store extended attributes */ - archive_write_pax_header_xattrs(pax, entry_original); + if (archive_write_pax_header_xattrs(a, pax, entry_original) + == ARCHIVE_FATAL) { + archive_entry_free(entry_main); + archive_string_free(&entry_name); + return (ARCHIVE_FATAL); + } } /* Only regular files have data. */ @@ -852,6 +1132,20 @@ archive_write_pax_header(struct archive_write *a, if (hardlink != NULL) archive_entry_set_size(entry_main, 0); + /* Save a real file size. */ + real_size = archive_entry_size(entry_main); + /* + * Overwrite a file size by the total size of sparse blocks and + * the size of sparse map info. That file size is the length of + * the data, which we will exactly store into an archive file. + */ + if (archive_strlen(&(pax->sparse_map))) { + size_t mapsize = archive_strlen(&(pax->sparse_map)); + pax->sparse_map_padding = 0x1ff & (-(ssize_t)mapsize); + archive_entry_set_size(entry_main, + mapsize + pax->sparse_map_padding + sparse_total); + } + /* Format 'ustar' header for main entry. * * The trouble with file size: If the reader can't understand @@ -878,30 +1172,31 @@ archive_write_pax_header(struct archive_write *a, * The non-strict formatter uses similar logic for other * numeric fields, though they're less critical. */ - __archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0); + if (__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0, + NULL) == ARCHIVE_FATAL) + return (ARCHIVE_FATAL); /* If we built any extended attributes, write that entry first. */ if (archive_strlen(&(pax->pax_header)) > 0) { struct archive_entry *pax_attr_entry; time_t s; - uid_t uid; - gid_t gid; - mode_t mode; + int64_t uid, gid; + int mode; - pax_attr_entry = archive_entry_new(); - p = archive_entry_pathname(entry_main); + pax_attr_entry = archive_entry_new2(&a->archive); + p = entry_name.s; archive_entry_set_pathname(pax_attr_entry, build_pax_attribute_name(pax_entry_name, p)); archive_entry_set_size(pax_attr_entry, archive_strlen(&(pax->pax_header))); /* Copy uid/gid (but clip to ustar limits). */ uid = archive_entry_uid(entry_main); - if ((unsigned int)uid >= 1 << 18) - uid = (uid_t)(1 << 18) - 1; + if (uid >= 1 << 18) + uid = (1 << 18) - 1; archive_entry_set_uid(pax_attr_entry, uid); gid = archive_entry_gid(entry_main); - if ((unsigned int)gid >= 1 << 18) - gid = (gid_t)(1 << 18) - 1; + if (gid >= 1 << 18) + gid = (1 << 18) - 1; archive_entry_set_gid(pax_attr_entry, gid); /* Copy mode over (but not setuid/setgid bits) */ mode = archive_entry_mode(entry_main); @@ -935,36 +1230,38 @@ archive_write_pax_header(struct archive_write *a, archive_entry_set_ctime(pax_attr_entry, 0, 0); r = __archive_write_format_header_ustar(a, paxbuff, - pax_attr_entry, 'x', 1); + pax_attr_entry, 'x', 1, NULL); archive_entry_free(pax_attr_entry); /* Note that the 'x' header shouldn't ever fail to format */ - if (r != 0) { - const char *msg = "archive_write_pax_header: " - "'x' header failed?! This can't happen.\n"; - size_t u = write(2, msg, strlen(msg)); - (void)u; /* UNUSED */ - exit(1); - } - r = (a->compressor.write)(a, paxbuff, 512); + if (r < ARCHIVE_WARN) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "archive_write_pax_header: " + "'x' header failed?! This can't happen.\n"); + return (ARCHIVE_FATAL); + } else if (r < ret) + ret = r; + r = __archive_write_output(a, paxbuff, 512); if (r != ARCHIVE_OK) { + sparse_list_clear(pax); pax->entry_bytes_remaining = 0; pax->entry_padding = 0; return (ARCHIVE_FATAL); } pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header)); - pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining); + pax->entry_padding = + 0x1ff & (-(int64_t)pax->entry_bytes_remaining); - r = (a->compressor.write)(a, pax->pax_header.s, + r = __archive_write_output(a, pax->pax_header.s, archive_strlen(&(pax->pax_header))); if (r != ARCHIVE_OK) { /* If a write fails, we're pretty much toast. */ return (ARCHIVE_FATAL); } /* Pad out the end of the entry. */ - r = write_nulls(a, pax->entry_padding); + r = __archive_write_nulls(a, pax->entry_padding); if (r != ARCHIVE_OK) { /* If a write fails, we're pretty much toast. */ return (ARCHIVE_FATAL); @@ -973,7 +1270,7 @@ archive_write_pax_header(struct archive_write *a, } /* Write the header for main entry. */ - r = (a->compressor.write)(a, ustarbuff, 512); + r = __archive_write_output(a, ustarbuff, 512); if (r != ARCHIVE_OK) return (r); @@ -982,10 +1279,16 @@ archive_write_pax_header(struct archive_write *a, * they can avoid unnecessarily writing a body for something * that we're just going to ignore. */ - archive_entry_set_size(entry_original, archive_entry_size(entry_main)); - pax->entry_bytes_remaining = archive_entry_size(entry_main); - pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining); + archive_entry_set_size(entry_original, real_size); + if (pax->sparse_list == NULL && real_size > 0) { + /* This is not a sparse file but we handle its data as + * a sparse block. */ + sparse_list_add(pax, 0, real_size); + sparse_total = real_size; + } + pax->entry_padding = 0x1ff & (-(int64_t)sparse_total); archive_entry_free(entry_main); + archive_string_free(&entry_name); return (ret); } @@ -1136,7 +1439,7 @@ build_ustar_entry_name(char *dest, const char *src, size_t src_length, * * Joerg Schilling has argued that this is unnecessary because, in * practice, if the pax extended attributes get extracted as regular - * files, noone is going to bother reading those attributes to + * files, no one is going to bother reading those attributes to * manually restore them. Based on this, 'star' uses * /tmp/PaxHeader/'basename' as the ustar header name. This is a * tempting argument, in part because it's simpler than the SUSv3 @@ -1200,27 +1503,71 @@ build_pax_attribute_name(char *dest, const char *src) /* If the platform can't fetch the pid, don't include it. */ strcpy(buff, "PaxHeader"); #endif - /* General case: build a ustar-compatible name adding "/PaxHeader/". */ + /* General case: build a ustar-compatible name adding + * "/PaxHeader/". */ build_ustar_entry_name(dest, src, p - src, buff); return (dest); } -/* Write two null blocks for the end of archive */ -static int -archive_write_pax_finish(struct archive_write *a) +/* + * GNU PAX Format 1.0 requires the special name, which pattern is: + * <dir>/GNUSparseFile.<pid>/<original file name> + * + * This function is used for only Sparse file, a file type of which + * is regular file. + */ +static char * +build_gnu_sparse_name(char *dest, const char *src) { - int r; + char buff[64]; + const char *p; - if (a->compressor.write == NULL) - return (ARCHIVE_OK); + /* Handle the null filename case. */ + if (src == NULL || *src == '\0') { + strcpy(dest, "GNUSparseFile/blank"); + return (dest); + } - r = write_nulls(a, 512 * 2); - return (r); + /* Prune final '/' and other unwanted final elements. */ + p = src + strlen(src); + for (;;) { + /* Ends in "/", remove the '/' */ + if (p > src && p[-1] == '/') { + --p; + continue; + } + /* Ends in "/.", remove the '.' */ + if (p > src + 1 && p[-1] == '.' + && p[-2] == '/') { + --p; + continue; + } + break; + } + +#if HAVE_GETPID && 0 /* Disable this as pax attribute name. */ + sprintf(buff, "GNUSparseFile.%d", getpid()); +#else + /* If the platform can't fetch the pid, don't include it. */ + strcpy(buff, "GNUSparseFile"); +#endif + /* General case: build a ustar-compatible name adding + * "/GNUSparseFile/". */ + build_ustar_entry_name(dest, src, p - src, buff); + + return (dest); } +/* Write two null blocks for the end of archive */ static int -archive_write_pax_destroy(struct archive_write *a) +archive_write_pax_close(struct archive_write *a) +{ + return (__archive_write_nulls(a, 512 * 2)); +} + +static int +archive_write_pax_free(struct archive_write *a) { struct pax *pax; @@ -1229,6 +1576,9 @@ archive_write_pax_destroy(struct archive_write *a) return (ARCHIVE_OK); archive_string_free(&pax->pax_header); + archive_string_free(&pax->sparse_map); + archive_string_free(&pax->l_url_encoded_name); + sparse_list_clear(pax); free(pax); a->format_data = NULL; return (ARCHIVE_OK); @@ -1238,56 +1588,97 @@ static int archive_write_pax_finish_entry(struct archive_write *a) { struct pax *pax; + uint64_t remaining; int ret; pax = (struct pax *)a->format_data; - ret = write_nulls(a, pax->entry_bytes_remaining + pax->entry_padding); + remaining = pax->entry_bytes_remaining; + if (remaining == 0) { + while (pax->sparse_list) { + struct sparse_block *sb; + if (!pax->sparse_list->is_hole) + remaining += pax->sparse_list->remaining; + sb = pax->sparse_list->next; + free(pax->sparse_list); + pax->sparse_list = sb; + } + } + ret = __archive_write_nulls(a, remaining + pax->entry_padding); pax->entry_bytes_remaining = pax->entry_padding = 0; return (ret); } -static int -write_nulls(struct archive_write *a, size_t padding) +static ssize_t +archive_write_pax_data(struct archive_write *a, const void *buff, size_t s) { + struct pax *pax; + size_t ws; + size_t total; int ret; - size_t to_write; - while (padding > 0) { - to_write = padding < a->null_length ? padding : a->null_length; - ret = (a->compressor.write)(a, a->nulls, to_write); + pax = (struct pax *)a->format_data; + + /* + * According to GNU PAX format 1.0, write a sparse map + * before the body. + */ + if (archive_strlen(&(pax->sparse_map))) { + ret = __archive_write_output(a, pax->sparse_map.s, + archive_strlen(&(pax->sparse_map))); + if (ret != ARCHIVE_OK) + return (ret); + ret = __archive_write_nulls(a, pax->sparse_map_padding); if (ret != ARCHIVE_OK) return (ret); - padding -= to_write; + archive_string_empty(&(pax->sparse_map)); } - return (ARCHIVE_OK); -} -static ssize_t -archive_write_pax_data(struct archive_write *a, const void *buff, size_t s) -{ - struct pax *pax; - int ret; + total = 0; + while (total < s) { + const unsigned char *p; - pax = (struct pax *)a->format_data; - if (s > pax->entry_bytes_remaining) - s = pax->entry_bytes_remaining; + while (pax->sparse_list != NULL && + pax->sparse_list->remaining == 0) { + struct sparse_block *sb = pax->sparse_list->next; + free(pax->sparse_list); + pax->sparse_list = sb; + } - ret = (a->compressor.write)(a, buff, s); - pax->entry_bytes_remaining -= s; - if (ret == ARCHIVE_OK) - return (s); - else - return (ret); + if (pax->sparse_list == NULL) + return (total); + + p = ((const unsigned char *)buff) + total; + ws = s - total; + if (ws > pax->sparse_list->remaining) + ws = pax->sparse_list->remaining; + + if (pax->sparse_list->is_hole) { + /* Current block is hole thus we do not write + * the body. */ + pax->sparse_list->remaining -= ws; + total += ws; + continue; + } + + ret = __archive_write_output(a, p, ws); + pax->sparse_list->remaining -= ws; + total += ws; + if (ret != ARCHIVE_OK) + return (ret); + } + return (total); } static int -has_non_ASCII(const wchar_t *wp) +has_non_ASCII(const char *_p) { - if (wp == NULL) + const unsigned char *p = (const unsigned char *)_p; + + if (p == NULL) return (1); - while (*wp != L'\0' && *wp < 128) - wp++; - return (*wp != L'\0'); + while (*p != '\0' && *p < 128) + p++; + return (*p != '\0'); } /* @@ -1384,3 +1775,60 @@ base64_encode(const char *s, size_t len) *d = '\0'; return (out); } + +static void +sparse_list_clear(struct pax *pax) +{ + while (pax->sparse_list != NULL) { + struct sparse_block *sb = pax->sparse_list; + pax->sparse_list = sb->next; + free(sb); + } + pax->sparse_tail = NULL; +} + +static int +_sparse_list_add_block(struct pax *pax, int64_t offset, int64_t length, + int is_hole) +{ + struct sparse_block *sb; + + sb = (struct sparse_block *)malloc(sizeof(*sb)); + if (sb == NULL) + return (ARCHIVE_FATAL); + sb->next = NULL; + sb->is_hole = is_hole; + sb->offset = offset; + sb->remaining = length; + if (pax->sparse_list == NULL) + pax->sparse_list = pax->sparse_tail = sb; + else { + pax->sparse_tail->next = sb; + pax->sparse_tail = sb; + } + return (ARCHIVE_OK); +} + +static int +sparse_list_add(struct pax *pax, int64_t offset, int64_t length) +{ + int64_t last_offset; + int r; + + if (pax->sparse_tail == NULL) + last_offset = 0; + else { + last_offset = pax->sparse_tail->offset + + pax->sparse_tail->remaining; + } + if (last_offset < offset) { + /* Add a hole block. */ + r = _sparse_list_add_block(pax, last_offset, + offset - last_offset, 1); + if (r != ARCHIVE_OK) + return (r); + } + /* Add data block. */ + return (_sparse_list_add_block(pax, offset, length, 0)); +} + |