diff options
author | kientzle <kientzle@FreeBSD.org> | 2006-01-17 03:40:42 +0000 |
---|---|---|
committer | kientzle <kientzle@FreeBSD.org> | 2006-01-17 03:40:42 +0000 |
commit | 2b45598dea8a40a88892f8e349d778469cf34ab2 (patch) | |
tree | 1b150f6f4a287e4a1542c115e5ce00549972cb76 /lib | |
parent | e042fa9ba27e0ade0e877579668d9ff0dca323af (diff) | |
download | FreeBSD-src-2b45598dea8a40a88892f8e349d778469cf34ab2.zip FreeBSD-src-2b45598dea8a40a88892f8e349d778469cf34ab2.tar.gz |
Add support for "tp" format. tp was the standard system
archiver for Fourth Edition through Sixth Edition Unix; it was
replaced by tar in Seventh Edition. (First Edition through
Third Edition used "tap.")
Unfortunately, tp was not so very standard; there were a
few different variants. The code here attempts to support
what I believe were the most common variants.
tp support is not yet enabled by archive_read_support_format_all(),
as I'm not yet entirely comfortable with the detection
heuristics. People interested in experimenting can
add archive_read_support_format_tp() just after any calls
to archive_read_support_format_all() in bsdtar to see how
well this works.
TODO: tp format is roughly similar in structure to dump/restore
archive formats used by many systems. It should be possible
to generalize this code to handle many dump/restore variants.
Format detection heuristics are going to be rough, though.
Thanks to: Warren Toomey, whose very basic tp extraction programs
and documentation made this possible.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libarchive/Makefile | 3 | ||||
-rw-r--r-- | lib/libarchive/archive.h.in | 1 | ||||
-rw-r--r-- | lib/libarchive/archive_read_support_format_tp.c | 618 | ||||
-rw-r--r-- | lib/libarchive/libarchive-formats.5 | 10 |
4 files changed, 631 insertions, 1 deletions
diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile index 41a1ee7..a677044 100644 --- a/lib/libarchive/Makefile +++ b/lib/libarchive/Makefile @@ -25,7 +25,7 @@ ARCHIVE_API_MAJOR= 1 # Note: Do NOT reset this to zero after bumping ARCHIVE_API_MAJOR! ARCHIVE_API_MINOR= 2 # Bumped often. ;-) -ARCHIVE_API_REVISION= 36 +ARCHIVE_API_REVISION= 37 # Full libarchive version combines the above three numbers. VERSION= ${ARCHIVE_API_MAJOR}.${ARCHIVE_API_MINOR}.${ARCHIVE_API_REVISION} @@ -68,6 +68,7 @@ BASE_SRCS= archive_check_magic.c \ archive_read_support_format_cpio.c \ archive_read_support_format_iso9660.c \ archive_read_support_format_tar.c \ + archive_read_support_format_tp.c \ archive_read_support_format_zip.c \ archive_string.c \ archive_string_sprintf.c \ diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in index 666917c..a7427b2 100644 --- a/lib/libarchive/archive.h.in +++ b/lib/libarchive/archive.h.in @@ -180,6 +180,7 @@ int archive_read_support_format_cpio(struct archive *); int archive_read_support_format_gnutar(struct archive *); int archive_read_support_format_iso9660(struct archive *); int archive_read_support_format_tar(struct archive *); +int archive_read_support_format_tp(struct archive *); int archive_read_support_format_zip(struct archive *); diff --git a/lib/libarchive/archive_read_support_format_tp.c b/lib/libarchive/archive_read_support_format_tp.c new file mode 100644 index 0000000..e3f63c3 --- /dev/null +++ b/lib/libarchive/archive_read_support_format_tp.c @@ -0,0 +1,618 @@ +/*- + * Copyright (c) 2003-2005 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> + +#include <errno.h> +/* #include <stdint.h> */ /* See archive_platform.h */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" +#include "archive_string.h" + +/* + * 'tp' was the common archiving format for Fourth Edition through + * Sixth Edition Unix. It was replaced by 'tar' in Seventh Edition. + * (First through Third Edition used the 'tap' archiver.) + * + * The format has a 512-byte boot block, followed by a table of + * contents listing all of the files in the archive, followed by + * the file data. Like 'tar', it is block-oriented; file data is + * padded to a whole number of blocks. + * + * There are three different variants with slightly different TOC + * formats: + * Original tp: 64-byte TOC entries with 32-byte pathnames. + * Ian Johnson's AGSM 'itp': 64-byte TOC entries with 48-byte pathnames + * 'dtp' ???: 128-byte TOC entries with 114-byte pathnames. + * + * All variants store similar metadata: 16-bit mode, 8-bit uid/gid, + * 24-bit size, 32-bit timestamp. (The later 'tar' format extended + * these fields and added link support. The earlier 'tap' format used + * narrower 8-bit mode and 16-bit size.) + */ + +/* + * The support code here reads the entire TOC into memory + * up front. The following structure is used to store + * a single TOC record in memory. + */ +struct file_info { + unsigned int offset; /* Offset in archive. */ + unsigned int size; /* File size in bytes. */ + time_t mtime; /* File last modified time. */ + mode_t mode; + uid_t uid; + gid_t gid; + char *name; /* Null-terminated filename. */ +}; + +/* + * Format-specific data. + */ +struct tp { + int bid; /* If non-zero, return this as our bid. */ + + struct file_info **pending_files; + int pending_files_allocated; + int pending_files_used; + + uint64_t current_position; + int64_t entry_bytes_remaining; + int64_t entry_sparse_offset; + int fake_inode; + int fake_dev; + + /* + * Pointer to a function to parse the dir entry for + * the selected format. + */ + struct file_info *(*parse_file_info)(struct archive *, const void *); + ssize_t toc_size; + int toc_read; /* True if we've already read the TOC. */ +}; + +static void add_entry(struct tp *tp, struct file_info *file); +static int archive_read_format_tp_bid(struct archive *); +static int archive_read_format_tp_cleanup(struct archive *); +static int archive_read_format_tp_read_data(struct archive *, + const void **, size_t *, off_t *); +static int archive_read_format_tp_read_header(struct archive *, + struct archive_entry *); +static struct file_info *next_entry(struct tp *); +static int next_entry_seek(struct archive *a, struct tp *tp, + struct file_info **pfile); +static struct file_info *parse_file_info_tp(struct archive *, const void *); +static struct file_info *parse_file_info_itp(struct archive *, const void *); +static void release_file(struct tp *, struct file_info *); +static int toi(const void *p, int n); + +int +archive_read_support_format_tp(struct archive *a) +{ + struct tp *tp; + int r; + + tp = malloc(sizeof(*tp)); + if (tp == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate tp data"); + return (ARCHIVE_FATAL); + } + memset(tp, 0, sizeof(*tp)); + tp->bid = -1; /* We haven't yet bid. */ + + r = __archive_read_register_format(a, + tp, + archive_read_format_tp_bid, + archive_read_format_tp_read_header, + archive_read_format_tp_read_data, + NULL, + archive_read_format_tp_cleanup); + + if (r != ARCHIVE_OK) { + free(tp); + return (r); + } + return (ARCHIVE_OK); +} + +static int +archive_read_format_tp_bid(struct archive *a) +{ + struct tp *tp; + ssize_t bytes_read; + const void *h; + const char *p; + int toc_count; + + tp = *(a->pformat_data); + + if (tp->bid >= 0) + return (tp->bid); + + /* Read a large initial block and inspect it to see + * if it looks like a tp TOC. */ + bytes_read = (a->compression_read_ahead)(a, &h, 8192); + if (bytes_read < 1024) + return (tp->bid = 0); + + p = (const char *)h; + + /* Skip the 512-byte boot block. */ + bytes_read -= 512; + p += 512; + + /* + * Check that there is something that looks like a tp TOC + * entry located every 64 bytes. + */ + tp->parse_file_info = parse_file_info_tp; + tp->toc_size = 64; + toc_count = 0; + while (bytes_read > 64 && p[0] != '\0') { + /* Null-terminated ASCII pathname starts at beginning + * of block and is no more than 32 characters long for + * tp format, 48 for 'itp' format. */ + const char *pn = p; + while (*pn >= 0x20 && *pn <= 0x7e && pn < p + 64) { + /* backslash is illegal in filenames */ + if (*pn == '\\') + return (tp->bid = 0); + pn++; + } + if (pn > p + 48) /* String longer than 48 chars? */ + return (tp->bid = 0); + /* Must be Ian Johnson's AGSM extended version. */ + if (pn > p + 32) + tp->parse_file_info = parse_file_info_itp; + if (*pn != '\0') /* Has non-ASCII character. */ + return (tp->bid = 0); + /* We've checked ~1 bit for each character. */ + tp->bid += pn - p; + + /* + * TODO: sanity-test the mode field; the upper bits + * of the mode should have only one of a small number + * of valid file types. + */ + toc_count++; + p += tp->toc_size; + } + + /* + * We now know how many TOC entries we have in memory. + * Read the offset/size values into memory, sort, and verify + * that they define non-overlapping blocks in the archive. + */ + { + struct block_info { uint64_t offset; uint64_t size; } *blocks; + struct block_info t; + int i, not_sorted; + + blocks = malloc(sizeof(*blocks) * toc_count); + memset(blocks, 0, sizeof(*blocks) * toc_count); + p = (const char *)h; + p += 512; + for (i = 0; i < toc_count; i++) { + /* TODO: If this is itp, use different offsets. */ + blocks[i].size = toi(p + 37, 3); + blocks[i].offset = toi(p + 44, 2) * 512; + p += 64; + /* TODO: If this is dtp, use different offsets and stride. */ + } + + /* + * Sort blocks by offset, just in case the entries + * aren't already in sorted order. Because we expect + * the entries to already be sorted, a bubble sort is + * actually appropriate: it's O(n) on already-sorted + * data, compared to O(n log n) for quicksort or merge + * sort and O(n^2) for insertion sort. + */ + do { + not_sorted = 0; + for (i = 0; i < toc_count - 1; i++) { + if (blocks[i].offset > blocks[i + 1].offset) { + t = blocks[i]; + blocks[i] = blocks[i + 1]; + blocks[i + 1] = t; + not_sorted = 1; + } + } + } while (not_sorted); + + /* Check that blocks don't overlap. */ + for (i = 0; i < toc_count - 1; i++) { + if (blocks[i].offset + blocks[i].size + > blocks[i + 1].offset) + { + free(blocks); + return (tp->bid = 0); + } + } + } + + return (tp->bid); +} + +static int +archive_read_format_tp_read_header(struct archive *a, + struct archive_entry *entry) +{ + struct stat st; + struct tp *tp; + struct file_info *file; + const char *p; + ssize_t bytes_read; + int r; + + tp = *(a->pformat_data); + + /* Read the entire TOC first. */ + if (!tp->toc_read) { + /* Skip the initial block. */ + bytes_read = (a->compression_read_ahead)(a, + (const void **)&p, 512); + if (bytes_read < 512) + return (ARCHIVE_FATAL); + bytes_read = 512; + tp->current_position += bytes_read; + (a->compression_read_consume)(a, bytes_read); + + /* Consume TOC entries. */ + do { + bytes_read = (a->compression_read_ahead)(a, + (const void **)&p, tp->toc_size); + if (bytes_read < tp->toc_size) + return (ARCHIVE_FATAL); + bytes_read = tp->toc_size; + tp->current_position += bytes_read; + (a->compression_read_consume)(a, bytes_read); + file = (*tp->parse_file_info)(a, p); + if (file != NULL) + add_entry(tp, file); + else if (p[0] != '\0') + /* NULL is okay if this is the sentinel. */ + return (ARCHIVE_FATAL); + } while (p[0] != '\0'); + + tp->toc_read = 1; + } + + /* Get the next entry that appears after the current offset. */ + r = next_entry_seek(a, tp, &file); + if (r != ARCHIVE_OK) + return (r); + + tp->entry_bytes_remaining = file->size; + tp->entry_sparse_offset = 0; /* Offset for sparse-file-aware clients */ + + /* Set up the entry structure with information about this entry. */ + memset(&st, 0, sizeof(st)); + st.st_mode = file->mode; + st.st_uid = file->uid; + st.st_gid = file->gid; + st.st_nlink = 1; + if (++tp->fake_inode > 0xfff0) { + tp->fake_inode = 1; + tp->fake_dev++; + } + st.st_ino = tp->fake_inode; + st.st_dev = tp->fake_dev; + st.st_mtime = file->mtime; + st.st_ctime = file->mtime; + st.st_atime = file->mtime; + st.st_size = tp->entry_bytes_remaining; + archive_entry_copy_stat(entry, &st); + archive_entry_set_pathname(entry, file->name); + + release_file(tp, file); + return (ARCHIVE_OK); +} + +static int +archive_read_format_tp_read_data(struct archive *a, + const void **buff, size_t *size, off_t *offset) +{ + ssize_t bytes_read; + struct tp *tp; + + tp = *(a->pformat_data); + if (tp->entry_bytes_remaining <= 0) { + *buff = NULL; + *size = 0; + *offset = tp->entry_sparse_offset; + return (ARCHIVE_EOF); + } + + bytes_read = (a->compression_read_ahead)(a, buff, 1); + if (bytes_read == 0) + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Truncated input file"); + if (bytes_read <= 0) + return (ARCHIVE_FATAL); + if (bytes_read > tp->entry_bytes_remaining) + bytes_read = tp->entry_bytes_remaining; + *size = bytes_read; + *offset = tp->entry_sparse_offset; + tp->entry_sparse_offset += bytes_read; + tp->entry_bytes_remaining -= bytes_read; + tp->current_position += bytes_read; + (a->compression_read_consume)(a, bytes_read); + return (ARCHIVE_OK); +} + +static int +archive_read_format_tp_cleanup(struct archive *a) +{ + struct tp *tp; + struct file_info *file; + + tp = *(a->pformat_data); + while ((file = next_entry(tp)) != NULL) + release_file(tp, file); + free(tp); + *(a->pformat_data) = NULL; + return (ARCHIVE_OK); +} + +/* + * This routine parses a single directory record. + */ +static struct file_info * +parse_file_info_tp(struct archive *a, const void *dir_p) +{ + struct file_info *file; + const struct tpdir { + char name[32]; + char mode[2]; + char uid[1]; + char gid[1]; + char unused[1]; + char size[3]; + char modtime[4]; + char tapeaddr[2]; + char unused2[16]; + char checksum[2]; + } *p = dir_p; + + (void)a; /* UNUSED */ + + /* Create a new file entry and copy data from the dir record. */ + file = malloc(sizeof(*file)); + if (file == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate TOC record"); + return (NULL); + } + memset(file, 0, sizeof(*file)); + + file->name = malloc(sizeof(p->name) + 1); + if (file->name == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate TOC name"); + free(file); + return (NULL); + } + memcpy(file->name, p->name, sizeof(p->name)); + file->name[sizeof(p->name)] = '\0'; + /* If name wasn't null-terminated, then it's not valid. */ + if (strlen(file->name) == sizeof(p->name) || strlen(file->name) == 0) { + archive_set_error(a, ENOMEM, "Damaged tp archive; invalid TOC"); + free(file->name); + free(file); + return (NULL); + } + file->offset = toi(p->tapeaddr, sizeof(p->tapeaddr)) * 512; + file->size = toi(p->size, sizeof(p->size)); + file->mtime = toi(p->modtime, sizeof(p->modtime)); + file->mode = toi(p->mode, sizeof(p->mode)); + file->uid = toi(p->uid, sizeof(p->uid)); + file->gid = toi(p->gid, sizeof(p->gid)); + return (file); +} + + +/* + * Ian Johnson's extended tp for AGSM eliminated the 16 pad bytes and + * extnded the name field, allowing for 48 byte names. + */ +static struct file_info * +parse_file_info_itp(struct archive *a, const void *dir_p) +{ + struct file_info *file; + const struct itpdir { + char name[48]; + char mode[2]; + char uid[1]; + char gid[1]; + char unused[1]; + char size[3]; + char modtime[4]; + char tapeaddr[2]; + char checksum[2]; + } *p = dir_p; + + (void)a; /* UNUSED */ + + /* Create a new file entry and copy data from the dir record. */ + file = malloc(sizeof(*file)); + if (file == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate TOC record"); + return (NULL); + } + memset(file, 0, sizeof(*file)); + + file->name = malloc(sizeof(p->name) + 1); + if (file->name == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate TOC name"); + free(file); + return (NULL); + } + memcpy(file->name, p->name, sizeof(p->name)); + file->name[sizeof(p->name)] = '\0'; + /* If name wasn't null-terminated, then it's not valid. */ + if (strlen(file->name) == sizeof(p->name) || strlen(file->name) == 0) { + archive_set_error(a, ENOMEM, "Damaged tp archive; invalid TOC"); + free(file->name); + free(file); + return (NULL); + } + file->offset = toi(p->tapeaddr, sizeof(p->tapeaddr)) * 512; + file->size = toi(p->size, sizeof(p->size)); + file->mtime = toi(p->modtime, sizeof(p->modtime)); + file->mode = toi(p->mode, sizeof(p->mode)); + file->uid = toi(p->uid, sizeof(p->uid)); + file->gid = toi(p->gid, sizeof(p->gid)); + return (file); +} + +static void +add_entry(struct tp *tp, struct file_info *file) +{ + /* Expand our pending files list as necessary. */ + if (tp->pending_files_used >= tp->pending_files_allocated) { + struct file_info **new_pending_files; + int new_size = tp->pending_files_allocated * 2; + + if (new_size < 1024) + new_size = 1024; + new_pending_files = malloc(new_size * sizeof(new_pending_files[0])); + if (new_pending_files == NULL) + __archive_errx(1, "Out of memory"); + memcpy(new_pending_files, tp->pending_files, + tp->pending_files_allocated * sizeof(new_pending_files[0])); + if (tp->pending_files != NULL) + free(tp->pending_files); + tp->pending_files = new_pending_files; + tp->pending_files_allocated = new_size; + } + + tp->pending_files[tp->pending_files_used++] = file; +} + +static void +release_file(struct tp *tp, struct file_info *file) +{ + (void)tp; /* UNUSED */ + if (file->name) + free(file->name); + free(file); +} + +static int +next_entry_seek(struct archive *a, struct tp *tp, + struct file_info **pfile) +{ + struct file_info *file; + uint64_t offset; + + *pfile = NULL; + for (;;) { + *pfile = file = next_entry(tp); + if (file == NULL) + return (ARCHIVE_EOF); + offset = file->offset; + + /* Seek forward to the start of the entry. */ + while (tp->current_position < offset) { + ssize_t step = offset - tp->current_position; + ssize_t bytes_read; + const void *buff; + + if (step > 512) + step = 512; + bytes_read = (a->compression_read_ahead)(a, &buff, step); + if (bytes_read <= 0) { + release_file(tp, file); + return (ARCHIVE_FATAL); + } + if (bytes_read > step) + bytes_read = step; + tp->current_position += bytes_read; + (a->compression_read_consume)(a, bytes_read); + } + + /* We found body of file; handle it now. */ + if (offset == file->offset) + return (ARCHIVE_OK); + } +} + +static struct file_info * +next_entry(struct tp *tp) +{ + int least_index; + uint64_t least_offset; + int i; + struct file_info *r; + + if (tp->pending_files_used < 1) + return (NULL); + + /* Assume the first file in the list is the earliest on disk. */ + least_index = 0; + least_offset = tp->pending_files[0]->offset; + + /* Now, try to find an earlier one. */ + for(i = 0; i < tp->pending_files_used; i++) { + uint64_t offset = tp->pending_files[i]->offset; + if (least_offset > offset) { + least_index = i; + least_offset = offset; + } + } + r = tp->pending_files[least_index]; + tp->pending_files[least_index] + = tp->pending_files[--tp->pending_files_used]; + return (r); +} + +/* + * 'tp' format was developed for PDP-11, so it uses the screwy PDP-11 + * byte order, which is big-endian words, little-endian bytes within a + * word. In particular, the 32-bit value 0x44332211 gets stored as + * four bytes: 0x33 0x44 0x11 0x22 + */ +static int +toi(const void *p, int n) +{ + const unsigned char *v = (const unsigned char *)p; + switch(n) { + case 1: return (v[0]); + case 2: return (v[0] + v[1] * 0x100); + case 3: return (v[0] * 0x10000 + toi(v + 1, 2)); + case 4: return (toi(v, 2) * 0x10000 + toi(v + 2, 2)); + default: return (0); + } +} diff --git a/lib/libarchive/libarchive-formats.5 b/lib/libarchive/libarchive-formats.5 index 7ed18da..8dc2d2a 100644 --- a/lib/libarchive/libarchive-formats.5 +++ b/lib/libarchive/libarchive-formats.5 @@ -235,6 +235,16 @@ compressed with the .Dq deflate algorithm. Older zip compression algorithms are not supported. +.Ss Tp Formats +The libarchive library has experimental support for tp format, +which was used in Fourth Edition through Sixth Edition Unix. +(It was supplanted by tar in Seventh Edition Unix.) +There were several distinct variants of this format; libarchive +supports the original tp format and the itp variant. +Currently, tp format support is not enabled by +.Fn archive_read_support_format_all , +it must be explicitly enabled by calling +.Fn archive_read_support_format_tp . .Sh SEE ALSO .Xr cpio 1 , .Xr mkisofs 1 , |