diff options
Diffstat (limited to 'bin/pax')
-rw-r--r-- | bin/pax/Makefile | 32 | ||||
-rw-r--r-- | bin/pax/ar_io.c | 1288 | ||||
-rw-r--r-- | bin/pax/ar_subs.c | 1238 | ||||
-rw-r--r-- | bin/pax/buf_subs.c | 1083 | ||||
-rw-r--r-- | bin/pax/cache.c | 479 | ||||
-rw-r--r-- | bin/pax/cache.h | 74 | ||||
-rw-r--r-- | bin/pax/cpio.c | 1278 | ||||
-rw-r--r-- | bin/pax/cpio.h | 151 | ||||
-rw-r--r-- | bin/pax/extern.h | 285 | ||||
-rw-r--r-- | bin/pax/file_subs.c | 1055 | ||||
-rw-r--r-- | bin/pax/ftree.c | 543 | ||||
-rw-r--r-- | bin/pax/ftree.h | 50 | ||||
-rw-r--r-- | bin/pax/gen_subs.c | 487 | ||||
-rw-r--r-- | bin/pax/options.c | 1140 | ||||
-rw-r--r-- | bin/pax/options.h | 113 | ||||
-rw-r--r-- | bin/pax/pat_rep.c | 1196 | ||||
-rw-r--r-- | bin/pax/pat_rep.h | 54 | ||||
-rw-r--r-- | bin/pax/pax.1 | 1169 | ||||
-rw-r--r-- | bin/pax/pax.c | 405 | ||||
-rw-r--r-- | bin/pax/pax.h | 238 | ||||
-rw-r--r-- | bin/pax/sel_subs.c | 657 | ||||
-rw-r--r-- | bin/pax/sel_subs.h | 73 | ||||
-rw-r--r-- | bin/pax/tables.c | 1426 | ||||
-rw-r--r-- | bin/pax/tables.h | 172 | ||||
-rw-r--r-- | bin/pax/tar.c | 1192 | ||||
-rw-r--r-- | bin/pax/tar.h | 148 | ||||
-rw-r--r-- | bin/pax/tty_subs.c | 245 |
27 files changed, 16271 insertions, 0 deletions
diff --git a/bin/pax/Makefile b/bin/pax/Makefile new file mode 100644 index 0000000..578f65a --- /dev/null +++ b/bin/pax/Makefile @@ -0,0 +1,32 @@ +# @(#)Makefile 8.1 (Berkeley) 5/31/93 + +# To install on versions prior to BSD 4.4 the following may have to be +# defined with CFLAGS += +# +# -DNET2_STAT Use NET2 or older stat structure. The version of the +# stat structure is easily determined by looking at the +# basic type of an off_t (often defined in the file: +# /usr/include/sys/types.h). If off_t is a long (and is +# NOT A quad) then you must define NET2_STAT. +# This define is important, as if you do have a quad_t +# off_t and define NET2_STAT, pax will compile but will +# NOT RUN PROPERLY. +# +# -DNET2_FTS Use the older NET2 fts. To identify the version, +# examine the file: /usr/include/fts.h. If FTS_COMFOLLOW +# is not defined then you must define NET2_FTS. +# Pax may not compile if this not (un)defined properly. +# +# -DNET2_REGEX Use the older regexp.h not regex.h. The regex version +# is determined by looking at the value returned by +# regexec() (man 3 regexec). If regexec return a 1 for +# success (and NOT a 0 for success) you have the older +# regex routines and must define NET2_REGEX. +# Pax may not compile if this not (un)defined properly. + +PROG= pax +SRCS= ar_io.c ar_subs.c buf_subs.c cache.c cpio.c file_subs.c ftree.c\ + gen_subs.c options.c pat_rep.c pax.c sel_subs.c tables.c tar.c\ + tty_subs.c + +.include <bsd.prog.mk> diff --git a/bin/pax/ar_io.c b/bin/pax/ar_io.c new file mode 100644 index 0000000..07eb77f --- /dev/null +++ b/bin/pax/ar_io.c @@ -0,0 +1,1288 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)ar_io.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/mtio.h> +#include <sys/param.h> +#include <signal.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include "pax.h" +#include "extern.h" + +/* + * Routines which deal directly with the archive I/O device/file. + */ + +#define DMOD 0666 /* default mode of created archives */ +#define EXT_MODE O_RDONLY /* open mode for list/extract */ +#define AR_MODE (O_WRONLY | O_CREAT | O_TRUNC) /* mode for archive */ +#define APP_MODE O_RDWR /* mode for append */ +#define STDO "<STDOUT>" /* psuedo name for stdout */ +#define STDN "<STDIN>" /* psuedo name for stdin */ +static int arfd = -1; /* archive file descriptor */ +static int artyp = ISREG; /* archive type: file/FIFO/tape */ +static int arvol = 1; /* archive volume number */ +static int lstrval = -1; /* return value from last i/o */ +static int io_ok; /* i/o worked on volume after resync */ +static int did_io; /* did i/o ever occur on volume? */ +static int done; /* set via tty termination */ +static struct stat arsb; /* stat of archive device at open */ +static int invld_rec; /* tape has out of spec record size */ +static int wr_trail = 1; /* trailer was rewritten in append */ +static int can_unlnk = 0; /* do we unlink null archives? */ +char *arcname; /* printable name of archive */ + +static int get_phys __P((void)); +extern sigset_t s_mask; + +/* + * ar_open() + * Opens the next archive volume. Determines the type of the device and + * sets up block sizes as required by the archive device and the format. + * Note: we may be called with name == NULL on the first open only. + * Return: + * -1 on failure, 0 otherwise + */ + +#if __STDC__ +int +ar_open(char *name) +#else +int +ar_open(name) + char *name; +#endif +{ + struct mtget mb; + + if (arfd != -1) + (void)close(arfd); + arfd = -1; + can_unlnk = did_io = io_ok = invld_rec = 0; + artyp = ISREG; + flcnt = 0; + + /* + * open based on overall operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + if (name == NULL) { + arfd = STDIN_FILENO; + arcname = STDN; + } else if ((arfd = open(name, EXT_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to read on %s", name); + break; + case ARCHIVE: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, AR_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to write on %s", name); + else + can_unlnk = 1; + break; + case APPND: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, APP_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to read/write on %s", + name); + break; + case COPY: + /* + * arfd not used in COPY mode + */ + arcname = "<NONE>"; + lstrval = 1; + return(0); + } + if (arfd < 0) + return(-1); + + /* + * set up is based on device type + */ + if (fstat(arfd, &arsb) < 0) { + syswarn(0, errno, "Failed stat on %s", arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + if (S_ISDIR(arsb.st_mode)) { + warn(0, "Cannot write an archive on top of a directory %s", + arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + + if (S_ISCHR(arsb.st_mode)) + artyp = ioctl(arfd, MTIOCGET, &mb) ? ISCHR : ISTAPE; + else if (S_ISBLK(arsb.st_mode)) + artyp = ISBLK; + else if ((lseek(arfd, (off_t)0L, SEEK_CUR) == -1) && (errno == ESPIPE)) + artyp = ISPIPE; + else + artyp = ISREG; + + /* + * make sure we beyond any doubt that we only can unlink regular files + * we created + */ + if (artyp != ISREG) + can_unlnk = 0; + /* + * if we are writing, we are done + */ + if (act == ARCHIVE) { + blksz = rdblksz = wrblksz; + lstrval = 1; + return(0); + } + + /* + * set default blksz on read. APPNDs writes rdblksz on the last volume + * On all new archive volumes, we shift to wrblksz (if the user + * specified one, otherwize we will continue to use rdblksz). We + * must to set blocksize based on what kind of device the archive is + * stored. + */ + switch(artyp) { + case ISTAPE: + /* + * Tape drives come in at least two flavors. Those that support + * variable sized records and those that have fixed sized + * records. They must be treated differently. For tape drives + * that support variable sized records, we must make large + * reads to make sure we get the entire record, otherwise we + * will just get the first part of the record (up to size we + * asked). Tapes with fixed sized records may or may not return + * multiple records in a single read. We really do not care + * what the physical record size is UNLESS we are going to + * append. (We will need the physical block size to rewrite + * the trailer). Only when we are appending do we go to the + * effort to figure out the true PHYSICAL record size. + */ + blksz = rdblksz = MAXBLK; + break; + case ISPIPE: + case ISBLK: + case ISCHR: + /* + * Blocksize is not a major issue with these devices (but must + * be kept a multiple of 512). If the user specified a write + * block size, we use that to read. Under append, we must + * always keep blksz == rdblksz. Otherwise we go ahead and use + * the device optimal blocksize as (and if) returned by stat + * and if it is within pax specs. + */ + if ((act == APPND) && wrblksz) { + blksz = rdblksz = wrblksz; + break; + } + + if ((arsb.st_blksize > 0) && (arsb.st_blksize < MAXBLK) && + ((arsb.st_blksize % BLKMULT) == 0)) + rdblksz = arsb.st_blksize; + else + rdblksz = DEVBLK; + /* + * For performance go for large reads when we can without harm + */ + if ((act == APPND) || (artyp == ISCHR)) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + case ISREG: + /* + * if the user specified wrblksz works, use it. Under appends + * we must always keep blksz == rdblksz + */ + if ((act == APPND) && wrblksz && ((arsb.st_size%wrblksz)==0)){ + blksz = rdblksz = wrblksz; + break; + } + /* + * See if we can find the blocking factor from the file size + */ + for (rdblksz = MAXBLK; rdblksz > 0; rdblksz -= BLKMULT) + if ((arsb.st_size % rdblksz) == 0) + break; + /* + * When we cannont find a match, we may have a flawed archive. + */ + if (rdblksz <= 0) + rdblksz = FILEBLK; + /* + * for performance go for large reads when we can + */ + if (act == APPND) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + default: + /* + * should never happen, worse case, slow... + */ + blksz = rdblksz = BLKMULT; + break; + } + lstrval = 1; + return(0); +} + +/* + * ar_close() + * closes archive device, increments volume number, and prints i/o summary + */ +#if __STDC__ +void +ar_close(void) +#else +void +ar_close() +#endif +{ + FILE *outf; + + if (arfd < 0) { + did_io = io_ok = flcnt = 0; + return; + } + + if (act == LIST) + outf = stdout; + else + outf = stderr; + + /* + * Close archive file. This may take a LONG while on tapes (we may be + * forced to wait for the rewind to complete) so tell the user what is + * going on (this avoids the user hitting control-c thinking pax is + * broken). + */ + if (vflag && (artyp == ISTAPE)) { + if (vfpart) + (void)putc('\n', outf); + (void)fprintf(outf, + "%s: Waiting for tape drive close to complete...", + argv0); + (void)fflush(outf); + } + + /* + * if nothing was written to the archive (and we created it), we remove + * it + */ + if (can_unlnk && (fstat(arfd, &arsb) == 0) && (S_ISREG(arsb.st_mode)) && + (arsb.st_size == 0)) { + (void)unlink(arcname); + can_unlnk = 0; + } + + (void)close(arfd); + + if (vflag && (artyp == ISTAPE)) { + (void)fputs("done.\n", outf); + vfpart = 0; + (void)fflush(outf); + } + arfd = -1; + + if (!io_ok && !did_io) { + flcnt = 0; + return; + } + did_io = io_ok = 0; + + /* + * The volume number is only increased when the last device has data + * and we have already determined the archive format. + */ + if (frmt != NULL) + ++arvol; + + if (!vflag) { + flcnt = 0; + return; + } + + /* + * Print out a summary of I/O for this archive volume. + */ + if (vfpart) { + (void)putc('\n', outf); + vfpart = 0; + } + + /* + * If we have not determined the format yet, we just say how many bytes + * we have skipped over looking for a header to id. there is no way we + * could have written anything yet. + */ + if (frmt == NULL) { +# ifdef NET2_STAT + (void)fprintf(outf, "%s: unknown format, %lu bytes skipped.\n", +# else + (void)fprintf(outf, "%s: unknown format, %qu bytes skipped.\n", +# endif + argv0, rdcnt); + (void)fflush(outf); + flcnt = 0; + return; + } + + (void)fprintf(outf, +# ifdef NET2_STAT + "%s: %s vol %d, %lu files, %lu bytes read, %lu bytes written.\n", +# else + "%s: %s vol %d, %lu files, %qu bytes read, %qu bytes written.\n", +# endif + argv0, frmt->name, arvol-1, flcnt, rdcnt, wrcnt); + (void)fflush(outf); + flcnt = 0; +} + +/* + * ar_drain() + * drain any archive format independent padding from an archive read + * from a socket or a pipe. This is to prevent the process on the + * other side of the pipe from getting a SIGPIPE (pax will stop + * reading an archive once a format dependent trailer is detected). + */ +#if __STDC__ +void +ar_drain(void) +#else +void +ar_drain() +#endif +{ + register int res; + char drbuf[MAXBLK]; + + /* + * we only drain from a pipe/socket. Other devices can be closed + * without reading up to end of file. We sure hope that pipe is closed + * on the other side so we will get an EOF. + */ + if ((artyp != ISPIPE) || (lstrval <= 0)) + return; + + /* + * keep reading until pipe is drained + */ + while ((res = read(arfd, drbuf, sizeof(drbuf))) > 0) + ; + lstrval = res; +} + +/* + * ar_set_wr() + * Set up device right before switching from read to write in an append. + * device dependent code (if required) to do this should be added here. + * For all archive devices we are already positioned at the place we want + * to start writing when this routine is called. + * Return: + * 0 if all ready to write, -1 otherwise + */ + +#if __STDC__ +int +ar_set_wr(void) +#else +int +ar_set_wr() +#endif +{ + off_t cpos; + + /* + * we must make sure the trailer is rewritten on append, ar_next() + * will stop us if the archive containing the trailer was not written + */ + wr_trail = 0; + + /* + * Add any device dependent code as required here + */ + if (artyp != ISREG) + return(0); + /* + * Ok we have an archive in a regular file. If we were rewriting a + * file, we must get rid of all the stuff after the current offset + * (it was not written by pax). + */ + if (((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) || + (ftruncate(arfd, cpos) < 0)) { + syswarn(1, errno, "Unable to truncate archive file"); + return(-1); + } + return(0); +} + +/* + * ar_app_ok() + * check if the last volume in the archive allows appends. We cannot check + * this until we are ready to write since there is no spec that says all + * volumes in a single archive have to be of the same type... + * Return: + * 0 if we can append, -1 otherwise. + */ + +#if __STDC__ +int +ar_app_ok(void) +#else +int +ar_app_ok() +#endif +{ + if (artyp == ISPIPE) { + warn(1, "Cannot append to an archive obtained from a pipe."); + return(-1); + } + + if (!invld_rec) + return(0); + warn(1,"Cannot append, device record size %d does not support %s spec", + rdblksz, argv0); + return(-1); +} + +/* + * ar_read() + * read up to a specified number of bytes from the archive into the + * supplied buffer. When dealing with tapes we may not always be able to + * read what we want. + * Return: + * Number of bytes in buffer. 0 for end of file, -1 for a read error. + */ + +#if __STDC__ +int +ar_read(register char *buf, register int cnt) +#else +int +ar_read(buf, cnt) + register char *buf; + register int cnt; +#endif +{ + register int res = 0; + + /* + * if last i/o was in error, no more reads until reset or new volume + */ + if (lstrval <= 0) + return(lstrval); + + /* + * how we read must be based on device type + */ + switch (artyp) { + case ISTAPE: + if ((res = read(arfd, buf, cnt)) > 0) { + /* + * CAUTION: tape systems may not always return the same + * sized records so we leave blksz == MAXBLK. The + * physical record size that a tape drive supports is + * very hard to determine in a uniform and portable + * manner. + */ + io_ok = 1; + if (res != rdblksz) { + /* + * Record size changed. If this is happens on + * any record after the first, we probably have + * a tape drive which has a fixed record size + * we are getting multiple records in a single + * read). Watch out for record blocking that + * violates pax spec (must be a multiple of + * BLKMULT). + */ + rdblksz = res; + if (rdblksz % BLKMULT) + invld_rec = 1; + } + return(res); + } + break; + case ISREG: + case ISBLK: + case ISCHR: + case ISPIPE: + default: + /* + * Files are so easy to deal with. These other things cannot + * be trusted at all. So when we are dealing with character + * devices and pipes we just take what they have ready for us + * and return. Trying to do anything else with them runs the + * risk of failure. + */ + if ((res = read(arfd, buf, cnt)) > 0) { + io_ok = 1; + return(res); + } + break; + } + + /* + * We are in trouble at this point, something is broken... + */ + lstrval = res; + if (res < 0) + syswarn(1, errno, "Failed read on archive volume %d", arvol); + else + warn(0, "End of archive volume %d reached", arvol); + return(res); +} + +/* + * ar_write() + * Write a specified number of bytes in supplied buffer to the archive + * device so it appears as a single "block". Deals with errors and tries + * to recover when faced with short writes. + * Return: + * Number of bytes written. 0 indicates end of volume reached and with no + * flaws (as best that can be detected). A -1 indicates an unrecoverable + * error in the archive occured. + */ + +#if __STDC__ +int +ar_write(register char *buf, register int bsz) +#else +int +ar_write(buf, bsz) + register char *buf; + register int bsz; +#endif +{ + register int res; + off_t cpos; + + /* + * do not allow pax to create a "bad" archive. Once a write fails on + * an archive volume prevent further writes to it. + */ + if (lstrval <= 0) + return(lstrval); + + if ((res = write(arfd, buf, bsz)) == bsz) { + wr_trail = 1; + io_ok = 1; + return(bsz); + } + /* + * write broke, see what we can do with it. We try to send any partial + * writes that may violate pax spec to the next archive volume. + */ + if (res < 0) + lstrval = res; + else + lstrval = 0; + + switch (artyp) { + case ISREG: + if ((res > 0) && (res % BLKMULT)) { + /* + * try to fix up partial writes which are not BLKMULT + * in size by forcing the runt record to next archive + * volume + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) + break; + cpos -= (off_t)res; + if (ftruncate(arfd, cpos) < 0) + break; + res = lstrval = 0; + break; + } + if (res >= 0) + break; + /* + * if file is out of space, handle it like a return of 0 + */ + if ((errno == ENOSPC) || (errno == EFBIG) || (errno == EDQUOT)) + res = lstrval = 0; + break; + case ISTAPE: + case ISCHR: + case ISBLK: + if (res >= 0) + break; + if (errno == EACCES) { + warn(0, "Write failed, archive is write protected."); + res = lstrval = 0; + return(0); + } + /* + * see if we reached the end of media, if so force a change to + * the next volume + */ + if ((errno == ENOSPC) || (errno == EIO) || (errno == ENXIO)) + res = lstrval = 0; + break; + case ISPIPE: + default: + /* + * we cannot fix errors to these devices + */ + break; + } + + /* + * Better tell the user the bad news... + * if this is a block aligned archive format, we may have a bad archive + * if the format wants the header to start at a BLKMULT boundry. While + * we can deal with the mis-aligned data, it violates spec and other + * archive readers will likely fail. if the format is not block + * aligned, the user may be lucky (and the archive is ok). + */ + if (res >= 0) { + if (res > 0) + wr_trail = 1; + io_ok = 1; + } + + /* + * If we were trying to rewrite the trailer and it didn't work, we + * must quit right away. + */ + if (!wr_trail && (res <= 0)) { + warn(1,"Unable to append, trailer re-write failed. Quitting."); + return(res); + } + + if (res == 0) + warn(0, "End of archive volume %d reached", arvol); + else if (res < 0) + syswarn(1, errno, "Failed write to archive volume: %d", arvol); + else if (!frmt->blkalgn || ((res % frmt->blkalgn) == 0)) + warn(0,"WARNING: partial archive write. Archive MAY BE FLAWED"); + else + warn(1,"WARNING: partial archive write. Archive IS FLAWED"); + return(res); +} + +/* + * ar_rdsync() + * Try to move past a bad spot on a flawed archive as needed to continue + * I/O. Clears error flags to allow I/O to continue. + * Return: + * 0 when ok to try i/o again, -1 otherwise. + */ + +#if __STDC__ +int +ar_rdsync(void) +#else +int +ar_rdsync() +#endif +{ + long fsbz; + off_t cpos; + off_t mpos; + struct mtop mb; + + /* + * Fail resync attempts at user request (done) or this is going to be + * an update/append to a existing archive. if last i/o hit media end, + * we need to go to the next volume not try a resync + */ + if ((done > 0) || (lstrval == 0)) + return(-1); + + if ((act == APPND) || (act == ARCHIVE)) { + warn(1, "Cannot allow updates to an archive with flaws."); + return(-1); + } + if (io_ok) + did_io = 1; + + switch(artyp) { + case ISTAPE: + /* + * if the last i/o was a successful data transfer, we assume + * the fault is just a bad record on the tape that we are now + * past. If we did not get any data since the last resync try + * to move the tape foward one PHYSICAL record past any + * damaged tape section. Some tape drives are stubborn and need + * to be pushed. + */ + if (io_ok) { + io_ok = 0; + lstrval = 1; + break; + } + mb.mt_op = MTFSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) + break; + lstrval = 1; + break; + case ISREG: + case ISCHR: + case ISBLK: + /* + * try to step over the bad part of the device. + */ + io_ok = 0; + if (((fsbz = arsb.st_blksize) <= 0) || (artyp != ISREG)) + fsbz = BLKMULT; + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) + break; + mpos = fsbz - (cpos % (off_t)fsbz); + if (lseek(arfd, mpos, SEEK_CUR) < 0) + break; + lstrval = 1; + break; + case ISPIPE: + default: + /* + * cannot recover on these archive device types + */ + io_ok = 0; + break; + } + if (lstrval <= 0) { + warn(1, "Unable to recover from an archive read failure."); + return(-1); + } + warn(0, "Attempting to recover from an archive read failure."); + return(0); +} + +/* + * ar_fow() + * Move the I/O position within the archive foward the specified number of + * bytes as supported by the device. If we cannot move the requested + * number of bytes, return the actual number of bytes moved in skipped. + * Return: + * 0 if moved the requested distance, -1 on complete failure, 1 on + * partial move (the amount moved is in skipped) + */ + +#if __STDC__ +int +ar_fow(off_t sksz, off_t *skipped) +#else +int +ar_fow(sksz, skipped) + off_t sksz; + off_t *skipped; +#endif +{ + off_t cpos; + off_t mpos; + + *skipped = 0; + if (sksz <= 0) + return(0); + + /* + * we cannot move foward at EOF or error + */ + if (lstrval <= 0) + return(lstrval); + + /* + * Safer to read forward on devices where it is hard to find the end of + * the media without reading to it. With tapes we cannot be sure of the + * number of physical blocks to skip (we do not know physical block + * size at this point), so we must only read foward on tapes! + */ + if (artyp != ISREG) + return(0); + + /* + * figure out where we are in the archive + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) >= 0) { + /* + * we can be asked to move farther than there are bytes in this + * volume, if so, just go to file end and let normal buf_fill() + * deal with the end of file (it will go to next volume by + * itself) + */ + if ((mpos = cpos + sksz) > arsb.st_size) { + *skipped = arsb.st_size - cpos; + mpos = arsb.st_size; + } else + *skipped = sksz; + if (lseek(arfd, mpos, SEEK_SET) >= 0) + return(0); + } + syswarn(1, errno, "Foward positioning operation on archive failed"); + lstrval = -1; + return(-1); +} + +/* + * ar_rev() + * move the i/o position within the archive backwards the specified byte + * count as supported by the device. With tapes drives we RESET rdblksz to + * the PHYSICAL blocksize. + * NOTE: We should only be called to move backwards so we can rewrite the + * last records (the trailer) of an archive (APPEND). + * Return: + * 0 if moved the requested distance, -1 on complete failure + */ + +#if __STDC__ +int +ar_rev(off_t sksz) +#else +int +ar_rev(sksz) + off_t sksz; +#endif +{ + off_t cpos; + struct mtop mb; + register int phyblk; + + /* + * make sure we do not have try to reverse on a flawed archive + */ + if (lstrval < 0) + return(lstrval); + + switch(artyp) { + case ISPIPE: + if (sksz <= 0) + break; + /* + * cannot go backwards on these critters + */ + warn(1, "Reverse positioning on pipes is not supported."); + lstrval = -1; + return(-1); + case ISREG: + case ISBLK: + case ISCHR: + default: + if (sksz <= 0) + break; + + /* + * For things other than files, backwards movement has a very + * high probability of failure as we really do not know the + * true attributes of the device we are talking to (the device + * may not even have the ability to lseek() in any direction). + * First we figure out where we are in the archive. + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) { + syswarn(1, errno, + "Unable to obtain current archive byte offset"); + lstrval = -1; + return(-1); + } + + /* + * we may try to go backwards past the start when the archive + * is only a single record. If this hapens and we are on a + * multi volume archive, we need to go to the end of the + * previous volume and continue our movement backwards from + * there. + */ + if ((cpos -= sksz) < (off_t)0L) { + if (arvol > 1) { + /* + * this should never happen + */ + warn(1,"Reverse position on previous volume."); + lstrval = -1; + return(-1); + } + cpos = (off_t)0L; + } + if (lseek(arfd, cpos, SEEK_SET) < 0) { + syswarn(1, errno, "Unable to seek archive backwards"); + lstrval = -1; + return(-1); + } + break; + case ISTAPE: + /* + * Calculate and move the proper number of PHYSICAL tape + * blocks. If the sksz is not an even multiple of the physical + * tape size, we cannot do the move (this should never happen). + * (We also cannot handler trailers spread over two vols). + * get_phys() also makes sure we are in front of the filemark. + */ + if ((phyblk = get_phys()) <= 0) { + lstrval = -1; + return(-1); + } + + /* + * make sure future tape reads only go by physical tape block + * size (set rdblksz to the real size). + */ + rdblksz = phyblk; + + /* + * if no movement is required, just return (we must be after + * get_phys() so the physical blocksize is properly set) + */ + if (sksz <= 0) + break; + + /* + * ok we have to move. Make sure the tape drive can do it. + */ + if (sksz % phyblk) { + warn(1, + "Tape drive unable to backspace requested amount"); + lstrval = -1; + return(-1); + } + + /* + * move backwards the requested number of bytes + */ + mb.mt_op = MTBSR; + mb.mt_count = sksz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1,errno, "Unable to backspace tape %d blocks.", + mb.mt_count); + lstrval = -1; + return(-1); + } + break; + } + lstrval = 1; + return(0); +} + +/* + * get_phys() + * Determine the physical block size on a tape drive. We need the physical + * block size so we know how many bytes we skip over when we move with + * mtio commands. We also make sure we are BEFORE THE TAPE FILEMARK when + * return. + * This is one really SLOW routine... + * Return: + * physical block size if ok (ok > 0), -1 otherwise + */ + +#if __STDC__ +static int +get_phys(void) +#else +static int +get_phys() +#endif +{ + register int padsz = 0; + register int res; + register int phyblk; + struct mtop mb; + char scbuf[MAXBLK]; + + /* + * move to the file mark, and then back up one record and read it. + * this should tell us the physical record size the tape is using. + */ + if (lstrval == 1) { + /* + * we know we are at file mark when we get back a 0 from + * read() + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + padsz += res; + if (res < 0) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + } + + /* + * move backwards over the file mark so we are at the end of the + * last record. + */ + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * move backwards so we are in front of the last record and read it to + * get physical tape blocksize. + */ + mb.mt_op = MTBSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1, errno, "Unable to backspace over last tape block."); + return(-1); + } + if ((phyblk = read(arfd, scbuf, sizeof(scbuf))) <= 0) { + syswarn(1, errno, "Cannot determine archive tape blocksize."); + return(-1); + } + + /* + * read foward to the file mark, then back up in front of the filemark + * (this is a bit paranoid, but should be safe to do). + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + ; + if (res < 0) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * set lstrval so we know that the filemark has not been seen + */ + lstrval = 1; + + /* + * return if there was no padding + */ + if (padsz == 0) + return(phyblk); + + /* + * make sure we can move backwards over the padding. (this should + * never fail). + */ + if (padsz % phyblk) { + warn(1, "Tape drive unable to backspace requested amount"); + return(-1); + } + + /* + * move backwards over the padding so the head is where it was when + * we were first called (if required). + */ + mb.mt_op = MTBSR; + mb.mt_count = padsz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1,errno,"Unable to backspace tape over %d pad blocks", + mb.mt_count); + return(-1); + } + return(phyblk); +} + +/* + * ar_next() + * prompts the user for the next volume in this archive. For some devices + * we may allow the media to be changed. Otherwise a new archive is + * prompted for. By pax spec, if there is no controlling tty or an eof is + * read on tty input, we must quit pax. + * Return: + * 0 when ready to continue, -1 when all done + */ + +#if __STDC__ +int +ar_next(void) +#else +int +ar_next() +#endif +{ + char buf[PAXPATHLEN+2]; + static int freeit = 0; + sigset_t o_mask; + + /* + * WE MUST CLOSE THE DEVICE. A lot of devices must see last close, (so + * things like writing EOF etc will be done) (Watch out ar_close() can + * also be called via a signal handler, so we must prevent a race. + */ + if (sigprocmask(SIG_BLOCK, &s_mask, &o_mask) < 0) + syswarn(0, errno, "Unable to set signal mask"); + ar_close(); + if (sigprocmask(SIG_SETMASK, &o_mask, (sigset_t *)NULL) < 0) + syswarn(0, errno, "Unable to restore signal mask"); + + if (done || !wr_trail) + return(-1); + + tty_prnt("\nATTENTION! %s archive volume change required.\n", argv0); + + /* + * if i/o is on stdin or stdout, we cannot reopen it (we do not know + * the name), the user will be forced to type it in. + */ + if (strcmp(arcname, STDO) && strcmp(arcname, STDN) && (artyp != ISREG) + && (artyp != ISPIPE)) { + if (artyp == ISTAPE) { + tty_prnt("%s ready for archive tape volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT TAPE on the tape drive"); + } else { + tty_prnt("%s ready for archive volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT STORAGE MEDIA (if required)"); + } + + if ((act == ARCHIVE) || (act == APPND)) + tty_prnt(" and make sure it is WRITE ENABLED.\n"); + else + tty_prnt("\n"); + + for(;;) { + tty_prnt("Type \"y\" to continue, \".\" to quit %s,", + argv0); + tty_prnt(" or \"s\" to switch to new device.\nIf you"); + tty_prnt(" cannot change storage media, type \"s\"\n"); + tty_prnt("Is the device ready and online? > "); + + if ((tty_read(buf,sizeof(buf))<0) || !strcmp(buf,".")){ + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + + if ((buf[0] == '\0') || (buf[1] != '\0')) { + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + + switch (buf[0]) { + case 'y': + case 'Y': + /* + * we are to continue with the same device + */ + if (ar_open(arcname) >= 0) + return(0); + tty_prnt("Cannot re-open %s, try again\n", + arcname); + continue; + case 's': + case 'S': + /* + * user wants to open a different device + */ + tty_prnt("Switching to a different archive\n"); + break; + default: + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + break; + } + } else + tty_prnt("Ready for archive volume: %d\n", arvol); + + /* + * have to go to a different archive + */ + for (;;) { + tty_prnt("Input archive name or \".\" to quit %s.\n", argv0); + tty_prnt("Archive name > "); + + if ((tty_read(buf, sizeof(buf)) < 0) || !strcmp(buf, ".")) { + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + if (buf[0] == '\0') { + tty_prnt("Empty file name, try again\n"); + continue; + } + if (!strcmp(buf, "..")) { + tty_prnt("Illegal file name: .. try again\n"); + continue; + } + if (strlen(buf) > PAXPATHLEN) { + tty_prnt("File name too long, try again\n"); + continue; + } + + /* + * try to open new archive + */ + if (ar_open(buf) >= 0) { + if (freeit) { + (void)free(arcname); + freeit = 0; + } + if ((arcname = strdup(buf)) == NULL) { + done = 1; + lstrval = -1; + warn(0, "Cannot save archive name."); + return(-1); + } + freeit = 1; + break; + } + tty_prnt("Cannot open %s, try again\n", buf); + continue; + } + return(0); +} diff --git a/bin/pax/ar_subs.c b/bin/pax/ar_subs.c new file mode 100644 index 0000000..5d6d3c6 --- /dev/null +++ b/bin/pax/ar_subs.c @@ -0,0 +1,1238 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)ar_subs.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <signal.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include "pax.h" +#include "extern.h" + +static void wr_archive __P((register ARCHD *, int is_app)); +static int get_arc __P((void)); +static int next_head __P((register ARCHD *)); +extern sigset_t s_mask; + +/* + * Routines which control the overall operation modes of pax as specified by + * the user: list, append, read ... + */ + +static char hdbuf[BLKMULT]; /* space for archive header on read */ +u_long flcnt; /* number of files processed */ + +/* + * list() + * list the contents of an archive which match user supplied pattern(s) + * (no pattern matches all). + */ + +#if __STDC__ +void +list(void) +#else +void +list() +#endif +{ + register ARCHD *arcn; + register int res; + ARCHD archd; + time_t now; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine. We + * also save current time for ls_list() so we do not make a system + * call for each file we need to print. If verbose (vflag) start up + * the name and group caches. + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0)) + return; + + if (vflag && ((uidtb_start() < 0) || (gidtb_start() < 0))) + return; + + now = time((time_t *)NULL); + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + /* + * check for pattern, and user specified options match. + * When all patterns are matched we are done. + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res == 0) && (sel_chk(arcn) == 0)) { + /* + * pattern resulted in a selected file + */ + if (pat_sel(arcn) < 0) + break; + + /* + * modify the name as requested by the user if name + * survives modification, do a listing of the file + */ + if ((res = mod_name(arcn)) < 0) + break; + if (res == 0) + ls_list(arcn, now); + } + + /* + * skip to next archive format header using values calculated + * by the format header read routine + */ + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + } + + /* + * all done, let format have a chance to cleanup, and make sure that + * the patterns supplied by the user were all matched + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); + ar_close(); + pat_chk(); +} + +/* + * extract() + * extract the member(s) of an archive as specified by user supplied + * pattern(s) (no patterns extracts all members) + */ + +#if __STDC__ +void +extract(void) +#else +void +extract() +#endif +{ + register ARCHD *arcn; + register int res; + off_t cnt; + ARCHD archd; + struct stat sb; + int fd; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine; + * start up the directory modification time and access mode database + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + /* + * step through each entry on the archive until the format read routine + * says it is done + */ + while (next_head(arcn) == 0) { + + /* + * check for pattern, and user specified options match. When + * all the patterns are matched we are done + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res > 0) || (sel_chk(arcn) != 0)) { + /* + * file is not selected. skip past any file data and + * padding and go back for the next archive member + */ + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * with -u or -D only extract when the archive member is newer + * than the file with the same name in the file system (nos + * test of being the same type is required). + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this operation can be confusing to the + * user who might expect the test to be done on an existing + * file AFTER the name mod. In honesty the pax spec is probably + * flawed in this respect. + */ + if ((uflag || Dflag) && ((lstat(arcn->name, &sb) == 0))) { + if (uflag && Dflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (Dflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (arcn->sb.st_mtime <= sb.st_mtime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } + + /* + * this archive member is now been selected. modify the name. + */ + if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if (res > 0) { + /* + * a bad name mod, skip and purge name from link table + */ + purg_lnk(arcn); + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * Non standard -Y and -Z flag. When the exisiting file is + * same age or newer skip + */ + if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { + if (Yflag && Zflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (Yflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (arcn->sb.st_mtime <= sb.st_mtime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } + + if (vflag) { + (void)fputs(arcn->name, stderr); + vfpart = 1; + } + + /* + * all ok, extract this member based on type + */ + if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { + /* + * process archive members that are not regular files. + * throw out padding and any data that might follow the + * header (as determined by the format). + */ + if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + + (void)rd_skip(arcn->skip + arcn->pad); + if (res < 0) + purg_lnk(arcn); + + if (vflag && vfpart) { + (void)putc('\n', stderr); + vfpart = 0; + } + continue; + } + /* + * we have a file with data here. If we can not create it, skip + * over the data and purge the name from hard link table + */ + if ((fd = file_creat(arcn)) < 0) { + (void)rd_skip(arcn->skip + arcn->pad); + purg_lnk(arcn); + continue; + } + /* + * extract the file from the archive and skip over padding and + * any unprocessed data + */ + res = (*frmt->rd_data)(arcn, fd, &cnt); + file_close(arcn, fd); + if (vflag && vfpart) { + (void)putc('\n', stderr); + vfpart = 0; + } + if (!res) + (void)rd_skip(cnt + arcn->pad); + } + + /* + * all done, restore directory modes and times as required; make sure + * all patterns supplied by the user were matched; block off signals + * to avoid chance for multiple entry into the cleanup code. + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); + ar_close(); + proc_dir(); + pat_chk(); +} + +/* + * wr_archive() + * Write an archive. used in both creating a new archive and appends on + * previously written archive. + */ + +#if __STDC__ +static void +wr_archive(register ARCHD *arcn, int is_app) +#else +static void +wr_archive(arcn, is_app) + register ARCHD *arcn; + int is_app; +#endif +{ + register int res; + register int hlk; + register int wr_one; + off_t cnt; + int (*wrf)(); + int fd = -1; + + /* + * if this format supports hard link storage, start up the database + * that detects them. + */ + if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) + return; + + /* + * start up the file traversal code and format specific write + */ + if ((ftree_start() < 0) || ((*frmt->st_wr)() < 0)) + return; + wrf = frmt->wr; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + /* + * if this not append, and there are no files, we do no write a trailer + */ + wr_one = is_app; + + /* + * while there are files to archive, process them one at at time + */ + while (next_file(arcn) == 0) { + /* + * check if this file meets user specified options match. + */ + if (sel_chk(arcn) != 0) + continue; + fd = -1; + if (uflag) { + /* + * only archive if this file is newer than a file with + * the same name that is already stored on the archive + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) + continue; + } + + /* + * this file is considered selected now. see if this is a hard + * link to a file already stored + */ + ftree_sel(arcn); + if (hlk && (chk_lnk(arcn) < 0)) + break; + + if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) || + (arcn->type == PAX_CTG)) { + /* + * we will have to read this file. by opening it now we + * can avoid writing a header to the archive for a file + * we were later unable to read (we also purge it from + * the link table). + */ + if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1,errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + } + + /* + * Now modify the name as requested by the user + */ + if ((res = mod_name(arcn)) < 0) { + /* + * name modification says to skip this file, close the + * file and purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + break; + } + + if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { + /* + * unable to obtain the crc we need, close the file, + * purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + continue; + } + + if (vflag) { + (void)fputs(arcn->name, stderr); + vfpart = 1; + } + ++flcnt; + + /* + * looks safe to store the file, have the format specific + * routine write routine store the file header on the archive + */ + if ((res = (*wrf)(arcn)) < 0) { + rdfile_close(arcn, &fd); + break; + } + wr_one = 1; + if (res > 0) { + /* + * format write says no file data needs to be stored + * so we are done messing with this file + */ + if (vflag && vfpart) { + (void)putc('\n', stderr); + vfpart = 0; + } + rdfile_close(arcn, &fd); + continue; + } + + /* + * Add file data to the archive, quit on write error. if we + * cannot write the entire file contents to the archive we + * must pad the archive to replace the missing file data + * (otherwise during an extract the file header for the file + * which FOLLOWS this one will not be where we expect it to + * be). + */ + res = (*frmt->wr_data)(arcn, fd, &cnt); + rdfile_close(arcn, &fd); + if (vflag && vfpart) { + (void)putc('\n', stderr); + vfpart = 0; + } + if (res < 0) + break; + + /* + * pad as required, cnt is number of bytes not written + */ + if (((cnt > 0) && (wr_skip(cnt) < 0)) || + ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) + break; + } + + /* + * tell format to write trailer; pad to block boundry; reset directory + * mode/access times, and check if all patterns supplied by the user + * were matched. block off signals to avoid chance for multiple entry + * into the cleanup code + */ + if (wr_one) { + (*frmt->end_wr)(); + wr_fin(); + } + (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); + ar_close(); + if (tflag) + proc_dir(); + ftree_chk(); +} + +/* + * append() + * Add file to previously written archive. Archive format specified by the + * user must agree with archive. The archive is read first to collect + * modification times (if -u) and locate the archive trailer. The archive + * is positioned in front of the record with the trailer and wr_archive() + * is called to add the new members. + * PAX IMPLEMENTATION DETAIL NOTE: + * -u is implemented by adding the new members to the end of the archive. + * Care is taken so that these do not end up as links to the older + * version of the same file already stored in the archive. It is expected + * when extraction occurs these newer versions will over-write the older + * ones stored "earlier" in the archive (this may be a bad assumption as + * it depends on the implementation of the program doing the extraction). + * It is really difficult to splice in members without either re-writing + * the entire archive (from the point were the old version was), or having + * assistance of the format specification in terms of a special update + * header that invalidates a previous archive record. The posix spec left + * the method used to implement -u unspecified. This pax is able to + * over write existing files that it creates. + */ + +#if __STDC__ +void +append(void) +#else +void +append() +#endif +{ + register ARCHD *arcn; + register int res; + ARCHD archd; + FSUB *orgfrmt; + int udev; + off_t tlen; + + arcn = &archd; + orgfrmt = frmt; + + /* + * Do not allow an append operation if the actual archive is of a + * different format than the user specified foramt. + */ + if (get_arc() < 0) + return; + if ((orgfrmt != NULL) && (orgfrmt != frmt)) { + warn(1, "Cannot mix current archive format %s with %s", + frmt->name, orgfrmt->name); + return; + } + + /* + * pass the format any options and start up format + */ + if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) + return; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files we see. + */ + if (uflag && (ftime_start() < 0)) + return; + + /* + * some archive formats encode hard links by recording the device and + * file serial number (inode) but copy the file anyway (multiple times) + * to the archive. When we append, we run the risk that newly added + * files may have the same device and inode numbers as those recorded + * on the archive but during a previous run. If this happens, when the + * archive is extracted we get INCORRECT hard links. We avoid this by + * remapping the device numbers so that newly added files will never + * use the same device number as one found on the archive. remapping + * allows new members to safely have links among themselves. remapping + * also avoids problems with file inode (serial number) truncations + * when the inode number is larger than storage space in the archive + * header. See the remap routines for more details. + */ + if ((udev = frmt->udev) && (dev_start() < 0)) + return; + + /* + * reading the archive may take a long time. If verbose tell the user + */ + if (vflag) { + (void)fprintf(stderr, + "%s: Reading archive to position at the end...", argv0); + vfpart = 1; + } + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + /* + * check if this file meets user specified options. + */ + if (sel_chk(arcn) != 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + + if (uflag) { + /* + * see if this is the newest version of this file has + * already been seen, if so skip. + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + } + + /* + * Store this device number. Device numbers seen during the + * read phase of append will cause newly appended files with a + * device number seen in the old part of the archive to be + * remapped to an unused device number. + */ + if ((udev && (add_dev(arcn) < 0)) || + (rd_skip(arcn->skip + arcn->pad) == 1)) + break; + } + + /* + * done, finish up read and get the number of bytes to back up so we + * can add new members. The format might have used the hard link table, + * purge it. + */ + tlen = (*frmt->end_rd)(); + lnk_end(); + + /* + * try to postion for write, if this fails quit. if any error occurs, + * we will refuse to write + */ + if (appnd_start(tlen) < 0) + return; + + /* + * tell the user we are done reading. + */ + if (vflag && vfpart) { + (void)fputs("done.\n", stderr); + vfpart = 0; + } + + /* + * go to the writing phase to add the new members + */ + wr_archive(arcn, 1); +} + +/* + * archive() + * write a new archive + */ + +#if __STDC__ +void +archive(void) +#else +void +archive() +#endif +{ + ARCHD archd; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files; set up for writing; pass the format any + * options write the archive + */ + if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) + return; + if ((*frmt->options)() < 0) + return; + + wr_archive(&archd, 0); +} + +/* + * copy() + * copy files from one part of the file system to another. this does not + * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an + * archive was written and then extracted in the destination directory + * (except the files are forced to be under the destination directory). + */ + +#if __STDC__ +void +copy(void) +#else +void +copy() +#endif +{ + register ARCHD *arcn; + register int res; + register int fddest; + register char *dest_pt; + register int dlen; + register int drem; + int fdsrc = -1; + struct stat sb; + ARCHD archd; + char dirbuf[PAXPATHLEN+1]; + + arcn = &archd; + /* + * set up the destination dir path and make sure it is a directory. We + * make sure we have a trailing / on the destination + */ + dlen = l_strncpy(dirbuf, dirptr, PAXPATHLEN); + dest_pt = dirbuf + dlen; + if (*(dest_pt-1) != '/') { + *dest_pt++ = '/'; + ++dlen; + } + *dest_pt = '\0'; + drem = PAXPATHLEN - dlen; + + if (stat(dirptr, &sb) < 0) { + syswarn(1, errno, "Cannot access destination directory %s", + dirptr); + return; + } + if (!S_ISDIR(sb.st_mode)) { + warn(1, "Destination is not a directory %s", dirptr); + return; + } + + /* + * start up the hard link table; file traversal routines and the + * modification time and access mode database + */ + if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + /* + * set up to cp file trees + */ + cp_start(); + + /* + * while there are files to archive, process them + */ + while (next_file(arcn) == 0) { + fdsrc = -1; + + /* + * check if this file meets user specified options + */ + if (sel_chk(arcn) != 0) + continue; + + /* + * if there is already a file in the destination directory with + * the same name and it is newer, skip the one stored on the + * archive. + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this can be confusing to the user who + * might expect the test to be done on an existing file AFTER + * the name mod. In honesty the pax spec is probably flawed in + * this respect + */ + if (uflag || Dflag) { + /* + * create the destination name + */ + if (*(arcn->name) == '/') + res = 1; + else + res = 0; + if ((arcn->nlen - res) > drem) { + warn(1, "Destination pathname too long %s", + arcn->name); + continue; + } + (void)strncpy(dest_pt, arcn->name + res, drem); + dirbuf[PAXPATHLEN] = '\0'; + + /* + * if existing file is same age or newer skip + */ + res = lstat(dirbuf, &sb); + *dest_pt = '\0'; + + if (res == 0) { + if (uflag && Dflag) { + if ((arcn->sb.st_mtime<=sb.st_mtime) && + (arcn->sb.st_ctime<=sb.st_ctime)) + continue; + } else if (Dflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) + continue; + } else if (arcn->sb.st_mtime <= sb.st_mtime) + continue; + } + } + + /* + * this file is considered selected. See if this is a hard link + * to a previous file; modify the name as requested by the + * user; set the final destination. + */ + ftree_sel(arcn); + if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { + /* + * skip file, purge from link table + */ + purg_lnk(arcn); + continue; + } + + /* + * Non standard -Y and -Z flag. When the exisiting file is + * same age or newer skip + */ + if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { + if (Yflag && Zflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) + continue; + } else if (Yflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) + continue; + } else if (arcn->sb.st_mtime <= sb.st_mtime) + continue; + } + + if (vflag) { + (void)fputs(arcn->name, stderr); + vfpart = 1; + } + ++flcnt; + + /* + * try to create a hard link to the src file if requested + * but make sure we are not trying to overwrite ourselves. + */ + if (lflag) + res = cross_lnk(arcn); + else + res = chk_same(arcn); + if (res <= 0) { + if (vflag && vfpart) { + (void)putc('\n', stderr); + vfpart = 0; + } + continue; + } + + /* + * have to create a new file + */ + if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { + /* + * create a link or special file + */ + if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + if (res < 0) + purg_lnk(arcn); + if (vflag && vfpart) { + (void)putc('\n', stderr); + vfpart = 0; + } + continue; + } + + /* + * have to copy a regular file to the destination directory. + * first open source file and then create the destination file + */ + if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1, errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + if ((fddest = file_creat(arcn)) < 0) { + rdfile_close(arcn, &fdsrc); + purg_lnk(arcn); + continue; + } + + /* + * copy source file data to the destination file + */ + cp_file(arcn, fdsrc, fddest); + file_close(arcn, fddest); + rdfile_close(arcn, &fdsrc); + + if (vflag && vfpart) { + (void)putc('\n', stderr); + vfpart = 0; + } + } + + /* + * restore directory modes and times as required; make sure all + * patterns were selected block off signals to avoid chance for + * multiple entry into the cleanup code. + */ + (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); + ar_close(); + proc_dir(); + ftree_chk(); +} + +/* + * next_head() + * try to find a valid header in the archive. Uses format specific + * routines to extract the header and id the trailer. Trailers may be + * located within a valid header or in an invalid header (the location + * is format specific. The inhead field from the option table tells us + * where to look for the trailer). + * We keep reading (and resyncing) until we get enough contiguous data + * to check for a header. If we cannot find one, we shift by a byte + * add a new byte from the archive to the end of the buffer and try again. + * If we get a read error, we throw out what we have (as we must have + * contiguous data) and start over again. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if we got a header, -1 if we are unable to ever find another one + * (we reached the end of input, or we reached the limit on retries. see + * the specs for rd_wrbuf() for more details) + */ + +#if __STDC__ +static int +next_head(register ARCHD *arcn) +#else +static int +next_head(arcn) + register ARCHD *arcn; +#endif +{ + register int ret; + register char *hdend; + register int res; + register int shftsz; + register int hsz; + register int in_resync = 0; /* set when we are in resync mode */ + int cnt = 0; /* counter for trailer function */ + + /* + * set up initial conditions, we want a whole frmt->hsz block as we + * have no data yet. + */ + res = hsz = frmt->hsz; + hdend = hdbuf; + shftsz = hsz - 1; + for(;;) { + /* + * keep looping until we get a contiguous FULL buffer + * (frmt->hsz is the proper size) + */ + for (;;) { + if ((ret = rd_wrbuf(hdend, res)) == res) + break; + + /* + * some kind of archive read problem, try to resync the + * storage device, better give the user the bad news. + */ + if ((ret == 0) || (rd_sync() < 0)) { + warn(1,"Premature end of file on archive read"); + return(-1); + } + if (!in_resync) { + if (act == APPND) { + warn(1, + "Archive I/O error, cannot continue"); + return(-1); + } + warn(1,"Archive I/O error. Trying to recover."); + ++in_resync; + } + + /* + * oh well, throw it all out and start over + */ + res = hsz; + hdend = hdbuf; + } + + /* + * ok we have a contiguous buffer of the right size. Call the + * format read routine. If this was not a valid header and this + * format stores trailers outside of the header, call the + * format specific trailer routine to check for a trailer. We + * have to watch out that we do not mis-identify file data or + * block padding as a header or trailer. Format specific + * trailer functions must NOT check for the trailer while we + * are running in resync mode. Some trailer functions may tell + * us that this block cannot contain a valid header either, so + * we then throw out the entire block and start over. + */ + if ((*frmt->rd)(arcn, hdbuf) == 0) + break; + + if (!frmt->inhead) { + /* + * this format has trailers outside of valid headers + */ + if ((ret = (*frmt->trail)(hdbuf,in_resync,&cnt)) == 0){ + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + if (ret == 1) { + /* + * we are in resync and we were told to throw + * the whole block out because none of the + * bytes in this block can be used to form a + * valid header + */ + res = hsz; + hdend = hdbuf; + continue; + } + } + + /* + * Brute force section. + * not a valid header. We may be able to find a header yet. So + * we shift over by one byte, and set up to read one byte at a + * time from the archive and place it at the end of the buffer. + * We will keep moving byte at a time until we find a header or + * get a read error and have to start over. + */ + if (!in_resync) { + if (act == APPND) { + warn(1,"Unable to append, archive header flaw"); + return(-1); + } + warn(1,"Invalid header, starting valid header search."); + ++in_resync; + } + bcopy(hdbuf+1, hdbuf, shftsz); + res = 1; + hdend = hdbuf + shftsz; + } + + /* + * ok got a valid header, check for trailer if format encodes it in the + * the header. NOTE: the parameters are different than trailer routines + * which encode trailers outside of the header! + */ + if (frmt->inhead && ((*frmt->trail)(arcn) == 0)) { + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + ++flcnt; + return(0); +} + +/* + * get_arc() + * Figure out what format an archive is. Handles archive with flaws by + * brute force searches for a legal header in any supported format. The + * format id routines have to be careful to NOT mis-identify a format. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if archive found -1 otherwise + */ + +#if __STDC__ +static int +get_arc(void) +#else +static int +get_arc() +#endif +{ + register int i; + register int hdsz = 0; + register int res; + register int minhd = BLKMULT; + char *hdend; + int notice = 0; + + /* + * find the smallest header size in all archive formats and then set up + * to read the archive. + */ + for (i = 0; ford[i] >= 0; ++i) { + if (fsub[ford[i]].hsz < minhd) + minhd = fsub[ford[i]].hsz; + } + if (rd_start() < 0) + return(-1); + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + for(;;) { + for (;;) { + /* + * fill the buffer with at least the smallest header + */ + i = rd_wrbuf(hdend, res); + if (i > 0) + hdsz += i; + if (hdsz >= minhd) + break; + + /* + * if we cannot recover from a read error quit + */ + if ((i == 0) || (rd_sync() < 0)) + goto out; + + /* + * when we get an error none of the data we already + * have can be used to create a legal header (we just + * got an error in the middle), so we throw it all out + * and refill the buffer with fresh data. + */ + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + if (!notice) { + if (act == APPND) + return(-1); + warn(1,"Cannot identify format. Searching..."); + ++notice; + } + } + + /* + * we have at least the size of the smallest header in any + * archive format. Look to see if we have a match. The array + * ford[] is used to specify the header id order to reduce the + * chance of incorrectly id'ing a valid header (some formats + * may be subsets of each other and the order would then be + * important). + */ + for (i = 0; ford[i] >= 0; ++i) { + if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0) + continue; + frmt = &(fsub[ford[i]]); + /* + * yuck, to avoid slow special case code in the extract + * routines, just push this header back as if it was + * not seen. We have left extra space at start of the + * buffer for this purpose. This is a bit ugly, but + * adding all the special case code is far worse. + */ + pback(hdbuf, hdsz); + return(0); + } + + /* + * We have a flawed archive, no match. we start searching, but + * we never allow additions to flawed archives + */ + if (!notice) { + if (act == APPND) + return(-1); + warn(1, "Cannot identify format. Searching..."); + ++notice; + } + + /* + * brute force search for a header that we can id. + * we shift through byte at a time. this is slow, but we cannot + * determine the nature of the flaw in the archive in a + * portable manner + */ + if (--hdsz > 0) { + bcopy(hdbuf+1, hdbuf, hdsz); + res = BLKMULT - hdsz; + hdend = hdbuf + hdsz; + } else { + res = BLKMULT; + hdend = hdbuf; + hdsz = 0; + } + } + + out: + /* + * we cannot find a header, bow, apologize and quit + */ + warn(1, "Sorry, unable to determine archive format."); + return(-1); +} diff --git a/bin/pax/buf_subs.c b/bin/pax/buf_subs.c new file mode 100644 index 0000000..a4ec446 --- /dev/null +++ b/bin/pax/buf_subs.c @@ -0,0 +1,1083 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)buf_subs.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include "pax.h" +#include "extern.h" + +/* + * routines which implement archive and file buffering + */ + +#define MINFBSZ 512 /* default block size for hole detect */ +#define MAXFLT 10 /* default media read error limit */ + +/* + * Need to change bufmem to dynamic allocation when the upper + * limit on blocking size is removed (though that will violate pax spec) + * MAXBLK define and tests will also need to be updated. + */ +static char bufmem[MAXBLK+BLKMULT]; /* i/o buffer + pushback id space */ +static char *buf; /* normal start of i/o buffer */ +static char *bufend; /* end or last char in i/o buffer */ +static char *bufpt; /* read/write point in i/o buffer */ +int blksz = MAXBLK; /* block input/output size in bytes */ +int wrblksz; /* user spec output size in bytes */ +int maxflt = MAXFLT; /* MAX consecutive media errors */ +int rdblksz; /* first read blksize (tapes only) */ +off_t wrlimit; /* # of bytes written per archive vol */ +off_t wrcnt; /* # of bytes written on current vol */ +off_t rdcnt; /* # of bytes read on current vol */ + +/* + * wr_start() + * set up the buffering system to operate in a write mode + * Return: + * 0 if ok, -1 if the user specified write block size violates pax spec + */ + +#if __STDC__ +int +wr_start(void) +#else +int +wr_start() +#endif +{ + buf = &(bufmem[BLKMULT]); + /* + * Check to make sure the write block size meets pax specs. If the user + * does not specify a blocksize, we use the format default blocksize. + * We must be picky on writes, so we do not allow the user to create an + * archive that might be hard to read elsewhere. If all ok, we then + * open the first archive volume + */ + if (!wrblksz) + wrblksz = frmt->bsz; + if (wrblksz > MAXBLK) { + warn(1, "Write block size of %d too large, maximium is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + warn(1, "Write block size of %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + + /* + * we only allow wrblksz to be used with all archive operations + */ + blksz = rdblksz = wrblksz; + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + wrcnt = 0; + bufend = buf + wrblksz; + bufpt = buf; + return(0); +} + +/* + * rd_start() + * set up buffering system to read an archive + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +rd_start(void) +#else +int +rd_start() +#endif +{ + /* + * leave space for the header pushback (see get_arc()). If we are + * going to append and user specified a write block size, check it + * right away + */ + buf = &(bufmem[BLKMULT]); + if ((act == APPND) && wrblksz) { + if (wrblksz > MAXBLK) { + warn(1,"Write block size %d too large, maximium is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + warn(1, "Write block size %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + } + + /* + * open the archive + */ + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + bufend = buf + rdblksz; + bufpt = bufend; + rdcnt = 0; + return(0); +} + +/* + * cp_start() + * set up buffer system for copying within the file system + */ + +#if __STDC__ +void +cp_start(void) +#else +void +cp_start() +#endif +{ + buf = &(bufmem[BLKMULT]); + rdblksz = blksz = MAXBLK; +} + +/* + * appnd_start() + * Set up the buffering system to append new members to an archive that + * was just read. The last block(s) of an archive may contain a format + * specific trailer. To append a new member, this trailer has to be + * removed from the archive. The first byte of the trailer is replaced by + * the start of the header of the first file added to the archive. The + * format specific end read function tells us how many bytes to move + * backwards in the archive to be positioned BEFORE the trailer. Two + * different postions have to be adjusted, the O.S. file offset (e.g. the + * position of the tape head) and the write point within the data we have + * stored in the read (soon to become write) buffer. We may have to move + * back several records (the number depends on the size of the archive + * record and the size of the format trailer) to read up the record where + * the first byte of the trailer is recorded. Trailers may span (and + * overlap) record boundries. + * We first calculate which record has the first byte of the trailer. We + * move the OS file offset back to the start of this record and read it + * up. We set the buffer write pointer to be at this byte (the byte where + * the trailer starts). We then move the OS file pointer back to the + * start of this record so a flush of this buffer will replace the record + * in the archive. + * A major problem is rewriting this last record. For archives stored + * on disk files, this is trival. However, many devices are really picky + * about the conditions under which they will allow a write to occur. + * Often devices restrict the conditions where writes can be made writes, + * so it may not be feasable to append archives stored on all types of + * devices. + * Return: + * 0 for success, -1 for failure + */ + +#if __STDC__ +int +appnd_start(off_t skcnt) +#else +int +appnd_start(skcnt) + off_t skcnt; +#endif +{ + register int res; + off_t cnt; + + if (exit_val != 0) { + warn(0, "Cannot append to an archive that may have flaws."); + return(-1); + } + /* + * if the user did not specify a write blocksize, inherit the size used + * in the last archive volume read. (If a is set we still use rdblksz + * until next volume, cannot shift sizes within a single volume). + */ + if (!wrblksz) + wrblksz = blksz = rdblksz; + else + blksz = rdblksz; + + /* + * make sure that this volume allows appends + */ + if (ar_app_ok() < 0) + return(-1); + + /* + * Calculate bytes to move back and move in front of record where we + * need to start writing from. Remember we have to add in any padding + * that might be in the buffer after the trailer in the last block. We + * travel skcnt + padding ROUNDED UP to blksize. + */ + skcnt += bufend - bufpt; + if ((cnt = (skcnt/blksz) * blksz) < skcnt) + cnt += blksz; + if (ar_rev((off_t)cnt) < 0) + goto out; + + /* + * We may have gone too far if there is valid data in the block we are + * now in front of, read up the block and position the pointer after + * the valid data. + */ + if ((cnt -= skcnt) > 0) { + /* + * watch out for stupid tape drives. ar_rev() will set rdblksz + * to be real physical blocksize so we must loop until we get + * the old rdblksz (now in blksz). If ar_rev() fouls up the + * determination of the physical block size, we will fail. + */ + bufpt = buf; + bufend = buf + blksz; + while (bufpt < bufend) { + if ((res = ar_read(bufpt, rdblksz)) <= 0) + goto out; + bufpt += res; + } + if (ar_rev((off_t)(bufpt - buf)) < 0) + goto out; + bufpt = buf + cnt; + bufend = buf + blksz; + } else { + /* + * buffer is empty + */ + bufend = buf + blksz; + bufpt = buf; + } + rdblksz = blksz; + rdcnt -= skcnt; + wrcnt = 0; + + /* + * At this point we are ready to write. If the device requires special + * handling to write at a point were previously recorded data resides, + * that is handled in ar_set_wr(). From now on we operate under normal + * ARCHIVE mode (write) conditions + */ + if (ar_set_wr() < 0) + return(-1); + act = ARCHIVE; + return(0); + + out: + warn(1, "Unable to rewrite archive trailer, cannot append."); + return(-1); +} + +/* + * rd_sync() + * A read error occurred on this archive volume. Resync the buffer and + * try to reset the device (if possible) so we can continue to read. Keep + * trying to do this until we get a valid read, or we reach the limit on + * consecutive read faults (at which point we give up). The user can + * adjust the read error limit through a command line option. + * Returns: + * 0 on success, and -1 on failure + */ + +#if __STDC__ +int +rd_sync(void) +#else +int +rd_sync() +#endif +{ + register int errcnt = 0; + register int res; + + /* + * if the user says bail out on first fault, we are out of here... + */ + if (maxflt == 0) + return(-1); + if (act == APPND) { + warn(1, "Unable to append when there are archive read errors."); + return(-1); + } + + /* + * poke at device and try to get past media error + */ + if (ar_rdsync() < 0) { + if (ar_next() < 0) + return(-1); + else + rdcnt = 0; + } + + for (;;) { + if ((res = ar_read(buf, blksz)) > 0) { + /* + * All right! got some data, fill that buffer + */ + bufpt = buf; + bufend = buf + res; + rdcnt += res; + return(0); + } + + /* + * Oh well, yet another failed read... + * if error limit reached, ditch. o.w. poke device to move past + * bad media and try again. if media is badly damaged, we ask + * the poor (and upset user at this point) for the next archive + * volume. remember the goal on reads is to get the most we + * can extract out of the archive. + */ + if ((maxflt > 0) && (++errcnt > maxflt)) + warn(0,"Archive read error limit (%d) reached",maxflt); + else if (ar_rdsync() == 0) + continue; + if (ar_next() < 0) + break; + rdcnt = 0; + errcnt = 0; + } + return(-1); +} + +/* + * pback() + * push the data used during the archive id phase back into the I/O + * buffer. This is required as we cannot be sure that the header does NOT + * overlap a block boundry (as in the case we are trying to recover a + * flawed archived). This was not designed to be used for any other + * purpose. (What software engineering, HA!) + * WARNING: do not even THINK of pback greater than BLKMULT, unless the + * pback space is increased. + */ + +#if __STDC__ +void +pback(char *pt, int cnt) +#else +void +pback(pt, cnt) + char *pt; + int cnt; +#endif +{ + bufpt -= cnt; + bcopy(pt, bufpt, cnt); + return; +} + +/* + * rd_skip() + * skip foward in the archive during a archive read. Used to get quickly + * past file data and padding for files the user did NOT select. + * Return: + * 0 if ok, -1 failure, and 1 when EOF on the archive volume was detected. + */ + +#if __STDC__ +int +rd_skip(off_t skcnt) +#else +int +rd_skip(skcnt) + off_t skcnt; +#endif +{ + off_t res; + off_t cnt; + off_t skipped = 0; + + /* + * consume what data we have in the buffer. If we have to move foward + * whole records, we call the low level skip function to see if we can + * move within the archive without doing the expensive reads on data we + * do not want. + */ + if (skcnt == 0) + return(0); + res = MIN((bufend - bufpt), skcnt); + bufpt += res; + skcnt -= res; + + /* + * if skcnt is now 0, then no additional i/o is needed + */ + if (skcnt == 0) + return(0); + + /* + * We have to read more, calculate complete and partial record reads + * based on rdblksz. we skip over "cnt" complete records + */ + res = skcnt%rdblksz; + cnt = (skcnt/rdblksz) * rdblksz; + + /* + * if the skip fails, we will have to resync. ar_fow will tell us + * how much it can skip over. We will have to read the rest. + */ + if (ar_fow(cnt, &skipped) < 0) + return(-1); + res += cnt - skipped; + rdcnt += skipped; + + /* + * what is left we have to read (which may be the whole thing if + * ar_fow() told us the device can only read to skip records); + */ + while (res > 0L) { + cnt = bufend - bufpt; + /* + * if the read fails, we will have to resync + */ + if ((cnt <= 0) && ((cnt = buf_fill()) < 0)) + return(-1); + if (cnt == 0) + return(1); + cnt = MIN(cnt, res); + bufpt += cnt; + res -= cnt; + } + return(0); +} + +/* + * wr_fin() + * flush out any data (and pad if required) the last block. We always pad + * with zero (even though we do not have to). Padding with 0 makes it a + * lot easier to recover if the archive is damaged. zero paddding SHOULD + * BE a requirement.... + */ + +#if __STDC__ +void +wr_fin(void) +#else +void +wr_fin() +#endif +{ + if (bufpt > buf) { + bzero(bufpt, bufend - bufpt); + bufpt = bufend; + (void)buf_flush(blksz); + } +} + +/* + * wr_rdbuf() + * fill the write buffer from data passed to it in a buffer (usually used + * by format specific write routines to pass a file header). On failure we + * punt. We do not allow the user to continue to write flawed archives. + * We assume these headers are not very large (the memory copy we use is + * a bit expensive). + * Return: + * 0 if buffer was filled ok, -1 o.w. (buffer flush failure) + */ + +#if __STDC__ +int +wr_rdbuf(register char *out, register int outcnt) +#else +int +wr_rdbuf(out, outcnt) + register char *out; + register int outcnt; +#endif +{ + register int cnt; + + /* + * while there is data to copy copy into the write buffer. when the + * write buffer fills, flush it to the archive and continue + */ + while (outcnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + /* + * only move what we have space for + */ + cnt = MIN(cnt, outcnt); + bcopy(out, bufpt, cnt); + bufpt += cnt; + out += cnt; + outcnt -= cnt; + } + return(0); +} + +/* + * rd_wrbuf() + * copy from the read buffer into a supplied buffer a specified number of + * bytes. If the read buffer is empty fill it and continue to copy. + * usually used to obtain a file header for processing by a format + * specific read routine. + * Return + * number of bytes copied to the buffer, 0 indicates EOF on archive volume, + * -1 is a read error + */ + +#if __STDC__ +int +rd_wrbuf(register char *in, register int cpcnt) +#else +int +rd_wrbuf(in, cpcnt) + register char *in; + register int cpcnt; +#endif +{ + register int res; + register int cnt; + register int incnt = cpcnt; + + /* + * loop until we fill the buffer with the requested number of bytes + */ + while (incnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) { + /* + * read error, return what we got (or the error if + * no data was copied). The caller must know that an + * error occured and has the best knowledge what to + * do with it + */ + if ((res = cpcnt - incnt) > 0) + return(res); + return(cnt); + } + + /* + * calculate how much data to copy based on whats left and + * state of buffer + */ + cnt = MIN(cnt, incnt); + bcopy(bufpt, in, cnt); + bufpt += cnt; + incnt -= cnt; + in += cnt; + } + return(cpcnt); +} + +/* + * wr_skip() + * skip foward during a write. In other words add padding to the file. + * we add zero filled padding as it makes flawed archives much easier to + * recover from. the caller tells us how many bytes of padding to add + * This routine was not designed to add HUGE amount of padding, just small + * amounts (a few 512 byte blocks at most) + * Return: + * 0 if ok, -1 if there was a buf_flush failure + */ + +#if __STDC__ +int +wr_skip(off_t skcnt) +#else +int +wr_skip(skcnt) + off_t skcnt; +#endif +{ + register int cnt; + + /* + * loop while there is more padding to add + */ + while (skcnt > 0L) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + cnt = MIN(cnt, skcnt); + bzero(bufpt, cnt); + bufpt += cnt; + skcnt -= cnt; + } + return(0); +} + +/* + * wr_rdfile() + * fill write buffer with the contents of a file. We are passed an open + * file descriptor to the file an the archive structure that describes the + * file we are storing. The variable "left" is modified to contain the + * number of bytes of the file we were NOT able to write to the archive. + * it is important that we always write EXACTLY the number of bytes that + * the format specific write routine told us to. The file can also get + * bigger, so reading to the end of file would create an improper archive, + * we just detect this case and warn the user. We never create a bad + * archive if we can avoid it. Of course trying to archive files that are + * active is asking for trouble. It we fail, we pass back how much we + * could NOT copy and let the caller deal with it. + * Return: + * 0 ok, -1 if archive write failure. a short read of the file returns a + * 0, but "left" is set to be greater than zero. + */ + +#if __STDC__ +int +wr_rdfile(ARCHD *arcn, int ifd, off_t *left) +#else +int +wr_rdfile(arcn, ifd, left) + ARCHD *arcn; + int ifd; + off_t *left; +#endif +{ + register int cnt; + register int res = 0; + register off_t size = arcn->sb.st_size; + struct stat sb; + + /* + * while there are more bytes to write + */ + while (size > 0L) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) { + *left = size; + return(-1); + } + cnt = MIN(cnt, size); + if ((res = read(ifd, bufpt, cnt)) <= 0) + break; + size -= res; + bufpt += res; + } + + /* + * better check the file did not change during this operation + * or the file read failed. + */ + if (res < 0) + syswarn(1, errno, "Read fault on %s", arcn->org_name); + else if (size != 0L) + warn(1, "File changed size during read %s", arcn->org_name); + else if (fstat(ifd, &sb) < 0) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (arcn->sb.st_mtime != sb.st_mtime) + warn(1, "File %s was modified during copy to archive", + arcn->org_name); + *left = size; + return(0); +} + +/* + * rd_wrfile() + * extract the contents of a file from the archive. If we are unable to + * extract the entire file (due to failure to write the file) we return + * the numbers of bytes we did NOT process. This way the caller knows how + * many bytes to skip past to find the next archive header. If the failure + * was due to an archive read, we will catch that when we try to skip. If + * the format supplies a file data crc value, we calculate the actual crc + * so that it can be compared to the value stored in the header + * NOTE: + * We call a special function to write the file. This function attempts to + * restore file holes (blocks of zeros) into the file. When files are + * sparse this saves space, and is a LOT faster. For non sparse files + * the performance hit is small. As of this writing, no archive supports + * information on where the file holes are. + * Return: + * 0 ok, -1 if archive read failure. if we cannot write the entire file, + * we return a 0 but "left" is set to be the amount unwritten + */ + +#if __STDC__ +int +rd_wrfile(ARCHD *arcn, int ofd, off_t *left) +#else +int +rd_wrfile(arcn, ofd, left) + ARCHD *arcn; + int ofd; + off_t *left; +#endif +{ + register int cnt = 0; + register off_t size = arcn->sb.st_size; + register int res = 0; + register char *fnm = arcn->name; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + u_long crc = 0L; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(ofd, &sb) == 0) { + if (sb.st_blksize > 0) + sz = (int)sb.st_blksize; + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + *left = 0L; + + /* + * Copy the archive to the file the number of bytes specified. We have + * to assume that we want to recover file holes as none of the archive + * formats can record the location of file holes. + */ + while (size > 0L) { + cnt = bufend - bufpt; + /* + * if we get a read error, we do not want to skip, as we may + * miss a header, so we do not set left, but if we get a write + * error, we do want to skip over the unprocessed data. + */ + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) + break; + cnt = MIN(cnt, size); + if ((res = file_write(ofd,bufpt,cnt,&rem,&isem,sz,fnm)) <= 0) { + *left = size; + break; + } + + if (docrc) { + /* + * update the actual crc value + */ + cnt = res; + while (--cnt >= 0) + crc += *bufpt++ & 0xff; + } else + bufpt += res; + size -= res; + } + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved foward may not put + * a hole at the end of the file. + */ + if (isem && (arcn->sb.st_size > 0L)) + file_flush(ofd, fnm, isem); + + /* + * if we failed from archive read, we do not want to skip + */ + if ((size > 0L) && (*left == 0L)) + return(-1); + + /* + * some formats record a crc on file data. If so, then we compare the + * calculated crc to the crc stored in the archive + */ + if (docrc && (size == 0L) && (arcn->crc != crc)) + warn(1,"Actual crc does not match expected crc %s",arcn->name); + return(0); +} + +/* + * cp_file() + * copy the contents of one file to another. used during -rw phase of pax + * just as in rd_wrfile() we use a special write function to write the + * destination file so we can properly copy files with holes. + */ + +#if __STDC__ +void +cp_file(ARCHD *arcn, int fd1, int fd2) +#else +void +cp_file(arcn, fd1, fd2) + ARCHD *arcn; + int fd1; + int fd2; +#endif +{ + register int cnt; + register off_t cpcnt = 0L; + register int res = 0; + register char *fnm = arcn->name; + register int no_hole = 0; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + + /* + * check for holes in the source file. If none, we will use regular + * write instead of file write. + */ + if (((off_t)(arcn->sb.st_blocks * BLKMULT)) >= arcn->sb.st_size) + ++no_hole; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(fd2, &sb) == 0) { + if (sb.st_blksize > 0) + sz = sb.st_blksize; + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + + /* + * read the source file and copy to destination file until EOF + */ + for(;;) { + if ((cnt = read(fd1, buf, blksz)) <= 0) + break; + if (no_hole) + res = write(fd2, buf, cnt); + else + res = file_write(fd2, buf, cnt, &rem, &isem, sz, fnm); + if (res != cnt) + break; + cpcnt += cnt; + } + + /* + * check to make sure the copy is valid. + */ + if (res < 0) + syswarn(1, errno, "Failed write during copy of %s to %s", + arcn->org_name, arcn->name); + else if (cpcnt != arcn->sb.st_size) + warn(1, "File %s changed size during copy to %s", + arcn->org_name, arcn->name); + else if (fstat(fd1, &sb) < 0) + syswarn(1, errno, "Failed stat of %s", arcn->org_name); + else if (arcn->sb.st_mtime != sb.st_mtime) + warn(1, "File %s was modified during copy to %s", + arcn->org_name, arcn->name); + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved foward may not put + * a hole at the end of the file. + */ + if (!no_hole && isem && (arcn->sb.st_size > 0L)) + file_flush(fd2, fnm, isem); + return; +} + +/* + * buf_fill() + * fill the read buffer with the next record (or what we can get) from + * the archive volume. + * Return: + * Number of bytes of data in the read buffer, -1 for read error, and + * 0 when finished (user specified termination in ar_next()). + */ + +#if __STDC__ +int +buf_fill(void) +#else +int +buf_fill() +#endif +{ + register int cnt; + static int fini = 0; + + if (fini) + return(0); + + for(;;) { + /* + * try to fill the buffer. on error the next archive volume is + * opened and we try again. + */ + if ((cnt = ar_read(buf, blksz)) > 0) { + bufpt = buf; + bufend = buf + cnt; + rdcnt += cnt; + return(cnt); + } + + /* + * errors require resync, EOF goes to next archive + */ + if (cnt < 0) + break; + if (ar_next() < 0) { + fini = 1; + return(0); + } + rdcnt = 0; + } + exit_val = 1; + return(-1); +} + +/* + * buf_flush() + * force the write buffer to the archive. We are passed the number of + * bytes in the buffer at the point of the flush. When we change archives + * the record size might change. (either larger or smaller). + * Return: + * 0 if all is ok, -1 when a write error occurs. + */ + +#if __STDC__ +int +buf_flush(register int bufcnt) +#else +int +buf_flush(bufcnt) + register int bufcnt; +#endif +{ + register int cnt; + register int push = 0; + register int totcnt = 0; + + /* + * if we have reached the user specified byte count for each archive + * volume, prompt for the next volume. (The non-standrad -R flag). + * NOTE: If the wrlimit is smaller than wrcnt, we will always write + * at least one record. We always round limit UP to next blocksize. + */ + if ((wrlimit > 0) && (wrcnt > wrlimit)) { + warn(0, "User specified archive volume byte limit reached."); + if (ar_next() < 0) { + wrcnt = 0; + exit_val = 1; + return(-1); + } + wrcnt = 0; + + /* + * The new archive volume might have changed the size of the + * write blocksize. if so we figure out if we need to write + * (one or more times), or if there is now free space left in + * the buffer (it is no longer full). bufcnt has the number of + * bytes in the buffer, (the blocksize, at the point we were + * CALLED). Push has the amount of "extra" data in the buffer + * if the block size has shrunk from a volume change. + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * We have enough data to write at least one archive block + */ + for (;;) { + /* + * write a block and check if it all went out ok + */ + cnt = ar_write(buf, blksz); + if (cnt == blksz) { + /* + * the write went ok + */ + wrcnt += cnt; + totcnt += cnt; + if (push > 0) { + /* we have extra data to push to the front. + * check for more than 1 block of push, and if + * so we loop back to write again + */ + bcopy(bufend, buf, push); + bufpt = buf + push; + if (push >= blksz) { + push -= blksz; + continue; + } + } else + bufpt = buf; + return(totcnt); + } else if (cnt > 0) { + /* + * Oh drat we got a partial write! + * if format doesnt care about alignment let it go, + * we warned the user in ar_write().... but this means + * the last record on this volume violates pax spec.... + */ + totcnt += cnt; + wrcnt += cnt; + bufpt = buf + cnt; + cnt = bufcnt - cnt; + bcopy(bufpt, buf, cnt); + bufpt = buf + cnt; + if (!frmt->blkalgn || ((cnt % frmt->blkalgn) == 0)) + return(totcnt); + break; + } + + /* + * All done, go to next archive + */ + wrcnt = 0; + if (ar_next() < 0) + break; + + /* + * The new archive volume might also have changed the block + * size. if so, figure out if we have too much or too little + * data for using the new block size + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * write failed, stop pax. we must not create a bad archive! + */ + exit_val = 1; + return(-1); +} diff --git a/bin/pax/cache.c b/bin/pax/cache.c new file mode 100644 index 0000000..2224e4d --- /dev/null +++ b/bin/pax/cache.c @@ -0,0 +1,479 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)cache.c 8.1 (Berkeley) 5/31/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <pwd.h> +#include <grp.h> +#include <unistd.h> +#include <stdlib.h> +#include "pax.h" +#include "cache.h" +#include "extern.h" + +/* + * routines that control user, group, uid and gid caches (for the archive + * member print routine). + * IMPORTANT: + * these routines cache BOTH hits and misses, a major performance improvement + */ + +static int pwopn = 0; /* is password file open */ +static int gropn = 0; /* is group file open */ +static UIDC **uidtb = NULL; /* uid to name cache */ +static GIDC **gidtb = NULL; /* gid to name cache */ +static UIDC **usrtb = NULL; /* user name to uid cache */ +static GIDC **grptb = NULL; /* group name to gid cache */ + +/* + * uidtb_start + * creates an an empty uidtb + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +uidtb_start(void) +#else +int +uidtb_start() +#endif +{ + static int fail = 0; + + if (uidtb != NULL) + return(0); + if (fail) + return(-1); + if ((uidtb = (UIDC **)calloc(UID_SZ, sizeof(UIDC *))) == NULL) { + ++fail; + warn(1, "Unable to allocate memory for user id cache table"); + return(-1); + } + return(0); +} + +/* + * gidtb_start + * creates an an empty gidtb + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +gidtb_start(void) +#else +int +gidtb_start() +#endif +{ + static int fail = 0; + + if (gidtb != NULL) + return(0); + if (fail) + return(-1); + if ((gidtb = (GIDC **)calloc(GID_SZ, sizeof(GIDC *))) == NULL) { + ++fail; + warn(1, "Unable to allocate memory for group id cache table"); + return(-1); + } + return(0); +} + +/* + * usrtb_start + * creates an an empty usrtb + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +usrtb_start(void) +#else +int +usrtb_start() +#endif +{ + static int fail = 0; + + if (usrtb != NULL) + return(0); + if (fail) + return(-1); + if ((usrtb = (UIDC **)calloc(UNM_SZ, sizeof(UIDC *))) == NULL) { + ++fail; + warn(1, "Unable to allocate memory for user name cache table"); + return(-1); + } + return(0); +} + +/* + * grptb_start + * creates an an empty grptb + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +grptb_start(void) +#else +int +grptb_start() +#endif +{ + static int fail = 0; + + if (grptb != NULL) + return(0); + if (fail) + return(-1); + if ((grptb = (GIDC **)calloc(GNM_SZ, sizeof(GIDC *))) == NULL) { + ++fail; + warn(1,"Unable to allocate memory for group name cache table"); + return(-1); + } + return(0); +} + +/* + * name_uid() + * caches the name (if any) for the uid. If frc set, we always return the + * the stored name (if valid or invalid match). We use a simple hash table. + * Return + * Pointer to stored name (or a empty string) + */ + +#if __STDC__ +char * +name_uid(uid_t uid, int frc) +#else +char * +name_uid(uid, frc) + uid_t uid; + int frc; +#endif +{ + register struct passwd *pw; + register UIDC *ptr; + + if ((uidtb == NULL) && (uidtb_start() < 0)) + return(""); + + /* + * see if we have this uid cached + */ + ptr = uidtb[uid % UID_SZ]; + if ((ptr != NULL) && (ptr->valid > 0) && (ptr->uid == uid)) { + /* + * have an entry for this uid + */ + if (frc || (ptr->valid == VALID)) + return(ptr->name); + return(""); + } + + /* + * No entry for this uid, we will add it + */ + if (!pwopn) { + setpassent(1); + ++pwopn; + } + if (ptr == NULL) + ptr = (UIDC *)malloc(sizeof(UIDC)); + + if ((pw = getpwuid(uid)) == NULL) { + /* + * no match for this uid in the local password file + * a string that is the uid in numberic format + */ + if (ptr == NULL) + return(""); + ptr->uid = uid; + ptr->valid = INVALID; +# ifdef NET2_STAT + (void)sprintf(ptr->name, "%u", uid); +# else + (void)sprintf(ptr->name, "%lu", uid); +# endif + if (frc == 0) + return(""); + } else { + /* + * there is an entry for this uid in the password file + */ + if (ptr == NULL) + return(pw->pw_name); + ptr->uid = uid; + (void)strncpy(ptr->name, pw->pw_name, UNMLEN); + ptr->name[UNMLEN-1] = '\0'; + ptr->valid = VALID; + } + return(ptr->name); +} + +/* + * name_gid() + * caches the name (if any) for the gid. If frc set, we always return the + * the stored name (if valid or invalid match). We use a simple hash table. + * Return + * Pointer to stored name (or a empty string) + */ + +#if __STDC__ +char * +name_gid(gid_t gid, int frc) +#else +char * +name_gid(gid, frc) + gid_t gid; + int frc; +#endif +{ + register struct group *gr; + register GIDC *ptr; + + if ((gidtb == NULL) && (gidtb_start() < 0)) + return(""); + + /* + * see if we have this gid cached + */ + ptr = gidtb[gid % GID_SZ]; + if ((ptr != NULL) && (ptr->valid > 0) && (ptr->gid == gid)) { + /* + * have an entry for this gid + */ + if (frc || (ptr->valid == VALID)) + return(ptr->name); + return(""); + } + + /* + * No entry for this gid, we will add it + */ + if (!gropn) { + setgroupent(1); + ++gropn; + } + if (ptr == NULL) + ptr = (GIDC *)malloc(sizeof(GIDC)); + + if ((gr = getgrgid(gid)) == NULL) { + /* + * no match for this gid in the local group file, put in + * a string that is the gid in numberic format + */ + if (ptr == NULL) + return(""); + ptr->gid = gid; + ptr->valid = INVALID; +# ifdef NET2_STAT + (void)sprintf(ptr->name, "%u", gid); +# else + (void)sprintf(ptr->name, "%lu", gid); +# endif + if (frc == 0) + return(""); + } else { + /* + * there is an entry for this group in the group file + */ + if (ptr == NULL) + return(gr->gr_name); + ptr->gid = gid; + (void)strncpy(ptr->name, gr->gr_name, GNMLEN); + ptr->name[GNMLEN-1] = '\0'; + ptr->valid = VALID; + } + return(ptr->name); +} + +/* + * uid_name() + * caches the uid for a given user name. We use a simple hash table. + * Return + * the uid (if any) for a user name, or a -1 if no match can be found + */ + +#if __STDC__ +int +uid_name(char *name, uid_t *uid) +#else +int +uid_name(name, uid) + char *name; + uid_t *uid; +#endif +{ + register struct passwd *pw; + register UIDC *ptr; + register int namelen; + + /* + * return -1 for mangled names + */ + if (((namelen = strlen(name)) == 0) || (name[0] == '\0')) + return(-1); + if ((usrtb == NULL) && (usrtb_start() < 0)) + return(-1); + + /* + * look up in hash table, if found and valid return the uid, + * if found and invalid, return a -1 + */ + ptr = usrtb[st_hash(name, namelen, UNM_SZ)]; + if ((ptr != NULL) && (ptr->valid > 0) && !strcmp(name, ptr->name)) { + if (ptr->valid == INVALID) + return(-1); + *uid = ptr->uid; + return(0); + } + + if (!pwopn) { + setpassent(1); + ++pwopn; + } + + if (ptr == NULL) + ptr = (UIDC *)malloc(sizeof(UIDC)); + + /* + * no match, look it up, if no match store it as an invalid entry, + * or store the matching uid + */ + if (ptr == NULL) { + if ((pw = getpwnam(name)) == NULL) + return(-1); + *uid = pw->pw_uid; + return(0); + } + (void)strncpy(ptr->name, name, UNMLEN); + ptr->name[UNMLEN-1] = '\0'; + if ((pw = getpwnam(name)) == NULL) { + ptr->valid = INVALID; + return(-1); + } + ptr->valid = VALID; + *uid = ptr->uid = pw->pw_uid; + return(0); +} + +/* + * gid_name() + * caches the gid for a given group name. We use a simple hash table. + * Return + * the gid (if any) for a group name, or a -1 if no match can be found + */ + +#if __STDC__ +int +gid_name(char *name, gid_t *gid) +#else +int +gid_name(name, gid) + char *name; + gid_t *gid; +#endif +{ + register struct group *gr; + register GIDC *ptr; + register int namelen; + + /* + * return -1 for mangled names + */ + if (((namelen = strlen(name)) == 0) || (name[0] == '\0')) + return(-1); + if ((grptb == NULL) && (grptb_start() < 0)) + return(-1); + + /* + * look up in hash table, if found and valid return the uid, + * if found and invalid, return a -1 + */ + ptr = grptb[st_hash(name, namelen, GID_SZ)]; + if ((ptr != NULL) && (ptr->valid > 0) && !strcmp(name, ptr->name)) { + if (ptr->valid == INVALID) + return(-1); + *gid = ptr->gid; + return(0); + } + + if (!gropn) { + setgroupent(1); + ++gropn; + } + if (ptr == NULL) + ptr = (GIDC *)malloc(sizeof(GIDC)); + + /* + * no match, look it up, if no match store it as an invalid entry, + * or store the matching gid + */ + if (ptr == NULL) { + if ((gr = getgrnam(name)) == NULL) + return(-1); + *gid = gr->gr_gid; + return(0); + } + + (void)strncpy(ptr->name, name, GNMLEN); + ptr->name[GNMLEN-1] = '\0'; + if ((gr = getgrnam(name)) == NULL) { + ptr->valid = INVALID; + return(-1); + } + ptr->valid = VALID; + *gid = ptr->gid = gr->gr_gid; + return(0); +} diff --git a/bin/pax/cache.h b/bin/pax/cache.h new file mode 100644 index 0000000..b4bac4f --- /dev/null +++ b/bin/pax/cache.h @@ -0,0 +1,74 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cache.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * Constants and data structures used to implement group and password file + * caches. Traditional passwd/group cache routines perform quite poorly with + * archives. The chances of hitting a valid lookup with an archive is quite a + * bit worse than with files already resident on the file system. These misses + * create a MAJOR performance cost. To adress this problem, these routines + * cache both hits and misses. + * + * NOTE: name lengths must be as large as those stored in ANY PROTOCOL and + * as stored in the passwd and group files. CACHE SIZES MUST BE PRIME + */ +#define UNMLEN 32 /* >= user name found in any protocol */ +#define GNMLEN 32 /* >= group name found in any protocol */ +#define UID_SZ 317 /* size of user_name/uid cache */ +#define UNM_SZ 317 /* size of user_name/uid cache */ +#define GID_SZ 251 /* size of gid cache */ +#define GNM_SZ 317 /* size of group name cache */ +#define VALID 1 /* entry and name are valid */ +#define INVALID 2 /* entry valid, name NOT valid */ + +/* + * Node structures used in the user, group, uid, and gid caches. + */ + +typedef struct uidc { + int valid; /* is this a valid or a miss entry */ + char name[UNMLEN]; /* uid name */ + uid_t uid; /* cached uid */ +} UIDC; + +typedef struct gidc { + int valid; /* is this a valid or a miss entry */ + char name[GNMLEN]; /* gid name */ + gid_t gid; /* cached gid */ +} GIDC; diff --git a/bin/pax/cpio.c b/bin/pax/cpio.c new file mode 100644 index 0000000..d494c9a --- /dev/null +++ b/bin/pax/cpio.c @@ -0,0 +1,1278 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)cpio.c 8.1 (Berkeley) 5/31/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include "pax.h" +#include "cpio.h" +#include "extern.h" + +static int rd_nm __P((register ARCHD *, int)); +static int rd_ln_nm __P((register ARCHD *)); +static int com_rd __P((register ARCHD *)); + +/* + * Routines which support the different cpio versions + */ + +static int swp_head; /* binary cpio header byte swap */ + +/* + * Routines common to all versions of cpio + */ + +/* + * cpio_strd() + * Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +#if __STDC__ +int +cpio_strd(void) +#else +int +cpio_strd() +#endif +{ + return(lnk_start()); +} + +/* + * cpio_trail() + * Called to determine if a header block is a valid trailer. We are + * passed the block, the in_sync flag (which tells us we are in resync + * mode; looking for a valid header), and cnt (which starts at zero) + * which is used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, + */ + +#if __STDC__ +int +cpio_trail(register ARCHD *arcn) +#else +int +cpio_trail(arcn) + register ARCHD *arcn; +#endif +{ + /* + * look for trailer id in file we are about to process + */ + if ((strcmp(arcn->name, TRAILER) == 0) && (arcn->sb.st_size == 0)) + return(0); + return(-1); +} + +/* + * com_rd() + * operations common to all cpio read functions. + * Return: + * 0 + */ + +#if __STDC__ +static int +com_rd(register ARCHD *arcn) +#else +static int +com_rd(arcn) + register ARCHD *arcn; +#endif +{ + arcn->skip = 0; + arcn->pat = NULL; + arcn->org_name = arcn->name; + switch(arcn->sb.st_mode & C_IFMT) { + case C_ISFIFO: + arcn->type = PAX_FIF; + break; + case C_ISDIR: + arcn->type = PAX_DIR; + break; + case C_ISBLK: + arcn->type = PAX_BLK; + break; + case C_ISCHR: + arcn->type = PAX_CHR; + break; + case C_ISLNK: + arcn->type = PAX_SLK; + break; + case C_ISOCK: + arcn->type = PAX_SCK; + break; + case C_ISCTG: + case C_ISREG: + default: + /* + * we have file data, set up skip (pad is set in the format + * specific sections) + */ + arcn->sb.st_mode = (arcn->sb.st_mode & 0xfff) | C_ISREG; + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + } + if (chk_lnk(arcn) < 0) + return(-1); + return(0); +} + +/* + * cpio_end_wr() + * write the special file with the name trailer in the proper format + * Return: + * result of the write of the trailer from the cpio specific write func + */ + +#if __STDC__ +int +cpio_endwr(void) +#else +int +cpio_endwr() +#endif +{ + ARCHD last; + + /* + * create a trailer request and call the proper format write function + */ + bzero((char *)&last, sizeof(last)); + last.nlen = sizeof(TRAILER) - 1; + last.type = PAX_REG; + last.sb.st_nlink = 1; + (void)strcpy(last.name, TRAILER); + return((*frmt->wr)(&last)); +} + +/* + * rd_nam() + * read in the file name which follows the cpio header + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +static int +rd_nm(register ARCHD *arcn, int nsz) +#else +static int +rd_nm(arcn, nsz) + register ARCHD *arcn; + int nsz; +#endif +{ + /* + * do not even try bogus values + */ + if ((nsz == 0) || (nsz > sizeof(arcn->name))) { + warn(1, "Cpio file name length %d is out of range", nsz); + return(-1); + } + + /* + * read the name and make sure it is not empty and is \0 terminated + */ + if ((rd_wrbuf(arcn->name,nsz) != nsz) || (arcn->name[nsz-1] != '\0') || + (arcn->name[0] == '\0')) { + warn(1, "Cpio file name in header is corrupted"); + return(-1); + } + return(0); +} + +/* + * rd_ln_nm() + * read in the link name for a file with links. The link name is stored + * like file data (and is NOT \0 terminated!) + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +static int +rd_ln_nm(register ARCHD *arcn) +#else +static int +rd_ln_nm(arcn) + register ARCHD *arcn; +#endif +{ + /* + * check the length specified for bogus values + */ + if ((arcn->sb.st_size == 0) || + (arcn->sb.st_size >= sizeof(arcn->ln_name))) { +# ifdef NET2_STAT + warn(1, "Cpio link name length is invalid: %lu", + arcn->sb.st_size); +# else + warn(1, "Cpio link name length is invalid: %qu", + arcn->sb.st_size); +# endif + return(-1); + } + + /* + * read in the link name and \0 terminate it + */ + if (rd_wrbuf(arcn->ln_name, (int)arcn->sb.st_size) != + (int)arcn->sb.st_size) { + warn(1, "Cpio link name read error"); + return(-1); + } + arcn->ln_nlen = arcn->sb.st_size; + arcn->ln_name[arcn->ln_nlen] = '\0'; + + /* + * watch out for those empty link names + */ + if (arcn->ln_name[0] == '\0') { + warn(1, "Cpio link name is corrupt"); + return(-1); + } + return(0); +} + +/* + * Routines common to the extended byte oriented cpio format + */ + +/* + * cpio_id() + * determine if a block given to us is a valid extended byte oriented + * cpio header + * Return: + * 0 if a valid header, -1 otherwise + */ + +#if __STDC__ +int +cpio_id(char *blk, int size) +#else +int +cpio_id(blk, size) + char *blk; + int size; +#endif +{ + if ((size < sizeof(HD_CPIO)) || + (strncmp(blk, AMAGIC, sizeof(AMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * cpio_rd() + * determine if a buffer is a byte oriented extended cpio archive entry. + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +#if __STDC__ +int +cpio_rd(register ARCHD *arcn, register char *buf) +#else +int +cpio_rd(arcn, buf) + register ARCHD *arcn; + register char *buf; +#endif +{ + register int nsz; + register HD_CPIO *hd; + + /* + * check that this is a valid header, if not return -1 + */ + if (cpio_id(buf, sizeof(HD_CPIO)) < 0) + return(-1); + hd = (HD_CPIO *)buf; + + /* + * byte oriented cpio (posix) does not have padding! extract the octal + * ascii fields from the header + */ + arcn->pad = 0L; + arcn->sb.st_dev = (dev_t)asc_ul(hd->c_dev, sizeof(hd->c_dev), OCT); + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), OCT); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), OCT); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), OCT); + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + OCT); + arcn->sb.st_rdev = (dev_t)asc_ul(hd->c_rdev, sizeof(hd->c_rdev), OCT); + arcn->sb.st_mtime = (time_t)asc_ul(hd->c_mtime, sizeof(hd->c_mtime), + OCT); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; +# ifdef NET2_STAT + arcn->sb.st_size = (off_t)asc_ul(hd->c_filesize,sizeof(hd->c_filesize), + OCT); +# else + arcn->sb.st_size = (off_t)asc_uqd(hd->c_filesize,sizeof(hd->c_filesize), + OCT); +# endif + + /* + * check name size and if valid, read in the name of this entry (name + * follows header in the archive) + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),OCT)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * no link name to read for this file + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + return(com_rd(arcn)); + } + + /* + * check link name size and read in the link name. Link names are + * stored like file data. + */ + if (rd_ln_nm(arcn) < 0) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * cpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +#if __STDC__ +off_t +cpio_endrd(void) +#else +off_t +cpio_endrd() +#endif +{ + return((off_t)(sizeof(HD_CPIO) + sizeof(TRAILER))); +} + +/* + * cpio_stwr() + * start up the device mapping table + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +#if __STDC__ +int +cpio_stwr(void) +#else +int +cpio_stwr() +#endif +{ + return(dev_start()); +} + +/* + * cpio_wr() + * copy the data in the ARCHD to buffer in extended byte oriented cpio + * format. + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +#if __STDC__ +int +cpio_wr(register ARCHD *arcn) +#else +int +cpio_wr(arcn) + register ARCHD *arcn; +#endif +{ + register HD_CPIO *hd; + register int nsz; + char hdblk[sizeof(HD_CPIO)]; + + /* + * check and repair truncated device and inode fields in the header + */ + if (map_dev(arcn, (u_long)CPIO_MASK, (u_long)CPIO_MASK) < 0) + return(-1); + + arcn->pad = 0L; + nsz = arcn->nlen + 1; + hd = (HD_CPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * set data size for file data + */ +# ifdef NET2_STAT + if (ul_asc((u_long)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) { +# else + if (uqd_asc((u_quad_t)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) { +# endif + warn(1,"File is too large for cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * set data size to hold link name + */ + if (ul_asc((u_long)arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) + goto out; + break; + default: + /* + * all other file types have no file data + */ + if (ul_asc((u_long)0, hd->c_filesize, sizeof(hd->c_filesize), + OCT)) + goto out; + break; + } + + /* + * copy the values to the header using octal ascii + */ + if (ul_asc((u_long)MAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc((u_long)arcn->sb.st_dev, hd->c_dev, sizeof(hd->c_dev), + OCT) || + ul_asc((u_long)arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), + OCT) || + ul_asc((u_long)arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), + OCT) || + ul_asc((u_long)arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), + OCT) || + ul_asc((u_long)arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), + OCT) || + ul_asc((u_long)arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), + OCT) || + ul_asc((u_long)arcn->sb.st_rdev, hd->c_rdev, sizeof(hd->c_rdev), + OCT) || + ul_asc((u_long)arcn->sb.st_mtime,hd->c_mtime,sizeof(hd->c_mtime), + OCT) || + ul_asc((u_long)nsz, hd->c_namesize, sizeof(hd->c_namesize), OCT)) + goto out; + + /* + * write the file name to the archive + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_CPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0)) { + warn(1, "Unable to write cpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if this file has data, we are done. The caller will write the file + * data, if we are link tell caller we are done, go to next file + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return(0); + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name to the archive, tell the caller to go to the + * next file as we are done. + */ + if (wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) { + warn(1,"Unable to write cpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + warn(1, "Cpio header field is too small to store file %s", + arcn->org_name); + return(1); +} + +/* + * Routines common to the system VR4 version of cpio (with/without file CRC) + */ + +/* + * vcpio_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITHOUT crc. WATCH it the magic cookies are in OCTAL, the header + * uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +#if __STDC__ +int +vcpio_id(char *blk, int size) +#else +int +vcpio_id(blk, size) + char *blk; + int size; +#endif +{ + if ((size < sizeof(HD_VCPIO)) || + (strncmp(blk, AVMAGIC, sizeof(AVMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITH crc. WATCH it the magic cookies are in OCTAL the header uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +#if __STDC__ +int +crc_id(char *blk, int size) +#else +int +crc_id(blk, size) + char *blk; + int size; +#endif +{ + if ((size < sizeof(HD_VCPIO)) || + (strncmp(blk, AVCMAGIC, sizeof(AVCMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_strd() + w set file data CRC calculations. Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +#if __STDC__ +int +crc_strd(void) +#else +int +crc_strd() +#endif +{ + docrc = 1; + return(lnk_start()); +} + +/* + * vcpio_rd() + * determine if a buffer is a system VR4 archive entry. (with/without CRC) + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +#if __STDC__ +int +vcpio_rd(register ARCHD *arcn, register char *buf) +#else +int +vcpio_rd(arcn, buf) + register ARCHD *arcn; + register char *buf; +#endif +{ + register HD_VCPIO *hd; + dev_t devminor; + dev_t devmajor; + register int nsz; + + /* + * during the id phase it was determined if we were using CRC, use the + * proper id routine. + */ + if (docrc) { + if (crc_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } else { + if (vcpio_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } + + hd = (HD_VCPIO *)buf; + arcn->pad = 0L; + + /* + * extract the hex ascii fields from the header + */ + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), HEX); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), HEX); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), HEX); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), HEX); + arcn->sb.st_mtime = (time_t)asc_ul(hd->c_mtime,sizeof(hd->c_mtime),HEX); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; +# ifdef NET2_STAT + arcn->sb.st_size = (off_t)asc_ul(hd->c_filesize, + sizeof(hd->c_filesize), HEX); +# else + arcn->sb.st_size = (off_t)asc_uqd(hd->c_filesize, + sizeof(hd->c_filesize), HEX); +# endif + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + HEX); + devmajor = (dev_t)asc_ul(hd->c_maj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_min, sizeof(hd->c_min), HEX); + arcn->sb.st_dev = TODEV(devmajor, devminor); + devmajor = (dev_t)asc_ul(hd->c_rmaj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_rmin, sizeof(hd->c_min), HEX); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + arcn->crc = asc_ul(hd->c_chksum, sizeof(hd->c_chksum), HEX); + + /* + * check the length of the file name, if ok read it in, return -1 if + * bogus + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),HEX)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * skip padding. header + filename is aligned to 4 byte boundries + */ + if (rd_skip((off_t)(VCPIO_PAD(sizeof(HD_VCPIO) + nsz))) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + /* + * read in the link name and skip over the padding + */ + if ((rd_ln_nm(arcn) < 0) || + (rd_skip((off_t)(VCPIO_PAD(arcn->sb.st_size))) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * vcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +#if __STDC__ +off_t +vcpio_endrd(void) +#else +off_t +vcpio_endrd() +#endif +{ + return((off_t)(sizeof(HD_VCPIO) + sizeof(TRAILER) + + (VCPIO_PAD(sizeof(HD_VCPIO) + sizeof(TRAILER))))); +} + +/* + * crc_stwr() + * start up the device mapping table, enable crc file calculation + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +#if __STDC__ +int +crc_stwr(void) +#else +int +crc_stwr() +#endif +{ + docrc = 1; + return(dev_start()); +} + +/* + * vcpio_wr() + * copy the data in the ARCHD to buffer in system VR4 cpio + * (with/without crc) format. + * Return + * 0 if file has data to be written after the header, 1 if file has + * NO data to write after the header, -1 if archive write failed + */ + +#if __STDC__ +int +vcpio_wr(register ARCHD *arcn) +#else +int +vcpio_wr(arcn) + register ARCHD *arcn; +#endif +{ + register HD_VCPIO *hd; + unsigned int nsz; + char hdblk[sizeof(HD_VCPIO)]; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, (u_long)VCPIO_MASK, (u_long)VCPIO_MASK) < 0) + return(-1); + nsz = arcn->nlen + 1; + hd = (HD_VCPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + /* + * add the proper magic value depending whether we were asked for + * file data crc's, and the crc if needed. + */ + if (docrc) { + if (ul_asc((u_long)VCMAGIC, hd->c_magic, sizeof(hd->c_magic), + OCT) || + ul_asc((u_long)arcn->crc,hd->c_chksum,sizeof(hd->c_chksum), + HEX)) + goto out; + } else { + if (ul_asc((u_long)VMAGIC, hd->c_magic, sizeof(hd->c_magic), + OCT) || + ul_asc((u_long)0L, hd->c_chksum, sizeof(hd->c_chksum),HEX)) + goto out; + } + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = VCPIO_PAD(arcn->sb.st_size); +# ifdef NET2_STAT + if (ul_asc((u_long)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) { +# else + if (uqd_asc((u_quad_t)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) { +# endif + warn(1,"File is too large for sv4cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0L; + if (ul_asc((u_long)arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0L; + if (ul_asc((u_long)0L, hd->c_filesize, sizeof(hd->c_filesize), + HEX)) + goto out; + break; + } + + /* + * set the other fields in the header + */ + if (ul_asc((u_long)arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), + HEX) || + ul_asc((u_long)arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), + HEX) || + ul_asc((u_long)arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), + HEX) || + ul_asc((u_long)arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), + HEX) || + ul_asc((u_long)arcn->sb.st_mtime, hd->c_mtime, sizeof(hd->c_mtime), + HEX) || + ul_asc((u_long)arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), + HEX) || + ul_asc((u_long)MAJOR(arcn->sb.st_dev),hd->c_maj, sizeof(hd->c_maj), + HEX) || + ul_asc((u_long)MINOR(arcn->sb.st_dev),hd->c_min, sizeof(hd->c_min), + HEX) || + ul_asc((u_long)MAJOR(arcn->sb.st_rdev),hd->c_rmaj,sizeof(hd->c_maj), + HEX) || + ul_asc((u_long)MINOR(arcn->sb.st_rdev),hd->c_rmin,sizeof(hd->c_min), + HEX) || + ul_asc((u_long)nsz, hd->c_namesize, sizeof(hd->c_namesize), HEX)) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_VCPIO)) < 0) || + (wr_rdbuf(arcn->name, (int)nsz) < 0) || + (wr_skip((off_t)(VCPIO_PAD(sizeof(HD_VCPIO) + nsz))) < 0)) { + warn(1,"Could not write sv4cpio header for %s",arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done, copy the file + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip((off_t)(VCPIO_PAD(arcn->ln_nlen))) < 0)) { + warn(1,"Could not write sv4cpio link name for %s", + arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + warn(1,"Sv4cpio header field is too small for file %s",arcn->org_name); + return(1); +} + +/* + * Routines common to the old binary header cpio + */ + +/* + * bcpio_id() + * determine if a block given to us is a old binary cpio header + * (with/without header byte swapping) + * Return: + * 0 if a valid header, -1 otherwise + */ + +#if __STDC__ +int +bcpio_id(char *blk, int size) +#else +int +bcpio_id(blk, size) + char *blk; + int size; +#endif +{ + if (size < sizeof(HD_BCPIO)) + return(-1); + + /* + * check both normal and byte swapped magic cookies + */ + if (((u_short)SHRT_EXT(blk)) == MAGIC) + return(0); + if (((u_short)RSHRT_EXT(blk)) == MAGIC) { + if (!swp_head) + ++swp_head; + return(0); + } + return(-1); +} + +/* + * bcpio_rd() + * determine if a buffer is a old binary archive entry. (it may have byte + * swapped header) convert and store the values in the ARCHD parameter. + * This is a very old header format and should not really be used. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +#if __STDC__ +int +bcpio_rd(register ARCHD *arcn, register char *buf) +#else +int +bcpio_rd(arcn, buf) + register ARCHD *arcn; + register char *buf; +#endif +{ + register HD_BCPIO *hd; + register int nsz; + + /* + * check the header + */ + if (bcpio_id(buf, sizeof(HD_BCPIO)) < 0) + return(-1); + + arcn->pad = 0L; + hd = (HD_BCPIO *)buf; + if (swp_head) { + /* + * header has swapped bytes on 16 bit boundries + */ + arcn->sb.st_dev = (dev_t)(RSHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(RSHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(RSHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(RSHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(RSHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(RSHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(RSHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(RSHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(RSHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(RSHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(RSHRT_EXT(hd->h_filesize_2))); + nsz = (int)(RSHRT_EXT(hd->h_namesize)); + } else { + arcn->sb.st_dev = (dev_t)(SHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(SHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(SHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(SHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(SHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(SHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(SHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(SHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(SHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(SHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(SHRT_EXT(hd->h_filesize_2))); + nsz = (int)(SHRT_EXT(hd->h_namesize)); + } + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * check the file name size, if bogus give up. otherwise read the file + * name + */ + if (nsz < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * header + file name are aligned to 2 byte boundries, skip if needed + */ + if (rd_skip((off_t)(BCPIO_PAD(sizeof(HD_BCPIO) + nsz))) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode & C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)){ + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + if ((rd_ln_nm(arcn) < 0) || + (rd_skip((off_t)(BCPIO_PAD(arcn->sb.st_size))) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * bcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +#if __STDC__ +off_t +bcpio_endrd(void) +#else +off_t +bcpio_endrd() +#endif +{ + return((off_t)(sizeof(HD_BCPIO) + sizeof(TRAILER) + + (BCPIO_PAD(sizeof(HD_BCPIO) + sizeof(TRAILER))))); +} + +/* + * bcpio_wr() + * copy the data in the ARCHD to buffer in old binary cpio format + * There is a real chance of field overflow with this critter. So we + * always check the conversion is ok. nobody in his their right mind + * should write an achive in this format... + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +#if __STDC__ +int +bcpio_wr(register ARCHD *arcn) +#else +int +bcpio_wr(arcn) + register ARCHD *arcn; +#endif +{ + register HD_BCPIO *hd; + register int nsz; + char hdblk[sizeof(HD_BCPIO)]; + off_t t_offt; + int t_int; + time_t t_timet; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, (u_long)BCPIO_MASK, (u_long)BCPIO_MASK) < 0) + return(-1); + + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + hd = (HD_BCPIO *)hdblk; + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + hd->h_filesize_1[0] = CHR_WR_0(arcn->sb.st_size); + hd->h_filesize_1[1] = CHR_WR_1(arcn->sb.st_size); + hd->h_filesize_2[0] = CHR_WR_2(arcn->sb.st_size); + hd->h_filesize_2[1] = CHR_WR_3(arcn->sb.st_size); + t_offt = (off_t)(SHRT_EXT(hd->h_filesize_1)); + t_offt = (t_offt<<16) | ((off_t)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->sb.st_size != t_offt) { + warn(1,"File is too large for bcpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0L; + hd->h_filesize_1[0] = CHR_WR_0(arcn->ln_nlen); + hd->h_filesize_1[1] = CHR_WR_1(arcn->ln_nlen); + hd->h_filesize_2[0] = CHR_WR_2(arcn->ln_nlen); + hd->h_filesize_2[1] = CHR_WR_3(arcn->ln_nlen); + t_int = (int)(SHRT_EXT(hd->h_filesize_1)); + t_int = (t_int << 16) | ((int)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->ln_nlen != t_int) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0L; + hd->h_filesize_1[0] = (char)0; + hd->h_filesize_1[1] = (char)0; + hd->h_filesize_2[0] = (char)0; + hd->h_filesize_2[1] = (char)0; + break; + } + + /* + * build up the rest of the fields + */ + hd->h_magic[0] = CHR_WR_2(MAGIC); + hd->h_magic[1] = CHR_WR_3(MAGIC); + hd->h_dev[0] = CHR_WR_2(arcn->sb.st_dev); + hd->h_dev[1] = CHR_WR_3(arcn->sb.st_dev); + if (arcn->sb.st_dev != (dev_t)(SHRT_EXT(hd->h_dev))) + goto out; + hd->h_ino[0] = CHR_WR_2(arcn->sb.st_ino); + hd->h_ino[1] = CHR_WR_3(arcn->sb.st_ino); + if (arcn->sb.st_ino != (ino_t)(SHRT_EXT(hd->h_ino))) + goto out; + hd->h_mode[0] = CHR_WR_2(arcn->sb.st_mode); + hd->h_mode[1] = CHR_WR_3(arcn->sb.st_mode); + if (arcn->sb.st_mode != (mode_t)(SHRT_EXT(hd->h_mode))) + goto out; + hd->h_uid[0] = CHR_WR_2(arcn->sb.st_uid); + hd->h_uid[1] = CHR_WR_3(arcn->sb.st_uid); + if (arcn->sb.st_uid != (uid_t)(SHRT_EXT(hd->h_uid))) + goto out; + hd->h_gid[0] = CHR_WR_2(arcn->sb.st_gid); + hd->h_gid[1] = CHR_WR_3(arcn->sb.st_gid); + if (arcn->sb.st_gid != (gid_t)(SHRT_EXT(hd->h_gid))) + goto out; + hd->h_nlink[0] = CHR_WR_2(arcn->sb.st_nlink); + hd->h_nlink[1] = CHR_WR_3(arcn->sb.st_nlink); + if (arcn->sb.st_nlink != (nlink_t)(SHRT_EXT(hd->h_nlink))) + goto out; + hd->h_rdev[0] = CHR_WR_2(arcn->sb.st_rdev); + hd->h_rdev[1] = CHR_WR_3(arcn->sb.st_rdev); + if (arcn->sb.st_rdev != (dev_t)(SHRT_EXT(hd->h_rdev))) + goto out; + hd->h_mtime_1[0] = CHR_WR_0(arcn->sb.st_mtime); + hd->h_mtime_1[1] = CHR_WR_1(arcn->sb.st_mtime); + hd->h_mtime_2[0] = CHR_WR_2(arcn->sb.st_mtime); + hd->h_mtime_2[1] = CHR_WR_3(arcn->sb.st_mtime); + t_timet = (time_t)(SHRT_EXT(hd->h_mtime_1)); + t_timet = (t_timet << 16) | ((time_t)(SHRT_EXT(hd->h_mtime_2))); + if (arcn->sb.st_mtime != t_timet) + goto out; + nsz = arcn->nlen + 1; + hd->h_namesize[0] = CHR_WR_2(nsz); + hd->h_namesize[1] = CHR_WR_3(nsz); + if (nsz != (int)(SHRT_EXT(hd->h_namesize))) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_BCPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0) || + (wr_skip((off_t)(BCPIO_PAD(sizeof(HD_BCPIO) + nsz))) < 0)) { + warn(1, "Could not write bcpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip((off_t)(BCPIO_PAD(arcn->ln_nlen))) < 0)) { + warn(1,"Could not write bcpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + warn(1,"Bcpio header field is too small for file %s", arcn->org_name); + return(1); +} diff --git a/bin/pax/cpio.h b/bin/pax/cpio.h new file mode 100644 index 0000000..73429f9 --- /dev/null +++ b/bin/pax/cpio.h @@ -0,0 +1,151 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cpio.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * Defines common to all versions of cpio + */ +#define TRAILER "TRAILER!!!" /* name in last archive record */ + +/* + * Header encoding of the different file types + */ +#define C_ISDIR 040000 /* Directory */ +#define C_ISFIFO 010000 /* FIFO */ +#define C_ISREG 0100000 /* Regular file */ +#define C_ISBLK 060000 /* Block special file */ +#define C_ISCHR 020000 /* Character special file */ +#define C_ISCTG 0110000 /* Reserved for contiguous files */ +#define C_ISLNK 0120000 /* Reserved for symbolic links */ +#define C_ISOCK 0140000 /* Reserved for sockets */ +#define C_IFMT 0170000 /* type of file */ + +/* + * Data Interchange Format - Extended cpio header format - POSIX 1003.1-1990 + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_dev[6]; /* device number */ + char c_ino[6]; /* inode number */ + char c_mode[6]; /* file type/access */ + char c_uid[6]; /* owners uid */ + char c_gid[6]; /* owners gid */ + char c_nlink[6]; /* # of links at archive creation */ + char c_rdev[6]; /* block/char major/minor # */ + char c_mtime[11]; /* modification time */ + char c_namesize[6]; /* length of pathname */ + char c_filesize[11]; /* length of file in bytes */ +} HD_CPIO; + +#define MAGIC 070707 /* transportable archive id */ + +#ifdef _PAX_ +#define AMAGIC "070707" /* ascii equivalent string of MAGIC */ +#define CPIO_MASK 0x3ffff /* bits valid in the dev/ino fields */ + /* used for dev/inode remaps */ +#endif /* _PAX_ */ + +/* + * Binary cpio header structure + * + * CAUTION! CAUTION! CAUTION! + * Each field really represents a 16 bit short (NOT ASCII). Described as + * an array of chars in an attempt to improve portability!! + */ +typedef struct { + u_char h_magic[2]; + u_char h_dev[2]; + u_char h_ino[2]; + u_char h_mode[2]; + u_char h_uid[2]; + u_char h_gid[2]; + u_char h_nlink[2]; + u_char h_rdev[2]; + u_char h_mtime_1[2]; + u_char h_mtime_2[2]; + u_char h_namesize[2]; + u_char h_filesize_1[2]; + u_char h_filesize_2[2]; +} HD_BCPIO; + +#ifdef _PAX_ +/* + * extraction and creation macros for binary cpio + */ +#define SHRT_EXT(ch) ((((unsigned)(ch)[0])<<8) | (((unsigned)(ch)[1])&0xff)) +#define RSHRT_EXT(ch) ((((unsigned)(ch)[1])<<8) | (((unsigned)(ch)[0])&0xff)) +#define CHR_WR_0(val) ((char)(((val) >> 24) & 0xff)) +#define CHR_WR_1(val) ((char)(((val) >> 16) & 0xff)) +#define CHR_WR_2(val) ((char)(((val) >> 8) & 0xff)) +#define CHR_WR_3(val) ((char)((val) & 0xff)) + +/* + * binary cpio masks and pads + */ +#define BCPIO_PAD(x) ((2 - ((x) & 1)) & 1) /* pad to next 2 byte word */ +#define BCPIO_MASK 0xffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ + +/* + * System VR4 cpio header structure (with/without file data crc) + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_ino[8]; /* inode number */ + char c_mode[8]; /* file type/access */ + char c_uid[8]; /* owners uid */ + char c_gid[8]; /* owners gid */ + char c_nlink[8]; /* # of links at archive creation */ + char c_mtime[8]; /* modification time */ + char c_filesize[8]; /* length of file in bytes */ + char c_maj[8]; /* block/char major # */ + char c_min[8]; /* block/char minor # */ + char c_rmaj[8]; /* special file major # */ + char c_rmin[8]; /* special file minor # */ + char c_namesize[8]; /* length of pathname */ + char c_chksum[8]; /* 0 OR CRC of bytes of FILE data */ +} HD_VCPIO; + +#define VMAGIC 070701 /* sVr4 new portable archive id */ +#define VCMAGIC 070702 /* sVr4 new portable archive id CRC */ +#ifdef _PAX_ +#define AVMAGIC "070701" /* ascii string of above */ +#define AVCMAGIC "070702" /* ascii string of above */ +#define VCPIO_PAD(x) ((4 - ((x) & 3)) & 3) /* pad to next 4 byte word */ +#define VCPIO_MASK 0xffffffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ diff --git a/bin/pax/extern.h b/bin/pax/extern.h new file mode 100644 index 0000000..a1a10d0 --- /dev/null +++ b/bin/pax/extern.h @@ -0,0 +1,285 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * External references from each source file + */ + +#include <sys/cdefs.h> + +/* + * ar_io.c + */ +extern char *arcname; +int ar_open __P((char *)); +void ar_close __P((void)); +void ar_drain __P((void)); +int ar_set_wr __P((void)); +int ar_app_ok __P((void)); +int ar_read __P((register char *, register int)); +int ar_write __P((register char *, register int)); +int ar_rdsync __P((void)); +int ar_fow __P((off_t, off_t *)); +int ar_rev __P((off_t )); +int ar_next __P((void)); + +/* + * ar_subs.c + */ +extern u_long flcnt; +void list __P((void)); +void extract __P((void)); +void append __P((void)); +void archive __P((void)); +void copy __P((void)); + +/* + * buf_subs.c + */ +extern int blksz; +extern int wrblksz; +extern int maxflt; +extern int rdblksz; +extern off_t wrlimit; +extern off_t rdcnt; +extern off_t wrcnt; +int wr_start __P((void)); +int rd_start __P((void)); +void cp_start __P((void)); +int appnd_start __P((off_t)); +int rd_sync __P((void)); +void pback __P((char *, int)); +int rd_skip __P((off_t)); +void wr_fin __P((void)); +int wr_rdbuf __P((register char *, register int)); +int rd_wrbuf __P((register char *, register int)); +int wr_skip __P((off_t)); +int wr_rdfile __P((ARCHD *, int, off_t *)); +int rd_wrfile __P((ARCHD *, int, off_t *)); +void cp_file __P((ARCHD *, int, int)); +int buf_fill __P((void)); +int buf_flush __P((register int)); + +/* + * cache.c + */ +int uidtb_start __P((void)); +int gidtb_start __P((void)); +int usrtb_start __P((void)); +int grptb_start __P((void)); +char * name_uid __P((uid_t, int)); +char * name_gid __P((gid_t, int)); +int uid_name __P((char *, uid_t *)); +int gid_name __P((char *, gid_t *)); + +/* + * cpio.c + */ +int cpio_strd __P((void)); +int cpio_trail __P((register ARCHD *)); +int cpio_endwr __P((void)); +int cpio_id __P((char *, int)); +int cpio_rd __P((register ARCHD *, register char *)); +off_t cpio_endrd __P((void)); +int cpio_stwr __P((void)); +int cpio_wr __P((register ARCHD *)); +int vcpio_id __P((char *, int)); +int crc_id __P((char *, int)); +int crc_strd __P((void)); +int vcpio_rd __P((register ARCHD *, register char *)); +off_t vcpio_endrd __P((void)); +int crc_stwr __P((void)); +int vcpio_wr __P((register ARCHD *)); +int bcpio_id __P((char *, int)); +int bcpio_rd __P((register ARCHD *, register char *)); +off_t bcpio_endrd __P((void)); +int bcpio_wr __P((register ARCHD *)); + +/* + * file_subs.c + */ +int file_creat __P((register ARCHD *)); +void file_close __P((register ARCHD *, int)); +int lnk_creat __P((register ARCHD *)); +int cross_lnk __P((register ARCHD *)); +int chk_same __P((register ARCHD *)); +int node_creat __P((register ARCHD *)); +int unlnk_exist __P((register char *, register int)); +int chk_path __P((register char *, uid_t, gid_t)); +void set_ftime __P((char *fnm, time_t mtime, time_t atime, int frc)); +int set_ids __P((char *, uid_t, gid_t)); +void set_pmode __P((char *, mode_t)); +int file_write __P((int, char *, register int, int *, int *, int, char *)); +void file_flush __P((int, char *, int)); +void rdfile_close __P((register ARCHD *, register int *)); +int set_crc __P((register ARCHD *, register int)); + +/* + * ftree.c + */ +int ftree_start __P((void)); +int ftree_add __P((register char *)); +void ftree_sel __P((register ARCHD *)); +void ftree_chk __P((void)); +int next_file __P((register ARCHD *)); + +/* + * gen_subs.c + */ +void ls_list __P((register ARCHD *, time_t)); +void ls_tty __P((register ARCHD *)); +void zf_strncpy __P((register char *, register char *, int)); +int l_strncpy __P((register char *, register char *, int)); +u_long asc_ul __P((register char *, int, register int)); +int ul_asc __P((u_long, register char *, register int, register int)); +#ifndef NET2_STAT +u_quad_t asc_uqd __P((register char *, int, register int)); +int uqd_asc __P((u_quad_t, register char *, register int, register int)); +#endif + +/* + * options.c + */ +extern FSUB fsub[]; +extern int ford[]; +void options __P((register int, register char **)); +OPLIST * opt_next __P((void)); +int opt_add __P((register char *)); +int bad_opt __P((void)); + +/* + * pat_rep.c + */ +int rep_add __P((register char *)); +int pat_add __P((char *)); +void pat_chk __P((void)); +int pat_sel __P((register ARCHD *)); +int pat_match __P((register ARCHD *)); +int mod_name __P((register ARCHD *)); +int set_dest __P((register ARCHD *, char *, int)); + +/* + * pax.c + */ +extern int act; +extern FSUB *frmt; +extern int cflag; +extern int dflag; +extern int iflag; +extern int kflag; +extern int lflag; +extern int nflag; +extern int tflag; +extern int uflag; +extern int vflag; +extern int Dflag; +extern int Hflag; +extern int Lflag; +extern int Xflag; +extern int Yflag; +extern int Zflag; +extern int vfpart; +extern int patime; +extern int pmtime; +extern int pmode; +extern int pids; +extern int exit_val; +extern int docrc; +extern char *dirptr; +extern char *ltmfrmt; +extern char *argv0; +int main __P((int, char **)); +void sig_cleanup __P((int)); + +/* + * sel_subs.c + */ +int sel_chk __P((register ARCHD *)); +int grp_add __P((register char *)); +int usr_add __P((register char *)); +int trng_add __P((register char *)); + +/* + * tables.c + */ +int lnk_start __P((void)); +int chk_lnk __P((register ARCHD *)); +void purg_lnk __P((register ARCHD *)); +void lnk_end __P((void)); +int ftime_start __P((void)); +int chk_ftime __P((register ARCHD *)); +int name_start __P((void)); +int add_name __P((register char *, int, char *)); +void sub_name __P((register char *, int *)); +int dev_start __P((void)); +int add_dev __P((register ARCHD *)); +int map_dev __P((register ARCHD *, u_long, u_long)); +int atdir_start __P((void)); +void atdir_end __P((void)); +void add_atdir __P((char *, dev_t, ino_t, time_t, time_t)); +int get_atdir __P((dev_t, ino_t, time_t *, time_t *)); +int dir_start __P((void)); +void add_dir __P((char *, int, struct stat *, int)); +void proc_dir __P((void)); +u_int st_hash __P((char *, int, int)); + +/* + * tar.c + */ +int tar_endwr __P((void)); +off_t tar_endrd __P((void)); +int tar_trail __P((register char *, register int, register int *)); +int tar_id __P((register char *, int)); +int tar_opt __P((void)); +int tar_rd __P((register ARCHD *, register char *)); +int tar_wr __P((register ARCHD *)); +int ustar_strd __P((void)); +int ustar_stwr __P((void)); +int ustar_id __P((char *, int)); +int ustar_rd __P((register ARCHD *, register char *)); +int ustar_wr __P((register ARCHD *)); + +/* + * tty_subs.c + */ +int tty_init __P((void)); +void tty_prnt __P((char *, ...)); +int tty_read __P((char *, int)); +void warn __P((int, char *, ...)); +void syswarn __P((int, int, char *, ...)); diff --git a/bin/pax/file_subs.c b/bin/pax/file_subs.c new file mode 100644 index 0000000..f394707 --- /dev/null +++ b/bin/pax/file_subs.c @@ -0,0 +1,1055 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)file_subs.c 8.1 (Berkeley) 5/31/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <unistd.h> +#include <sys/param.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <sys/uio.h> +#include <stdlib.h> +#include "pax.h" +#include "extern.h" + +static int +mk_link __P((register char *,register struct stat *,register char *, int)); + +/* + * routines that deal with file operations such as: creating, removing; + * and setting access modes, uid/gid and times of files + */ + +#define FILEBITS (S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) +#define SETBITS (S_ISUID | S_ISGID) +#define ABITS (FILEBITS | SETBITS) + +/* + * file_creat() + * Create and open a file. + * Return: + * file descriptor or -1 for failure + */ + +#if __STDC__ +int +file_creat(register ARCHD *arcn) +#else +int +file_creat(arcn) + register ARCHD *arcn; +#endif +{ + int fd = -1; + mode_t file_mode; + int oerrno; + + /* + * assume file doesn't exist, so just try to create it, most times this + * works. We have to take special handling when the file does exist. To + * detect this, we use O_EXCL. For example when trying to create a + * file and a character device or fifo exists with the same name, we + * can accidently open the device by mistake (or block waiting to open) + * If we find that the open has failed, then figure spend the effore to + * figure out why. This strategy was found to have better average + * performance in common use than checking the file (and the path) + * first with lstat. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, + file_mode)) >= 0) + return(fd); + + /* + * the file seems to exist. First we try to get rid of it (found to be + * the second most common failure when traced). If this fails, only + * then we go to the expense to check and create the path to the file + */ + if (unlnk_exist(arcn->name, arcn->type) != 0) + return(-1); + + for (;;) { + /* + * try to open it again, if this fails, check all the nodes in + * the path and give it a final try. if chk_path() finds that + * it cannot fix anything, we will skip the last attempt + */ + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC, + file_mode)) >= 0) + break; + oerrno = errno; + if (chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid) < 0) { + syswarn(1, oerrno, "Unable to create %s", arcn->name); + return(-1); + } + } + return(fd); +} + +/* + * file_close() + * Close file descriptor to a file just created by pax. Sets modes, + * ownership and times as required. + * Return: + * 0 for success, -1 for failure + */ + +#if __STDC__ +void +file_close(register ARCHD *arcn, int fd) +#else +void +file_close(arcn, fd) + register ARCHD *arcn; + int fd; +#endif +{ + int res = 0; + + if (fd < 0) + return; + if (close(fd) < 0) + syswarn(0, errno, "Unable to close file descriptor on %s", + arcn->name); + + /* + * set owner/groups first as this may strip off mode bits we want + * then set file permission modes. Then set file access and + * modification times. + */ + if (pids) + res = set_ids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid); + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode) + set_pmode(arcn->name, arcn->sb.st_mode); + if (patime || pmtime) + set_ftime(arcn->name, arcn->sb.st_mtime, arcn->sb.st_atime, 0); +} + +/* + * lnk_creat() + * Create a hard link to arcn->ln_name from arcn->name. arcn->ln_name + * must exist; + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +lnk_creat(register ARCHD *arcn) +#else +int +lnk_creat(arcn) + register ARCHD *arcn; +#endif +{ + struct stat sb; + + /* + * we may be running as root, so we have to be sure that link target + * is not a directory, so we lstat and check + */ + if (lstat(arcn->ln_name, &sb) < 0) { + syswarn(1,errno,"Unable to link to %s from %s", arcn->ln_name, + arcn->name); + return(-1); + } + + if (S_ISDIR(sb.st_mode)) { + warn(1, "A hard link to the directory %s is not allowed", + arcn->ln_name); + return(-1); + } + + return(mk_link(arcn->ln_name, &sb, arcn->name, 0)); +} + +/* + * cross_lnk() + * Create a hard link to arcn->org_name from arcn->name. Only used in copy + * with the -l flag. No warning or error if this does not succeed (we will + * then just create the file) + * Return: + * 1 if copy() should try to create this file node + * 0 if cross_lnk() ok, -1 for fatal flaw (like linking to self). + */ + +#if __STDC__ +int +cross_lnk(register ARCHD *arcn) +#else +int +cross_lnk(arcn) + register ARCHD *arcn; +#endif +{ + /* + * try to make a link to orginal file (-l flag in copy mode). make sure + * we do not try to link to directories in case we are running as root + * (and it might succeed). + */ + if (arcn->type == PAX_DIR) + return(1); + return(mk_link(arcn->org_name, &(arcn->sb), arcn->name, 1)); +} + +/* + * chk_same() + * In copy mode if we are not trying to make hard links between the src + * and destinations, make sure we are not going to overwrite ourselves by + * accident. This slows things down a little, but we have to protect all + * those people who make typing errors. + * Return: + * 1 the target does not exist, go ahead and copy + * 0 skip it file exists (-k) or may be the same as source file + */ + +#if __STDC__ +int +chk_same(register ARCHD *arcn) +#else +int +chk_same(arcn) + register ARCHD *arcn; +#endif +{ + struct stat sb; + + /* + * if file does not exist, return. if file exists and -k, skip it + * quietly + */ + if (lstat(arcn->name, &sb) < 0) + return(1); + if (kflag) + return(0); + + /* + * better make sure the user does not have src == dest by mistake + */ + if ((arcn->sb.st_dev == sb.st_dev) && (arcn->sb.st_ino == sb.st_ino)) { + warn(1, "Unable to copy %s, file would overwrite itself", + arcn->name); + return(0); + } + return(1); +} + +/* + * mk_link() + * try to make a hard link between two files. if ign set, we do not + * complain. + * Return: + * 0 if successful (or we are done with this file but no error, such as + * finding the from file exists and the user has set -k). + * 1 when ign was set to indicates we could not make the link but we + * should try to copy/extract the file as that might work (and is an + * allowed option). -1 an error occurred. + */ + +#if __STDC__ +static int +mk_link(register char *to, register struct stat *to_sb, register char *from, + int ign) +#else +static int +mk_link(to, to_sb, from, ign) + register char *to; + register struct stat *to_sb; + register char *from; + int ign; +#endif +{ + struct stat sb; + int oerrno; + + /* + * if from file exists, it has to be unlinked to make the link. If the + * file exists and -k is set, skip it quietly + */ + if (lstat(from, &sb) == 0) { + if (kflag) + return(0); + + /* + * make sure it is not the same file, protect the user + */ + if ((to_sb->st_dev==sb.st_dev)&&(to_sb->st_ino == sb.st_ino)) { + warn(1, "Unable to link file %s to itself", to); + return(-1);; + } + + /* + * try to get rid of the file, based on the type + */ + if (S_ISDIR(sb.st_mode)) { + if (rmdir(from) < 0) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + } else if (unlink(from) < 0) { + if (!ign) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + return(1); + } + } + + /* + * from file is gone (or did not exist), try to make the hard link. + * if it fails, check the path and try it again (if chk_path() says to + * try again) + */ + for (;;) { + if (link(to, from) == 0) + break; + oerrno = errno; + if (chk_path(from, to_sb->st_uid, to_sb->st_gid) == 0) + continue; + if (!ign) { + syswarn(1, oerrno, "Could not link to %s from %s", to, + from); + return(-1); + } + return(1); + } + + /* + * all right the link was made + */ + return(0); +} + +/* + * node_creat() + * create an entry in the file system (other than a file or hard link). + * If successful, sets uid/gid modes and times as required. + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +node_creat(register ARCHD *arcn) +#else +int +node_creat(arcn) + register ARCHD *arcn; +#endif +{ + register int res; + register int ign = 0; + register int oerrno; + register int pass = 0; + mode_t file_mode; + struct stat sb; + + /* + * create node based on type, if that fails try to unlink the node and + * try again. finally check the path and try again. As noted in the + * file and link creation routines, this method seems to exhibit the + * best performance in general use workloads. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + + for (;;) { + switch(arcn->type) { + case PAX_DIR: + res = mkdir(arcn->name, file_mode); + if (ign) + res = 0; + break; + case PAX_CHR: + file_mode |= S_IFCHR; + res = mknod(arcn->name, file_mode, arcn->sb.st_rdev); + break; + case PAX_BLK: + file_mode |= S_IFBLK; + res = mknod(arcn->name, file_mode, arcn->sb.st_rdev); + break; + case PAX_FIF: + res = mkfifo(arcn->name, file_mode); + break; + case PAX_SCK: + /* + * Skip sockets, operation has no meaning under BSD + */ + warn(0, + "%s skipped. Sockets cannot be copied or extracted", + arcn->name); + return(-1); + case PAX_SLK: + if ((res = symlink(arcn->ln_name, arcn->name)) == 0) + return(0); + break; + case PAX_CTG: + case PAX_HLK: + case PAX_HRG: + case PAX_REG: + default: + /* + * we should never get here + */ + warn(0, "%s has an unknown file type, skipping", + arcn->name); + return(-1); + } + + /* + * if we were able to create the node break out of the loop, + * otherwise try to unlink the node and try again. if that + * fails check the full path and try a final time. + */ + if (res == 0) + break; + + /* + * we failed to make the node + */ + oerrno = errno; + if ((ign = unlnk_exist(arcn->name, arcn->type)) < 0) + return(-1); + + if (++pass <= 1) + continue; + + if (chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid) < 0) { + syswarn(1, oerrno, "Could not create: %s", arcn->name); + return(-1); + } + } + + /* + * we were able to create the node. set uid/gid, modes and times + */ + if (pids) + res = set_ids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid); + else + res = 0; + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT any + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode) + set_pmode(arcn->name, arcn->sb.st_mode); + + if (arcn->type == PAX_DIR) { + /* + * Dirs must be processed again at end of extract to set times + * and modes to agree with those stored in the archive. However + * to allow extract to continue, we may have to also set owner + * rights. This allows nodes in the archive that are children + * of this directory to be extracted without failure. Both time + * and modes will be fixed after the entire archive is read and + * before pax exits. + */ + if (access(arcn->name, R_OK | W_OK | X_OK) < 0) { + if (lstat(arcn->name, &sb) < 0) { + syswarn(0, errno,"Could not access %s (stat)", + arcn->name); + set_pmode(arcn->name,file_mode | S_IRWXU); + } else { + /* + * We have to add rights to the dir, so we make + * sure to restore the mode. The mode must be + * restored AS CREATED and not as stored if + * pmode is not set. + */ + set_pmode(arcn->name, + ((sb.st_mode & FILEBITS) | S_IRWXU)); + if (!pmode) + arcn->sb.st_mode = sb.st_mode; + } + + /* + * we have to force the mode to what was set here, + * since we changed it from the default as created. + */ + add_dir(arcn->name, arcn->nlen, &(arcn->sb), 1); + } else if (pmode || patime || pmtime) + add_dir(arcn->name, arcn->nlen, &(arcn->sb), 0); + } + + if (patime || pmtime) + set_ftime(arcn->name, arcn->sb.st_mtime, arcn->sb.st_atime, 0); + return(0); +} + +/* + * unlnk_exist() + * Remove node from file system with the specified name. We pass the type + * of the node that is going to replace it. When we try to create a + * directory and find that it already exists, we allow processing to + * continue as proper modes etc will always be set for it later on. + * Return: + * 0 is ok to proceed, no file with the specified name exists + * -1 we were unable to remove the node, or we should not remove it (-k) + * 1 we found a directory and we were going to create a directory. + */ + +#if __STDC__ +int +unlnk_exist(register char *name, register int type) +#else +int +unlnk_exist(name, type) + register char *name; + register int type; +#endif +{ + struct stat sb; + + /* + * the file does not exist, or -k we are done + */ + if (lstat(name, &sb) < 0) + return(0); + if (kflag) + return(-1); + + if (S_ISDIR(sb.st_mode)) { + /* + * try to remove a directory, if it fails and we were going to + * create a directory anyway, tell the caller (return a 1) + */ + if (rmdir(name) < 0) { + if (type == PAX_DIR) + return(1); + syswarn(1,errno,"Unable to remove directory %s", name); + return(-1); + } + return(0); + } + + /* + * try to get rid of all non-directory type nodes + */ + if (unlink(name) < 0) { + syswarn(1, errno, "Could not unlink %s", name); + return(-1); + } + return(0); +} + +/* + * chk_path() + * We were trying to create some kind of node in the file system and it + * failed. chk_path() makes sure the path up to the node exists and is + * writeable. When we have to create a directory that is missing along the + * path somewhere, the directory we create will be set to the same + * uid/gid as the file has (when uid and gid are being preserved). + * NOTE: this routine is a real performance loss. It is only used as a + * last resort when trying to create entries in the file system. + * Return: + * -1 when it could find nothing it is allowed to fix. + * 0 otherwise + */ + +#if __STDC__ +int +chk_path( register char *name, uid_t st_uid, gid_t st_gid) +#else +int +chk_path(name, st_uid, st_gid) + register char *name; + uid_t st_uid; + gid_t st_gid; +#endif +{ + register char *spt = name; + struct stat sb; + int retval = -1; + + /* + * watch out for paths with nodes stored directly in / (e.g. /bozo) + */ + if (*spt == '/') + ++spt; + + for(;;) { + /* + * work foward from the first / and check each part of the path + */ + spt = strchr(spt, '/'); + if (spt == NULL) + break; + *spt = '\0'; + + /* + * if it exists we assume it is a directory, it is not within + * the spec (at least it seems to read that way) to alter the + * file system for nodes NOT EXPLICITLY stored on the archive. + * If that assumption is changed, you would test the node here + * and figure out how to get rid of it (probably like some + * recursive unlink()) or fix up the directory permissions if + * required (do an access()). + */ + if (lstat(name, &sb) == 0) { + *(spt++) = '/'; + continue; + } + + /* + * the path fails at this point, see if we can create the + * needed directory and continue on + */ + if (mkdir(name, S_IRWXU | S_IRWXG | S_IRWXO) < 0) { + *spt = '/'; + retval = -1; + break; + } + + /* + * we were able to create the directory. We will tell the + * caller that we found something to fix, and it is ok to try + * and create the node again. + */ + retval = 0; + if (pids) + (void)set_ids(name, st_uid, st_gid); + + /* + * make sure the user doen't have some strange umask that + * causes this newly created directory to be unusable. We fix + * the modes and restore them back to the creation default at + * the end of pax + */ + if ((access(name, R_OK | W_OK | X_OK) < 0) && + (lstat(name, &sb) == 0)) { + set_pmode(name, ((sb.st_mode & FILEBITS) | S_IRWXU)); + add_dir(name, spt - name, &sb, 1); + } + *(spt++) = '/'; + continue; + } + return(retval); +} + +/* + * set_ftime() + * Set the access time and modification time for a named file. If frc is + * non-zero we force these times to be set even if the the user did not + * request access and/or modification time preservation (this is also + * used by -t to reset access times). + * When ign is zero, only those times the user has asked for are set, the + * other ones are left alone. We do not assume the un-documented feature + * of many utimes() implementations that consider a 0 time value as a do + * not set request. + */ + +#if __STDC__ +void +set_ftime(char *fnm, time_t mtime, time_t atime, int frc) +#else +void +set_ftime(fnm, mtime, atime, frc) + char *fnm; + time_t mtime; + time_t atime; + int frc; +#endif +{ + static struct timeval tv[2] = {{0L, 0L}, {0L, 0L}}; + struct stat sb; + + tv[0].tv_sec = (long)atime; + tv[1].tv_sec = (long)mtime; + if (!frc && (!patime || !pmtime)) { + /* + * if we are not forcing, only set those times the user wants + * set. We get the current values of the times if we need them. + */ + if (lstat(fnm, &sb) == 0) { + if (!patime) + tv[0].tv_sec = (long)sb.st_atime; + if (!pmtime) + tv[1].tv_sec = (long)sb.st_mtime; + } else + syswarn(0,errno,"Unable to obtain file stats %s", fnm); + } + + /* + * set the times + */ + if (utimes(fnm, tv) < 0) + syswarn(1, errno, "Access/modification time set failed on: %s", + fnm); + return; +} + +/* + * set_ids() + * set the uid and gid of a file system node + * Return: + * 0 when set, -1 on failure + */ + +#if __STDC__ +int +set_ids(char *fnm, uid_t uid, gid_t gid) +#else +int +set_ids(fnm, uid, gid) + char *fnm; + uid_t uid; + gid_t gid; +#endif +{ + if (chown(fnm, uid, gid) < 0) { + syswarn(1, errno, "Unable to set file uid/gid of %s", fnm); + return(-1); + } + return(0); +} + +/* + * set_pmode() + * Set file access mode + */ + +#if __STDC__ +void +set_pmode(char *fnm, mode_t mode) +#else +void +set_pmode(fnm, mode) + char *fnm; + mode_t mode; +#endif +{ + mode &= ABITS; + if (chmod(fnm, mode) < 0) + syswarn(1, errno, "Could not set permissions on %s", fnm); + return; +} + +/* + * file_write() + * Write/copy a file (during copy or archive extract). This routine knows + * how to copy files with lseek holes in it. (Which are read as file + * blocks containing all 0's but do not have any file blocks associated + * with the data). Typical examples of these are files created by dbm + * variants (.pag files). While the file size of these files are huge, the + * actual storage is quite small (the files are sparse). The problem is + * the holes read as all zeros so are probably stored on the archive that + * way (there is no way to determine if the file block is really a hole, + * we only know that a file block of all zero's can be a hole). + * At this writing, no major archive format knows how to archive files + * with holes. However, on extraction (or during copy, -rw) we have to + * deal with these files. Without detecting the holes, the files can + * consume a lot of file space if just written to disk. This replacement + * for write when passed the basic allocation size of a file system block, + * uses lseek whenever it detects the input data is all 0 within that + * file block. In more detail, the strategy is as follows: + * While the input is all zero keep doing an lseek. Keep track of when we + * pass over file block boundries. Only write when we hit a non zero + * input. once we have written a file block, we continue to write it to + * the end (we stop looking at the input). When we reach the start of the + * next file block, start checking for zero blocks again. Working on file + * block boundries significantly reduces the overhead when copying files + * that are NOT very sparse. This overhead (when compared to a write) is + * almost below the measurement resolution on many systems. Without it, + * files with holes cannot be safely copied. It does has a side effect as + * it can put holes into files that did not have them before, but that is + * not a problem since the file contents are unchanged (in fact it saves + * file space). (Except on paging files for diskless clients. But since we + * cannot determine one of those file from here, we ignore them). If this + * ever ends up on a system where CTG files are supported and the holes + * are not desired, just do a conditional test in those routines that + * call file_write() and have it call write() instead. BEFORE CLOSING THE + * FILE, make sure to call file_flush() when the last write finishes with + * an empty block. A lot of file systems will not create an lseek hole at + * the end. In this case we drop a single 0 at the end to force the + * trailing 0's in the file. + * ---Parameters--- + * rem: how many bytes left in this file system block + * isempt: have we written to the file block yet (is it empty) + * sz: basic file block allocation size + * cnt: number of bytes on this write + * str: buffer to write + * Return: + * number of bytes written, -1 on write (or lseek) error. + */ + +#if __STDC__ +int +file_write(int fd, char *str, register int cnt, int *rem, int *isempt, int sz, + char *name) +#else +int +file_write(fd, str, cnt, rem, isempt, sz, name) + int fd; + char *str; + register int cnt; + int *rem; + int *isempt; + int sz; + char *name; +#endif +{ + register char *pt; + register char *end; + register int wcnt; + register char *st = str; + + /* + * while we have data to process + */ + while (cnt) { + if (!*rem) { + /* + * We are now at the start of file system block again + * (or what we think one is...). start looking for + * empty blocks again + */ + *isempt = 1; + *rem = sz; + } + + /* + * only examine up to the end of the current file block or + * remaining characters to write, whatever is smaller + */ + wcnt = MIN(cnt, *rem); + cnt -= wcnt; + *rem -= wcnt; + if (*isempt) { + /* + * have not written to this block yet, so we keep + * looking for zero's + */ + pt = st; + end = st + wcnt; + + /* + * look for a zero filled buffer + */ + while ((pt < end) && (*pt == '\0')) + ++pt; + + if (pt == end) { + /* + * skip, buf is empty so far + */ + if (lseek(fd, (off_t)wcnt, SEEK_CUR) < 0) { + syswarn(1,errno,"File seek on %s", + name); + return(-1); + } + st = pt; + continue; + } + /* + * drat, the buf is not zero filled + */ + *isempt = 0; + } + + /* + * have non-zero data in this file system block, have to write + */ + if (write(fd, st, wcnt) != wcnt) { + syswarn(1, errno, "Failed write to file %s", name); + return(-1); + } + st += wcnt; + } + return(st - str); +} + +/* + * file_flush() + * when the last file block in a file is zero, many file systems will not + * let us create a hole at the end. To get the last block with zeros, we + * write the last BYTE with a zero (back up one byte and write a zero). + */ + +#if __STDC__ +void +file_flush(int fd, char *fname, int isempt) +#else +void +file_flush(fd, fname, isempt) + int fd; + char *fname; + int isempt; +#endif +{ + static char blnk[] = "\0"; + + /* + * silly test, but make sure we are only called when the last block is + * filled with all zeros. + */ + if (!isempt) + return; + + /* + * move back one byte and write a zero + */ + if (lseek(fd, (off_t)-1, SEEK_CUR) < 0) { + syswarn(1, errno, "Failed seek on file %s", fname); + return; + } + + if (write(fd, blnk, 1) < 0) + syswarn(1, errno, "Failed write to file %s", fname); + return; +} + +/* + * rdfile_close() + * close a file we have beed reading (to copy or archive). If we have to + * reset access time (tflag) do so (the times are stored in arcn). + */ + +#if __STDC__ +void +rdfile_close(register ARCHD *arcn, register int *fd) +#else +void +rdfile_close(arcn, fd) + register ARCHD *arcn; + register int *fd; +#endif +{ + /* + * make sure the file is open + */ + if (*fd < 0) + return; + + (void)close(*fd); + *fd = -1; + if (!tflag) + return; + + /* + * user wants last access time reset + */ + set_ftime(arcn->org_name, arcn->sb.st_mtime, arcn->sb.st_atime, 1); + return; +} + +/* + * set_crc() + * read a file to calculate its crc. This is a real drag. Archive formats + * that have this, end up reading the file twice (we have to write the + * header WITH the crc before writing the file contents. Oh well... + * Return: + * 0 if was able to calculate the crc, -1 otherwise + */ + +#if __STDC__ +int +set_crc(register ARCHD *arcn, register int fd) +#else +int +set_crc(arcn, fd) + register ARCHD *arcn; + register int fd; +#endif +{ + register int i; + register int res; + off_t cpcnt = 0L; + u_long size; + unsigned long crc = 0L; + char tbuf[FILEBLK]; + struct stat sb; + + if (fd < 0) { + /* + * hmm, no fd, should never happen. well no crc then. + */ + arcn->crc = 0L; + return(0); + } + + if ((size = (u_long)arcn->sb.st_blksize) > (u_long)sizeof(tbuf)) + size = (u_long)sizeof(tbuf); + + /* + * read all the bytes we think that there are in the file. If the user + * is trying to archive an active file, forget this file. + */ + for(;;) { + if ((res = read(fd, tbuf, size)) <= 0) + break; + cpcnt += res; + for (i = 0; i < res; ++i) + crc += (tbuf[i] & 0xff); + } + + /* + * safety check. we want to avoid archiving files that are active as + * they can create inconsistant archive copies. + */ + if (cpcnt != arcn->sb.st_size) + warn(1, "File changed size %s", arcn->org_name); + else if (fstat(fd, &sb) < 0) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (arcn->sb.st_mtime != sb.st_mtime) + warn(1, "File %s was modified during read", arcn->org_name); + else if (lseek(fd, (off_t)0L, SEEK_SET) < 0) + syswarn(1, errno, "File rewind failed on: %s", arcn->org_name); + else { + arcn->crc = crc; + return(0); + } + return(-1); +} diff --git a/bin/pax/ftree.c b/bin/pax/ftree.c new file mode 100644 index 0000000..62dd94d --- /dev/null +++ b/bin/pax/ftree.c @@ -0,0 +1,543 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)ftree.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include <fts.h> +#include "pax.h" +#include "ftree.h" +#include "extern.h" + +/* + * routines to interface with the fts library function. + * + * file args supplied to pax are stored on a single linked list (of type FTREE) + * and given to fts to be processed one at a time. pax "selects" files from + * the expansion of each arg into the corresponding file tree (if the arg is a + * directory, otherwise the node itself is just passed to pax). The selection + * is modified by the -n and -u flags. The user is informed when a specific + * file arg does not generate any selected files. -n keeps expanding the file + * tree arg until one of its files is selected, then skips to the next file + * arg. when the user does not supply the file trees as command line args to + * pax, they are read from stdin + */ + +static FTS *ftsp = NULL; /* curent FTS handle */ +static int ftsopts; /* options to be used on fts_open */ +static char *farray[2]; /* array for passing each arg to fts */ +static FTREE *fthead = NULL; /* head of linked list of file args */ +static FTREE *fttail = NULL; /* tail of linked list of file args */ +static FTREE *ftcur = NULL; /* current file arg being processed */ +static FTSENT *ftent = NULL; /* current file tree entry */ +static int ftree_skip; /* when set skip to next file arg */ + +static int ftree_arg __P((void)); + +/* + * ftree_start() + * initialize the options passed to fts_open() during this run of pax + * options are based on the selection of pax options by the user + * fts_start() also calls fts_arg() to open the first valid file arg. We + * also attempt to reset directory access times when -t (tflag) is set. + * Return: + * 0 if there is at least one valid file arg to process, -1 otherwise + */ + +#if __STDC__ +int +ftree_start(void) +#else +int +ftree_start() +#endif +{ + /* + * set up the operation mode of fts, open the first file arg. We must + * use FTS_NOCHDIR, as the user may have to open multiple archives and + * if fts did a chdir off into the boondocks, we may create an archive + * volume in an place where the user did not expect to. + */ + ftsopts = FTS_NOCHDIR; + + /* + * optional user flags that effect file traversal + * -H command line symlink follow only (half follow) + * -L follow sylinks (logical) + * -P do not follow sylinks (physical). This is the default. + * -X do not cross over mount points + * -t preserve access times on files read. + * -n select only the first member of a file tree when a match is found + * -d do not extract subtrees rooted at a directory arg. + */ + if (Lflag) + ftsopts |= FTS_LOGICAL; + else + ftsopts |= FTS_PHYSICAL; + if (Hflag) +# ifdef NET2_FTS + warn(0, "The -H flag is not supported on this version"); +# else + ftsopts |= FTS_COMFOLLOW; +# endif + if (Xflag) + ftsopts |= FTS_XDEV; + + if ((fthead == NULL) && ((farray[0] = malloc(PAXPATHLEN+2)) == NULL)) { + warn(1, "Unable to allocate memory for file name buffer"); + return(-1); + } + + if (ftree_arg() < 0) + return(-1); + if (tflag && (atdir_start() < 0)) + return(-1); + return(0); +} + +/* + * ftree_add() + * add the arg to the linked list of files to process. Each will be + * processed by fts one at a time + * Return: + * 0 if added to the linked list, -1 if failed + */ + +#if __STDC__ +int +ftree_add(register char *str) +#else +int +ftree_add(str) + register char *str; +#endif +{ + register FTREE *ft; + register int len; + + /* + * simple check for bad args + */ + if ((str == NULL) || (*str == '\0')) { + warn(0, "Invalid file name arguement"); + return(-1); + } + + /* + * allocate FTREE node and add to the end of the linked list (args are + * processed in the same order they were passed to pax). Get rid of any + * trailing / the user may pass us. (watch out for / by itself). + */ + if ((ft = (FTREE *)malloc(sizeof(FTREE))) == NULL) { + warn(0, "Unable to allocate memory for filename"); + return(-1); + } + + if (((len = strlen(str) - 1) > 0) && (str[len] == '/')) + str[len] = '\0'; + ft->fname = str; + ft->refcnt = 0; + ft->fow = NULL; + if (fthead == NULL) { + fttail = fthead = ft; + return(0); + } + fttail->fow = ft; + fttail = ft; + return(0); +} + +/* + * ftree_sel() + * this entry has been selected by pax. bump up reference count and handle + * -n and -d processing. + */ + +#if __STDC__ +void +ftree_sel(register ARCHD *arcn) +#else +void +ftree_sel(arcn) + register ARCHD *arcn; +#endif +{ + /* + * set reference bit for this pattern. This linked list is only used + * when file trees are supplied pax as args. The list is not used when + * the trees are read from stdin. + */ + if (ftcur != NULL) + ftcur->refcnt = 1; + + /* + * if -n we are done with this arg, force a skip to the next arg when + * pax asks for the next file in next_file(). + * if -d we tell fts only to match the directory (if the arg is a dir) + * and not the entire file tree rooted at that point. + */ + if (nflag) + ftree_skip = 1; + + if (!dflag || (arcn->type != PAX_DIR)) + return; + + if (ftent != NULL) + (void)fts_set(ftsp, ftent, FTS_SKIP); +} + +/* + * ftree_chk() + * called at end on pax execution. Prints all those file args that did not + * have a selected member (reference count still 0) + */ + +#if __STDC__ +void +ftree_chk(void) +#else +void +ftree_chk() +#endif +{ + register FTREE *ft; + register int wban = 0; + + /* + * make sure all dir access times were reset. + */ + if (tflag) + atdir_end(); + + /* + * walk down list and check reference count. Print out those members + * that never had a match + */ + for (ft = fthead; ft != NULL; ft = ft->fow) { + if (ft->refcnt > 0) + continue; + if (wban == 0) { + warn(1,"WARNING! These file names were not selected:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", ft->fname); + } +} + +/* + * ftree_arg() + * Get the next file arg for fts to process. Can be from either the linked + * list or read from stdin when the user did not them as args to pax. Each + * arg is processed until the first successful fts_open(). + * Return: + * 0 when the next arg is ready to go, -1 if out of file args (or EOF on + * stdin). + */ + +#if __STDC__ +static int +ftree_arg(void) +#else +static int +ftree_arg() +#endif +{ + register char *pt; + + /* + * close off the current file tree + */ + if (ftsp != NULL) { + (void)fts_close(ftsp); + ftsp = NULL; + } + + /* + * keep looping until we get a valid file tree to process. Stop when we + * reach the end of the list (or get an eof on stdin) + */ + for(;;) { + if (fthead == NULL) { + /* + * the user didn't supply any args, get the file trees + * to process from stdin; + */ + if (fgets(farray[0], PAXPATHLEN+1, stdin) == NULL) + return(-1); + if ((pt = strchr(farray[0], '\n')) != NULL) + *pt = '\0'; + } else { + /* + * the user supplied the file args as arguements to pax + */ + if (ftcur == NULL) + ftcur = fthead; + else if ((ftcur = ftcur->fow) == NULL) + return(-1); + farray[0] = ftcur->fname; + } + + /* + * watch it, fts wants the file arg stored in a array of char + * ptrs, with the last one a null. we use a two element array + * and set farray[0] to point at the buffer with the file name + * in it. We cannnot pass all the file args to fts at one shot + * as we need to keep a handle on which file arg generates what + * files (the -n and -d flags need this). If the open is + * successful, return a 0. + */ + if ((ftsp = fts_open(farray, ftsopts, NULL)) != NULL) + break; + } + return(0); +} + +/* + * next_file() + * supplies the next file to process in the supplied archd structure. + * Return: + * 0 when contents of arcn have been set with the next file, -1 when done. + */ + +#if __STDC__ +int +next_file(register ARCHD *arcn) +#else +int +next_file(arcn) + register ARCHD *arcn; +#endif +{ + register int cnt; + time_t atime; + time_t mtime; + + /* + * ftree_sel() might have set the ftree_skip flag if the user has the + * -n option and a file was selected from this file arg tree. (-n says + * only one member is matched for each pattern) ftree_skip being 1 + * forces us to go to the next arg now. + */ + if (ftree_skip) { + /* + * clear and go to next arg + */ + ftree_skip = 0; + if (ftree_arg() < 0) + return(-1); + } + + /* + * loop until we get a valid file to process + */ + for(;;) { + if ((ftent = fts_read(ftsp)) == NULL) { + /* + * out of files in this tree, go to next arg, if none + * we are done + */ + if (ftree_arg() < 0) + return(-1); + continue; + } + + /* + * handle each type of fts_read() flag + */ + switch(ftent->fts_info) { + case FTS_D: + case FTS_DEFAULT: + case FTS_F: + case FTS_SL: + case FTS_SLNONE: + /* + * these are all ok + */ + break; + case FTS_DP: + /* + * already saw this directory. If the user wants file + * access times reset, we use this to restore the + * access time for this directory since this is the + * last time we will see it in this file subtree + * remember to force the time (this is -t on a read + * directory, not a created directory). + */ +# ifdef NET2_FTS + if (!tflag || (get_atdir(ftent->fts_statb.st_dev, + ftent->fts_statb.st_ino, &mtime, &atime) < 0)) +# else + if (!tflag || (get_atdir(ftent->fts_statp->st_dev, + ftent->fts_statp->st_ino, &mtime, &atime) < 0)) +# endif + continue; + set_ftime(ftent->fts_path, mtime, atime, 1); + continue; + case FTS_DC: + /* + * fts claims a file system cycle + */ + warn(1,"File system cycle found at %s",ftent->fts_path); + continue; + case FTS_DNR: +# ifdef NET2_FTS + syswarn(1, errno, +# else + syswarn(1, ftent->fts_errno, +# endif + "Unable to read directory %s", ftent->fts_path); + continue; + case FTS_ERR: +# ifdef NET2_FTS + syswarn(1, errno, +# else + syswarn(1, ftent->fts_errno, +# endif + "File system traversal error"); + continue; + case FTS_NS: + case FTS_NSOK: +# ifdef NET2_FTS + syswarn(1, errno, +# else + syswarn(1, ftent->fts_errno, +# endif + "Unable to access %s", ftent->fts_path); + continue; + } + + /* + * ok got a file tree node to process. copy info into arcn + * structure (initialize as required) + */ + arcn->skip = 0; + arcn->pad = 0; + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; +# ifdef NET2_FTS + arcn->sb = ftent->fts_statb; +# else + arcn->sb = *(ftent->fts_statp); +# endif + + /* + * file type based set up and copy into the arcn struct + * SIDE NOTE: + * we try to reset the access time on all files and directories + * we may read when the -t flag is specified. files are reset + * when we close them after copying. we reset the directories + * when we are done with their file tree (we also clean up at + * end in case we cut short a file tree traversal). However + * there is no way to reset access times on symlinks. + */ + switch(S_IFMT & arcn->sb.st_mode) { + case S_IFDIR: + arcn->type = PAX_DIR; + if (!tflag) + break; + add_atdir(ftent->fts_path, arcn->sb.st_dev, + arcn->sb.st_ino, arcn->sb.st_mtime, + arcn->sb.st_atime); + break; + case S_IFCHR: + arcn->type = PAX_CHR; + break; + case S_IFBLK: + arcn->type = PAX_BLK; + break; + case S_IFREG: + /* + * only regular files with have data to store on the + * archive. all others will store a zero length skip. + * the skip field is used by pax for actual data it has + * to read (or skip over). + */ + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + case S_IFLNK: + arcn->type = PAX_SLK; + /* + * have to read the symlink path from the file + */ + if ((cnt = readlink(ftent->fts_path, arcn->ln_name, + PAXPATHLEN)) < 0) { + syswarn(1, errno, "Unable to read symlink %s", + ftent->fts_path); + continue; + } + /* + * set link name length, watch out readlink does not + * allways null terminate the link path + */ + arcn->ln_name[cnt] = '\0'; + arcn->ln_nlen = cnt; + break; + case S_IFSOCK: + /* + * under BSD storing a socket is senseless but we will + * let the format specific write function make the + * decision of what to do with it. + */ + arcn->type = PAX_SCK; + break; + case S_IFIFO: + arcn->type = PAX_FIF; + break; + } + break; + } + + /* + * copy file name, set file name length + */ + arcn->nlen = l_strncpy(arcn->name, ftent->fts_path, PAXPATHLEN+1); + arcn->name[arcn->nlen] = '\0'; + arcn->org_name = ftent->fts_path; + return(0); +} diff --git a/bin/pax/ftree.h b/bin/pax/ftree.h new file mode 100644 index 0000000..f93eda6 --- /dev/null +++ b/bin/pax/ftree.h @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ftree.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * Data structure used by the ftree.c routines to store the file args to be + * handed to fts(). It keeps a reference count of which args generated a + * "selected" member + */ + +typedef struct ftree { + char *fname; /* file tree name */ + int refcnt; /* has tree had a selected file? */ + struct ftree *fow; /* pointer to next entry on list */ +} FTREE; diff --git a/bin/pax/gen_subs.c b/bin/pax/gen_subs.c new file mode 100644 index 0000000..390e5bd --- /dev/null +++ b/bin/pax/gen_subs.c @@ -0,0 +1,487 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)gen_subs.c 8.1 (Berkeley) 5/31/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <stdio.h> +#include <ctype.h> +#include <tzfile.h> +#include <utmp.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include "pax.h" +#include "extern.h" + +/* + * a collection of general purpose subroutines used by pax + */ + +/* + * constants used by ls_list() when printing out archive members + */ +#define MODELEN 20 +#define DATELEN 64 +#define SIXMONTHS ((DAYSPERNYEAR / 2) * SECSPERDAY) +#define CURFRMT "%b %e %H:%M" +#define OLDFRMT "%b %e %Y" +#ifndef UT_NAMESIZE +#define UT_NAMESIZE 8 +#endif +#define UT_GRPSIZE 6 + +/* + * ls_list() + * list the members of an archive in ls format + */ + +#if __STDC__ +void +ls_list(register ARCHD *arcn, time_t now) +#else +void +ls_list(arcn, now) + register ARCHD *arcn; + time_t now; +#endif +{ + register struct stat *sbp; + char f_mode[MODELEN]; + char f_date[DATELEN]; + char *timefrmt; + + /* + * if not verbose, just print the file name + */ + if (!vflag) { + (void)printf("%s\n", arcn->name); + (void)fflush(stdout); + return; + } + + /* + * user wants long mode + */ + sbp = &(arcn->sb); + strmode(sbp->st_mode, f_mode); + + if (ltmfrmt == NULL) { + /* + * no locale specified format. time format based on age + * compared to the time pax was started. + */ + if ((sbp->st_mtime + SIXMONTHS) <= now) + timefrmt = OLDFRMT; + else + timefrmt = CURFRMT; + } else + timefrmt = ltmfrmt; + + /* + * print file mode, link count, uid, gid and time + */ + if (strftime(f_date,DATELEN,timefrmt,localtime(&(sbp->st_mtime))) == 0) + f_date[0] = '\0'; + (void)printf("%s%2u %-*s %-*s ", f_mode, sbp->st_nlink, UT_NAMESIZE, + name_uid(sbp->st_uid, 1), UT_GRPSIZE, + name_gid(sbp->st_gid, 1)); + + /* + * print device id's for devices, or sizes for other nodes + */ + if ((arcn->type == PAX_CHR) || (arcn->type == PAX_BLK)) +# ifdef NET2_STAT + (void)printf("%4u,%4u ", MAJOR(sbp->st_rdev), +# else + (void)printf("%4lu,%4lu ", MAJOR(sbp->st_rdev), +# endif + MINOR(sbp->st_rdev)); + else { +# ifdef NET2_STAT + (void)printf("%9lu ", sbp->st_size); +# else + (void)printf("%9qu ", sbp->st_size); +# endif + } + + /* + * print name and link info for hard and soft links + */ + (void)printf("%s %s", f_date, arcn->name); + if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + (void)printf(" == %s\n", arcn->ln_name); + else if (arcn->type == PAX_SLK) + (void)printf(" => %s\n", arcn->ln_name); + else + (void)putchar('\n'); + (void)fflush(stdout); + return; +} + +/* + * tty_ls() + * print a short summary of file to tty. + */ + +#if __STDC__ +void +ls_tty(register ARCHD *arcn) +#else +void +ls_tty(arcn) + register ARCHD *arcn; +#endif +{ + char f_date[DATELEN]; + char f_mode[MODELEN]; + char *timefrmt; + + if (ltmfrmt == NULL) { + /* + * no locale specified format + */ + if ((arcn->sb.st_mtime + SIXMONTHS) <= time((time_t *)NULL)) + timefrmt = OLDFRMT; + else + timefrmt = CURFRMT; + } else + timefrmt = ltmfrmt; + + /* + * convert time to string, and print + */ + if (strftime(f_date, DATELEN, timefrmt, + localtime(&(arcn->sb.st_mtime))) == 0) + f_date[0] = '\0'; + strmode(arcn->sb.st_mode, f_mode); + tty_prnt("%s%s %s\n", f_mode, f_date, arcn->name); + return; +} + +/* + * zf_strncpy() + * copy src to dest up to len chars (stopping at first '\0'), when src is + * shorter than len, pads to len with '\0'. big performance win (and + * a lot easier to code) over strncpy(), then a strlen() then a + * bzero(). (or doing the bzero() first). + */ + +#if __STDC__ +void +zf_strncpy(register char *dest, register char *src, int len) +#else +void +zf_strncpy(dest, src, len) + register char *dest; + register char *src; + int len; +#endif +{ + register char *stop; + + stop = dest + len; + while ((dest < stop) && (*src != '\0')) + *dest++ = *src++; + while (dest < stop) + *dest++ = '\0'; + return; +} + +/* + * l_strncpy() + * copy src to dest up to len chars (stopping at first '\0') + * Return: + * number of chars copied. (Note this is a real performance win over + * doing a strncpy() then a strlen() + */ + +#if __STDC__ +int +l_strncpy(register char *dest, register char *src, int len) +#else +int +l_strncpy(dest, src, len) + register char *dest; + register char *src; + int len; +#endif +{ + register char *stop; + register char *start; + + stop = dest + len; + start = dest; + while ((dest < stop) && (*src != '\0')) + *dest++ = *src++; + if (dest < stop) + *dest = '\0'; + return(dest - start); +} + +/* + * asc_ul() + * convert hex/octal character string into a u_long. We do not have to + * check for overflow! (the headers in all supported formats are not large + * enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * unsigned long value + */ + +#if __STDC__ +u_long +asc_ul(register char *str, int len, register int base) +#else +u_long +asc_ul(str, len, base) + register char *str; + int len; + register int base; +#endif +{ + register char *stop; + u_long tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * ul_asc() + * convert an unsigned long into an hex/oct ascii string. pads with LEADING + * ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +#if __STDC__ +int +ul_asc(u_long val, register char *str, register int len, register int base) +#else +int +ul_asc(val, str, len, base) + u_long val; + register char *str; + register int len; + register int base; +#endif +{ + register char *pt; + u_long digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + if ((val = (val >> 4)) == (u_long)0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = (val >> 3)) == (u_long)0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != (u_long)0) + return(-1); + return(0); +} + +#ifndef NET2_STAT +/* + * asc_uqd() + * convert hex/octal character string into a u_quad_t. We do not have to + * check for overflow! (the headers in all supported formats are not large + * enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * u_quad_t value + */ + +#if __STDC__ +u_quad_t +asc_uqd(register char *str, int len, register int base) +#else +u_quad_t +asc_uqd(str, len, base) + register char *str; + int len; + register int base; +#endif +{ + register char *stop; + u_quad_t tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * uqd_asc() + * convert an u_quad_t into a hex/oct ascii string. pads with LEADING + * ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +#if __STDC__ +int +uqd_asc(u_quad_t val, register char *str, register int len, register int base) +#else +int +uqd_asc(val, str, len, base) + u_quad_t val; + register char *str; + register int len; + register int base; +#endif +{ + register char *pt; + u_quad_t digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + if ((val = (val >> 4)) == (u_quad_t)0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = (val >> 3)) == (u_quad_t)0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != (u_quad_t)0) + return(-1); + return(0); +} +#endif diff --git a/bin/pax/options.c b/bin/pax/options.c new file mode 100644 index 0000000..a7233fb --- /dev/null +++ b/bin/pax/options.c @@ -0,0 +1,1140 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)options.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/mtio.h> +#include <sys/param.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <limits.h> +#include "pax.h" +#include "options.h" +#include "cpio.h" +#include "tar.h" +#include "extern.h" + +/* + * Routines which handle command line options + */ + +static char flgch[] = FLGCH; /* list of all possible flags */ +static OPLIST *ophead = NULL; /* head for format specific options -x */ +static OPLIST *optail = NULL; /* option tail */ + +static int no_op __P((void)); +static void printflg __P((unsigned int)); +static int c_frmt __P((const void *, const void *)); +static off_t str_offt __P((char *)); +static void pax_options __P((register int, register char **)); +static void pax_usage __P((void)); +static void tar_options __P((register int, register char **)); +static void tar_usage __P((void)); +#ifdef notdef +static void cpio_options __P((register int, register char **)); +static void cpio_usage __P((void)); +#endif + +/* + * Format specific routine table - MUST BE IN SORTED ORDER BY NAME + * (see pax.h for description of each function) + * + * name, blksz, hdsz, udev, hlk, blkagn, inhead, id, st_read, + * read, end_read, st_write, write, end_write, trail, + * rd_data, wr_data, options + */ + +FSUB fsub[] = { +/* 0: OLD BINARY CPIO */ + "bcpio", 5120, sizeof(HD_BCPIO), 1, 0, 0, 1, bcpio_id, cpio_strd, + bcpio_rd, bcpio_endrd, cpio_stwr, bcpio_wr, cpio_endwr, cpio_trail, + rd_wrfile, wr_rdfile, bad_opt, + +/* 1: OLD OCTAL CHARACTER CPIO */ + "cpio", 5120, sizeof(HD_CPIO), 1, 0, 0, 1, cpio_id, cpio_strd, + cpio_rd, cpio_endrd, cpio_stwr, cpio_wr, cpio_endwr, cpio_trail, + rd_wrfile, wr_rdfile, bad_opt, + +/* 2: SVR4 HEX CPIO */ + "sv4cpio", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, vcpio_id, cpio_strd, + vcpio_rd, vcpio_endrd, cpio_stwr, vcpio_wr, cpio_endwr, cpio_trail, + rd_wrfile, wr_rdfile, bad_opt, + +/* 3: SVR4 HEX CPIO WITH CRC */ + "sv4crc", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, crc_id, crc_strd, + vcpio_rd, vcpio_endrd, crc_stwr, vcpio_wr, cpio_endwr, cpio_trail, + rd_wrfile, wr_rdfile, bad_opt, + +/* 4: OLD TAR */ + "tar", 10240, BLKMULT, 0, 1, BLKMULT, 0, tar_id, no_op, + tar_rd, tar_endrd, no_op, tar_wr, tar_endwr, tar_trail, + rd_wrfile, wr_rdfile, tar_opt, + +/* 5: POSIX USTAR */ + "ustar", 10240, BLKMULT, 0, 1, BLKMULT, 0, ustar_id, ustar_strd, + ustar_rd, tar_endrd, ustar_stwr, ustar_wr, tar_endwr, tar_trail, + rd_wrfile, wr_rdfile, bad_opt, +}; +#define F_TAR 4 /* format when called as tar */ +#define DEFLT 5 /* default write format from list above */ + +/* + * ford is the archive search order used by get_arc() to determine what kind + * of archive we are dealing with. This helps to properly id archive formats + * some formats may be subsets of others.... + */ +int ford[] = {5, 4, 3, 2, 1, 0, -1 }; + +/* + * options() + * figure out if we are pax, tar or cpio. Call the appropriate options + * parser + */ + +#if __STDC__ +void +options(register int argc, register char **argv) +#else +void +options(argc, argv) + register int argc; + register char **argv; +#endif +{ + + /* + * Are we acting like pax, tar or cpio (based on argv[0]) + */ + if ((argv0 = strrchr(argv[0], '/')) != NULL) + argv0++; + else + argv0 = argv[0]; + + if (strcmp(NM_TAR, argv0) == 0) + return(tar_options(argc, argv)); +# ifdef notdef + else if (strcmp(NM_CPIO, argv0) == 0) + return(cpio_options(argc, argv)); +# endif + /* + * assume pax as the default + */ + argv0 = NM_PAX; + return(pax_options(argc, argv)); +} + +/* + * pax_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +#if __STDC__ +static void +pax_options(register int argc, register char **argv) +#else +static void +pax_options(argc, argv) + register int argc; + register char **argv; +#endif +{ + register int c; + register int i; + unsigned int flg = 0; + unsigned int bflg = 0; + register char *pt; + FSUB tmp; + extern char *optarg; + extern int optind; + + /* + * process option flags + */ + while ((c=getopt(argc,argv,"ab:cdf:iklno:p:rs:tuvwx:B:DE:G:HLPT:U:XYZ")) + != EOF) { + switch (c) { + case 'a': + /* + * append + */ + flg |= AF; + break; + case 'b': + /* + * specify blocksize + */ + flg |= BF; + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + warn(1, "Invalid block size %s", optarg); + pax_usage(); + } + break; + case 'c': + /* + * inverse match on patterns + */ + cflag = 1; + flg |= CF; + break; + case 'd': + /* + * match only dir on extract, not the subtree at dir + */ + dflag = 1; + flg |= DF; + break; + case 'f': + /* + * filename where the archive is stored + */ + arcname = optarg; + flg |= FF; + break; + case 'i': + /* + * interactive file rename + */ + iflag = 1; + flg |= IF; + break; + case 'k': + /* + * do not clobber files that exist + */ + kflag = 1; + flg |= KF; + break; + case 'l': + /* + * try to link src to dest with copy (-rw) + */ + lflag = 1; + flg |= LF; + break; + case 'n': + /* + * select first match for a pattern only + */ + nflag = 1; + flg |= NF; + break; + case 'o': + /* + * pass format specific options + */ + flg |= OF; + if (opt_add(optarg) < 0) + pax_usage(); + break; + case 'p': + /* + * specify file characteristic options + */ + for (pt = optarg; *pt != '\0'; ++pt) { + switch(*pt) { + case 'a': + /* + * do not preserve access time + */ + patime = 0; + break; + case 'e': + /* + * preserve user id, group id, file + * mode, access/modification times + */ + pids = 1; + pmode = 1; + patime = 1; + pmtime = 1; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + /* + * preserve uid/gid + */ + pids = 1; + break; + case 'p': + /* + * preserver file mode bits + */ + pmode = 1; + break; + default: + warn(1, "Invalid -p string: %c", *pt); + pax_usage(); + break; + } + } + flg |= PF; + break; + case 'r': + /* + * read the archive + */ + flg |= RF; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= SF; + break; + case 't': + /* + * preserve access time on filesystem nodes we read + */ + tflag = 1; + flg |= TF; + break; + case 'u': + /* + * ignore those older files + */ + uflag = 1; + flg |= UF; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + flg |= VF; + break; + case 'w': + /* + * write an archive + */ + flg |= WF; + break; + case 'x': + /* + * specify an archive format on write + */ + tmp.name = optarg; + if (frmt = (FSUB *)bsearch((void *)&tmp, (void *)fsub, + sizeof(fsub)/sizeof(FSUB), sizeof(FSUB), c_frmt)) { + flg |= XF; + break; + } + warn(1, "Unknown -x format: %s", optarg); + (void)fputs("pax: Known -x formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + (void)fprintf(stderr, " %s", fsub[i].name); + (void)fputs("\n\n", stderr); + pax_usage(); + break; + case 'B': + /* + * non-standard option on number of bytes written on a + * single archive volume. + */ + if ((wrlimit = str_offt(optarg)) <= 0) { + warn(1, "Invalid write limit %s", optarg); + pax_usage(); + } + if (wrlimit % BLKMULT) { + warn(1, "Write limit is not a %d byte multiple", + BLKMULT); + pax_usage(); + } + flg |= CBF; + break; + case 'D': + /* + * On extraction check file inode change time before the + * modification of the file name. Non standard option. + */ + Dflag = 1; + flg |= CDF; + break; + case 'E': + /* + * non-standard limit on read faults + * 0 indicates stop after first error, values + * indicate a limit, "NONE" try forever + */ + flg |= CEF; + if (strcmp(NONE, optarg) == 0) + maxflt = -1; + else if ((maxflt = atoi(optarg)) < 0) { + warn(1, "Error count value must be positive"); + pax_usage(); + } + break; + case 'G': + /* + * non-standard option for selecting files within an + * archive by group (gid or name) + */ + if (grp_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CGF; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + flg |= CHF; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + flg |= CLF; + break; + case 'P': + /* + * do NOT follow symlinks (default) + */ + Lflag = 0; + flg |= CPF; + break; + case 'T': + /* + * non-standard option for selecting files within an + * archive by modification time range (lower,upper) + */ + if (trng_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CTF; + break; + case 'U': + /* + * non-standard option for selecting files within an + * archive by user (uid or name) + */ + if (usr_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CUF; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + flg |= CXF; + break; + case 'Y': + /* + * On extraction check file inode change time after the + * modification of the file name. Non standard option. + */ + Yflag = 1; + flg |= CYF; + break; + case 'Z': + /* + * On extraction check modification time after the + * modification of the file name. Non standard option. + */ + Zflag = 1; + flg |= CZF; + break; + case '?': + default: + pax_usage(); + break; + } + } + + /* + * figure out the operation mode of pax read,write,extract,copy,append + * or list. check that we have not been given a bogus set of flags + * for the operation mode. + */ + if (ISLIST(flg)) { + act = LIST; + bflg = flg & BDLIST; + } else if (ISEXTRACT(flg)) { + act = EXTRACT; + bflg = flg & BDEXTR; + } else if (ISARCHIVE(flg)) { + act = ARCHIVE; + bflg = flg & BDARCH; + } else if (ISAPPND(flg)) { + act = APPND; + bflg = flg & BDARCH; + } else if (ISCOPY(flg)) { + act = COPY; + bflg = flg & BDCOPY; + } else + pax_usage(); + if (bflg) { + printflg(flg); + pax_usage(); + } + + /* + * if we are writing (ARCHIVE) we use the default format if the user + * did not specify a format. when we write during an APPEND, we will + * adopt the format of the existing archive if none was supplied. + */ + if (!(flg & XF) && (act == ARCHIVE)) + frmt = &(fsub[DEFLT]); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + for (; optind < argc; optind++) + if (pat_add(argv[optind]) < 0) + pax_usage(); + break; + case COPY: + if (optind >= argc) { + warn(0, "Destination directory was not supplied"); + pax_usage(); + } + --argc; + dirptr = argv[argc]; + /* FALL THROUGH */ + case ARCHIVE: + case APPND: + for (; optind < argc; optind++) + if (ftree_add(argv[optind]) < 0) + pax_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } +} + + +/* + * tar_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +#if __STDC__ +static void +tar_options(register int argc, register char **argv) +#else +static void +tar_options(argc, argv) + register int argc; + register char **argv; +#endif +{ + register char *cp; + int fstdin = 0; + + if (argc < 2) + tar_usage(); + /* + * process option flags + */ + ++argv; + for (cp = *argv++; *cp != '\0'; ++cp) { + switch (*cp) { + case '-': + /* + * skip over - + */ + break; + case 'b': + /* + * specify blocksize + */ + if (*argv == (char *)NULL) { + warn(1,"blocksize must be specified with 'b'"); + tar_usage(); + } + if ((wrblksz = (int)str_offt(*argv)) <= 0) { + warn(1, "Invalid block size %s", *argv); + tar_usage(); + } + ++argv; + break; + case 'c': + /* + * create an archive + */ + act = ARCHIVE; + break; + case 'e': + /* + * stop after first error + */ + maxflt = 0; + break; + case 'f': + /* + * filename where the archive is stored + */ + if (*argv == (char *)NULL) { + warn(1, "filename must be specified with 'f'"); + tar_usage(); + } + if ((argv[0][0] == '-') && (argv[0][1]== '\0')) { + /* + * treat a - as stdin + */ + ++argv; + ++fstdin; + arcname = (char *)0; + break; + } + fstdin = 0; + arcname = *argv++; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + if (opt_add("write_opt=nodir") < 0) + tar_usage(); + break; + case 'p': + /* + * preserve user id, group id, file + * mode, access/modification times + */ + pids = 1; + pmode = 1; + patime = 1; + pmtime = 1; + break; + case 'r': + case 'u': + /* + * append to the archive + */ + act = APPND; + break; + case 't': + /* + * list contents of the tape + */ + act = LIST; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + break; + case 'w': + /* + * interactive file rename + */ + iflag = 1; + break; + case 'x': + /* + * write an archive + */ + act = EXTRACT; + break; + case 'B': + /* + * Nothing to do here, this is pax default + */ + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'P': + /* + * do not follow symlinks + */ + Lflag = 0; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + break; + case '0': + arcname = DEV_0; + break; + case '1': + arcname = DEV_1; + break; + case '4': + arcname = DEV_4; + break; + case '5': + arcname = DEV_5; + break; + case '7': + arcname = DEV_7; + break; + case '8': + arcname = DEV_8; + break; + default: + tar_usage(); + break; + } + } + + /* + * if we are writing (ARCHIVE) specify tar, otherwise run like pax + */ + if (act == ARCHIVE) + frmt = &(fsub[F_TAR]); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + default: + while (*argv != (char *)NULL) + if (pat_add(*argv++) < 0) + tar_usage(); + break; + case ARCHIVE: + case APPND: + while (*argv != (char *)NULL) + if (ftree_add(*argv++) < 0) + tar_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } + if (!fstdin && ((arcname == (char *)NULL) || (*arcname == '\0'))) { + arcname = getenv("TAPE"); + if ((arcname == (char *)NULL) || (*arcname == '\0')) + arcname = DEV_8; + } +} + +#ifdef notdef +/* + * cpio_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +#if __STDC__ +static void +cpio_options(register int argc, register char **argv) +#else +static void +cpio_options(argc, argv) + register int argc; + register char **argv; +#endif +{ +} +#endif + +/* + * printflg() + * print out those invalid flag sets found to the user + */ + +#if __STDC__ +static void +printflg(unsigned int flg) +#else +static void +printflg(flg) + unsigned int flg; +#endif +{ + int nxt; + int pos = 0; + + (void)fprintf(stderr,"%s: Invalid combination of options:", argv0); + while (nxt = ffs(flg)) { + flg = flg >> nxt; + pos += nxt; + (void)fprintf(stderr, " -%c", flgch[pos-1]); + } + (void)putc('\n', stderr); +} + +/* + * c_frmt() + * comparison routine used by bsearch to find the format specified + * by the user + */ + +#if __STDC__ +static int +c_frmt(const void *a, const void *b) +#else +static int +c_frmt(a, b) + void *a; + void *b; +#endif +{ + return(strcmp(((FSUB *)a)->name, ((FSUB *)b)->name)); +} + +/* + * opt_next() + * called by format specific options routines to get each format specific + * flag and value specified with -o + * Return: + * pointer to next OPLIST entry or NULL (end of list). + */ + +#if __STDC__ +OPLIST * +opt_next(void) +#else +OPLIST * +opt_next() +#endif +{ + OPLIST *opt; + + if ((opt = ophead) != NULL) + ophead = ophead->fow; + return(opt); +} + +/* + * bad_opt() + * generic routine used to complain about a format specific options + * when the format does not support options. + */ + +#if __STDC__ +int +bad_opt(void) +#else +int +bad_opt() +#endif +{ + register OPLIST *opt; + + if (ophead == NULL) + return(0); + /* + * print all we were given + */ + warn(1,"These format options are not supported"); + while ((opt = opt_next()) != NULL) + (void)fprintf(stderr, "\t%s = %s\n", opt->name, opt->value); + pax_usage(); + return(0); +} + +/* + * opt_add() + * breaks the value supplied to -o into a option name and value. options + * are given to -o in the form -o name-value,name=value + * mulltiple -o may be specified. + * Return: + * 0 if format in name=value format, -1 if -o is passed junk + */ + +#if __STDC__ +int +opt_add(register char *str) +#else +int +opt_add(str) + register char *str; +#endif +{ + register OPLIST *opt; + register char *frpt; + register char *pt; + register char *endpt; + + if ((str == NULL) || (*str == '\0')) { + warn(0, "Invalid option name"); + return(-1); + } + frpt = endpt = str; + + /* + * break into name and values pieces and stuff each one into a + * OPLIST structure. When we know the format, the format specific + * option function will go through this list + */ + while ((frpt != NULL) && (*frpt != '\0')) { + if ((endpt = strchr(frpt, ',')) != NULL) + *endpt = '\0'; + if ((pt = strchr(frpt, '=')) == NULL) { + warn(0, "Invalid options format"); + return(-1); + } + if ((opt = (OPLIST *)malloc(sizeof(OPLIST))) == NULL) { + warn(0, "Unable to allocate space for option list"); + return(-1); + } + *pt++ = '\0'; + opt->name = frpt; + opt->value = pt; + opt->fow = NULL; + if (endpt != NULL) + frpt = endpt + 1; + else + frpt = NULL; + if (ophead == NULL) { + optail = ophead = opt; + continue; + } + optail->fow = opt; + optail = opt; + } + return(0); +} + +/* + * str_offt() + * Convert an expression of the following forms to an off_t > 0. + * 1) A positive decimal number. + * 2) A positive decimal number followed by a b (mult by 512). + * 3) A positive decimal number followed by a k (mult by 1024). + * 4) A positive decimal number followed by a m (mult by 512). + * 5) A positive decimal number followed by a w (mult by sizeof int) + * 6) Two or more positive decimal numbers (with/without k,b or w). + * seperated by x (also * for backwards compatibility), specifying + * the product of the indicated values. + * Return: + * 0 for an error, a positive value o.w. + */ + +#if __STDC__ +static off_t +str_offt(char *val) +#else +static off_t +str_offt(val) + char *val; +#endif +{ + char *expr; + off_t num, t; + +# ifdef NET2_STAT + num = strtol(val, &expr, 0); + if ((num == LONG_MAX) || (num <= 0) || (expr == val)) +# else + num = strtoq(val, &expr, 0); + if ((num == QUAD_MAX) || (num <= 0) || (expr == val)) +# endif + return(0); + + switch(*expr) { + case 'b': + t = num; + num *= 512; + if (t > num) + return(0); + ++expr; + break; + case 'k': + t = num; + num *= 1024; + if (t > num) + return(0); + ++expr; + break; + case 'm': + t = num; + num *= 1048576; + if (t > num) + return(0); + ++expr; + break; + case 'w': + t = num; + num *= sizeof(int); + if (t > num) + return(0); + ++expr; + break; + } + + switch(*expr) { + case '\0': + break; + case '*': + case 'x': + t = num; + num *= str_offt(expr + 1); + if (t > num) + return(0); + break; + default: + return(0); + } + return(num); +} + +/* + * no_op() + * for those option functions where the archive format has nothing to do. + * Return: + * 0 + */ + +#if __STDC__ +static int +no_op(void) +#else +static int +no_op() +#endif +{ + return(0); +} + +/* + * pax_usage() + * print the usage summary to the user + */ + +#if __STDC__ +void +pax_usage(void) +#else +void +pax_usage() +#endif +{ + (void)fputs("usage: pax [-cdnv] [-E limit] [-f archive] ", stderr); + (void)fputs("[-s replstr] ... [-U user] ...", stderr); + (void)fputs("\n [-G group] ... ", stderr); + (void)fputs("[-T [from_date][,to_date]] ... ", stderr); + (void)fputs("[pattern ...]\n", stderr); + (void)fputs(" pax -r [-cdiknuvDYZ] [-E limit] ", stderr); + (void)fputs("[-f archive] [-o options] ... \n", stderr); + (void)fputs(" [-p string] ... [-s replstr] ... ", stderr); + (void)fputs("[-U user] ... [-G group] ...\n ", stderr); + (void)fputs("[-T [from_date][,to_date]] ... ", stderr); + (void)fputs(" [pattern ...]\n", stderr); + (void)fputs(" pax -w [-dituvHLPX] [-b blocksize] ", stderr); + (void)fputs("[ [-a] [-f archive] ] [-x format] \n", stderr); + (void)fputs(" [-B bytes] [-s replstr] ... ", stderr); + (void)fputs("[-o options] ... [-U user] ...", stderr); + (void)fputs("\n [-G group] ... ", stderr); + (void)fputs("[-T [from_date][,to_date][/[c][m]]] ... ", stderr); + (void)fputs("[file ...]\n", stderr); + (void)fputs(" pax -r -w [-diklntuvDHLPXYZ] ", stderr); + (void)fputs("[-p string] ... [-s replstr] ...", stderr); + (void)fputs("\n [-U user] ... [-G group] ... ", stderr); + (void)fputs("[-T [from_date][,to_date][/[c][m]]] ... ", stderr); + (void)fputs("\n [file ...] directory\n", stderr); + exit(1); +} + +/* + * tar_usage() + * print the usage summary to the user + */ + +#if __STDC__ +void +tar_usage(void) +#else +void +tar_usage() +#endif +{ + (void)fputs("usage: tar -{txru}[cevfbmopwBHLPX014578] [tapefile] ", + stderr); + (void)fputs("[blocksize] file1 file2...\n", stderr); + exit(1); +} + +#ifdef notdef +/* + * cpio_usage() + * print the usage summary to the user + */ + +#if __STDC__ +void +cpio_usage(void) +#else +void +cpio_usage() +#endif +{ + exit(1); +} +#endif diff --git a/bin/pax/options.h b/bin/pax/options.h new file mode 100644 index 0000000..5dc855b --- /dev/null +++ b/bin/pax/options.h @@ -0,0 +1,113 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)options.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * argv[0] names. Used for tar and cpio emulation + */ + +#define NM_TAR "tar" +#define NM_CPIO "cpio" +#define NM_PAX "pax" + +/* + * Constants used to specify the legal sets of flags in pax. For each major + * operation mode of pax, a set of illegal flags is defined. If any one of + * those illegal flags are found set, we scream and exit + */ +#define NONE "none" + +/* + * flags (one for each option). + */ +#define AF 0x00000001 +#define BF 0x00000002 +#define CF 0x00000004 +#define DF 0x00000008 +#define FF 0x00000010 +#define IF 0x00000020 +#define KF 0x00000040 +#define LF 0x00000080 +#define NF 0x00000100 +#define OF 0x00000200 +#define PF 0x00000400 +#define RF 0x00000800 +#define SF 0x00001000 +#define TF 0x00002000 +#define UF 0x00004000 +#define VF 0x00008000 +#define WF 0x00010000 +#define XF 0x00020000 +#define CBF 0x00040000 /* nonstandard extension */ +#define CDF 0x00080000 /* nonstandard extension */ +#define CEF 0x00100000 /* nonstandard extension */ +#define CGF 0x00200000 /* nonstandard extension */ +#define CHF 0x00400000 /* nonstandard extension */ +#define CLF 0x00800000 /* nonstandard extension */ +#define CPF 0x01000000 /* nonstandard extension */ +#define CTF 0x02000000 /* nonstandard extension */ +#define CUF 0x04000000 /* nonstandard extension */ +#define CXF 0x08000000 +#define CYF 0x10000000 /* nonstandard extension */ +#define CZF 0x20000000 /* nonstandard extension */ + +/* + * ascii string indexed by bit position above (alter the above and you must + * alter this string) used to tell the user what flags caused us to complain + */ +#define FLGCH "abcdfiklnoprstuvwxBDEGHLPTUXYZ" + +/* + * legal pax operation bit patterns + */ + +#define ISLIST(x) (((x) & (RF|WF)) == 0) +#define ISEXTRACT(x) (((x) & (RF|WF)) == RF) +#define ISARCHIVE(x) (((x) & (AF|RF|WF)) == WF) +#define ISAPPND(x) (((x) & (AF|RF|WF)) == (AF|WF)) +#define ISCOPY(x) (((x) & (RF|WF)) == (RF|WF)) +#define ISWRITE(x) (((x) & (RF|WF)) == WF) + +/* + * Illegal option flag subsets based on pax operation + */ + +#define BDEXTR (AF|BF|LF|TF|WF|XF|CBF|CHF|CLF|CPF|CXF) +#define BDARCH (CF|KF|LF|NF|PF|RF|CDF|CEF|CYF|CZF) +#define BDCOPY (AF|BF|FF|OF|XF|CBF|CEF) +#define BDLIST (AF|BF|IF|KF|LF|OF|PF|RF|TF|UF|WF|XF|CBF|CDF|CHF|CLF|CPF|CXF|CYF|CZF) diff --git a/bin/pax/pat_rep.c b/bin/pax/pat_rep.c new file mode 100644 index 0000000..2da5827 --- /dev/null +++ b/bin/pax/pat_rep.c @@ -0,0 +1,1196 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#ifdef NET2_REGEX +#include <regexp.h> +#else +#include <regex.h> +#endif +#include "pax.h" +#include "pat_rep.h" +#include "extern.h" + +/* + * routines to handle pattern matching, name modification (regular expression + * substitution and interactive renames), and destination name modification for + * copy (-rw). Both file name and link names are adjusted as required in these + * routines. + */ + +#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ +static PATTERN *pathead = NULL; /* file pattern match list head */ +static PATTERN *pattail = NULL; /* file pattern match list tail */ +static REPLACE *rephead = NULL; /* replacement string list head */ +static REPLACE *reptail = NULL; /* replacement string list tail */ + +static int rep_name __P((char *, int *, int)); +static int tty_rename __P((register ARCHD *)); +static int fix_path __P((char *, int *, char *, int)); +static int fn_match __P((register char *, register char *, char **)); +static char * range_match __P((register char *, register int)); +#ifdef NET2_REGEX +static int resub __P((regexp *, char *, char *, register char *)); +#else +static int resub __P((regex_t *, regmatch_t *, char *, char *, char *)); +#endif + +/* + * rep_add() + * parses the -s replacement string; compiles the regular expression + * and stores the compiled value and it's replacement string together in + * replacement string list. Input to this function is of the form: + * /old/new/pg + * The first char in the string specifies the delimiter used by this + * replacement string. "Old" is a regular expression in "ed" format which + * is compiled by regcomp() and is applied to filenames. "new" is the + * substitution string; p and g are options flags for printing and global + * replacement (over the single filename) + * Return: + * 0 if a proper replacement string and regular expression was added to + * the list of replacement patterns; -1 otherwise. + */ + +#if __STDC__ +int +rep_add(register char *str) +#else +int +rep_add(str) + register char *str; +#endif +{ + register char *pt1; + register char *pt2; + register REPLACE *rep; +# ifndef NET2_REGEX + register int res; + char rebuf[BUFSIZ]; +# endif + + /* + * throw out the bad parameters + */ + if ((str == NULL) || (*str == '\0')) { + warn(1, "Empty replacement string"); + return(-1); + } + + /* + * first character in the string specifies what the delimiter is for + * this expression + */ + if ((pt1 = strchr(str+1, *str)) == NULL) { + warn(1, "Invalid replacement string %s", str); + return(-1); + } + + /* + * allocate space for the node that handles this replacement pattern + * and split out the regular expression and try to compile it + */ + if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { + warn(1, "Unable to allocate memory for replacement string"); + return(-1); + } + + *pt1 = '\0'; +# ifdef NET2_REGEX + if ((rep->rcmp = regcomp(str+1)) == NULL) { +# else + if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { + regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); + warn(1, "%s while compiling regular expression %s", rebuf, str); +# endif + (void)free((char *)rep); + return(-1); + } + + /* + * put the delimiter back in case we need an error message and + * locate the delimiter at the end of the replacement string + * we then point the node at the new substitution string + */ + *pt1++ = *str; + if ((pt2 = strchr(pt1, *str)) == NULL) { +# ifdef NET2_REGEX + (void)free((char *)rep->rcmp); +# else + regfree(&(rep->rcmp)); +# endif + (void)free((char *)rep); + warn(1, "Invalid replacement string %s", str); + return(-1); + } + + *pt2 = '\0'; + rep->nstr = pt1; + pt1 = pt2++; + rep->flgs = 0; + + /* + * set the options if any + */ + while (*pt2 != '\0') { + switch(*pt2) { + case 'g': + case 'G': + rep->flgs |= GLOB; + break; + case 'p': + case 'P': + rep->flgs |= PRNT; + break; + default: +# ifdef NET2_REGEX + (void)free((char *)rep->rcmp); +# else + regfree(&(rep->rcmp)); +# endif + (void)free((char *)rep); + *pt1 = *str; + warn(1, "Invalid replacement string option %s", str); + return(-1); + } + ++pt2; + } + + /* + * all done, link it in at the end + */ + rep->fow = NULL; + if (rephead == NULL) { + reptail = rephead = rep; + return(0); + } + reptail->fow = rep; + reptail = rep; + return(0); +} + +/* + * pat_add() + * add a pattern match to the pattern match list. Pattern matches are used + * to select which archive members are extracted. (They appear as + * arguments to pax in the list and read modes). If no patterns are + * supplied to pax, all members in the archive will be selected (and the + * pattern match list is empty). + * Return: + * 0 if the pattern was added to the list, -1 otherwise + */ + +#if __STDC__ +int +pat_add(char *str) +#else +int +pat_add(str) + char *str; +#endif +{ + register PATTERN *pt; + + /* + * throw out the junk + */ + if ((str == NULL) || (*str == '\0')) { + warn(1, "Empty pattern string"); + return(-1); + } + + /* + * allocate space for the pattern and store the pattern. the pattern is + * part of argv so do not bother to copy it, just point at it. Add the + * node to the end of the pattern list + */ + if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { + warn(1, "Unable to allocate memory for pattern string"); + return(-1); + } + + pt->pstr = str; + pt->pend = NULL; + pt->plen = strlen(str); + pt->fow = NULL; + pt->flgs = 0; + if (pathead == NULL) { + pattail = pathead = pt; + return(0); + } + pattail->fow = pt; + pattail = pt; + return(0); +} + +/* + * pat_chk() + * complain if any the user supplied pattern did not result in a match to + * a selected archive member. + */ + +#if __STDC__ +void +pat_chk(void) +#else +void +pat_chk() +#endif +{ + register PATTERN *pt; + register int wban = 0; + + /* + * walk down the list checking the flags to make sure MTCH was set, + * if not complain + */ + for (pt = pathead; pt != NULL; pt = pt->fow) { + if (pt->flgs & MTCH) + continue; + if (!wban) { + warn(1, "WARNING! These patterns were not matched:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", pt->pstr); + } +} + +/* + * pat_sel() + * the archive member which matches a pattern was selected. Mark the + * pattern as having selected an archive member. arcn->pat points at the + * pattern that was matched. arcn->pat is set in pat_match() + * + * NOTE: When the -c option is used, we are called when there was no match + * by pat_match() (that means we did match before the inverted sense of + * the logic). Now this seems really strange at first, but with -c we + * need to keep track of those patterns that cause a archive member to NOT + * be selected (it found an archive member with a specified pattern) + * Return: + * 0 if the pattern pointed at by arcn->pat was tagged as creating a + * match, -1 otherwise. + */ + +#if __STDC__ +int +pat_sel(register ARCHD *arcn) +#else +int +pat_sel(arcn) + register ARCHD *arcn; +#endif +{ + register PATTERN *pt; + register PATTERN **ppt; + register int len; + + /* + * if no patterns just return + */ + if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) + return(0); + + /* + * when we are NOT limited to a single match per pattern mark the + * pattern and return + */ + if (!nflag) { + pt->flgs |= MTCH; + return(0); + } + + /* + * we reach this point only when we allow a single selected match per + * pattern, if the pattern matches a directory and we do not have -d + * (dflag) we are done with this pattern. We may also be handed a file + * in the subtree of a directory. in that case when we are operating + * with -d, this pattern was already selected and we are done + */ + if (pt->flgs & DIR_MTCH) + return(0); + + if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { + /* + * ok we matched a directory and we are allowing + * subtree matches but because of the -n only its children will + * match. This is tagged as a DIR_MTCH type. + * WATCH IT, the code assumes that pt->pend points + * into arcn->name and arcn->name has not been modified. + * If not we will have a big mess. Yup this is another kludge + */ + + /* + * if this was a prefix match, remove trailing part of path + * so we can copy it. Future matches will be exact prefix match + */ + if (pt->pend != NULL) + *pt->pend = '\0'; + + if ((pt->pstr = strdup(arcn->name)) == NULL) { + warn(1, "Pattern select out of memory"); + if (pt->pend != NULL) + *pt->pend = '/'; + pt->pend = NULL; + return(-1); + } + + /* + * put the trailing / back in the source string + */ + if (pt->pend != NULL) { + *pt->pend = '/'; + pt->pend = NULL; + } + pt->plen = strlen(pt->pstr); + + /* + * strip off any trailing /, this should really never happen + */ + len = pt->plen - 1; + if (*(pt->pstr + len) == '/') { + *(pt->pstr + len) = '\0'; + pt->plen = len; + } + pt->flgs = DIR_MTCH | MTCH; + arcn->pat = pt; + return(0); + } + + /* + * we are then done with this pattern, so we delete it from the list + * because it can never be used for another match. + * Seems kind of strange to do for a -c, but the pax spec is really + * vague on the interaction of -c -n and -d. We assume that when -c + * and the pattern rejects a member (i.e. it matched it) it is done. + * In effect we place the order of the flags as having -c last. + */ + pt = pathead; + ppt = &pathead; + while ((pt != NULL) && (pt != arcn->pat)) { + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt == NULL) { + /* + * should never happen.... + */ + warn(1, "Pattern list inconsistant"); + return(-1); + } + *ppt = pt->fow; + (void)free((char *)pt); + arcn->pat = NULL; + return(0); +} + +/* + * pat_match() + * see if this archive member matches any supplied pattern, if a match + * is found, arcn->pat is set to point at the potential pattern. Later if + * this archive member is "selected" we process and mark the pattern as + * one which matched a selected archive member (see pat_sel()) + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + */ + +#if __STDC__ +int +pat_match(register ARCHD *arcn) +#else +int +pat_match(arcn) + register ARCHD *arcn; +#endif +{ + register PATTERN *pt; + + arcn->pat = NULL; + + /* + * if there are no more patterns and we have -n (and not -c) we are + * done. otherwise with no patterns to match, matches all + */ + if (pathead == NULL) { + if (nflag && !cflag) + return(-1); + return(0); + } + + /* + * have to search down the list one at a time looking for a match. + */ + pt = pathead; + while (pt != NULL) { + /* + * check for a file name match unless we have DIR_MTCH set in + * this pattern then we want a prefix match + */ + if (pt->flgs & DIR_MTCH) { + /* + * this pattern was matched before to a directory + * as we must have -n set for this (but not -d). We can + * only match CHILDREN of that directory so we must use + * an exact prefix match (no wildcards). + */ + if ((arcn->name[pt->plen] == '/') && + (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) + break; + } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) + break; + pt = pt->fow; + } + + /* + * return the result, remember that cflag (-c) inverts the sense of a + * match + */ + if (pt == NULL) + return(cflag ? 0 : 1); + + /* + * we had a match, now when we invert the sense (-c) we reject this + * member. However we have to tag the pattern a being successful, (in a + * match, not in selecting a archive member) so we call pat_sel() here. + */ + arcn->pat = pt; + if (!cflag) + return(0); + + if (pat_sel(arcn) < 0) + return(-1); + arcn->pat = NULL; + return(1); +} + +/* + * fn_match() + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + * Note: *pend may be changed to show where the prefix ends. + */ + +#if __STDC__ +static int +fn_match(register char *pattern, register char *string, char **pend) +#else +static int +fn_match(pattern, string, pend) + register char *pattern; + register char *string; + char **pend; +#endif +{ + register char c; + char test; + + *pend = NULL; + for (;;) { + switch (c = *pattern++) { + case '\0': + /* + * Ok we found an exact match + */ + if (*string == '\0') + return(0); + + /* + * Check if it is a prefix match + */ + if ((dflag == 1) || (*string != '/')) + return(-1); + + /* + * It is a prefix match, remember where the trailing + * / is located + */ + *pend = string; + return(0); + case '?': + if ((test = *string++) == '\0') + return (-1); + break; + case '*': + c = *pattern; + /* + * Collapse multiple *'s. + */ + while (c == '*') + c = *++pattern; + + /* + * Optimized hack for pattern with a * at the end + */ + if (c == '\0') + return (0); + + /* + * General case, use recursion. + */ + while ((test = *string) != '\0') { + if (!fn_match(pattern, string, pend)) + return (0); + ++string; + } + return (-1); + case '[': + /* + * range match + */ + if (((test = *string++) == '\0') || + ((pattern = range_match(pattern, test)) == NULL)) + return (-1); + break; + case '\\': + default: + if (c != *string++) + return (-1); + break; + } + } + /* NOTREACHED */ +} + +#ifdef __STDC__ +static char * +range_match(register char *pattern, register int test) +#else +static char * +range_match(pattern, test) + register char *pattern; + register int test; +#endif +{ + register char c; + register char c2; + int negate; + int ok = 0; + + if (negate = (*pattern == '!')) + ++pattern; + + while ((c = *pattern++) != ']') { + /* + * Illegal pattern + */ + if (c == '\0') + return (NULL); + + if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && + (c2 != ']')) { + if ((c <= test) && (test <= c2)) + ok = 1; + pattern += 2; + } else if (c == test) + ok = 1; + } + return (ok == negate ? NULL : pattern); +} + +/* + * mod_name() + * modify a selected file name. first attempt to apply replacement string + * expressions, then apply interactive file rename. We apply replacement + * string expressions to both filenames and file links (if we didn't the + * links would point to the wrong place, and we could never be able to + * move an archive that has a file link in it). When we rename files + * interactively, we store that mapping (old name to user input name) so + * if we spot any file links to the old file name in the future, we will + * know exactly how to fix the file link. + * Return: + * 0 continue to process file, 1 skip this file, -1 pax is finished + */ + +#if __STDC__ +int +mod_name(register ARCHD *arcn) +#else +int +mod_name(arcn) + register ARCHD *arcn; +#endif +{ + register int res = 0; + + /* + * IMPORTANT: We have a problem. what do we do with symlinks? + * Modifying a hard link name makes sense, as we know the file it + * points at should have been seen already in the archive (and if it + * wasn't seen because of a read error or a bad archive, we lose + * anyway). But there are no such requirements for symlinks. On one + * hand the symlink that refers to a file in the archive will have to + * be modified to so it will still work at its new location in the + * file system. On the other hand a symlink that points elsewhere (and + * should continue to do so) should not be modified. There is clearly + * no perfect solution here. So we handle them like hardlinks. Clearly + * a replacement made by the interactive rename mapping is very likely + * to be correct since it applies to a single file and is an exact + * match. The regular expression replacements are a little harder to + * justify though. We claim that the symlink name is only likely + * to be replaced when it points within the file tree being moved and + * in that case it should be modified. what we really need to do is to + * call an oracle here. :) + */ + if (rephead != NULL) { + /* + * we have replacement strings, modify the name and the link + * name if any. + */ + if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0) + return(res); + + if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) && + ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0)) + return(res); + } + + if (iflag) { + /* + * perform interactive file rename, then map the link if any + */ + if ((res = tty_rename(arcn)) != 0) + return(res); + if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) + sub_name(arcn->ln_name, &(arcn->ln_nlen)); + } + return(res); +} + +/* + * tty_rename() + * Prompt the user for a replacement file name. A "." keeps the old name, + * a empty line skips the file, and an EOF on reading the tty, will cause + * pax to stop processing and exit. Otherwise the file name input, replaces + * the old one. + * Return: + * 0 process this file, 1 skip this file, -1 we need to exit pax + */ + +#if __STDC__ +static int +tty_rename(register ARCHD *arcn) +#else +static int +tty_rename(arcn) + register ARCHD *arcn; +#endif +{ + char tmpname[PAXPATHLEN+2]; + int res; + + /* + * prompt user for the replacement name for a file, keep trying until + * we get some reasonable input. Archives may have more than one file + * on them with the same name (from updates etc). We print verbose info + * on the file so the user knows what is up. + */ + tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); + + for (;;) { + ls_tty(arcn); + tty_prnt("Input new name, or a \".\" to keep the old name, "); + tty_prnt("or a \"return\" to skip this file.\n"); + tty_prnt("Input > "); + if (tty_read(tmpname, sizeof(tmpname)) < 0) + return(-1); + if (strcmp(tmpname, "..") == 0) { + tty_prnt("Try again, illegal file name: ..\n"); + continue; + } + if (strlen(tmpname) > PAXPATHLEN) { + tty_prnt("Try again, file name too long\n"); + continue; + } + break; + } + + /* + * empty file name, skips this file. a "." leaves it alone + */ + if (tmpname[0] == '\0') { + tty_prnt("Skipping file.\n"); + return(1); + } + if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { + tty_prnt("Processing continues, name unchanged.\n"); + return(0); + } + + /* + * ok the name changed. We may run into links that point at this + * file later. we have to remember where the user sent the file + * in order to repair any links. + */ + tty_prnt("Processing continues, name changed to: %s\n", tmpname); + res = add_name(arcn->name, arcn->nlen, tmpname); + arcn->nlen = l_strncpy(arcn->name, tmpname, PAXPATHLEN+1); + if (res < 0) + return(-1); + return(0); +} + +/* + * set_dest() + * fix up the file name and the link name (if any) so this file will land + * in the destination directory (used during copy() -rw). + * Return: + * 0 if ok, -1 if failure (name too long) + */ + +#if __STDC__ +int +set_dest(register ARCHD *arcn, char *dest_dir, int dir_len) +#else +int +set_dest(arcn, dest_dir, dir_len) + register ARCHD *arcn; + char *dest_dir; + int dir_len; +#endif +{ + if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) + return(-1); + + /* + * It is really hard to deal with symlinks here, we cannot be sure + * if the name they point was moved (or will be moved). It is best to + * leave them alone. + */ + if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) + return(0); + + if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) + return(-1); + return(0); +} + +/* + * fix_path + * concatenate dir_name and or_name and store the result in or_name (if + * it fits). This is one ugly function. + * Return: + * 0 if ok, -1 if the final name is too long + */ + +#if __STDC__ +static int +fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) +#else +static int +fix_path(or_name, or_len, dir_name, dir_len) + char *or_name; + int *or_len; + char *dir_name; + int dir_len; +#endif +{ + register char *src; + register char *dest; + register char *start; + int len; + + /* + * we shift the or_name to the right enough to tack in the dir_name + * at the front. We make sure we have enough space for it all before + * we start. since dest always ends in a slash, we skip of or_name + * if it also starts with one. + */ + start = or_name; + src = start + *or_len; + dest = src + dir_len; + if (*start == '/') { + ++start; + --dest; + } + if ((len = dest - or_name) > PAXPATHLEN) { + warn(1, "File name %s/%s, too long", dir_name, start); + return(-1); + } + *or_len = len; + + /* + * enough space, shift + */ + while (src >= start) + *dest-- = *src--; + src = dir_name + dir_len - 1; + + /* + * splice in the destination directory name + */ + while (src >= dir_name) + *dest-- = *src--; + + *(or_name + len) = '\0'; + return(0); +} + +/* + * rep_name() + * walk down the list of replacement strings applying each one in order. + * when we find one with a successful substitution, we modify the name + * as specified. if required, we print the results. if the resulting name + * is empty, we will skip this archive member. We use the regexp(3) + * routines (regexp() ought to win a prize as having the most cryptic + * library function manual page). + * --Parameters-- + * name is the file name we are going to apply the regular expressions to + * (and may be modified) + * nlen is the length of this name (and is modified to hold the length of + * the final string). + * prnt is a flag that says whether to print the final result. + * Return: + * 0 if substitution was successful, 1 if we are to skip the file (the name + * ended up empty) + */ + +#if __STDC__ +static int +rep_name(char *name, int *nlen, int prnt) +#else +static int +rep_name(name, nlen, prnt) + char *name; + int *nlen; + int prnt; +#endif +{ + register REPLACE *pt; + register char *inpt; + register char *outpt; + register char *endpt; + register char *rpt; + register int found = 0; + register int res; +# ifndef NET2_REGEX + regmatch_t pm[MAXSUBEXP]; +# endif + char nname[PAXPATHLEN+1]; /* final result of all replacements */ + char buf1[PAXPATHLEN+1]; /* where we work on the name */ + + /* + * copy the name into buf1, where we will work on it. We need to keep + * the orig string around so we can print out the result of the final + * replacement. We build up the final result in nname. inpt points at + * the string we apply the regular expression to. prnt is used to + * suppress printing when we handle replacements on the link field + * (the user already saw that substitution go by) + */ + pt = rephead; + (void)strcpy(buf1, name); + inpt = buf1; + outpt = nname; + endpt = outpt + PAXPATHLEN; + + /* + * try each replacement string in order + */ + while (pt != NULL) { + do { + /* + * check for a successful substitution, if not go to + * the next pattern, or cleanup if we were global + */ +# ifdef NET2_REGEX + if (regexec(pt->rcmp, inpt) == 0) +# else + if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) +# endif + break; + + /* + * ok we found one. We have three parts, the prefix + * which did not match, the section that did and the + * tail (that also did not match). Copy the prefix to + * the final output buffer (watching to make sure we + * do not create a string too long). + */ + found = 1; +# ifdef NET2_REGEX + rpt = pt->rcmp->startp[0]; +# else + rpt = inpt + pm[0].rm_so; +# endif + + while ((inpt < rpt) && (outpt < endpt)) + *outpt++ = *inpt++; + if (outpt == endpt) + break; + + /* + * for the second part (which matched the regular + * expression) apply the substitution using the + * replacement string and place it the prefix in the + * final output. If we have problems, skip it. + */ +# ifdef NET2_REGEX + if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) { +# else + if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt)) + < 0) { +# endif + if (prnt) + warn(1, "Replacement name error %s", + name); + return(1); + } + outpt += res; + + /* + * we set up to look again starting at the first + * character in the tail (of the input string right + * after the last character matched by the regular + * expression (inpt always points at the first char in + * the string to process). If we are not doing a global + * substitution, we will use inpt to copy the tail to + * the final result. Make sure we do not overrun the + * output buffer + */ +# ifdef NET2_REGEX + inpt = pt->rcmp->endp[0]; +# else + inpt += pm[0].rm_eo; +# endif + + if ((outpt == endpt) || (*inpt == '\0')) + break; + + /* + * if the user wants global we keep trying to + * substitute until it fails, then we are done. + */ + } while (pt->flgs & GLOB); + + if (found) + break; + + /* + * a successful substitution did NOT occur, try the next one + */ + pt = pt->fow; + } + + if (found) { + /* + * we had a substitution, copy the last tail piece (if there is + * room) to the final result + */ + while ((outpt < endpt) && (*inpt != '\0')) + *outpt++ = *inpt++; + + *outpt = '\0'; + if ((outpt == endpt) && (*inpt != '\0')) { + if (prnt) + warn(1,"Replacement name too long %s >> %s", + name, nname); + return(1); + } + + /* + * inform the user of the result if wanted + */ + if (prnt && (pt->flgs & PRNT)) { + if (*nname == '\0') + (void)fprintf(stderr,"%s >> <empty string>\n", + name); + else + (void)fprintf(stderr,"%s >> %s\n", name, nname); + } + + /* + * if empty inform the caller this file is to be skipped + * otherwise copy the new name over the orig name and return + */ + if (*nname == '\0') + return(1); + *nlen = l_strncpy(name, nname, PAXPATHLEN + 1); + } + return(0); +} + +#ifdef NET2_REGEX +/* + * resub() + * apply the replacement to the matched expression. expand out the old + * style ed(1) subexpression expansion. + * Return: + * -1 if error, or the number of characters added to the destination. + */ + +#if __STDC__ +static int +resub(regexp *prog, char *src, char *dest, register char *destend) +#else +static int +resub(prog, src, dest, destend) + regexp *prog; + char *src; + char *dest; + register char *destend; +#endif +{ + register char *spt; + register char *dpt; + register char c; + register int no; + register int len; + + spt = src; + dpt = dest; + while ((dpt < destend) && ((c = *spt++) != '\0')) { + if (c == '&') + no = 0; + else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) + no = *spt++ - '0'; + else { + if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) + c = *spt++; + *dpt++ = c; + continue; + } + if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) || + ((len = prog->endp[no] - prog->startp[no]) <= 0)) + continue; + + /* + * copy the subexpression to the destination. + * fail if we run out of space or the match string is damaged + */ + if (len > (destend - dpt)) + len = destend - dpt; + if (l_strncpy(dpt, prog->startp[no], len) != len) + return(-1); + dpt += len; + } + return(dpt - dest); +} + +#else + +/* + * resub() + * apply the replacement to the matched expression. expand out the old + * style ed(1) subexpression expansion. + * Return: + * -1 if error, or the number of characters added to the destination. + */ + +#if __STDC__ +static int +resub(regex_t *rp, register regmatch_t *pm, char *src, char *dest, + register char *destend) +#else +static int +resub(rp, pm, src, dest, destend) + regex_t *rp; + register regmatch_t *pm; + char *src; + char *dest; + register char *destend; +#endif +{ + register char *spt; + register char *dpt; + register char c; + register regmatch_t *pmpt; + register int len; + int subexcnt; + + spt = src; + dpt = dest; + subexcnt = rp->re_nsub; + while ((dpt < destend) && ((c = *spt++) != '\0')) { + /* + * see if we just have an ordinary replacement character + * or we refer to a subexpression. + */ + if (c == '&') { + pmpt = pm; + } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { + /* + * make sure there is a subexpression as specified + */ + if ((len = *spt++ - '0') > subexcnt) + return(-1); + pmpt = pm + len; + } else { + /* + * Ordinary character, just copy it + */ + if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) + c = *spt++; + *dpt++ = c; + continue; + } + + /* + * continue if the subexpression is bogus + */ + if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || + ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) + continue; + + /* + * copy the subexpression to the destination. + * fail if we run out of space or the match string is damaged + */ + if (len > (destend - dpt)) + len = destend - dpt; + if (l_strncpy(dpt, src + pmpt->rm_so, len) != len) + return(-1); + dpt += len; + } + return(dpt - dest); +} +#endif diff --git a/bin/pax/pat_rep.h b/bin/pax/pat_rep.h new file mode 100644 index 0000000..8d22cf0 --- /dev/null +++ b/bin/pax/pat_rep.h @@ -0,0 +1,54 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pat_rep.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * data structure for storing user supplied replacement strings (-s) + */ +typedef struct replace { + char *nstr; /* the new string we will substitute with */ +# ifdef NET2_REGEX + regexp *rcmp; /* compiled regular expression used to match */ +# else + regex_t rcmp; /* compiled regular expression used to match */ +# endif + int flgs; /* print conversions? global in operation? */ +#define PRNT 0x1 +#define GLOB 0x2 + struct replace *fow; /* pointer to next pattern */ +} REPLACE; diff --git a/bin/pax/pax.1 b/bin/pax/pax.1 new file mode 100644 index 0000000..a1d7565 --- /dev/null +++ b/bin/pax/pax.1 @@ -0,0 +1,1169 @@ +.\" Copyright (c) 1992 Keith Muller. +.\" Copyright (c) 1992, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Keith Muller of the University of California, San Diego. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)pax.1 8.4 (Berkeley) 4/18/94 +.\" +.Dd April 18, 1994 +.Dt PAX 1 +.Os BSD 4.4 +.Sh NAME +.Nm pax +.Nd read and write file archives and copy directory hierarchies +.Sh SYNOPSIS +.Nm pax +.Op Fl cdnv +.Bk -words +.Op Fl f Ar archive +.Ek +.Bk -words +.Op Fl s Ar replstr +.Ar ... +.Ek +.Bk -words +.Op Fl U Ar user +.Ar ... +.Ek +.Bk -words +.Op Fl G Ar group +.Ar ... +.Ek +.Bk -words +.Oo +.Fl T +.Op Ar from_date +.Op Ar ,to_date +.Oc +.Ar ... +.Ek +.Op Ar pattern ... +.Nm pax +.Fl r +.Op Fl cdiknuvDYZ +.Bk -words +.Op Fl f Ar archive +.Ek +.Bk -words +.Op Fl o Ar options +.Ar ... +.Ek +.Bk -words +.Op Fl p Ar string +.Ar ... +.Ek +.Bk -words +.Op Fl s Ar replstr +.Ar ... +.Ek +.Op Fl E Ar limit +.Bk -words +.Op Fl U Ar user +.Ar ... +.Ek +.Bk -words +.Op Fl G Ar group +.Ar ... +.Ek +.Bk -words +.Oo +.Fl T +.Op Ar from_date +.Op Ar ,to_date +.Oc +.Ar ... +.Ek +.Op Ar pattern ... +.Nm pax +.Fl w +.Op Fl dituvHLPX +.Bk -words +.Op Fl b Ar blocksize +.Ek +.Oo +.Op Fl a +.Op Fl f Ar archive +.Oc +.Bk -words +.Op Fl x Ar format +.Ek +.Bk -words +.Op Fl s Ar replstr +.Ar ... +.Ek +.Bk -words +.Op Fl o Ar options +.Ar ... +.Ek +.Bk -words +.Op Fl U Ar user +.Ar ... +.Ek +.Bk -words +.Op Fl G Ar group +.Ar ... +.Ek +.Bk -words +.Op Fl B Ar bytes +.Ek +.Bk -words +.Oo +.Fl T +.Op Ar from_date +.Op Ar ,to_date +.Op Ar /[c][m] +.Oc +.Ar ... +.Ek +.Op Ar file ... +.Nm pax +.Fl r +.Fl w +.Op Fl diklntuvDHLPXYZ +.Bk -words +.Op Fl p Ar string +.Ar ... +.Ek +.Bk -words +.Op Fl s Ar replstr +.Ar ... +.Ek +.Bk -words +.Op Fl U Ar user +.Ar ... +.Ek +.Bk -words +.Op Fl G Ar group +.Ar ... +.Ek +.Bk -words +.Oo +.Fl T +.Op Ar from_date +.Op Ar ,to_date +.Op Ar /[c][m] +.Oc +.Ar ... +.Ek +.Op Ar file ... +.Ar directory +.Sh DESCRIPTION +.Nm Pax +will read, write, and list the members of an archive file, +and will copy directory hierarchies. +.Nm Pax +operation is independent of the specific archive format, +and supports a wide variety of different archive formats. +A list of supported archive formats can be found under the description of the +.Fl x +option. +.Pp +The presence of the +.Fl r +and the +.Fl w +options specifies which of the following functional modes +.Nm pax +will operate under: +.Em list , read , write , +and +.Em copy. +.Bl -tag -width 6n +.It <none> +.Em List . +.Nm Pax +will write to +.Dv standard output +a table of contents of the members of the archive file read from +.Dv standard input , +whose pathnames match the specified +.Ar patterns. +The table of contents contains one filename per line +and is written using single line buffering. +.It Fl r +.Em Read . +.Nm Pax +extracts the members of the archive file read from the +.Dv standard input , +with pathnames matching the specified +.Ar patterns. +The archive format and blocking is automatically determined on input. +When an extracted file is a directory, the entire file hierarchy +rooted at that directory is extracted. +All extracted files are created relative to the current file hierarchy. +The setting of ownership, access and modification times, and file mode of +the extracted files are discussed in more detail under the +.Fl p +option. +.It Fl w +.Em Write . +.Nm Pax +writes an archive containing the +.Ar file +operands to +.Dv standard output +using the specified archive format. +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +.Dv standard input . +When a +.Ar file +operand is also a directory, the entire file hierarchy rooted +at that directory will be included. +.It Fl r Fl w +.Em Copy . +.Nm Pax +copies the +.Ar file +operands to the destination +.Ar directory . +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +the +.Dv standard input . +When a +.Ar file +operand is also a directory the entire file +hierarchy rooted at that directory will be included. +The effect of the +.Em copy +is as if the copied files were written to an archive file and then +subsequently extracted, except that there may be hard links between +the original and the copied files (see the +.Fl l +option below). +.Pp +.Em Warning : +The destination +.Ar directory +must not be one of the +.Ar file +operands or a member of a file hierarchy rooted at one of the +.Ar file +operands. +The result of a +.Em copy +under these conditions is unpredictable. +.El +.Pp +While processing a damaged archive during a +.Em read +or +.Em list +operation, +.Nm pax +will attempt to recover from media defects and will search through the archive +to locate and process the largest number of archive members possible (see the +.Fl E +option for more details on error handling). +.Sh OPERANDS +.Pp +The +.Ar directory +operand specifies a destination directory pathname. +If the +.Ar directory +operand does not exist, or it is not writable by the user, +or it is not of type directory, +.Nm Pax +will exit with a non-zero exit status. +.Pp +The +.Ar pattern +operand is used to select one or more pathnames of archive members. +Archive members are selected using the pattern matching notation described +by +.Xr fnmatch 3 . +When the +.Ar pattern +operand is not supplied, all members of the archive will be selected. +When a +.Ar pattern +matches a directory, the entire file hierarchy rooted at that directory will +be selected. +When a +.Ar pattern +operand does not select at least one archive member, +.Nm pax +will write these +.Ar pattern +operands in a diagnostic message to +.Dv standard error +and then exit with a non-zero exit status. +.Pp +The +.Ar file +operand specifies the pathname of a file to be copied or archived. +When a +.Ar file +operand does not select at least one archive member, +.Nm pax +will write these +.Ar file +operand pathnames in a diagnostic message to +.Dv standard error +and then exit with a non-zero exit status. +.Sh OPTIONS +.Pp +The following options are supported: +.Bl -tag -width 4n +.It Fl r +Read an archive file from +.Dv standard input +and extract the specified +.Ar files . +If any intermediate directories are needed in order to extract an archive +member, these directories will be created as if +.Xr mkdir 2 +was called with the bitwise inclusive +.Dv OR +of +.Dv S_IRWXU , S_IRWXG , +and +.Dv S_IRWXO +as the mode argument. +When the selected archive format supports the specification of linked +files and these files cannot be linked while the archive is being extracted, +.Nm pax +will write a diagnostic message to +.Dv standard error +and exit with a non-zero exit status at the completion of operation. +.It Fl w +Write files to the +.Dv standard output +in the specified archive format. +When no +.Ar file +operands are specified, +.Dv standard input +is read for a list of pathnames with one per line without any leading or +trailing +.Aq blanks . +.It Fl a +Append +.Ar files +to the end of an archive that was previously written. +If an archive format is not specified with a +.Fl x +option, the format currently being used in the archive will be selected. +Any attempt to append to an archive in a format different from the +format already used in the archive will cause +.Nm pax +to exit immediately +with a non-zero exit status. +The blocking size used in the archive volume where writing starts +will continue to be used for the remainder of that archive volume. +.Pp +.Em Warning : +Many storage devices are not able to support the operations necessary +to perform an append operation. +Any attempt to append to an archive stored on such a device may damage the +archive or have other unpredictable results. +Tape drives in particular are more likely to not support an append operation. +An archive stored in a regular file system file or on a disk device will +usually support an append operation. +.It Fl b Ar blocksize +When +.Em writing +an archive, +block the output at a positive decimal integer number of +bytes per write to the archive file. +The +.Ar blocksize +must be a multiple of 512 bytes with a maximum of 32256 bytes. +A +.Ar blocksize +can end with +.Li k +or +.Li b +to specify multiplication by 1024 (1K) or 512, respectively. +A pair of +.Ar blocksizes +can be separated by +.Li x +to indicate a product. +A specific archive device may impose additional restrictions on the size +of blocking it will support. +When blocking is not specified, the default +.Ar blocksize +is dependent on the specific archive format being used (see the +.Fl x +option). +.It Fl c +Match all file or archive members +.Em except +those specified by the +.Ar pattern +and +.Ar file +operands. +.It Fl d +Cause files of type directory being copied or archived, or archive members of +type directory being extracted, to match only the directory file or archive +member and not the file hierarchy rooted at the directory. +.It Fl f Ar archive +Specify +.Ar archive +as the pathname of the input or output archive, overriding the default +.Dv standard input +(for +.Em list +and +.Em read ) +or +.Dv standard output +(for +.Em write ) . +A single archive may span multiple files and different archive devices. +When required, +.Nm pax +will prompt for the pathname of the file or device of the next volume in the +archive. +.It Fl i +Interactively rename files or archive members. +For each archive member matching a +.Ar pattern +operand or each file matching a +.Ar file +operand, +.Nm pax +will prompt to +.Pa /dev/tty +giving the name of the file, its file mode and its modification time. +.Nm Pax +will then read a line from +.Pa /dev/tty . +If this line is blank, the file or archive member is skipped. +If this line consists of a single period, the +file or archive member is processed with no modification to its name. +Otherwise, its name is replaced with the contents of the line. +.Nm Pax +will immediately exit with a non-zero exit status if +.Dv <EOF> +is encountered when reading a response or if +.Pa /dev/tty +cannot be opened for reading and writing. +.It Fl k +Do not overwrite existing files. +.It Fl l +Link files. (The letter ell). +In the +.Em copy +mode ( +.Fl r +.Fl w ) , +hard links are made between the source and destination file hierarchies +whenever possible. +.It Fl n +Select the first archive member that matches each +.Ar pattern +operand. +No more than one archive member is matched for each +.Ar pattern . +When members of type directory are matched, the file hierarchy rooted at that +directory is also matched (unless +.Fl d +is also specified). +.It Fl o Ar options +Information to modify the algorithm for extracting or writing archive files +which is specific to the archive format specified by +.Fl x . +In general, +.Ar options +take the form: +.Cm name=value +.It Fl p Ar string +Specify one or more file characteristic options (privileges). +The +.Ar string +option-argument is a string specifying file characteristics to be retained or +discarded on extraction. +The string consists of the specification characters +.Cm a , e , m , o , +and +.Cm p . +Multiple characteristics can be concatenated within the same string +and multiple +.Fl p +options can be specified. +The meaning of the specification characters are as follows: +.Bl -tag -width 2n +.It Cm a +Do not preserve file access times. +By default, file access times are preserved whenever possible. +.It Cm e +.Sq Preserve everything , +the user ID, group ID, file mode bits, +file access time, and file modification time. +This is intended to be used by +.Em root , +someone with all the appropriate privileges, in order to preserve all +aspects of the files as they are recorded in the archive. +The +.Cm e +flag is the sum of the +.Cm o +and +.Cm p +flags. +.It Cm m +Do not preserve file modification times. +By default, file modification times are preserved whenever possible. +.It Cm o +Preserve the user ID and group ID. +.It Cm p +.Sq Preserve +the file mode bits. +This intended to be used by a +.Em user +with regular privileges who wants to preserve all aspects of the file other +than the ownership. +The file times are preserved by default, but two other flags are offered to +disable this and use the time of extraction instead. +.El +.Pp +In the preceding list, +.Sq preserve +indicates that an attribute stored in the archive is given to the +extracted file, subject to the permissions of the invoking +process. +Otherwise the attribute of the extracted file is determined as +part of the normal file creation action. +If neither the +.Cm e +nor the +.Cm o +specification character is specified, or the user ID and group ID are not +preserved for any reason, +.Nm pax +will not set the +.Dv S_ISUID +.Em ( setuid ) +and +.Dv S_ISGID +.Em ( setgid ) +bits of the file mode. +If the preservation of any of these items fails for any reason, +.Nm pax +will write a diagnostic message to +.Dv standard error . +Failure to preserve these items will affect the final exit status, +but will not cause the extracted file to be deleted. +If the file characteristic letters in any of the string option-arguments are +duplicated or conflict with each other, the one(s) given last will take +precedence. +For example, if +.Dl Fl p Ar eme +is specified, file modification times are still preserved. +.It Fl s Ar replstr +Modify the file or archive member names specified by the +.Ar pattern +or +.Ar file +operands according to the substitution expression +.Ar replstr , +using the syntax of the +.Xr ed 1 +utility regular expressions. +The format of these regular expressions are: +.Dl /old/new/[gp] +As in +.Xr ed 1 , +.Cm old +is a basic regular expression and +.Cm new +can contain an ampersand (&), \\n (where n is a digit) back-references, +or subexpression matching. +The +.Cm old +string may also contain +.Dv <newline> +characters. +Any non-null character can be used as a delimiter (/ is shown here). +Multiple +.Fl s +expressions can be specified. +The expressions are applied in the order they are specified on the +command line, terminating with the first successful substitution. +The optional trailing +.Cm g +continues to apply the substitution expression to the pathname substring +which starts with the first character following the end of the last successful +substitution. The first unsuccessful substitution stops the operation of the +.Cm g +option. +The optional trailing +.Cm p +will cause the final result of a successful substitution to be written to +.Dv standard error +in the following format: +.Dl <original pathname> >> <new pathname> +File or archive member names that substitute to the empty string +are not selected and will be skipped. +.It Fl t +Reset the access times of any file or directory read or accessed by +.Nm pax +to be the same as they were before being read or accessed by +.Nm pax . +.It Fl u +Ignore files that are older (having a less recent file modification time) +than a pre-existing file or archive member with the same name. +During +.Em read , +an archive member with the same name as a file in the file system will be +extracted if the archive member is newer than the file. +During +.Em write , +a file system member with the same name as an archive member will be +written to the archive if it is newer than the archive member. +During +.Em copy , +the file in the destination hierarchy is replaced by the file in the source +hierarchy or by a link to the file in the source hierarchy if the file in +the source hierarchy is newer. +.It Fl v +During a +.Em list +operation, produce a verbose table of contents using the format of the +.Xr ls 1 +utility with the +.Fl l +option. +For pathnames representing a hard link to a previous member of the archive, +the output has the format: +.Dl <ls -l listing> == <link name> +For pathnames representing a symbolic link, the output has the format: +.Dl <ls -l listing> => <link name> +Where <ls -l listing> is the output format specified by the +.Xr ls 1 +utility when used with the +.Fl l +option. +Otherwise for all the other operational modes ( +.Em read , write , +and +.Em copy ) , +pathnames are written and flushed to +.Dv standard error +without a trailing +.Dv <newline> +as soon as processing begins on that file or +archive member. +The trailing +.Dv <newline> , +is not buffered, and is written only after the file has been read or written. +.It Fl x Ar format +Specify the output archive format, with the default format being +.Ar ustar . +.Nm Pax +currently supports the following formats: +.Bl -tag -width "sv4cpio" +.It Ar cpio +The extended cpio interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm pax +and is repaired. +.It Ar bcpio +The old binary cpio format. +The default blocksize for this format is 5120 bytes. +This format is not very portable and should not be used when other formats +are available. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm pax +and is repaired. +.It Ar sv4cpio +The System V release 4 cpio. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm pax +and is repaired. +.It Ar sv4crc +The System V release 4 cpio with file crc checksums. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm pax +and is repaired. +.It Ar tar +The old BSD tar format as found in BSD4.3. +The default blocksize for this format is 10240 bytes. +Pathnames stored by this format must be 100 characters or less in length. +Only +.Em regular +files, +.Em hard links , soft links , +and +.Em directories +will be archived (other file system types are not supported). +For backwards compatibility with even older tar formats, a +.Fl o +option can be used when writing an archive to omit the storage of directories. +This option takes the form: +.Dl Fl o Cm write_opt=nodir +.It Ar ustar +The extended tar interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 10240 bytes. +Pathnames stored by this format must be 250 characters or less in length. +.El +.Pp +.Nm Pax +will detect and report any file that it is unable to store or extract +as the result of any specific archive format restrictions. +The individual archive formats may impose additional restrictions on use. +Typical archive format restrictions include (but are not limited to): +file pathname length, file size, link pathname length and the type of the file. +.It Fl B Ar bytes +Limit the number of bytes written to a single archive volume to +.Ar bytes . +The +.Ar bytes +limit can end with +.Li m , +.Li k , +or +.Li b +to specify multiplication by 1048576 (1M), 1024 (1K) or 512, respectively. +A pair of +.Ar bytes +limits can be separated by +.Li x +to indicate a product. +.Pp +.Em Warning : +Only use this option when writing an archive to a device which supports +an end of file read condition based on last (or largest) write offset +(such as a regular file or a tape drive). +The use of this option with a floppy or hard disk is not recommended. +.It Fl D +This option is the same as the +.Fl u +option, except that the file inode change time is checked instead of the +file modification time. +The file inode change time can be used to select files whose inode information +(e.g. uid, gid, etc.) is newer than a copy of the file in the destination +.Ar directory . +.It Fl E Ar limit +Limit the number of consecutive read faults while trying to read a flawed +archives to +.Ar limit . +With a positive +.Ar limit , +.Nm pax +will attempt to recover from an archive read error and will +continue processing starting with the next file stored in the archive. +A +.Ar limit +of 0 will cause +.Nm pax +to stop operation after the first read error is detected on an archive volume. +A +.Ar limit +of +.Li NONE +will cause +.Nm pax +to attempt to recover from read errors forever. +The default +.Ar limit +is a small positive number of retries. +.Pp +.Em Warning: +Using this option with +.Li NONE +should be used with extreme caution as +.Nm pax +may get stuck in an infinite loop on a very badly flawed archive. +.It Fl G Ar group +Select a file based on its +.Ar group +name, or when starting with a +.Cm # , +a numeric gid. +A '\\' can be used to escape the +.Cm # . +Multiple +.Fl G +options may be supplied and checking stops with the first match. +.It Fl H +Follow only command line symbolic links while performing a physical file +system traversal. +.It Fl L +Follow all symbolic links to perform a logical file system traversal. +.It Fl P +Do not follow symbolic links, perform a physical file system traversal. +This is the default mode. +.It Fl T Ar [from_date][,to_date][/[c][m]] +Allow files to be selected based on a file modification or inode change +time falling within a specified time range of +.Ar from_date +to +.Ar to_date +(the dates are inclusive). +If only a +.Ar from_date +is supplied, all files with a modification or inode change time +equal to or younger are selected. +If only a +.Ar to_date +is supplied, all files with a modification or inode change time +equal to or older will be selected. +When the +.Ar from_date +is equal to the +.Ar to_date , +only files with a modification or inode change time of exactly that +time will be selected. +.Pp +When +.Nm pax +is in the +.Em write +or +.Em copy +mode, the optional trailing field +.Ar [c][m] +can be used to determine which file time (inode change, file modification or +both) are used in the comparison. +If neither is specified, the default is to use file modification time only. +The +.Ar m +specifies the comparison of file modification time (the time when +the file was last written). +The +.Ar c +specifies the comparison of inode change time (the time when the file +inode was last changed; e.g. a change of owner, group, mode, etc). +When +.Ar c +and +.Ar m +are both specified, then the modification and inode change times are +both compared. +The inode change time comparison is useful in selecting files whose +attributes were recently changed or selecting files which were recently +created and had their modification time reset to an older time (as what +happens when a file is extracted from an archive and the modification time +is preserved). +Time comparisons using both file times is useful when +.Nm pax +is used to create a time based incremental archive (only files that were +changed during a specified time range will be archived). +.Pp +A time range is made up of six different fields and each field must contain two +digits. +The format is: +.Dl [yy[mm[dd[hh]]]]mm[.ss] +Where +.Cm yy +is the last two digits of the year, +the first +.Cm mm +is the month (from 01 to 12), +.Cm dd +is the day of the month (from 01 to 31), +.Cm hh +is the hour of the day (from 00 to 23), +the second +.Cm mm +is the minute (from 00 to 59), +and +.Cm ss +is the seconds (from 00 to 59). +The minute field +.Cm mm +is required, while the other fields are optional and must be added in the +following order: +.Dl Cm hh , dd , mm , yy . +The +.Cm ss +field may be added independently of the other fields. +Time ranges are relative to the current time, so +.Dl Fl T Ar 1234/cm +would select all files with a modification or inode change time +of 12:34 PM today or later. +Multiple +.Fl T +time range can be supplied and checking stops with the first match. +.It Fl U Ar user +Select a file based on its +.Ar user +name, or when starting with a +.Cm # , +a numeric uid. +A '\\' can be used to escape the +.Cm # . +Multiple +.Fl U +options may be supplied and checking stops with the first match. +.It Fl X +When traversing the file hierarchy specified by a pathname, +do not descend into directories that have a different device ID. +See the +.Li st_dev +field as described in +.Xr stat 2 +for more information about device ID's. +.It Fl Y +This option is the same as the +.Fl D +option, except that the inode change time is checked using the +pathname created after all the file name modifications have completed. +.It Fl Z +This option is the same as the +.Fl u +option, except that the modification time is checked using the +pathname created after all the file name modifications have completed. +.El +.Pp +The options that operate on the names of files or archive members ( +.Fl c , +.Fl i , +.Fl n , +.Fl s , +.Fl u , +.Fl v , +.Fl D , +.Fl G , +.Fl T , +.Fl U , +.Fl Y , +and +.Fl Z ) +interact as follows. +.Pp +When extracting files during a +.Em read +operation, archive members are +.Sq selected , +based only on the user specified pattern operands as modified by the +.Fl c , +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +.Fl U +options. +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then the +.Fl Y +and +.Fl Z +options will be applied based on the final pathname. +Finally the +.Fl v +option will write the names resulting from these modifications. +.Pp +When archiving files during a +.Em write +operation, or copying files during a +.Em copy +operation, archive members are +.Sq selected , +based only on the user specified pathnames as modified by the +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +and +.Fl U +options (the +.Fl D +option only applies during a copy operation). +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then during a +.Em copy +operation the +.Fl Y +and the +.Fl Z +options will be applied based on the final pathname. +Finally the +.Fl v +option will write the names resulting from these modifications. +.Pp +When one or both of the +.Fl u +or +.Fl D +options are specified along with the +.Fl n +option, a file is not considered selected unless it is newer +than the file to which it is compared. +.Sh EXAMPLES +The command: +.Dl pax -w -f /dev/rst0 .\ +copies the contents of the current directory to the device +.Pa /dev/rst0 . +.Pp +The command: +.Dl pax -r -v -f filename +gives the verbose table of contents for an archive stored in +.Pa filename . +.Pp +The following commands: +.Dl mkdir newdir +.Dl cd olddir +.Dl pax -rw .\ newdir +will copy the entire +.Pa olddir +directory hierarchy to +.Pa newdir . +.Pp +The command: +.Dl pax -r -s ',^//*usr//*,,' -f a.pax +reads the archive +.Pa a.pax , +with all files rooted in ``/usr'' into the archive extracted relative to the +current directory. +.Pp +The command: +.Dl pax -rw -i .\ dest_dir +can be used to interactively select the files to copy from the current +directory to +.Pa dest_dir . +.Pp +The command: +.Dl pax -r -pe -U root -G bin -f a.pax +will extract all files from the archive +.Pa a.pax +which are owned by +.Em root +with group +.Em bin +and will preserve all file permissions. +.Pp +The command: +.Dl pax -r -w -v -Y -Z home /backup +will update (and list) only those files in the destination directory +.Pa /backup +which are older (less recent inode change or file modification times) than +files with the same name found in the source file tree +.Pa home . +.Sh STANDARDS +The +.Nm pax +utility is a superset of the +.St -p1003.2 +standard. +The options +.Fl B , +.Fl D , +.Fl E , +.Fl G , +.Fl H , +.Fl L , +.Fl P , +.Fl T , +.Fl U , +.Fl Y , +.Fl Z , +the archive formats +.Ar bcpio , +.Ar sv4cpio , +.Ar sv4crc , +.Ar tar , +and the flawed archive handling during +.Ar list +and +.Ar read +operations are extensions to the +.Tn POSIX +standard. +.Sh AUTHOR +Keith Muller at the University of California, San Diego +.Sh ERRORS +.Nm pax +will exit with one of the following values: +.Bl -tag -width 2n +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Pp +Whenever +.Nm pax +cannot create a file or a link when reading an archive or cannot +find a file when writing an archive, or cannot preserve the user ID, +group ID, or file mode when the +.Fl p +option is specified, a diagnostic message is written to +.Dv standard error +and a non-zero exit status will be returned, but processing will continue. +In the case where pax cannot create a link to a file, +.Nm pax +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated by +a signal or error, +.Nm pax +may have only partially extracted a file the user wanted. +Additionally, the file modes of extracted files and directories +may have incorrect file bits, and the modification and access times may be +wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal or error, +.Nm pax +may have only partially created the archive which may violate the specific +archive format specification. +.Pp +If while doing a +.Em copy , +.Nm pax +detects a file is about to overwrite itself, the file is not copied, +a diagnostic message is written to +.Dv standard error +and when +.Nm pax +completes it will exit with a non-zero exit status. diff --git a/bin/pax/pax.c b/bin/pax/pax.c new file mode 100644 index 0000000..e8b6626 --- /dev/null +++ b/bin/pax/pax.c @@ -0,0 +1,405 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1992, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)pax.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <stdio.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <signal.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include "pax.h" +#include "extern.h" +static int gen_init __P((void)); + +/* + * PAX main routines, general globals and some simple start up routines + */ + +/* + * Variables that can be accessed by any routine within pax + */ +int act = DEFOP; /* read/write/append/copy */ +FSUB *frmt = NULL; /* archive format type */ +int cflag; /* match all EXCEPT pattern/file */ +int dflag; /* directory member match only */ +int iflag; /* interactive file/archive rename */ +int kflag; /* do not overwrite existing files */ +int lflag; /* use hard links when possible */ +int nflag; /* select first archive member match */ +int tflag; /* restore access time after read */ +int uflag; /* ignore older modification time files */ +int vflag; /* produce verbose output */ +int Dflag; /* same as uflag except inode change time */ +int Hflag; /* follow command line symlinks (write only) */ +int Lflag; /* follow symlinks when writing */ +int Xflag; /* archive files with same device id only */ +int Yflag; /* same as Dflg except after name mode */ +int Zflag; /* same as uflg except after name mode */ +int vfpart; /* is partial verbose output in progress */ +int patime = 1; /* preserve file access time */ +int pmtime = 1; /* preserve file modification times */ +int pmode; /* preserve file mode bits */ +int pids; /* preserve file uid/gid */ +int exit_val; /* exit value */ +int docrc; /* check/create file crc */ +char *dirptr; /* destination dir in a copy */ +char *ltmfrmt; /* -v locale time format (if any) */ +char *argv0; /* root of argv[0] */ +sigset_t s_mask; /* signal mask for cleanup critical sect */ + +/* + * PAX - Portable Archive Interchange + * + * A utility to read, write, and write lists of the members of archive + * files and copy directory hierarchies. A variety of archive formats + * are supported (some are described in POSIX 1003.1 10.1): + * + * ustar - 10.1.1 extended tar interchange format + * cpio - 10.1.2 extended cpio interchange format + * tar - old BSD 4.3 tar format + * binary cpio - old cpio with binary header format + * sysVR4 cpio - with and without CRC + * + * This version is a superset of IEEE Std 1003.2b-d3 + * + * Summary of Extensions to the IEEE Standard: + * + * 1 READ ENHANCEMENTS + * 1.1 Operations which read archives will continue to operate even when + * processing archives which may be damaged, truncated, or fail to meet + * format specs in several different ways. Damaged sections of archives + * are detected and avoided if possible. Attempts will be made to resync + * archive read operations even with badly damaged media. + * 1.2 Blocksize requirements are not strictly enforced on archive read. + * Tapes which have variable sized records can be read without errors. + * 1.3 The user can specify via the non-standard option flag -E if error + * resync operation should stop on a media error, try a specified number + * of times to correct, or try to correct forever. + * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks + * of all zeros will be restored with holes appropriate for the target + * filesystem + * 1.5 The user is notified whenever something is found during archive + * read operations which violates spec (but the read will continue). + * 1.6 Multiple archive volumes can be read and may span over different + * archive devices + * 1.7 Rigidly restores all file attributes exactly as they are stored on the + * archive. + * 1.8 Modification change time ranges can be specified via multiple -T + * options. These allow a user to select files whose modification time + * lies within a specific time range. + * 1.9 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 1.10 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 1.11 File modification time can be checked against exisiting file after + * name modification (-Z) + * + * 2 WRITE ENHANCEMENTS + * 2.1 Write operation will stop instead of allowing a user to create a flawed + * flawed archive (due to any problem). + * 2.2 Archives writtens by pax are forced to strictly conform to both the + * archive and pax the spceific format specifications. + * 2.3 Blocking size and format is rigidly enforced on writes. + * 2.4 Formats which may exhibit header overflow problems (they have fields + * too small for large file systems, such as inode number storage), use + * routines designed to repair this problem. These techniques still + * conform to both pax and format specifications, but no longer truncate + * these fields. This removes any restrictions on using these archive + * formats on large file systems. + * 2.5 Multiple archive volumes can be written and may span over different + * archive devices + * 2.6 A archive volume record limit allows the user to specify the number + * of bytes stored on an archive volume. When reached the user is + * prompted for the next archive volume. This is specified with the + * non-standard -B flag. THe limit is rounded up to the next blocksize. + * 2.7 All archive padding during write use zero filled sections. This makes + * it much easier to pull data out of flawed archive during read + * operations. + * 2.8 Access time reset with the -t applies to all file nodes (including + * directories). + * 2.9 Symbolic links can be followed with -L (optional in the spec). + * 2.10 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 2.11 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 2.12 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 2.13 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * + * 3 COPY ENHANCEMENTS + * 3.1 Sparse files (lseek holes) can be copied without expanding the holes + * into zero filled blocks. The file copy is created with holes which are + * appropriate for the target filesystem + * 3.2 Access time as well as modification time on copied file trees can be + * preserved with the appropriate -p options. + * 3.3 Access time reset with the -t applies to all file nodes (including + * directories). + * 3.4 Symbolic links can be followed with -L (optional in the spec). + * 3.5 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 3.6 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 3.7 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 3.8 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * 3.9 File inode change time can be checked against exisiting file before + * name modification (-D) + * 3.10 File inode change time can be checked against exisiting file after + * name modification (-Y) + * 3.11 File modification time can be checked against exisiting file after + * name modification (-Z) + * + * 4 GENERAL ENHANCEMENTS + * 4.1 Internal structure is designed to isolate format dependent and + * independent functions. Formats are selected via a format driver table. + * This encourages the addition of new archive formats by only having to + * write those routines which id, read and write the archive header. + */ + +/* + * main() + * parse options, set up and operate as specified by the user. + * any operational flaw will set exit_val to non-zero + * Return: 0 if ok, 1 otherwise + */ + +#if __STDC__ +int +main(int argc, char **argv) +#else +int +main(argc, argv) + int argc; + char **argv; +#endif +{ + /* + * parse options, determine operational mode, general init + */ + options(argc, argv); + if ((gen_init() < 0) || (tty_init() < 0)) + return(exit_val); + + /* + * select a primary operation mode + */ + switch(act) { + case EXTRACT: + extract(); + break; + case ARCHIVE: + archive(); + break; + case APPND: + append(); + break; + case COPY: + copy(); + break; + default: + case LIST: + list(); + break; + } + return(exit_val); +} + +/* + * sig_cleanup() + * when interrupted we try to do whatever delayed processing we can. + * This is not critical, but we really ought to limit our damage when we + * are aborted by the user. + * Return: + * never.... + */ + +#if __STDC__ +void +sig_cleanup(int which_sig) +#else +void +sig_cleanup(which_sig) + int which_sig; +#endif +{ + /* + * restore modes and times for any dirs we may have created + * or any dirs we may have read. Set vflag and vfpart so the user + * will clearly see the message on a line by itself. + */ + vflag = vfpart = 1; + if (which_sig == SIGXCPU) + warn(0, "Cpu time limit reached, cleaning up."); + else + warn(0, "Signal caught, cleaning up."); + + ar_close(); + proc_dir(); + if (tflag) + atdir_end(); + exit(1); +} + +/* + * gen_init() + * general setup routines. Not all are required, but they really help + * when dealing with a medium to large sized archives. + */ + +#if __STDC__ +static int +gen_init(void) +#else +static int +gen_init() +#endif +{ + struct rlimit reslimit; + struct sigaction n_hand; + struct sigaction o_hand; + + /* + * Really needed to handle large archives. We can run out of memory for + * internal tables really fast when we have a whole lot of files... + */ + if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_DATA , &reslimit); + } + + /* + * should file size limits be waived? if the os limits us, this is + * needed if we want to write a large archive + */ + if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_FSIZE , &reslimit); + } + + /* + * increase the size the stack can grow to + */ + if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_STACK , &reslimit); + } + + /* + * not really needed, but doesn't hurt + */ + if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_RSS , &reslimit); + } + + /* + * Handle posix locale + * + * set user defines time printing format for -v option + */ + ltmfrmt = getenv("LC_TIME"); + + /* + * signal handling to reset stored directory times and modes. Since + * we deal with broken pipes via failed writes we ignore it. We also + * deal with any file size limit thorugh failed writes. Cpu time + * limits are caught and a cleanup is forced. + */ + if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || + (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || + (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) || + (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) { + warn(1, "Unable to set up signal mask"); + return(-1); + } + n_hand.sa_mask = s_mask; + n_hand.sa_flags = 0; + n_hand.sa_handler = sig_cleanup; + + if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGHUP, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGTERM, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGINT, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGQUIT, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGXCPU, &o_hand, &o_hand) < 0)) + goto out; + + n_hand.sa_handler = SIG_IGN; + if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) || + (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0)) + goto out; + return(0); + + out: + syswarn(1, errno, "Unable to set up signal handler"); + return(-1); +} diff --git a/bin/pax/pax.h b/bin/pax/pax.h new file mode 100644 index 0000000..5658d76 --- /dev/null +++ b/bin/pax/pax.h @@ -0,0 +1,238 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pax.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * BSD PAX global data structures and constants. + */ + +#define MAXBLK 32256 /* MAX blocksize supported (posix SPEC) */ + /* WARNING: increasing MAXBLK past 32256 */ + /* will violate posix spec. */ +#define BLKMULT 512 /* blocksize must be even mult of 512 bytes */ + /* Don't even think of changing this */ +#define DEVBLK 8192 /* default read blksize for devices */ +#define FILEBLK 10240 /* default read blksize for files */ +#define PAXPATHLEN 3072 /* maximium path length for pax. MUST be */ + /* longer than the system MAXPATHLEN */ + +/* + * Pax modes of operation + */ +#define LIST 0 /* List the file in an archive */ +#define EXTRACT 1 /* extract the files in an archive */ +#define ARCHIVE 2 /* write a new archive */ +#define APPND 3 /* append to the end of an archive */ +#define COPY 4 /* copy files to destination dir */ +#define DEFOP LIST /* if no flags default is to LIST */ + +/* + * Device type of the current archive volume + */ +#define ISREG 0 /* regular file */ +#define ISCHR 1 /* character device */ +#define ISBLK 2 /* block device */ +#define ISTAPE 3 /* tape drive */ +#define ISPIPE 4 /* pipe/socket */ + +/* + * Format Specific Routine Table + * + * The format specific routine table allows new archive formats to be quickly + * added. Overall pax operation is independent of the actual format used to + * form the archive. Only those routines which deal directly with the archive + * are tailored to the oddities of the specifc format. All other routines are + * independent of the archive format. Data flow in and out of the format + * dependent routines pass pointers to ARCHD structure (described below). + */ +typedef struct { + char *name; /* name of format, this is the name the user */ + /* gives to -x option to select it. */ + int bsz; /* default block size. used when the user */ + /* does not specify a blocksize for writing */ + /* Appends continue to with the blocksize */ + /* the archive is currently using.*/ + int hsz; /* Header size in bytes. this is the size of */ + /* the smallest header this format supports. */ + /* Headers are assumed to fit in a BLKMULT. */ + /* If they are bigger, get_head() and */ + /* get_arc() must be adjusted */ + int udev; /* does append require unique dev/ino? some */ + /* formats use the device and inode fields */ + /* to specify hard links. when members in */ + /* the archive have the same inode/dev they */ + /* are assumed to be hard links. During */ + /* append we may have to generate unique ids */ + /* to avoid creating incorrect hard links */ + int hlk; /* does archive store hard links info? if */ + /* not, we do not bother to look for them */ + /* during archive write operations */ + int blkalgn; /* writes must be aligned to blkalgn boundry */ + int inhead; /* is the trailer encoded in a valid header? */ + /* if not, trailers are assumed to be found */ + /* in invalid headers (i.e like tar) */ + int (*id)(); /* checks if a buffer is a valid header */ + /* returns 1 if it is, o.w. returns a 0 */ + int (*st_rd)(); /* initialize routine for read. so format */ + /* can set up tables etc before it starts */ + /* reading an archive */ + int (*rd)(); /* read header routine. passed a pointer to */ + /* ARCHD. It must extract the info from the */ + /* format and store it in the ARCHD struct. */ + /* This routine is expected to fill all the */ + /* fields in the ARCHD (including stat buf) */ + /* 0 is returned when a valid header is */ + /* found. -1 when not valid. This routine */ + /* set the skip and pad fields so the format */ + /* independent routines know the amount of */ + /* padding and the number of bytes of data */ + /* which follow the header. This info is */ + /* used skip to the next file header */ + off_t (*end_rd)(); /* read cleanup. Allows format to clean up */ + /* and MUST RETURN THE LENGTH OF THE TRAILER */ + /* RECORD (so append knows how many bytes */ + /* to move back to rewrite the trailer) */ + int (*st_wr)(); /* initialize routine for write operations */ + int (*wr)(); /* write archive header. Passed an ARCHD */ + /* filled with the specs on the next file to */ + /* archived. Returns a 1 if no file data is */ + /* is to be stored; 0 if file data is to be */ + /* added. A -1 is returned if a write */ + /* operation to the archive failed. this */ + /* function sets the skip and pad fields so */ + /* the proper padding can be added after */ + /* file data. This routine must NEVER write */ + /* a flawed archive header. */ + int (*end_wr)(); /* end write. write the trailer and do any */ + /* other format specific functions needed */ + /* at the ecnd of a archive write */ + int (*trail)(); /* returns 0 if a valid trailer, -1 if not */ + /* For formats which encode the trailer */ + /* outside of a valid header, a return value */ + /* of 1 indicates that the block passed to */ + /* it can never contain a valid header (skip */ + /* this block, no point in looking at it) */ + /* CAUTION: parameters to this function are */ + /* different for trailers inside or outside */ + /* of headers. See get_head() for details */ + int (*rd_data)(); /* read/process file data from the archive */ + int (*wr_data)(); /* write/process file data to the archive */ + int (*options)(); /* process format specific options (-o) */ +} FSUB; + +/* + * Pattern matching structure + * + * Used to store command line patterns + */ +typedef struct pattern { + char *pstr; /* pattern to match, user supplied */ + char *pend; /* end of a prefix match */ + int plen; /* length of pstr */ + int flgs; /* processing/state flags */ +#define MTCH 0x1 /* pattern has been matched */ +#define DIR_MTCH 0x2 /* pattern matched a directory */ + struct pattern *fow; /* next pattern */ +} PATTERN; + +/* + * General Archive Structure (used internal to pax) + * + * This structure is used to pass information about archive members between + * the format independent routines and the format specific routines. When + * new archive formats are added, they must accept requests and supply info + * encoded in a structure of this type. The name fields are declared statically + * here, as there is only ONE of these floating around, size is not a major + * consideration. Eventually converting the name fields to a dynamic length + * may be required if and when the supporting operating system removes all + * restrictions on the length of pathnames it will resolve. + */ +typedef struct { + int nlen; /* file name length */ + char name[PAXPATHLEN+1]; /* file name */ + int ln_nlen; /* link name length */ + char ln_name[PAXPATHLEN+1]; /* name to link to (if any) */ + char *org_name; /* orig name in file system */ + PATTERN *pat; /* ptr to pattern match (if any) */ + struct stat sb; /* stat buffer see stat(2) */ + off_t pad; /* bytes of padding after file xfer */ + off_t skip; /* bytes of real data after header */ + /* IMPORTANT. The st_size field does */ + /* not always indicate the amount of */ + /* data following the header. */ + u_long crc; /* file crc */ + int type; /* type of file node */ +#define PAX_DIR 1 /* directory */ +#define PAX_CHR 2 /* character device */ +#define PAX_BLK 3 /* block device */ +#define PAX_REG 4 /* regular file */ +#define PAX_SLK 5 /* symbolic link */ +#define PAX_SCK 6 /* socket */ +#define PAX_FIF 7 /* fifo */ +#define PAX_HLK 8 /* hard link */ +#define PAX_HRG 9 /* hard link to a regular file */ +#define PAX_CTG 10 /* high performance file */ +} ARCHD; + +/* + * Format Specific Options List + * + * Used to pass format options to the format options handler + */ +typedef struct oplist { + char *name; /* option variable name e.g. name= */ + char *value; /* value for option variable */ + struct oplist *fow; /* next option */ +} OPLIST; + +/* + * General Macros + */ +#ifndef MIN +#define MIN(a,b) (((a)<(b))?(a):(b)) +#endif +#define MAJOR(x) (((unsigned)(x) >> 8) & 0xff) +#define MINOR(x) ((x) & 0xff) +#define TODEV(x, y) (((unsigned)(x) << 8) | ((unsigned)(y))) + +/* + * General Defines + */ +#define HEX 16 +#define OCT 8 +#define _PAX_ 1 diff --git a/bin/pax/sel_subs.c b/bin/pax/sel_subs.c new file mode 100644 index 0000000..fa7cb85 --- /dev/null +++ b/bin/pax/sel_subs.c @@ -0,0 +1,657 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)sel_subs.c 8.1 (Berkeley) 5/31/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <pwd.h> +#include <grp.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> +#include <stdlib.h> +#include "pax.h" +#include "sel_subs.h" +#include "extern.h" + +static int str_sec __P((register char *, time_t *)); +static int usr_match __P((register ARCHD *)); +static int grp_match __P((register ARCHD *)); +static int trng_match __P((register ARCHD *)); + +static TIME_RNG *trhead = NULL; /* time range list head */ +static TIME_RNG *trtail = NULL; /* time range list tail */ +static USRT **usrtb = NULL; /* user selection table */ +static GRPT **grptb = NULL; /* group selection table */ + +/* + * Routines for selection of archive members + */ + +/* + * sel_chk() + * check if this file matches a specfied uid, gid or time range + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +#if __STDC__ +int +sel_chk(register ARCHD *arcn) +#else +int +sel_chk(arcn) + register ARCHD *arcn; +#endif +{ + if (((usrtb != NULL) && usr_match(arcn)) || + ((grptb != NULL) && grp_match(arcn)) || + ((trhead != NULL) && trng_match(arcn))) + return(1); + return(0); +} + +/* + * User/group selection routines + * + * Routines to handle user selection of files based on the file uid/gid. To + * add an entry, the user supplies either then name or the uid/gid starting with + * a # on the command line. A \# will eascape the #. + */ + +/* + * usr_add() + * add a user match to the user match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +#if __STDC__ +int +usr_add(register char *str) +#else +int +usr_add(str) + register char *str; +#endif +{ + register u_int indx; + register USRT *pt; + register struct passwd *pw; + register uid_t uid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((usrtb == NULL) && + ((usrtb = (USRT **)calloc(USR_TB_SZ, sizeof(USRT *))) == NULL)) { + warn(1, "Unable to allocate memory for user selection table"); + return(-1); + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a user name, \# escapes # as first char in user name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if ((pw = getpwnam(str)) == NULL) { + warn(1, "Unable to find uid for user: %s", str); + return(-1); + } + uid = (uid_t)pw->pw_uid; + } else +# ifdef NET2_STAT + uid = (uid_t)atoi(str+1); +# else + uid = (uid_t)strtoul(str+1, (char **)NULL, 10); +# endif + endpwent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)uid) % USR_TB_SZ; + if ((pt = usrtb[indx]) != NULL) { + while (pt != NULL) { + if (pt->uid == uid) + return(0); + pt = pt->fow; + } + } + + /* + * uid is not yet in the table, add it to the front of the chain + */ + if ((pt = (USRT *)malloc(sizeof(USRT))) != NULL) { + pt->uid = uid; + pt->fow = usrtb[indx]; + usrtb[indx] = pt; + return(0); + } + warn(1, "User selection table out of memory"); + return(-1); +} + +/* + * usr_match() + * check if this files uid matches a selected uid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +#if __STDC__ +static int +usr_match(register ARCHD *arcn) +#else +static int +usr_match(arcn) + register ARCHD *arcn; +#endif +{ + register USRT *pt; + + /* + * hash and look for it in the table + */ + pt = usrtb[((unsigned)arcn->sb.st_uid) % USR_TB_SZ]; + while (pt != NULL) { + if (pt->uid == arcn->sb.st_uid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * grp_add() + * add a group match to the group match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +#if __STDC__ +int +grp_add(register char *str) +#else +int +grp_add(str) + register char *str; +#endif +{ + register u_int indx; + register GRPT *pt; + register struct group *gr; + register gid_t gid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((grptb == NULL) && + ((grptb = (GRPT **)calloc(GRP_TB_SZ, sizeof(GRPT *))) == NULL)) { + warn(1, "Unable to allocate memory fo group selection table"); + return(-1); + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a group name, \# escapes # as first char in group name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if ((gr = getgrnam(str)) == NULL) { + warn(1,"Cannot determine gid for group name: %s", str); + return(-1); + } + gid = (gid_t)gr->gr_gid; + } else +# ifdef NET2_STAT + gid = (gid_t)atoi(str+1); +# else + gid = (gid_t)strtoul(str+1, (char **)NULL, 10); +# endif + endgrent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)gid) % GRP_TB_SZ; + if ((pt = grptb[indx]) != NULL) { + while (pt != NULL) { + if (pt->gid == gid) + return(0); + pt = pt->fow; + } + } + + /* + * gid not in the table, add it to the front of the chain + */ + if ((pt = (GRPT *)malloc(sizeof(GRPT))) != NULL) { + pt->gid = gid; + pt->fow = grptb[indx]; + grptb[indx] = pt; + return(0); + } + warn(1, "Group selection table out of memory"); + return(-1); +} + +/* + * grp_match() + * check if this files gid matches a selected gid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +#if __STDC__ +static int +grp_match(register ARCHD *arcn) +#else +static int +grp_match(arcn) + register ARCHD *arcn; +#endif +{ + register GRPT *pt; + + /* + * hash and look for it in the table + */ + pt = grptb[((unsigned)arcn->sb.st_gid) % GRP_TB_SZ]; + while (pt != NULL) { + if (pt->gid == arcn->sb.st_gid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * Time range selection routines + * + * Routines to handle user selection of files based on the modification and/or + * inode change time falling within a specified time range (the non-standard + * -T flag). The user may specify any number of different file time ranges. + * Time ranges are checked one at a time until a match is found (if at all). + * If the file has a mtime (and/or ctime) which lies within one of the time + * ranges, the file is selected. Time ranges may have a lower and/or a upper + * value. These ranges are inclusive. When no time ranges are supplied to pax + * with the -T option, all members in the archive will be selected by the time + * range routines. When only a lower range is supplied, only files with a + * mtime (and/or ctime) equal to or younger are selected. When only a upper + * range is supplied, only files with a mtime (and/or ctime) equal to or older + * are selected. When the lower time range is equal to the upper time range, + * only files with a mtime (or ctime) of exactly that time are selected. + */ + +/* + * trng_add() + * add a time range match to the time range list. + * This is a non-standard pax option. Lower and upper ranges are in the + * format: [yy[mm[dd[hh]]]]mm[.ss] and are comma separated. + * Time ranges are based on current time, so 1234 would specify a time of + * 12:34 today. + * Return: + * 0 if the time range was added to the list, -1 otherwise + */ + +#if __STDC__ +int +trng_add(register char *str) +#else +int +trng_add(str) + register char *str; +#endif +{ + register TIME_RNG *pt; + register char *up_pt = NULL; + register char *stpt; + register char *flgpt; + register int dot = 0; + + /* + * throw out the badly formed time ranges + */ + if ((str == NULL) || (*str == '\0')) { + warn(1, "Empty time range string"); + return(-1); + } + + /* + * locate optional flags suffix /{cm}. + */ + if ((flgpt = rindex(str, '/')) != NULL) + *flgpt++ = '\0'; + + for (stpt = str; *stpt != '\0'; ++stpt) { + if ((*stpt >= '0') && (*stpt <= '9')) + continue; + if ((*stpt == ',') && (up_pt == NULL)) { + *stpt = '\0'; + up_pt = stpt + 1; + dot = 0; + continue; + } + + /* + * allow only one dot per range (secs) + */ + if ((*stpt == '.') && (!dot)) { + ++dot; + continue; + } + warn(1, "Improperly specified time range: %s", str); + goto out; + } + + /* + * allocate space for the time range and store the limits + */ + if ((pt = (TIME_RNG *)malloc(sizeof(TIME_RNG))) == NULL) { + warn(1, "Unable to allocate memory for time range"); + return(-1); + } + + /* + * by default we only will check file mtime, but usee can specify + * mtime, ctime (inode change time) or both. + */ + if ((flgpt == NULL) || (*flgpt == '\0')) + pt->flgs = CMPMTME; + else { + pt->flgs = 0; + while (*flgpt != '\0') { + switch(*flgpt) { + case 'M': + case 'm': + pt->flgs |= CMPMTME; + break; + case 'C': + case 'c': + pt->flgs |= CMPCTME; + break; + default: + warn(1, "Bad option %c with time range %s", + *flgpt, str); + goto out; + } + ++flgpt; + } + } + + /* + * start off with the current time + */ + pt->low_time = pt->high_time = time((time_t *)NULL); + if (*str != '\0') { + /* + * add lower limit + */ + if (str_sec(str, &(pt->low_time)) < 0) { + warn(1, "Illegal lower time range %s", str); + (void)free((char *)pt); + goto out; + } + pt->flgs |= HASLOW; + } + + if ((up_pt != NULL) && (*up_pt != '\0')) { + /* + * add upper limit + */ + if (str_sec(up_pt, &(pt->high_time)) < 0) { + warn(1, "Illegal upper time range %s", up_pt); + (void)free((char *)pt); + goto out; + } + pt->flgs |= HASHIGH; + + /* + * check that the upper and lower do not overlap + */ + if (pt->flgs & HASLOW) { + if (pt->low_time > pt->high_time) { + warn(1, "Upper %s and lower %s time overlap", + up_pt, str); + (void)free((char *)pt); + return(-1); + } + } + } + + pt->fow = NULL; + if (trhead == NULL) { + trtail = trhead = pt; + return(0); + } + trtail->fow = pt; + trtail = pt; + return(0); + + out: + warn(1, "Time range format is: [yy[mm[dd[hh]]]]mm[.ss][/[c][m]]"); + return(-1); +} + +/* + * trng_match() + * check if this files mtime/ctime falls within any supplied time range. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +#if __STDC__ +static int +trng_match(register ARCHD *arcn) +#else +static int +trng_match(arcn) + register ARCHD *arcn; +#endif +{ + register TIME_RNG *pt; + + /* + * have to search down the list one at a time looking for a match. + * remember time range limits are inclusive. + */ + pt = trhead; + while (pt != NULL) { + switch(pt->flgs & CMPBOTH) { + case CMPBOTH: + /* + * user wants both mtime and ctime checked for this + * time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPCTME: + /* + * user wants only ctime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPMTME: + default: + /* + * user wants only mtime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + } + break; + } + + if (pt == NULL) + return(1); + return(0); +} + +/* + * str_sec() + * Convert a time string in the format of [yy[mm[dd[hh]]]]mm[.ss] to gmt + * seconds. Tval already has current time loaded into it at entry. + * Return: + * 0 if converted ok, -1 otherwise + */ + +#if __STDC__ +static int +str_sec(register char *str, time_t *tval) +#else +static int +str_sec(str, tval) + register char *str; + time_t *tval; +#endif +{ + register struct tm *lt; + register char *dot = NULL; + + lt = localtime(tval); + if ((dot = index(str, '.')) != NULL) { + /* + * seconds (.ss) + */ + *dot++ = '\0'; + if (strlen(dot) != 2) + return(-1); + if ((lt->tm_sec = ATOI2(dot)) > 61) + return(-1); + } else + lt->tm_sec = 0; + + switch (strlen(str)) { + case 10: + /* + * year (yy) + * watch out for year 2000 + */ + if ((lt->tm_year = ATOI2(str)) < 69) + lt->tm_year += 100; + str += 2; + /* FALLTHROUGH */ + case 8: + /* + * month (mm) + * watch out months are from 0 - 11 internally + */ + if ((lt->tm_mon = ATOI2(str)) > 12) + return(-1); + --lt->tm_mon; + str += 2; + /* FALLTHROUGH */ + case 6: + /* + * day (dd) + */ + if ((lt->tm_mday = ATOI2(str)) > 31) + return(-1); + str += 2; + /* FALLTHROUGH */ + case 4: + /* + * hour (hh) + */ + if ((lt->tm_hour = ATOI2(str)) > 23) + return(-1); + str += 2; + /* FALLTHROUGH */ + case 2: + /* + * minute (mm) + */ + if ((lt->tm_min = ATOI2(str)) > 59) + return(-1); + break; + default: + return(-1); + } + /* + * convert broken-down time to GMT clock time seconds + */ + if ((*tval = mktime(lt)) == -1) + return(-1); + return(0); +} diff --git a/bin/pax/sel_subs.h b/bin/pax/sel_subs.h new file mode 100644 index 0000000..5040c73 --- /dev/null +++ b/bin/pax/sel_subs.h @@ -0,0 +1,73 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sel_subs.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * data structure for storing uid/grp selects (-U, -G non standard options) + */ + +#define USR_TB_SZ 317 /* user selection table size */ +#define GRP_TB_SZ 317 /* user selection table size */ + +typedef struct usrt { + uid_t uid; + struct usrt *fow; /* next uid */ +} USRT; + +typedef struct grpt { + gid_t gid; + struct grpt *fow; /* next gid */ +} GRPT; + +/* + * data structure for storing user supplied time ranges (-T option) + */ + +#define ATOI2(s) ((((s)[0] - '0') * 10) + ((s)[1] - '0')) + +typedef struct time_rng { + time_t low_time; /* lower inclusive time limit */ + time_t high_time; /* higher inclusive time limit */ + int flgs; /* option flags */ +#define HASLOW 0x01 /* has lower time limit */ +#define HASHIGH 0x02 /* has higher time limit */ +#define CMPMTME 0x04 /* compare file modification time */ +#define CMPCTME 0x08 /* compare inode change time */ +#define CMPBOTH (CMPMTME|CMPCTME) /* compare inode and mod time */ + struct time_rng *fow; /* next pattern */ +} TIME_RNG; diff --git a/bin/pax/tables.c b/bin/pax/tables.c new file mode 100644 index 0000000..3ac1c41 --- /dev/null +++ b/bin/pax/tables.c @@ -0,0 +1,1426 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)tables.c 8.1 (Berkeley) 5/31/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <sys/fcntl.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <stdlib.h> +#include "pax.h" +#include "tables.h" +#include "extern.h" + +/* + * Routines for controlling the contents of all the different databases pax + * keeps. Tables are dynamically created only when they are needed. The + * goal was speed and the ability to work with HUGE archives. The databases + * were kept simple, but do have complex rules for when the contents change. + * As of this writing, the posix library functions were more complex than + * needed for this application (pax databases have very short lifetimes and + * do not survive after pax is finished). Pax is required to handle very + * large archives. These database routines carefully combine memory usage and + * temporary file storage in ways which will not significantly impact runtime + * performance while allowing the largest possible archives to be handled. + * Trying to force the fit to the posix databases routines was not considered + * time well spent. + */ + +static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ +static FTM **ftab = NULL; /* file time table for updating arch */ +static NAMT **ntab = NULL; /* interactive rename storage table */ +static DEVT **dtab = NULL; /* device/inode mapping tables */ +static ATDIR **atab = NULL; /* file tree directory time reset table */ +static int dirfd = -1; /* storage for setting created dir time/mode */ +static u_long dircnt; /* entries in dir time/mode storage */ +static int ffd = -1; /* tmp file for file time table name storage */ + +static DEVT *chk_dev __P((dev_t, int)); + +/* + * hard link table routines + * + * The hard link table tries to detect hard links to files using the device and + * inode values. We do this when writing an archive, so we can tell the format + * write routine that this file is a hard link to another file. The format + * write routine then can store this file in whatever way it wants (as a hard + * link if the format supports that like tar, or ignore this info like cpio). + * (Actually a field in the format driver table tells us if the format wants + * hard link info. if not, we do not waste time looking for them). We also use + * the same table when reading an archive. In that situation, this table is + * used by the format read routine to detect hard links from stored dev and + * inode numbers (like cpio). This will allow pax to create a link when one + * can be detected by the archive format. + */ + +/* + * lnk_start + * Creates the hard link table. + * Return: + * 0 if created, -1 if failure + */ + +#if __STDC__ +int +lnk_start(void) +#else +int +lnk_start() +#endif +{ + if (ltab != NULL) + return(0); + if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { + warn(1, "Cannot allocate memory for hard link table"); + return(-1); + } + return(0); +} + +/* + * chk_lnk() + * Looks up entry in hard link hash table. If found, it copies the name + * of the file it is linked to (we already saw that file) into ln_name. + * lnkcnt is decremented and if goes to 1 the node is deleted from the + * database. (We have seen all the links to this file). If not found, + * we add the file to the database if it has the potential for having + * hard links to other files we may process (it has a link count > 1) + * Return: + * if found returns 1; if not found returns 0; -1 on error + */ + +#if __STDC__ +int +chk_lnk(register ARCHD *arcn) +#else +int +chk_lnk(arcn) + register ARCHD *arcn; +#endif +{ + register HRDLNK *pt; + register HRDLNK **ppt; + register u_int indx; + + if (ltab == NULL) + return(-1); + /* + * ignore those nodes that cannot have hard links + */ + if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) + return(0); + + /* + * hash inode number and look for this file + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) != NULL) { + /* + * it's hash chain in not empty, walk down looking for it + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found a link. set the node type and copy in the + * name of the file it is to link to. we need to + * handle hardlinks to regular files differently than + * other links. + */ + arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name, + PAXPATHLEN+1); + if (arcn->type == PAX_REG) + arcn->type = PAX_HRG; + else + arcn->type = PAX_HLK; + + /* + * if we have found all the links to this file, remove + * it from the database + */ + if (--pt->nlink <= 1) { + *ppt = pt->fow; + (void)free((char *)pt->name); + (void)free((char *)pt); + } + return(1); + } + } + + /* + * we never saw this file before. It has links so we add it to the + * front of this hash chain + */ + if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) { + if ((pt->name = strdup(arcn->name)) != NULL) { + pt->dev = arcn->sb.st_dev; + pt->ino = arcn->sb.st_ino; + pt->nlink = arcn->sb.st_nlink; + pt->fow = ltab[indx]; + ltab[indx] = pt; + return(0); + } + (void)free((char *)pt); + } + + warn(1, "Hard link table out of memory"); + return(-1); +} + +/* + * purg_lnk + * remove reference for a file that we may have added to the data base as + * a potential source for hard links. We ended up not using the file, so + * we do not want to accidently point another file at it later on. + */ + +#if __STDC__ +void +purg_lnk(register ARCHD *arcn) +#else +void +purg_lnk(arcn) + register ARCHD *arcn; +#endif +{ + register HRDLNK *pt; + register HRDLNK **ppt; + register u_int indx; + + if (ltab == NULL) + return; + /* + * do not bother to look if it could not be in the database + */ + if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || + (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + return; + + /* + * find the hash chain for this inode value, if empty return + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) == NULL) + return; + + /* + * walk down the list looking for the inode/dev pair, unlink and + * free if found + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + if (pt == NULL) + return; + + /* + * remove and free it + */ + *ppt = pt->fow; + (void)free((char *)pt->name); + (void)free((char *)pt); +} + +/* + * lnk_end() + * pull apart a existing link table so we can reuse it. We do this between + * read and write phases of append with update. (The format may have + * used the link table, and we need to start with a fresh table for the + * write phase + */ + +#if __STDC__ +void +lnk_end(void) +#else +void +lnk_end() +#endif +{ + register int i; + register HRDLNK *pt; + register HRDLNK *ppt; + + if (ltab == NULL) + return; + + for (i = 0; i < L_TAB_SZ; ++i) { + if (ltab[i] == NULL) + continue; + pt = ltab[i]; + ltab[i] = NULL; + + /* + * free up each entry on this chain + */ + while (pt != NULL) { + ppt = pt; + pt = ppt->fow; + (void)free((char *)ppt->name); + (void)free((char *)ppt); + } + } + return; +} + +/* + * modification time table routines + * + * The modification time table keeps track of last modification times for all + * files stored in an archive during a write phase when -u is set. We only + * add a file to the archive if it is newer than a file with the same name + * already stored on the archive (if there is no other file with the same + * name on the archive it is added). This applies to writes and appends. + * An append with an -u must read the archive and store the modification time + * for every file on that archive before starting the write phase. It is clear + * that this is one HUGE database. To save memory space, the actual file names + * are stored in a scatch file and indexed by an in memory hash table. The + * hash table is indexed by hashing the file path. The nodes in the table store + * the length of the filename and the lseek offset within the scratch file + * where the actual name is stored. Since there are never any deletions to this + * table, fragmentation of the scratch file is never a issue. Lookups seem to + * not exhibit any locality at all (files in the database are rarely + * looked up more than once...). So caching is just a waste of memory. The + * only limitation is the amount of scatch file space available to store the + * path names. + */ + +/* + * ftime_start() + * create the file time hash table and open for read/write the scratch + * file. (after created it is unlinked, so when we exit we leave + * no witnesses). + * Return: + * 0 if the table and file was created ok, -1 otherwise + */ + +#if __STDC__ +int +ftime_start(void) +#else +int +ftime_start() +#endif +{ + char *pt; + + if (ftab != NULL) + return(0); + if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { + warn(1, "Cannot allocate memory for file time table"); + return(-1); + } + + /* + * get random name and create temporary scratch file, unlink name + * so it will get removed on exit + */ + if ((pt = tempnam((char *)NULL, (char *)NULL)) == NULL) + return(-1); + (void)unlink(pt); + + if ((ffd = open(pt, O_RDWR | O_CREAT, S_IRWXU)) < 0) { + syswarn(1, errno, "Unable to open temporary file: %s", pt); + return(-1); + } + + (void)unlink(pt); + return(0); +} + +/* + * chk_ftime() + * looks up entry in file time hash table. If not found, the file is + * added to the hash table and the file named stored in the scratch file. + * If a file with the same name is found, the file times are compared and + * the most recent file time is retained. If the new file was younger (or + * was not in the database) the new file is selected for storage. + * Return: + * 0 if file should be added to the archive, 1 if it should be skipped, + * -1 on error + */ + +#if __STDC__ +int +chk_ftime(register ARCHD *arcn) +#else +int +chk_ftime(arcn) + register ARCHD *arcn; +#endif +{ + register FTM *pt; + register int namelen; + register u_int indx; + char ckname[PAXPATHLEN+1]; + + /* + * no info, go ahead and add to archive + */ + if (ftab == NULL) + return(0); + + /* + * hash the pathname and look up in table + */ + namelen = arcn->nlen; + indx = st_hash(arcn->name, namelen, F_TAB_SZ); + if ((pt = ftab[indx]) != NULL) { + /* + * the hash chain is not empty, walk down looking for match + * only read up the path names if the lengths match, speeds + * up the search a lot + */ + while (pt != NULL) { + if (pt->namelen == namelen) { + /* + * potential match, have to read the name + * from the scratch file. + */ + if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { + syswarn(1, errno, + "Failed ftime table seek"); + return(-1); + } + if (read(ffd, ckname, namelen) != namelen) { + syswarn(1, errno, + "Failed ftime table read"); + return(-1); + } + + /* + * if the names match, we are done + */ + if (!strncmp(ckname, arcn->name, namelen)) + break; + } + + /* + * try the next entry on the chain + */ + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found the file, compare the times, save the newer + */ + if (arcn->sb.st_mtime > pt->mtime) { + /* + * file is newer + */ + pt->mtime = arcn->sb.st_mtime; + return(0); + } + /* + * file is older + */ + return(1); + } + } + + /* + * not in table, add it + */ + if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) { + /* + * add the name at the end of the scratch file, saving the + * offset. add the file to the head of the hash chain + */ + if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) { + if (write(ffd, arcn->name, namelen) == namelen) { + pt->mtime = arcn->sb.st_mtime; + pt->namelen = namelen; + pt->fow = ftab[indx]; + ftab[indx] = pt; + return(0); + } + syswarn(1, errno, "Failed write to file time table"); + } else + syswarn(1, errno, "Failed seek on file time table"); + } else + warn(1, "File time table ran out of memory"); + + if (pt != NULL) + (void)free((char *)pt); + return(-1); +} + +/* + * Interactive rename table routines + * + * The interactive rename table keeps track of the new names that the user + * assignes to files from tty input. Since this map is unique for each file + * we must store it in case there is a reference to the file later in archive + * (a link). Otherwise we will be unable to find the file we know was + * extracted. The remapping of these files is stored in a memory based hash + * table (it is assumed since input must come from /dev/tty, it is unlikely to + * be a very large table). + */ + +/* + * name_start() + * create the interactive rename table + * Return: + * 0 if successful, -1 otherwise + */ + +#if __STDC__ +int +name_start(void) +#else +int +name_start() +#endif +{ + if (ntab != NULL) + return(0); + if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { + warn(1, "Cannot allocate memory for interactive rename table"); + return(-1); + } + return(0); +} + +/* + * add_name() + * add the new name to old name mapping just created by the user. + * If an old name mapping is found (there may be duplicate names on an + * archive) only the most recent is kept. + * Return: + * 0 if added, -1 otherwise + */ + +#if __STDC__ +int +add_name(register char *oname, int onamelen, char *nname) +#else +int +add_name(oname, onamelen, nname) + register char *oname; + int onamelen; + char *nname; +#endif +{ + register NAMT *pt; + register u_int indx; + + if (ntab == NULL) { + /* + * should never happen + */ + warn(0, "No interactive rename table, links may fail\n"); + return(0); + } + + /* + * look to see if we have already mapped this file, if so we + * will update it + */ + indx = st_hash(oname, onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) != NULL) { + /* + * look down the has chain for the file + */ + while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) + pt = pt->fow; + + if (pt != NULL) { + /* + * found an old mapping, replace it with the new one + * the user just input (if it is different) + */ + if (strcmp(nname, pt->nname) == 0) + return(0); + + (void)free((char *)pt->nname); + if ((pt->nname = strdup(nname)) == NULL) { + warn(1, "Cannot update rename table"); + return(-1); + } + return(0); + } + } + + /* + * this is a new mapping, add it to the table + */ + if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) { + if ((pt->oname = strdup(oname)) != NULL) { + if ((pt->nname = strdup(nname)) != NULL) { + pt->fow = ntab[indx]; + ntab[indx] = pt; + return(0); + } + (void)free((char *)pt->oname); + } + (void)free((char *)pt); + } + warn(1, "Interactive rename table out of memory"); + return(-1); +} + +/* + * sub_name() + * look up a link name to see if it points at a file that has been + * remapped by the user. If found, the link is adjusted to contain the + * new name (oname is the link to name) + */ + +#if __STDC__ +void +sub_name(register char *oname, int *onamelen) +#else +void +sub_name(oname, onamelen) + register char *oname; + int *onamelen; +#endif +{ + register NAMT *pt; + register u_int indx; + + if (ntab == NULL) + return; + /* + * look the name up in the hash table + */ + indx = st_hash(oname, *onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) == NULL) + return; + + while (pt != NULL) { + /* + * walk down the hash cahin looking for a match + */ + if (strcmp(oname, pt->oname) == 0) { + /* + * found it, replace it with the new name + * and return (we know that oname has enough space) + */ + *onamelen = l_strncpy(oname, pt->nname, PAXPATHLEN+1); + return; + } + pt = pt->fow; + } + + /* + * no match, just return + */ + return; +} + +/* + * device/inode mapping table routines + * (used with formats that store device and inodes fields) + * + * device/inode mapping tables remap the device field in a archive header. The + * device/inode fields are used to determine when files are hard links to each + * other. However these values have very little meaning outside of that. This + * database is used to solve one of two different problems. + * + * 1) when files are appended to an archive, while the new files may have hard + * links to each other, you cannot determine if they have hard links to any + * file already stored on the archive from a prior run of pax. We must assume + * that these inode/device pairs are unique only within a SINGLE run of pax + * (which adds a set of files to an archive). So we have to make sure the + * inode/dev pairs we add each time are always unique. We do this by observing + * while the inode field is very dense, the use of the dev field is fairly + * sparse. Within each run of pax, we remap any device number of a new archive + * member that has a device number used in a prior run and already stored in a + * file on the archive. During the read phase of the append, we store the + * device numbers used and mark them to not be used by any file during the + * write phase. If during write we go to use one of those old device numbers, + * we remap it to a new value. + * + * 2) Often the fields in the archive header used to store these values are + * too small to store the entire value. The result is an inode or device value + * which can be truncated. This really can foul up an archive. With truncation + * we end up creating links between files that are really not links (after + * truncation the inodes are the same value). We address that by detecting + * truncation and forcing a remap of the device field to split truncated + * inodes away from each other. Each truncation creates a pattern of bits that + * are removed. We use this pattern of truncated bits to partition the inodes + * on a single device to many different devices (each one represented by the + * truncated bit pattern). All inodes on the same device that have the same + * truncation pattern are mapped to the same new device. Two inodes that + * truncate to the same value clearly will always have different truncation + * bit patterns, so they will be split from away each other. When we spot + * device truncation we remap the device number to a non truncated value. + * (for more info see table.h for the data structures involved). + */ + +/* + * dev_start() + * create the device mapping table + * Return: + * 0 if successful, -1 otherwise + */ + +#if __STDC__ +int +dev_start(void) +#else +int +dev_start() +#endif +{ + if (dtab != NULL) + return(0); + if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { + warn(1, "Cannot allocate memory for device mapping table"); + return(-1); + } + return(0); +} + +/* + * add_dev() + * add a device number to the table. this will force the device to be + * remapped to a new value if it be used during a write phase. This + * function is called during the read phase of an append to prohibit the + * use of any device number already in the archive. + * Return: + * 0 if added ok, -1 otherwise + */ + +#if __STDC__ +int +add_dev(register ARCHD *arcn) +#else +int +add_dev(arcn) + register ARCHD *arcn; +#endif +{ + if (chk_dev(arcn->sb.st_dev, 1) == NULL) + return(-1); + return(0); +} + +/* + * chk_dev() + * check for a device value in the device table. If not found and the add + * flag is set, it is added. This does NOT assign any mapping values, just + * adds the device number as one that need to be remapped. If this device + * is alread mapped, just return with a pointer to that entry. + * Return: + * pointer to the entry for this device in the device map table. Null + * if the add flag is not set and the device is not in the table (it is + * not been seen yet). If add is set and the device cannot be added, null + * is returned (indicates an error). + */ + +#if __STDC__ +static DEVT * +chk_dev(dev_t dev, int add) +#else +static DEVT * +chk_dev(dev, add) + dev_t dev; + int add; +#endif +{ + register DEVT *pt; + register u_int indx; + + if (dtab == NULL) + return(NULL); + /* + * look to see if this device is already in the table + */ + indx = ((unsigned)dev) % D_TAB_SZ; + if ((pt = dtab[indx]) != NULL) { + while ((pt != NULL) && (pt->dev != dev)) + pt = pt->fow; + + /* + * found it, return a pointer to it + */ + if (pt != NULL) + return(pt); + } + + /* + * not in table, we add it only if told to as this may just be a check + * to see if a device number is being used. + */ + if (add == 0) + return(NULL); + + /* + * allocate a node for this device and add it to the front of the hash + * chain. Note we do not assign remaps values here, so the pt->list + * list must be NULL. + */ + if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) { + warn(1, "Device map table out of memory"); + return(NULL); + } + pt->dev = dev; + pt->list = NULL; + pt->fow = dtab[indx]; + dtab[indx] = pt; + return(pt); +} +/* + * map_dev() + * given an inode and device storage mask (the mask has a 1 for each bit + * the archive format is able to store in a header), we check for inode + * and device truncation and remap the device as required. Device mapping + * can also occur when during the read phase of append a device number was + * seen (and was marked as do not use during the write phase). WE ASSUME + * that unsigned longs are the same size or bigger than the fields used + * for ino_t and dev_t. If not the types will have to be changed. + * Return: + * 0 if all ok, -1 otherwise. + */ + +#if __STDC__ +int +map_dev(register ARCHD *arcn, u_long dev_mask, u_long ino_mask) +#else +int +map_dev(arcn, dev_mask, ino_mask) + register ARCHD *arcn; + u_long dev_mask; + u_long ino_mask; +#endif +{ + register DEVT *pt; + register DLIST *dpt; + static dev_t lastdev = 0; /* next device number to try */ + int trc_ino = 0; + int trc_dev = 0; + ino_t trunc_bits = 0; + ino_t nino; + + if (dtab == NULL) + return(0); + /* + * check for device and inode truncation, and extract the truncated + * bit pattern. + */ + if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) + ++trc_dev; + if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { + ++trc_ino; + trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); + } + + /* + * see if this device is already being mapped, look up the device + * then find the truncation bit pattern which applies + */ + if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { + /* + * this device is already marked to be remapped + */ + for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) + if (dpt->trunc_bits == trunc_bits) + break; + + if (dpt != NULL) { + /* + * we are being remapped for this device and pattern + * change the device number to be stored and return + */ + arcn->sb.st_dev = dpt->dev; + arcn->sb.st_ino = nino; + return(0); + } + } else { + /* + * this device is not being remapped YET. if we do not have any + * form of truncation, we do not need a remap + */ + if (!trc_ino && !trc_dev) + return(0); + + /* + * we have truncation, have to add this as a device to remap + */ + if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) + goto bad; + + /* + * if we just have a truncated inode, we have to make sure that + * all future inodes that do not truncate (they have the + * truncation pattern of all 0's) continue to map to the same + * device number. We probably have already written inodes with + * this device number to the archive with the truncation + * pattern of all 0's. So we add the mapping for all 0's to the + * same device number. + */ + if (!trc_dev && (trunc_bits != 0)) { + if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL) + goto bad; + dpt->trunc_bits = 0; + dpt->dev = arcn->sb.st_dev; + dpt->fow = pt->list; + pt->list = dpt; + } + } + + /* + * look for a device number not being used. We must watch for wrap + * around on lastdev (so we do not get stuck looking forever!) + */ + while (++lastdev > 0) { + if (chk_dev(lastdev, 0) != NULL) + continue; + /* + * found an unused value. If we have reached truncation point + * for this format we are hosed, so we give up. Otherwise we + * mark it as being used. + */ + if (((lastdev & ((dev_t)dev_mask)) != lastdev) || + (chk_dev(lastdev, 1) == NULL)) + goto bad; + break; + } + + if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)) + goto bad; + + /* + * got a new device number, store it under this truncation pattern. + * change the device number this file is being stored with. + */ + dpt->trunc_bits = trunc_bits; + dpt->dev = lastdev; + dpt->fow = pt->list; + pt->list = dpt; + arcn->sb.st_dev = lastdev; + arcn->sb.st_ino = nino; + return(0); + + bad: + warn(1, "Unable to fix truncated inode/device field when storing %s", + arcn->name); + warn(0, "Archive may create improper hard links when extracted"); + return(0); +} + +/* + * directory access/mod time reset table routines (for directories READ by pax) + * + * The pax -t flag requires that access times of archive files to be the same + * before being read by pax. For regular files, access time is restored after + * the file has been copied. This database provides the same functionality for + * directories read during file tree traversal. Restoring directory access time + * is more complex than files since directories may be read several times until + * all the descendants in their subtree are visited by fts. Directory access + * and modification times are stored during the fts pre-order visit (done + * before any descendants in the subtree is visited) and restored after the + * fts post-order visit (after all the descendants have been visited). In the + * case of premature exit from a subtree (like from the effects of -n), any + * directory entries left in this database are reset during final cleanup + * operations of pax. Entries are hashed by inode number for fast lookup. + */ + +/* + * atdir_start() + * create the directory access time database for directories READ by pax. + * Return: + * 0 is created ok, -1 otherwise. + */ + +#if __STDC__ +int +atdir_start(void) +#else +int +atdir_start() +#endif +{ + if (atab != NULL) + return(0); + if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { + warn(1,"Cannot allocate space for directory access time table"); + return(-1); + } + return(0); +} + + +/* + * atdir_end() + * walk through the directory access time table and reset the access time + * of any directory who still has an entry left in the database. These + * entries are for directories READ by pax + */ + +#if __STDC__ +void +atdir_end(void) +#else +void +atdir_end() +#endif +{ + register ATDIR *pt; + register int i; + + if (atab == NULL) + return; + /* + * for each non-empty hash table entry reset all the directories + * chained there. + */ + for (i = 0; i < A_TAB_SZ; ++i) { + if ((pt = atab[i]) == NULL) + continue; + /* + * remember to force the times, set_ftime() looks at pmtime + * and patime, which only applies to things CREATED by pax, + * not read by pax. Read time reset is controlled by -t. + */ + for (; pt != NULL; pt = pt->fow) + set_ftime(pt->name, pt->mtime, pt->atime, 1); + } +} + +/* + * add_atdir() + * add a directory to the directory access time table. Table is hashed + * and chained by inode number. This is for directories READ by pax + */ + +#if __STDC__ +void +add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime) +#else +void +add_atdir(fname, dev, ino, mtime, atime) + char *fname; + dev_t dev; + ino_t ino; + time_t mtime; + time_t atime; +#endif +{ + register ATDIR *pt; + register u_int indx; + + if (atab == NULL) + return; + + /* + * make sure this directory is not already in the table, if so just + * return (the older entry always has the correct time). The only + * way this will happen is when the same subtree can be traversed by + * different args to pax and the -n option is aborting fts out of a + * subtree before all the post-order visits have been made). + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) != NULL) { + while (pt != NULL) { + if ((pt->ino == ino) && (pt->dev == dev)) + break; + pt = pt->fow; + } + + /* + * oops, already there. Leave it alone. + */ + if (pt != NULL) + return; + } + + /* + * add it to the front of the hash chain + */ + if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) { + if ((pt->name = strdup(fname)) != NULL) { + pt->dev = dev; + pt->ino = ino; + pt->mtime = mtime; + pt->atime = atime; + pt->fow = atab[indx]; + atab[indx] = pt; + return; + } + (void)free((char *)pt); + } + + warn(1, "Directory access time reset table ran out of memory"); + return; +} + +/* + * get_atdir() + * look up a directory by inode and device number to obtain the access + * and modification time you want to set to. If found, the modification + * and access time parameters are set and the entry is removed from the + * table (as it is no longer needed). These are for directories READ by + * pax + * Return: + * 0 if found, -1 if not found. + */ + +#if __STDC__ +int +get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime) +#else +int +get_atdir(dev, ino, mtime, atime) + dev_t dev; + ino_t ino; + time_t *mtime; + time_t *atime; +#endif +{ + register ATDIR *pt; + register ATDIR **ppt; + register u_int indx; + + if (atab == NULL) + return(-1); + /* + * hash by inode and search the chain for an inode and device match + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) == NULL) + return(-1); + + ppt = &(atab[indx]); + while (pt != NULL) { + if ((pt->ino == ino) && (pt->dev == dev)) + break; + /* + * no match, go to next one + */ + ppt = &(pt->fow); + pt = pt->fow; + } + + /* + * return if we did not find it. + */ + if (pt == NULL) + return(-1); + + /* + * found it. return the times and remove the entry from the table. + */ + *ppt = pt->fow; + *mtime = pt->mtime; + *atime = pt->atime; + (void)free((char *)pt->name); + (void)free((char *)pt); + return(0); +} + +/* + * directory access mode and time storage routines (for directories CREATED + * by pax). + * + * Pax requires that extracted directories, by default, have their access/mod + * times and permissions set to the values specified in the archive. During the + * actions of extracting (and creating the destination subtree during -rw copy) + * directories extracted may be modified after being created. Even worse is + * that these directories may have been created with file permissions which + * prohibits any descendants of these directories from being extracted. When + * directories are created by pax, access rights may be added to permit the + * creation of files in their subtree. Every time pax creates a directory, the + * times and file permissions specified by the archive are stored. After all + * files have been extracted (or copied), these directories have their times + * and file modes reset to the stored values. The directory info is restored in + * reverse order as entries were added to the data file from root to leaf. To + * restore atime properly, we must go backwards. The data file consists of + * records with two parts, the file name followed by a DIRDATA trailer. The + * fixed sized trailer contains the size of the name plus the off_t location in + * the file. To restore we work backwards through the file reading the trailer + * then the file name. + */ + +/* + * dir_start() + * set up the directory time and file mode storage for directories CREATED + * by pax. + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +dir_start(void) +#else +int +dir_start() +#endif +{ + char *pt; + + if (dirfd != -1) + return(0); + if ((pt = tempnam((char *)NULL, (char *)NULL)) == NULL) + return(-1); + + /* + * unlink the file so it goes away at termination by itself + */ + (void)unlink(pt); + if ((dirfd = open(pt, O_RDWR|O_CREAT, 0600)) >= 0) { + (void)unlink(pt); + return(0); + } + warn(1, "Unable to create temporary file for directory times: %s", pt); + return(-1); +} + +/* + * add_dir() + * add the mode and times for a newly CREATED directory + * name is name of the directory, psb the stat buffer with the data in it, + * frc_mode is a flag that says whether to force the setting of the mode + * (ignoring the user set values for preserving file mode). Frc_mode is + * for the case where we created a file and found that the resulting + * directory was not writeable and the user asked for file modes to NOT + * be preserved. (we have to preserve what was created by default, so we + * have to force the setting at the end. this is stated explicitly in the + * pax spec) + */ + +#if __STDC__ +void +add_dir(char *name, int nlen, struct stat *psb, int frc_mode) +#else +void +add_dir(name, nlen, psb, frc_mode) + char *name; + int nlen; + struct stat *psb; + int frc_mode; +#endif +{ + DIRDATA dblk; + + if (dirfd < 0) + return; + + /* + * get current position (where file name will start) so we can store it + * in the trailer + */ + if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) { + warn(1,"Unable to store mode and times for directory: %s",name); + return; + } + + /* + * write the file name followed by the trailer + */ + dblk.nlen = nlen + 1; + dblk.mode = psb->st_mode & 0xffff; + dblk.mtime = psb->st_mtime; + dblk.atime = psb->st_atime; + dblk.frc_mode = frc_mode; + if ((write(dirfd, name, dblk.nlen) == dblk.nlen) && + (write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) { + ++dircnt; + return; + } + + warn(1,"Unable to store mode and times for created directory: %s",name); + return; +} + +/* + * proc_dir() + * process all file modes and times stored for directories CREATED + * by pax + */ + +#if __STDC__ +void +proc_dir(void) +#else +void +proc_dir() +#endif +{ + char name[PAXPATHLEN+1]; + DIRDATA dblk; + u_long cnt; + + if (dirfd < 0) + return; + /* + * read backwards through the file and process each directory + */ + for (cnt = 0; cnt < dircnt; ++cnt) { + /* + * read the trailer, then the file name, if this fails + * just give up. + */ + if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0) + break; + if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk)) + break; + if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) + break; + if (read(dirfd, name, dblk.nlen) != dblk.nlen) + break; + if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) + break; + + /* + * frc_mode set, make sure we set the file modes even if + * the user didn't ask for it (see file_subs.c for more info) + */ + if (pmode || dblk.frc_mode) + set_pmode(name, dblk.mode); + if (patime || pmtime) + set_ftime(name, dblk.mtime, dblk.atime, 0); + } + + (void)close(dirfd); + dirfd = -1; + if (cnt != dircnt) + warn(1,"Unable to set mode and times for created directories"); + return; +} + +/* + * database independent routines + */ + +/* + * st_hash() + * hashes filenames to a u_int for hashing into a table. Looks at the tail + * end of file, as this provides far better distribution than any other + * part of the name. For performance reasons we only care about the last + * MAXKEYLEN chars (should be at LEAST large enough to pick off the file + * name). Was tested on 500,000 name file tree traversal from the root + * and gave almost a perfectly uniform distribution of keys when used with + * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) + * chars at a time and pads with 0 for last addition. + * Return: + * the hash value of the string MOD (%) the table size. + */ + +#if __STDC__ +u_int +st_hash(char *name, int len, int tabsz) +#else +u_int +st_hash(name, len, tabsz) + char *name; + int len; + int tabsz; +#endif +{ + register char *pt; + register char *dest; + register char *end; + register int i; + register u_int key = 0; + register int steps; + register int res; + u_int val; + + /* + * only look at the tail up to MAXKEYLEN, we do not need to waste + * time here (remember these are pathnames, the tail is what will + * spread out the keys) + */ + if (len > MAXKEYLEN) { + pt = &(name[len - MAXKEYLEN]); + len = MAXKEYLEN; + } else + pt = name; + + /* + * calculate the number of u_int size steps in the string and if + * there is a runt to deal with + */ + steps = len/sizeof(u_int); + res = len % sizeof(u_int); + + /* + * add up the value of the string in unsigned integer sized pieces + * too bad we cannot have unsigned int aligned strings, then we + * could avoid the expensive copy. + */ + for (i = 0; i < steps; ++i) { + end = pt + sizeof(u_int); + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * add in the runt padded with zero to the right + */ + if (res) { + val = 0; + end = pt + res; + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * return the result mod the table size + */ + return(key % tabsz); +} diff --git a/bin/pax/tables.h b/bin/pax/tables.h new file mode 100644 index 0000000..a992613 --- /dev/null +++ b/bin/pax/tables.h @@ -0,0 +1,172 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tables.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * data structures and constants used by the different databases kept by pax + */ + +/* + * Hash Table Sizes MUST BE PRIME, if set too small performance suffers. + * Probably safe to expect 500000 inodes per tape. Assuming good key + * distribution (inodes) chains of under 50 long (worse case) is ok. + */ +#define L_TAB_SZ 2503 /* hard link hash table size */ +#define F_TAB_SZ 50503 /* file time hash table size */ +#define N_TAB_SZ 541 /* interactive rename hash table */ +#define D_TAB_SZ 317 /* unique device mapping table */ +#define A_TAB_SZ 317 /* ftree dir access time reset table */ +#define MAXKEYLEN 64 /* max number of chars for hash */ + +/* + * file hard link structure (hashed by dev/ino and chained) used to find the + * hard links in a file system or with some archive formats (cpio) + */ +typedef struct hrdlnk { + char *name; /* name of first file seen with this ino/dev */ + dev_t dev; /* files device number */ + ino_t ino; /* files inode number */ + u_long nlink; /* expected link count */ + struct hrdlnk *fow; +} HRDLNK; + +/* + * Archive write update file time table (the -u, -C flag), hashed by filename. + * Filenames are stored in a scratch file at seek offset into the file. The + * file time (mod time) and the file name length (for a quick check) are + * stored in a hash table node. We were forced to use a scratch file because + * with -u, the mtime for every node in the archive must always be available + * to compare against (and this data can get REALLY large with big archives). + * By being careful to read only when we have a good chance of a match, the + * performance loss is not measurable (and the size of the archive we can + * handle is greatly increased). + */ +typedef struct ftm { + int namelen; /* file name length */ + time_t mtime; /* files last modification time */ + off_t seek; /* loacation in scratch file */ + struct ftm *fow; +} FTM; + +/* + * Interactive rename table (-i flag), hashed by orig filename. + * We assume this will not be a large table as this mapping data can only be + * obtained through interactive input by the user. Nobody is going to type in + * changes for 500000 files? We use chaining to resolve collisions. + */ + +typedef struct namt { + char *oname; /* old name */ + char *nname; /* new name typed in by the user */ + struct namt *fow; +} NAMT; + +/* + * Unique device mapping tables. Some protocols (e.g. cpio) require that the + * <c_dev,c_ino> pair will uniquely identify a file in an archive unless they + * are links to the same file. Appending to archives can break this. For those + * protocols that have this requirement we map c_dev to a unique value not seen + * in the archive when we append. We also try to handle inode truncation with + * this table. (When the inode field in the archive header are too small, we + * remap the dev on writes to remove accidental collisions). + * + * The list is hashed by device number using chain collision resolution. Off of + * each DEVT are linked the various remaps for this device based on those bits + * in the inode which were truncated. For example if we are just remapping to + * avoid a device number during an update append, off the DEVT we would have + * only a single DLIST that has a truncation id of 0 (no inode bits were + * stripped for this device so far). When we spot inode truncation we create + * a new mapping based on the set of bits in the inode which were stripped off. + * so if the top four bits of the inode are stripped and they have a pattern of + * 0110...... (where . are those bits not truncated) we would have a mapping + * assigned for all inodes that has the same 0110.... pattern (with this dev + * number of course). This keeps the mapping sparse and should be able to store + * close to the limit of files which can be represented by the optimal + * combination of dev and inode bits, and without creating a fouled up archive. + * Note we also remap truncated devs in the same way (an exercise for the + * dedicated reader; always wanted to say that...:) + */ + +typedef struct devt { + dev_t dev; /* the orig device number we now have to map */ + struct devt *fow; /* new device map list */ + struct dlist *list; /* map list based on inode truncation bits */ +} DEVT; + +typedef struct dlist { + ino_t trunc_bits; /* truncation pattern for a specific map */ + dev_t dev; /* the new device id we use */ + struct dlist *fow; +} DLIST; + +/* + * ftree directory access time reset table. When we are done with with a + * subtree we reset the access and mod time of the directory when the tflag is + * set. Not really explicitly specified in the pax spec, but easy and fast to + * do (and this may have even been intended in the spec, it is not clear). + * table is hashed by inode with chaining. + */ + +typedef struct atdir { + char *name; /* name of directory to reset */ + dev_t dev; /* dev and inode for fast lookup */ + ino_t ino; + time_t mtime; /* access and mod time to reset to */ + time_t atime; + struct atdir *fow; +} ATDIR; + +/* + * created directory time and mode storage entry. After pax is finished during + * extraction or copy, we must reset directory access modes and times that + * may have been modified after creation (they no longer have the specified + * times and/or modes). We must reset time in the reverse order of creation, + * because entries are added from the top of the file tree to the bottom. + * We MUST reset times from leaf to root (it will not work the other + * direction). Entries are recorded into a spool file to make reverse + * reading faster. + */ + +typedef struct dirdata { + int nlen; /* length of the directory name (includes \0) */ + off_t npos; /* position in file where this dir name starts */ + mode_t mode; /* file mode to restore */ + time_t mtime; /* mtime to set */ + time_t atime; /* atime to set */ + int frc_mode; /* do we force mode settings? */ +} DIRDATA; diff --git a/bin/pax/tar.c b/bin/pax/tar.c new file mode 100644 index 0000000..246ae8e --- /dev/null +++ b/bin/pax/tar.c @@ -0,0 +1,1192 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <unistd.h> +#include <stdlib.h> +#include "pax.h" +#include "extern.h" +#include "tar.h" + +/* + * Routines for reading, writing and header identify of various versions of tar + */ + +static u_long tar_chksm __P((register char *, register int)); +static char *name_split __P((register char *, register int)); +static int ul_oct __P((u_long, register char *, register int, int)); +#ifndef NET2_STAT +static int uqd_oct __P((u_quad_t, register char *, register int, int)); +#endif + +/* + * Routines common to all versions of tar + */ + +static int tar_nodir; /* do not write dirs under old tar */ + +/* + * tar_endwr() + * add the tar trailer of two null blocks + * Return: + * 0 if ok, -1 otherwise (what wr_skip returns) + */ + +#if __STDC__ +int +tar_endwr(void) +#else +int +tar_endwr() +#endif +{ + return(wr_skip((off_t)(NULLCNT*BLKMULT))); +} + +/* + * tar_endrd() + * no cleanup needed here, just return size of trailer (for append) + * Return: + * size of trailer (2 * BLKMULT) + */ + +#if __STDC__ +off_t +tar_endrd(void) +#else +off_t +tar_endrd() +#endif +{ + return((off_t)(NULLCNT*BLKMULT)); +} + +/* + * tar_trail() + * Called to determine if a header block is a valid trailer. We are passed + * the block, the in_sync flag (which tells us we are in resync mode; + * looking for a valid header), and cnt (which starts at zero) which is + * used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block + * could never contain a header. + */ + +#if __STDC__ +int +tar_trail(register char *buf, register int in_resync, register int *cnt) +#else +int +tar_trail(buf, in_resync, cnt) + register char *buf; + register int in_resync; + register int *cnt; +#endif +{ + register int i; + + /* + * look for all zero, trailer is two consecutive blocks of zero + */ + for (i = 0; i < BLKMULT; ++i) { + if (buf[i] != '\0') + break; + } + + /* + * if not all zero it is not a trailer, but MIGHT be a header. + */ + if (i != BLKMULT) + return(-1); + + /* + * When given a zero block, we must be careful! + * If we are not in resync mode, check for the trailer. Have to watch + * out that we do not mis-identify file data as the trailer, so we do + * NOT try to id a trailer during resync mode. During resync mode we + * might as well throw this block out since a valid header can NEVER be + * a block of all 0 (we must have a valid file name). + */ + if (!in_resync && (++*cnt >= NULLCNT)) + return(0); + return(1); +} + +/* + * ul_oct() + * convert an unsigned long to an octal string. many oddball field + * termination characters are used by the various versions of tar in the + * different fields. term selects which kind to use. str is BLANK padded + * at the front to len. we are unable to use only one format as many old + * tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +#if __STDC__ +static int +ul_oct(u_long val, register char *str, register int len, int term) +#else +static int +ul_oct(val, str, len, term) + u_long val; + register char *str; + register int len; + int term; +#endif +{ + register char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch(term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = val >> 3) == (u_long)0) + break; + } + + while (pt >= str) + *pt-- = ' '; + if (val != (u_long)0) + return(-1); + return(0); +} + +#ifndef NET2_STAT +/* + * uqd_oct() + * convert an u_quad_t to an octal string. one of many oddball field + * termination characters are used by the various versions of tar in the + * different fields. term selects which kind to use. str is BLANK padded + * at the front to len. we are unable to use only one format as many old + * tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +#if __STDC__ +static int +uqd_oct(u_quad_t val, register char *str, register int len, int term) +#else +static int +uqd_oct(val, str, len, term) + u_quad_t val; + register char *str; + register int len; + int term; +#endif +{ + register char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch(term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = val >> 3) == 0) + break; + } + + while (pt >= str) + *pt-- = ' '; + if (val != (u_quad_t)0) + return(-1); + return(0); +} +#endif + +/* + * tar_chksm() + * calculate the checksum for a tar block counting the checksum field as + * all blanks (BLNKSUM is that value pre-calculated, the sume of 8 blanks). + * NOTE: we use len to short circuit summing 0's on write since we ALWAYS + * pad headers with 0. + * Return: + * unsigned long checksum + */ + +#if __STDC__ +static u_long +tar_chksm(register char *blk, register int len) +#else +static u_long +tar_chksm(blk, len) + register char *blk; + register int len; +#endif +{ + register char *stop; + register char *pt; + u_long chksm = BLNKSUM; /* inital value is checksum field sum */ + + /* + * add the part of the block before the checksum field + */ + pt = blk; + stop = blk + CHK_OFFSET; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + /* + * move past the checksum field and keep going, spec counts the + * checksum field as the sum of 8 blanks (which is pre-computed as + * BLNKSUM). + * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding + * starts, no point in summing zero's) + */ + pt += CHK_LEN; + stop = blk + len; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + return(chksm); +} + +/* + * Routines for old BSD style tar (also made portable to sysV tar) + */ + +/* + * tar_id() + * determine if a block given to us is a valid tar header (and not a USTAR + * header). We have to be on the lookout for those pesky blocks of all + * zero's. + * Return: + * 0 if a tar header, -1 otherwise + */ + +#if __STDC__ +int +tar_id(register char *blk, int size) +#else +int +tar_id(blk, size) + register char *blk; + int size; +#endif +{ + register HD_TAR *hd; + register HD_USTAR *uhd; + + if (size < BLKMULT) + return(-1); + hd = (HD_TAR *)blk; + uhd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test, then make + * sure this is not a ustar header by looking for the ustar magic + * cookie. We should use TMAGLEN, but some USTAR archive programs are + * wrong and create archives missing the \0. Last we check the + * checksum. If this is ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return(-1); + if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + return(0); +} + +/* + * tar_opt() + * handle tar format specific -o options + * Return: + * 0 if ok -1 otherwise + */ + +#if __STDC__ +int +tar_opt(void) +#else +int +tar_opt() +#endif +{ + OPLIST *opt; + + while ((opt = opt_next()) != NULL) { + if (strcmp(opt->name, TAR_OPTION) || + strcmp(opt->value, TAR_NODIR)) { + warn(1, "Unknown tar format -o option/value pair %s=%s", + opt->name, opt->value); + warn(1,"%s=%s is the only supported tar format option", + TAR_OPTION, TAR_NODIR); + return(-1); + } + + /* + * we only support one option, and only when writing + */ + if ((act != APPND) && (act != ARCHIVE)) { + warn(1, "%s=%s is only supported when writing.", + opt->name, opt->value); + return(-1); + } + tar_nodir = 1; + } + return(0); +} + + +/* + * tar_rd() + * extract the values out of block already determined to be a tar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +#if __STDC__ +int +tar_rd(register ARCHD *arcn, register char *buf) +#else +int +tar_rd(arcn, buf) + register ARCHD *arcn; + register char *buf; +#endif +{ + register HD_TAR *hd; + register char *pt; + + /* + * we only get proper sized buffers passed to us + */ + if (tar_id(buf, BLKMULT) < 0) + return(-1); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + arcn->pat = NULL; + + /* + * copy out the name and values in the stat buffer + */ + hd = (HD_TAR *)buf; + arcn->nlen = l_strncpy(arcn->name, hd->name, sizeof(hd->name)); + arcn->name[arcn->nlen] = '\0'; + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & + 0xfff); + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); + arcn->sb.st_size = (size_t)asc_ul(hd->size, sizeof(hd->size), OCT); + arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * have to look at the last character, it may be a '/' and that is used + * to encode this as a directory + */ + pt = &(arcn->name[arcn->nlen - 1]); + arcn->pad = 0; + arcn->skip = 0; + switch(hd->linkflag) { + case SYMTYPE: + /* + * symbolic link, need to get the link name and set the type in + * the st_mode so -v printing will look correct. + */ + arcn->type = PAX_SLK; + arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname, + sizeof(hd->linkname)); + arcn->ln_name[arcn->ln_nlen] = '\0'; + arcn->sb.st_mode |= S_IFLNK; + break; + case LNKTYPE: + /* + * hard link, need to get the link name, set the type in the + * st_mode and st_nlink so -v printing will look better. + */ + arcn->type = PAX_HLK; + arcn->sb.st_nlink = 2; + arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname, + sizeof(hd->linkname)); + arcn->ln_name[arcn->ln_nlen] = '\0'; + + /* + * no idea of what type this thing really points at, but + * we set something for printing only. + */ + arcn->sb.st_mode |= S_IFREG; + break; + case AREGTYPE: + case REGTYPE: + default: + /* + * If we have a trailing / this is a directory and NOT a file. + */ + arcn->ln_name[0] = '\0'; + arcn->ln_nlen = 0; + if (*pt == '/') { + /* + * it is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + } else { + /* + * have a file that will be followed by data. Set the + * skip value to the size field and caluculate the size + * of the padding. + */ + arcn->type = PAX_REG; + arcn->sb.st_mode |= S_IFREG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + } + break; + } + + /* + * strip off any trailing slash. + */ + if (*pt == '/') { + *pt = '\0'; + --arcn->nlen; + } + return(0); +} + +/* + * tar_wr() + * write a tar header for the file specified in the ARCHD to the archive. + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, each field + * of tar has it own spec for the termination character(s). + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +#if __STDC__ +int +tar_wr(register ARCHD *arcn) +#else +int +tar_wr(arcn) + register ARCHD *arcn; +#endif +{ + register HD_TAR *hd; + int len; + char hdblk[sizeof(HD_TAR)]; + + /* + * check for those file system types which tar cannot store + */ + switch(arcn->type) { + case PAX_DIR: + /* + * user asked that dirs not be written to the archive + */ + if (tar_nodir) + return(1); + break; + case PAX_CHR: + warn(1, "Tar cannot archive a character device %s", + arcn->org_name); + return(1); + case PAX_BLK: + warn(1, "Tar cannot archive a block device %s", arcn->org_name); + return(1); + case PAX_SCK: + warn(1, "Tar cannot archive a socket %s", arcn->org_name); + return(1); + case PAX_FIF: + warn(1, "Tar cannot archive a fifo %s", arcn->org_name); + return(1); + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->ln_nlen > sizeof(hd->linkname)) { + warn(1,"Link name too long for tar %s", arcn->ln_name); + return(1); + } + break; + case PAX_REG: + case PAX_CTG: + default: + break; + } + + /* + * check file name len, remember extra char for dirs (the / at the end) + */ + len = arcn->nlen; + if (arcn->type == PAX_DIR) + ++len; + if (len > sizeof(hd->name)) { + warn(1, "File name too long for tar %s", arcn->name); + return(1); + } + + /* + * copy the data out of the ARCHD into the tar header based on the type + * of the file. Remember many tar readers want the unused fields to be + * padded with zero. We set the linkflag field (type), the linkname + * (or zero if not used),the size, and set the padding (if any) to be + * added after the file data (0 for all other types, as they only have + * a header) + */ + hd = (HD_TAR *)hdblk; + zf_strncpy(hd->name, arcn->name, sizeof(hd->name)); + arcn->pad = 0; + + if (arcn->type == PAX_DIR) { + /* + * directories are the same as files, except have a filename + * that ends with a /, we add the slash here. No data follows, + * dirs, so no pad. + */ + hd->linkflag = AREGTYPE; + bzero(hd->linkname, sizeof(hd->linkname)); + hd->name[len-1] = '/'; + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else if (arcn->type == PAX_SLK) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = SYMTYPE; + zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = LNKTYPE; + zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else { + /* + * data follows this file, so set the pad + */ + hd->linkflag = AREGTYPE; + bzero(hd->linkname, sizeof(hd->linkname)); +# ifdef NET2_STAT + if (ul_oct((u_long)arcn->sb.st_size, hd->size, + sizeof(hd->size), 1)) { +# else + if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size, + sizeof(hd->size), 1)) { +# endif + warn(1,"File is too large for tar %s", arcn->org_name); + return(1); + } + arcn->pad = TAR_PAD(arcn->sb.st_size); + } + + /* + * copy those fields that are independent of the type + */ + if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || + ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || + ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || + ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) + goto out; + + /* + * calculate and add the checksum, then write the header. A return of + * 0 tells the caller to now write the file data, 1 says no data needs + * to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, + sizeof(hd->chksum), 2)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) + return(-1); + if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) + return(-1); + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + warn(1, "Tar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * Routines for POSIX ustar + */ + +/* + * ustar_strd() + * initialization for ustar read + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +ustar_strd(void) +#else +int +ustar_strd() +#endif +{ + if ((usrtb_start() < 0) || (grptb_start() < 0)) + return(-1); + return(0); +} + +/* + * ustar_stwr() + * initialization for ustar write + * Return: + * 0 if ok, -1 otherwise + */ + +#if __STDC__ +int +ustar_stwr(void) +#else +int +ustar_stwr() +#endif +{ + if ((uidtb_start() < 0) || (gidtb_start() < 0)) + return(-1); + return(0); +} + +/* + * ustar_id() + * determine if a block given to us is a valid ustar header. We have to + * be on the lookout for those pesky blocks of all zero's + * Return: + * 0 if a ustar header, -1 otherwise + */ + +#if __STDC__ +int +ustar_id(char *blk, int size) +#else +int +ustar_id(blk, size) + char *blk; + int size; +#endif +{ + register HD_USTAR *hd; + + if (size < BLKMULT) + return(-1); + hd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test then check + * ustar magic cookie. We should use TMAGLEN, but some USTAR archive + * programs are fouled up and create archives missing the \0. Last we + * check the checksum. If ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return(-1); + if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + return(0); +} + +/* + * ustar_rd() + * extract the values out of block already determined to be a ustar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +#if __STDC__ +int +ustar_rd(register ARCHD *arcn, register char *buf) +#else +int +ustar_rd(arcn, buf) + register ARCHD *arcn; + register char *buf; +#endif +{ + register HD_USTAR *hd; + register char *dest; + register int cnt = 0; + dev_t devmajor; + dev_t devminor; + + /* + * we only get proper sized buffers + */ + if (ustar_id(buf, BLKMULT) < 0) + return(-1); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + arcn->pat = NULL; + hd = (HD_USTAR *)buf; + + /* + * see if the filename is split into two parts. if, so joint the parts. + * we copy the prefix first and add a / between the prefix and name. + */ + dest = arcn->name; + if (*(hd->prefix) != '\0') { + cnt = l_strncpy(arcn->name, hd->prefix, sizeof(hd->prefix)); + dest = arcn->name + arcn->nlen; + *dest++ = '/'; + } + arcn->nlen = l_strncpy(dest, hd->name, sizeof(hd->name)); + arcn->nlen += cnt; + arcn->name[arcn->nlen] = '\0'; + + /* + * follow the spec to the letter. we should only have mode bits, strip + * off all other crud we may be passed. + */ + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & + 0xfff); + arcn->sb.st_size = (size_t)asc_ul(hd->size, sizeof(hd->size), OCT); + arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * If we can find the ascii names for gname and uname in the password + * and group files we will use the uid's and gid they bind. Otherwise + * we use the uid and gid values stored in the header. (This is what + * the posix spec wants). + */ + hd->gname[sizeof(hd->gname) - 1] = '\0'; + if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0) + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); + hd->uname[sizeof(hd->uname) - 1] = '\0'; + if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0) + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + + /* + * set the defaults, these may be changed depending on the file type + */ + arcn->ln_name[0] = '\0'; + arcn->ln_nlen = 0; + arcn->pad = 0; + arcn->skip = 0; + arcn->sb.st_rdev = (dev_t)0; + + /* + * set the mode and PAX type according to the typeflag in the header + */ + switch(hd->typeflag) { + case FIFOTYPE: + arcn->type = PAX_FIF; + arcn->sb.st_mode |= S_IFIFO; + break; + case DIRTYPE: + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + + /* + * Some programs that create ustar archives append a '/' + * to the pathname for directories. This clearly violates + * ustar specs, but we will silently strip it off anyway. + */ + if (arcn->name[arcn->nlen - 1] == '/') + arcn->name[--arcn->nlen] = '\0'; + break; + case BLKTYPE: + case CHRTYPE: + /* + * this type requires the rdev field to be set. + */ + if (hd->typeflag == BLKTYPE) { + arcn->type = PAX_BLK; + arcn->sb.st_mode |= S_IFBLK; + } else { + arcn->type = PAX_CHR; + arcn->sb.st_mode |= S_IFCHR; + } + devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); + devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + break; + case SYMTYPE: + case LNKTYPE: + if (hd->typeflag == SYMTYPE) { + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + } else { + arcn->type = PAX_HLK; + /* + * so printing looks better + */ + arcn->sb.st_mode |= S_IFREG; + arcn->sb.st_nlink = 2; + } + /* + * copy the link name + */ + arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname, + sizeof(hd->linkname)); + arcn->ln_name[arcn->ln_nlen] = '\0'; + break; + case CONTTYPE: + case AREGTYPE: + case REGTYPE: + default: + /* + * these types have file data that follows. Set the skip and + * pad fields. + */ + arcn->type = PAX_REG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + arcn->sb.st_mode |= S_IFREG; + break; + } + return(0); +} + +/* + * ustar_wr() + * write a ustar header for the file specified in the ARCHD to the archive + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, we only use + * '\0' for the termination character (this is different than picky tar) + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +#if __STDC__ +int +ustar_wr(register ARCHD *arcn) +#else +int +ustar_wr(arcn) + register ARCHD *arcn; +#endif +{ + register HD_USTAR *hd; + register char *pt; + char hdblk[sizeof(HD_USTAR)]; + + /* + * check for those file system types ustar cannot store + */ + if (arcn->type == PAX_SCK) { + warn(1, "Ustar cannot archive a socket %s", arcn->org_name); + return(1); + } + + /* + * check the length of the linkname + */ + if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){ + warn(1, "Link name too long for ustar %s", arcn->ln_name); + return(1); + } + + /* + * split the path name into prefix and name fields (if needed). if + * pt != arcn->name, the name has to be split + */ + if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { + warn(1, "File name too long for ustar %s", arcn->name); + return(1); + } + hd = (HD_USTAR *)hdblk; + arcn->pad = 0L; + + /* + * split the name, or zero out the prefix + */ + if (pt != arcn->name) { + /* + * name was split, pt points at the / where the split is to + * occur, we remove the / and copy the first part to the prefix + */ + *pt = '\0'; + zf_strncpy(hd->prefix, arcn->name, sizeof(hd->prefix)); + *pt++ = '/'; + } else + bzero(hd->prefix, sizeof(hd->prefix)); + + /* + * copy the name part. this may be the whole path or the part after + * the prefix + */ + zf_strncpy(hd->name, pt, sizeof(hd->name)); + + /* + * set the fields in the header that are type dependent + */ + switch(arcn->type) { + case PAX_DIR: + hd->typeflag = DIRTYPE; + bzero(hd->linkname, sizeof(hd->linkname)); + bzero(hd->devmajor, sizeof(hd->devmajor)); + bzero(hd->devminor, sizeof(hd->devminor)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_CHR: + case PAX_BLK: + if (arcn->type == PAX_CHR) + hd->typeflag = CHRTYPE; + else + hd->typeflag = BLKTYPE; + bzero(hd->linkname, sizeof(hd->linkname)); + if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, + sizeof(hd->devmajor), 3) || + ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, + sizeof(hd->devminor), 3) || + ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_FIF: + hd->typeflag = FIFOTYPE; + bzero(hd->linkname, sizeof(hd->linkname)); + bzero(hd->devmajor, sizeof(hd->devmajor)); + bzero(hd->devminor, sizeof(hd->devminor)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->type == PAX_SLK) + hd->typeflag = SYMTYPE; + else + hd->typeflag = LNKTYPE; + zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname)); + bzero(hd->devmajor, sizeof(hd->devmajor)); + bzero(hd->devminor, sizeof(hd->devminor)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_REG: + case PAX_CTG: + default: + /* + * file data with this type, set the padding + */ + if (arcn->type == PAX_CTG) + hd->typeflag = CONTTYPE; + else + hd->typeflag = REGTYPE; + bzero(hd->linkname, sizeof(hd->linkname)); + bzero(hd->devmajor, sizeof(hd->devmajor)); + bzero(hd->devminor, sizeof(hd->devminor)); + arcn->pad = TAR_PAD(arcn->sb.st_size); +# ifdef NET2_STAT + if (ul_oct((u_long)arcn->sb.st_size, hd->size, + sizeof(hd->size), 3)) { +# else + if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size, + sizeof(hd->size), 3)) { +# endif + warn(1,"File is too long for ustar %s",arcn->org_name); + return(1); + } + break; + } + + zf_strncpy(hd->magic, TMAGIC, TMAGLEN); + zf_strncpy(hd->version, TVERSION, TVERSLEN); + + /* + * set the remaining fields. Some versions want all 16 bits of mode + * we better humor them (they really do not meet spec though).... + */ + if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || + ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || + ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || + ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) + goto out; + zf_strncpy(hd->uname,name_uid(arcn->sb.st_uid, 0),sizeof(hd->uname)); + zf_strncpy(hd->gname,name_gid(arcn->sb.st_gid, 0),sizeof(hd->gname)); + + /* + * calculate and store the checksum write the header to the archive + * return 0 tells the caller to now write the file data, 1 says no data + * needs to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) + return(-1); + if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) + return(-1); + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + warn(1, "Ustar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * name_split() + * see if the name has to be split for storage in a ustar header. We try + * to fit the entire name in the name field without splitting if we can. + * The split point is always at a / + * Return + * character pointer to split point (always the / that is to be removed + * if the split is not needed, the points is set to the start of the file + * name (it would violate the spec to split there). A NULL is returned if + * the file name is too long + */ + +#if __STDC__ +static char * +name_split(register char *name, register int len) +#else +static char * +name_split(name, len) + register char *name; + register int len; +#endif +{ + register char *start; + + /* + * check to see if the file name is small enough to fit in the name + * field. if so just return a pointer to the name. + */ + if (len <= TNMSZ) + return(name); + if (len > (TPFSZ + TNMSZ + 1)) + return(NULL); + + /* + * we start looking at the biggest sized piece that fits in the name + * field. We walk foward looking for a slash to split at. The idea is + * to find the biggest piece to fit in the name field (or the smallest + * prefix we can find) (the -1 is correct the biggest piece would + * include the slash between the two parts that gets thrown away) + */ + start = name + len - TNMSZ - 1; + while ((*start != '\0') && (*start != '/')) + ++start; + + /* + * if we hit the end of the string, this name cannot be split, so we + * cannot store this file. + */ + if (*start == '\0') + return(NULL); + len = start - name; + + /* + * NOTE: /str where the length of str == TNMSZ can not be stored under + * the p1003.1-1990 spec for ustar. We could force a prefix of / and + * the file would then expand on extract to //str. The len == 0 below + * makes this special case follow the spec to the letter. + */ + if ((len > TPFSZ) || (len == 0)) + return(NULL); + + /* + * ok have a split point, return it to the caller + */ + return(start); +} diff --git a/bin/pax/tar.h b/bin/pax/tar.h new file mode 100644 index 0000000..dad2d77 --- /dev/null +++ b/bin/pax/tar.h @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tar.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * defines and data structures common to all tar formats + */ +#define CHK_LEN 8 /* length of checksum field */ +#define TNMSZ 100 /* size of name field */ +#ifdef _PAX_ +#define NULLCNT 2 /* number of null blocks in trailer */ +#define CHK_OFFSET 148 /* start of chksum field */ +#define BLNKSUM 256L /* sum of checksum field using ' ' */ +#endif /* _PAX_ */ + +/* + * Values used in typeflag field in all tar formats + * (only REGTYPE, LNKTYPE and SYMTYPE are used in old bsd tar headers) + */ +#define REGTYPE '0' /* Regular File */ +#define AREGTYPE '\0' /* Regular File */ +#define LNKTYPE '1' /* Link */ +#define SYMTYPE '2' /* Symlink */ +#define CHRTYPE '3' /* Character Special File */ +#define BLKTYPE '4' /* Block Special File */ +#define DIRTYPE '5' /* Directory */ +#define FIFOTYPE '6' /* FIFO */ +#define CONTTYPE '7' /* high perf file */ + +/* + * Mode field encoding of the different file types - values in octal + */ +#define TSUID 04000 /* Set UID on execution */ +#define TSGID 02000 /* Set GID on execution */ +#define TSVTX 01000 /* Reserved */ +#define TUREAD 00400 /* Read by owner */ +#define TUWRITE 00200 /* Write by owner */ +#define TUEXEC 00100 /* Execute/Search by owner */ +#define TGREAD 00040 /* Read by group */ +#define TGWRITE 00020 /* Write by group */ +#define TGEXEC 00010 /* Execute/Search by group */ +#define TOREAD 00004 /* Read by other */ +#define TOWRITE 00002 /* Write by other */ +#define TOEXEC 00001 /* Execute/Search by other */ + +#ifdef _PAX_ +/* + * Pad with a bit mask, much faster than doing a mod but only works on powers + * of 2. Macro below is for block of 512 bytes. + */ +#define TAR_PAD(x) ((512 - ((x) & 511)) & 511) +#endif /* _PAX_ */ + +/* + * structure of an old tar header as it appeared in BSD releases + */ +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char linkflag; /* norm, hard, or sym. */ + char linkname[TNMSZ]; /* linked to name */ +} HD_TAR; + +#ifdef _PAX_ +/* + * -o options for BSD tar to not write directories to the archive + */ +#define TAR_NODIR "nodir" +#define TAR_OPTION "write_opt" + +/* + * default device names + */ +#define DEV_0 "/dev/rmt0" +#define DEV_1 "/dev/rmt1" +#define DEV_4 "/dev/rmt4" +#define DEV_5 "/dev/rmt5" +#define DEV_7 "/dev/rmt7" +#define DEV_8 "/dev/rmt8" +#endif /* _PAX_ */ + +/* + * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990 + */ +#define TPFSZ 155 +#define TMAGIC "ustar" /* ustar and a null */ +#define TMAGLEN 6 +#define TVERSION "00" /* 00 and no null */ +#define TVERSLEN 2 + +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char typeflag; /* type of file. */ + char linkname[TNMSZ]; /* linked to name */ + char magic[TMAGLEN]; /* magic cookie */ + char version[TVERSLEN]; /* version */ + char uname[32]; /* ascii owner name */ + char gname[32]; /* ascii group name */ + char devmajor[8]; /* major device number */ + char devminor[8]; /* minor device number */ + char prefix[TPFSZ]; /* linked to name */ +} HD_USTAR; diff --git a/bin/pax/tty_subs.c b/bin/pax/tty_subs.c new file mode 100644 index 0000000..0c7e80d --- /dev/null +++ b/bin/pax/tty_subs.c @@ -0,0 +1,245 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)tty_subs.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <fcntl.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include "pax.h" +#include "extern.h" +#if __STDC__ +#include <stdarg.h> +#else +#include <varargs.h> +#endif + +/* + * routines that deal with I/O to and from the user + */ + +#define DEVTTY "/dev/tty" /* device for interactive i/o */ +static FILE *ttyoutf = NULL; /* output pointing at control tty */ +static FILE *ttyinf = NULL; /* input pointing at control tty */ + +/* + * tty_init() + * try to open the controlling termina (if any) for this process. if the + * open fails, future ops that require user input will get an EOF + */ + +#if __STDC__ +int +tty_init(void) +#else +int +tty_init() +#endif +{ + int ttyfd; + + if ((ttyfd = open(DEVTTY, O_RDWR)) >= 0) { + if ((ttyoutf = fdopen(ttyfd, "w")) != NULL) { + if ((ttyinf = fdopen(ttyfd, "r")) != NULL) + return(0); + (void)fclose(ttyoutf); + } + (void)close(ttyfd); + } + + if (iflag) { + warn(1, "Fatal error, cannot open %s", DEVTTY); + return(-1); + } + return(0); +} + +/* + * tty_prnt() + * print a message using the specified format to the controlling tty + * if there is no controlling terminal, just return. + */ + +#if __STDC__ +void +tty_prnt(char *fmt, ...) +#else +void +tty_prnt(fmt, va_alist) + char *fmt; + va_dcl +#endif +{ + va_list ap; +# if __STDC__ + va_start(ap, fmt); +# else + va_start(ap); +# endif + if (ttyoutf == NULL) + return; + (void)vfprintf(ttyoutf, fmt, ap); + va_end(ap); + (void)fflush(ttyoutf); +} + +/* + * tty_read() + * read a string from the controlling terminal if it is open into the + * supplied buffer + * Return: + * 0 if data was read, -1 otherwise. + */ + +#if __STDC__ +int +tty_read(char *str, int len) +#else +int +tty_read(str, len) + char *str; + int len; +#endif +{ + register char *pt; + + if ((--len <= 0) || (ttyinf == NULL) || (fgets(str,len,ttyinf) == NULL)) + return(-1); + *(str + len) = '\0'; + + /* + * strip off that trailing newline + */ + if ((pt = strchr(str, '\n')) != NULL) + *pt = '\0'; + return(0); +} + +/* + * warn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +#if __STDC__ +void +warn(int set, char *fmt, ...) +#else +void +warn(set, fmt, va_alist) + int set; + char *fmt; + va_dcl +#endif +{ + va_list ap; +# if __STDC__ + va_start(ap, fmt); +# else + va_start(ap); +# endif + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fputc('\n', stderr); +} + +/* + * syswarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +#if __STDC__ +void +syswarn(int set, int errnum, char *fmt, ...) +#else +void +syswarn(set, errnum, fmt, va_alist) + int set; + int errnum; + char *fmt; + va_dcl +#endif +{ + va_list ap; +# if __STDC__ + va_start(ap, fmt); +# else + va_start(ap); +# endif + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + + /* + * format and print the errno + */ + if (errnum > 0) + (void)fprintf(stderr, " <%s>", sys_errlist[errnum]); + (void)fputc('\n', stderr); +} |