diff options
author | des <des@FreeBSD.org> | 2000-06-28 16:55:15 +0000 |
---|---|---|
committer | des <des@FreeBSD.org> | 2000-06-28 16:55:15 +0000 |
commit | 9b9369d90eea0c073368daad0cf0e48083929081 (patch) | |
tree | da1f3b0d451d9e9d665ada998ddbcffca1dfe487 /usr.bin | |
parent | b112ea1321e952999620553fdcca688a1d21d2c3 (diff) | |
download | FreeBSD-src-9b9369d90eea0c073368daad0cf0e48083929081.zip FreeBSD-src-9b9369d90eea0c073368daad0cf0e48083929081.tar.gz |
New libfetch-based fetch.
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/fetch/Makefile | 13 | ||||
-rw-r--r-- | usr.bin/fetch/fetch.1 | 499 | ||||
-rw-r--r-- | usr.bin/fetch/fetch.c | 644 | ||||
-rw-r--r-- | usr.bin/fetch/fetch.h | 94 | ||||
-rw-r--r-- | usr.bin/fetch/file.c | 168 | ||||
-rw-r--r-- | usr.bin/fetch/ftp.c | 521 | ||||
-rw-r--r-- | usr.bin/fetch/http.c | 1846 | ||||
-rw-r--r-- | usr.bin/fetch/main.c | 402 | ||||
-rw-r--r-- | usr.bin/fetch/uri.c | 122 | ||||
-rw-r--r-- | usr.bin/fetch/util.c | 334 |
10 files changed, 798 insertions, 3845 deletions
diff --git a/usr.bin/fetch/Makefile b/usr.bin/fetch/Makefile index 31479bd..a34c5e8 100644 --- a/usr.bin/fetch/Makefile +++ b/usr.bin/fetch/Makefile @@ -1,9 +1,10 @@ -PROG = fetch -SRCS = file.c ftp.c http.c main.c util.c uri.c +# $FreeBSD$ -CFLAGS+= -Wall -Wwrite-strings -Wmissing-prototypes - -DPADD= ${LIBFTPIO} ${LIBMD} -LDADD= -lftpio -lmd +MAINTAINER= des@freebsd.org +PROG= fetch +CFLAGS+= -Wall -pedantic +SRCS= fetch.c +DPADD= ${LIBFETCH} +LDADD= -lfetch .include <bsd.prog.mk> diff --git a/usr.bin/fetch/fetch.1 b/usr.bin/fetch/fetch.1 index b2b2ea0..8f59f68 100644 --- a/usr.bin/fetch/fetch.1 +++ b/usr.bin/fetch/fetch.1 @@ -1,378 +1,209 @@ -.\" $FreeBSD$ -.Dd February 22, 1999 +.\"- +.\" Copyright (c) 2000 Dag-Erling Coïdan Smørgrav +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer +.\" in this position and unchanged. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. The name of the author may not be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd June 28, 2000 .Dt FETCH 1 -.Os FreeBSD 4.0 +.Os .Sh NAME .Nm fetch .Nd retrieve a file by Uniform Resource Locator .Sh SYNOPSIS -.Nm fetch -.Op Fl AFMPablmnpqrtv -.Op Fl S Ar size -.Op Fl T Ar timeout +.Nm +.Op Fl 146AFHMPRalmnpqrsv +.Op Fl B Ar bytes +.Op Fl S Ar bytes +.Op Fl T Ar seconds .Op Fl o Ar file +.Op Fl w Ar seconds .Ar URL .Op Ar ... -.Nm fetch -.Op Fl FMPRlmnpqrv -.Op Fl S Ar size -.Op Fl o Ar file -.Op Fl c Ar dir -.Fl f Ar file -.Fl h Ar host .Sh DESCRIPTION -.Nm fetch -allows a user to transfer files from a remote network site using -either the -.Tn FTP -or the -.Tn HTTP -protocol. -In the first form of the command, the -.Ar URL -may be of the form -.Li http://site.domain/path/to/the/file -or -.Li ftp://site.domain/path/to/the/file . -To denote a local filename to be copied or linked to (see the -.Fl l -flag below), the -.Em file:/path/to/the/file -URL form is used. See URL SYNTAX, below. -.Pp -The second form of the command can be used to get a file using the -.Tn FTP -protocol, specifying the file name and the remote host with the -.Fl h -and the -.Fl f -flags. +.Nm Fetch +provides a command-line interface to the +.Xr fetch 3 +library. +Its purpose is to retrieve the file(s) pointed to by the URL(s) on the +command line. .Pp The following options are available: .Bl -tag -width Fl +.It Fl \&1 +Stop and return exit code 0 at the first successfully retrieved file. +.It Fl 4 +Forces +.Nm +to use IPv4 addresses only. +.It Fl 6 +Forces +.Nm +to use IPv6 addresses only. .It Fl A -Do not automatically follow ``temporary'' (302) redirects. Some -broken Web sites will return a redirect instead of a not-found error -when the requested object does not exist. +Do not automatically follow ``temporary'' (302) redirects. +Some broken Web sites will return a redirect instead of a not-found +error when the requested object does not exist. .It Fl a Automatically retry the transfer upon soft failures. -.It Fl b -Work around a bug in some -.Tn HTTP -servers which fail to correctly implement the -.Tn TCP -protocol. +.It Fl B Ar bytes +Specify the read buffer size in bytes. +The default is 4096 bytes. +Attempts to set a buffer size lower than this will be silently +ignored. +The number of reads actually performed is reported at verbosity level +two or higher (see the +.Fl v +flag). .It Fl c Ar dir The file to retrieve is in directory .Ar dir on the remote host. .It Fl F -Force restart without checking for the local file's date matching -that of the remote file. Use this with +In combination with the .Fl r -to restart a transfer of a partial file where the modification -time on the local file has been changed and you are sure that the -remote file is still the same, as this will prevent retrieval from -starting anew. +flag, forces a restart even if the local and remote files have +different modification times. .It Fl f Ar file The file to retrieve is named .Ar file on the remote host. +.It Fl H +When using passive FTP, allocate a high port for the data connection. +See +.Xr ip 4 +for details on how to specify which port range this corresponds to. .It Fl h Ar host The file to retrieve is located on the host .Ar host . .It Fl l -If target is a -.Ar file:/ -style of URL, make a link to the target rather than trying -to copy it. +If the target is a file-scheme URL, make a symbolic link to the target +rather than trying to copy it. .It Fl M .It Fl m -Mirror mode: Set the modification time of the file so that it is -identical to the modification time of the file at the remote host. -If the file already exists on the local host and is identical (as -gauged by size and modification time), no transfer is done. +Mirror mode: set the modification time of the local file to that of +the remote file. +If the file already exists locally and has the same size and +modification time as the remote file, it will not be fetched. .It Fl n -Don't preserve the modtime of the transferred file, use the current time. +Don't preserve the modtime of the transfered file, use the current +time. .It Fl o Ar file Set the output file name to .Ar file . By default, a ``pathname'' is extracted from the specified URI, and -its basename is used as the name of the output file. A +its basename is used as the name of the output file. +A .Ar file argument of .Sq Li \&- indicates that results are to be directed to the standard output. .It Fl P .It Fl p -Use the passive mode of the -.Tn FTP -protocol. This is useful for crossing certain sorts of firewalls. +Use passive FTP. +This is useful if you are behind a firewall which blocks incoming +connections. +Try this flag if +.Nm +seems to hang when retrieving FTP URLs. .It Fl q Quiet mode. -Do not report transfer progress on the terminal. +This works by setting the verbosity level to zero; see the +.Fl v +option. .It Fl R -The filenames specified are ``precious'', and should not be deleted -under any circumstances, even if the transfer failed or was incomplete. +The output files are precious, and should not be deleted under any +circumstances, even if the transfer failed or was incomplete. .It Fl r Restart a previously interrupted transfer. .It Fl S Ar bytes -Require the file size reported by -.Tn FTP -or -.Tn HTTP -server to match the value specified with this option. -On mismatch, a message is printed and the file will not be fetched. -If the server does not support reporting of file sizes, the option -will be ignored and the file will be retrieved anyway. -This option is useful to prevent -.Nm fetch -from downloading a file that is either incomplete or the wrong version, -given the correct size of the file in advance. +Require the file size reported by the server to match the specified +value. +If it does not, a message is printed and the file is not fetched. +If the server does not support reporting file sizes, this option is +ignored and the file is fetched unconditionally. .It Fl s -Ask server for size of file in bytes and print it to stdout. -Do not -actually fetch the file. +Print the size in bytes of each requested file, without fetching it. .It Fl T Ar seconds Set timeout value to .Ar seconds. Overrides the environment variables .Ev FTP_TIMEOUT -for ftp transfers or +for FTP transfers or .Ev HTTP_TIMEOUT -for http transfers if set. +for HTTP transfers if set. .It Fl t Work around a different set of buggy .Tn TCP implementations. .It Fl v -Increase verbosity. More -.Fl v Ns \&'s -result in more information. -.El -.Pp -Many options are also controlled solely by the environment (this is a -bug). -.Sh URL SYNTAX -.Nm -accepts -.Tn http -and -.Tn ftp -URL's, as described in RFC1738. For -.Tn ftp -URL's, a username and password may be specified, using the syntax -.Li ftp://user:password@host/. -If the path is to be absolute, as opposed to relative to the user's -home directory, it must start with %2F, as in -.Li ftp://root:mypass@localhost/%2Fetc/passwd . -.Nm Fetch -condenses multiple slashes in an -.Tn ftp -URL into a single slash; literal multiple slashes translate to an -.Tn ftp -protocol error. -.Sh PROXY SERVERS -Many sites use application gateways (``proxy servers'') in their -firewalls in order to allow communication across the firewall using a -trusted protocol. The -.Nm fetch -program can use both the -.Tn FTP -and the -.Tn HTTP -protocol with a proxy server. -.Tn FTP -proxy servers can only relay -.Tn FTP -requests; -.Tn HTTP -proxy servers can relay both -.Tn FTP -and -.Tn HTTP -requests. -A proxy server can be configured by defining an environment variable -named -.Dq Va PROTO Ns Ev _PROXY , -where -.Va PROTO -is the name of the protocol in upper case. The value of the -environment variable specifies a hostname, optionally followed by a -colon and a port number. -.Pp -The -.Tn FTP -proxy client passes the remote username, host and port as the -.Tn FTP -session's username, in the form -.Do Va remoteuser Ns Li \&@ Ns Va remotehost -.Op Li \&@ Ns Va port -.Dc . -The -.Tn HTTP -proxy client simply passes the originally-requested URI to the remote -server in an -.Tn HTTP -.Dq Li GET -request. HTTP proxy authentication is not yet implemented. -.Sh HTTP AUTHENTICATION -The -.Tn HTTP -protocol includes support for various methods of authentication. -Currently, the -.Dq basic -method, which provides no security from packet-sniffing or -man-in-the-middle attacks, is the only method supported in -.Nm fetch . -Authentication is enabled by the -.Ev HTTP_AUTH -and -.Ev HTTP_PROXY_AUTH -environment variables. Both variables have the same format, which -consists of space-separated list of parameter settings, where each -setting consists of a colon-separated list of parameters. The first -two parameters are always the (case-insensitive) authentication scheme -name and the realm in which authentication is to be performed. If the -realm is specified as -.Sq Li \&* , -then it will match all realms not specified otherwise. -.Pp -The -.Li basic -authentication scheme uses two additional optional parameters; the -first is a user name, and the second is the password associated with -it. If either the password or both parameters are not specified in -the environment, and the standard input of -.Nm -is connected to a terminal, then -.Nm -will prompt the user to enter the missing parameters. Thus, if the -user is known as -.Dq Li jane -in the -.Dq Li WallyWorld -realm, and has a password of -.Dq Li QghiLx79 -there, then she might set her -.Ev HTTP_AUTH -variable to: -.Bl -enum -offset indent -.It -.Dq Li basic:WallyWorld:jane:QghiLx79 -.It -.Dq Li basic:WallyWorld:jane , -or -.It -.Dq Li basic:WallyWorld +Each instance of this flag increases the verbosity level by one. +Level one (the default) only gives a summary after each file; level +two show a running count during the transfer, provided that the +standard output goes to a terminal; level three enables messages from +the +.Xr fetch 3 +library. +.It Fl w Ar seconds +When the +.Fl a +flag is specified, wait this many seconds between successive retries. .El -.Pp -and -.Nm -will prompt for any missing information when it is required. She might -also specify a realm of -.Dq Li \&* -instead of -.Dq Li WallyWorld -to indicate that the parameters can be applied to any realm. (This is -most commonly used in a construction such as -.Dq Li basic:* , -which indicates to -.Nm -that it may offer to do -.Li basic -authentication for any realm. -.Sh ERRORS +.Sh DIAGNOSTICS The .Nm -command returns zero on success, or a non-zero value from -.Aq Pa sysexits.h -on failure. If multiple URIs are given for retrieval, +command returns zero on success, or one on failure. +If multiple URLs are listed on the command line, .Nm -will attempt all of them and return zero only if all succeeded -(otherwise it will return the error from the last failure). +will attempt to retrieve them each of them in turn, and return zero +only if they were all successfully retrieved. .Sh ENVIRONMENT .Bl -tag -width FTP_PASSIVE_MODE -offset indent .It Ev FTP_TIMEOUT maximum time, in seconds, to wait before aborting an .Tn FTP connection. -.It Ev FTP_LOGIN -the login name used for -.Tn FTP -transfers (default -.Dq Li anonymous ) -.It Ev FTP_PASSIVE_MODE -force the use of passive mode FTP -.It Ev FTP_PASSWORD -the password used for -.Tn FTP -transfers (default -.Dq Va yourname Ns Li \&@ Ns Va yourhost ) -.It Ev FTP_PROXY -the address (in the form -.Do Va hostname Ns -.Op Li : Ns Va port -.Dc ) -of a proxy server which understands -.Tn FTP -.It Ev HTTP_AUTH -defines authentication parameters for -.Tn HTTP -.It Ev HTTP_PROXY -the address (in the form -.Do Va hostname Ns -.Op Li : Ns Va port -.Dc ) -of a proxy server which understands -.Tn HTTP -.It Ev HTTP_PROXY_AUTH -defines authentication parameters for -.Tn HTTP -proxy servers .It Ev HTTP_TIMEOUT maximum time, in seconds, to wait before aborting an .Tn HTTP connection. +.El +.Pp +All environment variables mentioned in the documentation for the +.Xr fetch 3 +library are supported. .Sh SEE ALSO -.Xr ftp 1 , -.Xr tftp 1 -.Rs -.%A R. Fielding -.%A J. Gettys -.%A J. Mogul -.%A H. Frystyk -.%A T. Berners-Lee -.%T "Hypertext Transfer Protocol \-\- HTTP/1.1" -.%O RFC 2068 -.%D January 1997 -.Re -.Rs -.%A T. Berners-Lee -.%A L. Masinter -.%A M. McCahill -.%T "Uniform Resource Locators (URL)" -.%O RFC 1738 -.%D December 1994 -.Re -.Rs -.%A J. Postel -.%A J.K. Reynolds -.%T "File Transfer Protocol" -.%O RFC 959 / STD 9 -.%D October 1985 -.Re -.Rs -.%A M.R. Horton -.%T "Standard for interchange of USENET messages." -.%O RFC 850 -.%D June 1983 -.Re +.Xr fetch 3 .Sh HISTORY The -.Nm fetch +.Nm command appeared in .Fx 2.1.5 . +This implementation first appeared in +.Fx 4.1 . .Sh AUTHORS The original implementation of .Nm @@ -381,63 +212,27 @@ was done by It was extensively re-worked for .Fx 2.2 by -.An Garrett Wollman . -.Sh BUGS -There are too many environment variables and command-line options. -.Pp +.An Garrett Wollman , +and later completely rewritten to use the +.Xr fetch 3 +library by +.An Dag-Erling Smørgrav . +.Sh NOTES The -.Fl a -option is only implemented for certain kinds of -.Tn HTTP -failures, and no -.Tn FTP -failures. -.Pp -Only the -.Dq basic -authentication mode is implemented for -.Tn HTTP . -This should be replaced by digest authentication. -.Pp -Some -.Tn TCP -implementations (other than -.Tn FreeBSD ) -fail to correctly implement cases where the -.Dv SYN -and/or -.Dv FIN -control flags are specified in packets which also contain data. -The -.Sq Fl t -flag works around the latter deficiency and the -.Sq Fl b -flag works around the former. Since these are errors of the server's -.Tn TCP -stack, the best we can do is provide these workarounds. Given a correct -server, an optimal -.Tn HTTP -transfer without -.Fl t -and .Fl b -involves a minimum of two round trips (for small replies), one less than -other implementations. +and +.Fl t +options are no longer supported and will generate warnings. +They were workarounds for bugs in other OSes which this implementation +does not trigger. .Pp The -.Tn HTTP -standard requires interpretation of the -.Tn RFC 850 -date format, which does not provide a century indication. Versions of -.Nm fetch -prior to -.Fx 3.1 -would interpret all such dates as being in the 1900s. This version of -.Nm fetch -interprets such dates according to the rule given in -.Tn RFC 2068 : -.Bd -literal -offset indent - o HTTP/1.1 clients and caches should assume that an RFC-850 date - which appears to be more than 50 years in the future is in fact - in the past (this helps solve the "year 2000" problem). -.Ed +.Fl f +and +.Fl h +options (used for specifying an file to fetch and a host to fetch +from) are no longer supported and will generate errors. +Use URLs. +RFC1738 is your friend. +.Xr fetch 3 +library. diff --git a/usr.bin/fetch/fetch.c b/usr.bin/fetch/fetch.c new file mode 100644 index 0000000..d063100 --- /dev/null +++ b/usr.bin/fetch/fetch.c @@ -0,0 +1,644 @@ +/*- + * Copyright (c) 2000 Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/socket.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> + +#include <fetch.h> + +#define MINBUFSIZE 4096 + +/* Option flags */ +int A_flag; /* -A: do not follow 302 redirects */ +int a_flag; /* -a: auto retry */ +size_t B_size; /* -B: buffer size */ +int b_flag; /*! -b: workaround TCP bug */ +int d_flag; /* -d: direct connection */ +int F_flag; /* -F: restart without checking mtime */ +char *f_filename; /* -f: file to fetch */ +int H_flag; /* -H: use high port */ +char *h_hostname; /* -h: host to fetch from */ +int l_flag; /* -l: link rather than copy file: URLs */ +int m_flag; /* -[Mm]: set local timestamp to remote timestamp */ +int o_flag; /* -o: specify output file */ +int o_directory; /* output file is a directory */ +char *o_filename; /* name of output file */ +int o_stdout; /* output file is stdout */ +int once_flag; /* -1: stop at first successful file */ +int p_flag = 1; /* -[Pp]: use passive FTP */ +int R_flag; /* -R: don't delete partially transferred files */ +int r_flag; /* -r: restart previously interrupted transfer */ +u_int T_secs = 0; /* -T: transfer timeout in seconds */ +int s_flag; /* -s: show size, don't fetch */ +off_t S_size; /* -S: require size to match */ +int t_flag; /*! -t: workaround TCP bug */ +int v_level = 1; /* -v: verbosity level */ +int v_tty; /* stdout is a tty */ +u_int w_secs; /* -w: retry delay */ +int family = PF_UNSPEC; /* -[46]: address family to use */ + + +u_int ftp_timeout; /* default timeout for FTP transfers */ +u_int http_timeout; /* default timeout for HTTP transfers */ +u_char *buf; /* transfer buffer */ + + +void +sig_handler(int sig) +{ + errx(1, "Transfer timed out"); +} + +struct xferstat { + char name[40]; + struct timeval start; + struct timeval end; + struct timeval last; + off_t size; + off_t offset; + off_t rcvd; +}; + +void +stat_start(struct xferstat *xs, char *name, off_t size, off_t offset) +{ + snprintf(xs->name, sizeof xs->name, "%s", name); + xs->size = size; + xs->offset = offset; + if (v_level) { + fprintf(stderr, "Receiving %s", xs->name); + if (xs->size != -1) + fprintf(stderr, " (%lld bytes)", xs->size - xs->offset); + } + gettimeofday(&xs->start, NULL); + xs->last = xs->start; +} + +void +stat_update(struct xferstat *xs, off_t rcvd) +{ + struct timeval now; + + xs->rcvd = rcvd; + + if (v_level <= 1 || !v_tty) + return; + + gettimeofday(&now, NULL); + if (now.tv_sec <= xs->last.tv_sec) + return; + xs->last = now; + + fprintf(stderr, "\rReceiving %s", xs->name); + if (xs->size == -1) + fprintf(stderr, ": %lld bytes", xs->rcvd - xs->offset); + else + fprintf(stderr, " (%lld bytes): %d%%", xs->size - xs->offset, + (int)((100.0 * xs->rcvd) / (xs->size - xs->offset))); +} + +void +stat_end(struct xferstat *xs) +{ + double delta; + double bps; + + gettimeofday(&xs->end, NULL); + + if (!v_level) + return; + + fputc('\n', stderr); + delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6)) + - (xs->start.tv_sec + (xs->start.tv_usec / 1.e6)); + fprintf(stderr, "%lld bytes transferred in %.1f seconds ", + xs->size - xs->offset, delta); + bps = (xs->size - xs->offset) / delta; + if (bps > 1024*1024) + fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024)); + else if (bps > 1024) + fprintf(stderr, "(%.2f kBps)\n", bps / 1024); + else + fprintf(stderr, "(%.2f Bps)\n", bps); +} + +int +fetch(char *URL, char *path) +{ + struct url *url; + struct url_stat us; + struct stat sb; + struct xferstat xs; + FILE *f, *of; + size_t size; + off_t count; + char flags[8]; + int ch, n, r; + u_int timeout; + + f = of = NULL; + + /* parse URL */ + if ((url = fetchParseURL(URL)) == NULL) { + warnx("%s: parse error", URL); + goto failure; + } + + timeout = 0; + *flags = 0; + + /* common flags */ + if (v_level > 2) + strcat(flags, "v"); + switch (family) { + case PF_INET: + strcat(flags, "4"); + break; + case PF_INET6: + strcat(flags, "6"); + break; + } + + /* FTP specific flags */ + if (strcmp(url->scheme, "ftp") == 0) { + if (p_flag) + strcat(flags, "p"); + if (d_flag) + strcat(flags, "d"); + if (H_flag) + strcat(flags, "h"); + timeout = T_secs ? T_secs : ftp_timeout; + } + + /* HTTP specific flags */ + if (strcmp(url->scheme, "http") == 0) { + if (d_flag) + strcat(flags, "d"); + if (A_flag) + strcat(flags, "A"); + timeout = T_secs ? T_secs : http_timeout; + } + + /* + * Set the protocol timeout. + * This currently only works for FTP, so we still use + * alarm(timeout) further down. + */ + fetchTimeout = timeout; + + /* stat remote file */ + alarm(timeout); + if (fetchStat(url, &us, flags) == -1) + warnx("%s: size not known", path); + alarm(timeout); + + /* just print size */ + if (s_flag) { + if (us.size == -1) + printf("Unknown\n"); + else + printf("%lld\n", us.size); + goto success; + } + + /* check that size is as expected */ + if (S_size && us.size != -1 && us.size != S_size) { + warnx("%s: size mismatch: expected %lld, actual %lld", + path, S_size, us.size); + goto failure; + } + + /* symlink instead of copy */ + if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) { + if (symlink(url->doc, path) == -1) { + warn("%s: symlink()", path); + goto failure; + } + goto success; + } + + if (o_stdout) { + /* output to stdout */ + of = stdout; + } else if (r_flag && us.size != -1 && stat(path, &sb) != -1 + && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) { + /* output to file, restart aborted transfer */ + if (us.size == sb.st_size) + goto success; + else if (sb.st_size > us.size && truncate(path, us.size) == -1) { + warn("%s: truncate()", path); + goto failure; + } + if ((of = fopen(path, "a")) == NULL) { + warn("%s: open()", path); + goto failure; + } + url->offset = sb.st_size; + } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) { + /* output to file, mirror mode */ + warnx(" local: %lld bytes, mtime %ld", sb.st_size, sb.st_mtime); + warnx("remote: %lld bytes, mtime %ld", us.size, us.mtime); + if (sb.st_size == us.size && sb.st_mtime == us.mtime) + return 0; + if ((of = fopen(path, "w")) == NULL) { + warn("%s: open()", path); + goto failure; + } + } else { + /* output to file, all other cases */ + if ((of = fopen(path, "w")) == NULL) { + warn("%s: open()", path); + goto failure; + } + } + count = url->offset; + + /* start the transfer */ + if ((f = fetchGet(url, flags)) == NULL) { + warnx("%s", fetchLastErrString); + goto failure; + } + + /* start the counter */ + stat_start(&xs, path, us.size, count); + + n = 0; + + if (us.size == -1) { + /* + * We have no idea how much data to expect, so do it byte by + * byte. This is incredibly inefficient, but there's not much + * we can do about it... :( + */ + while (1) { + if (timeout) + alarm(timeout); +#ifdef STDIO_HACK + /* + * This is a non-portable hack, but it makes things go + * faster. Basically, if there is data in the input file's + * buffer, write it out; then fall through to the fgetc() + * which forces a refill. It saves a memcpy() and reduces + * the number of iterations, i.e the number of calls to + * alarm(). Empirical evidence shows this can cut user + * time by up to 90%. There may be better (even portable) + * ways to do this. + */ + if (f->_r && (f->_ub._base == NULL)) { + if (fwrite(f->_p, f->_r, 1, of) < 1) + break; + count += f->_r; + f->_p += f->_r; + f->_r = 0; + } +#endif + if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF) + break; + stat_update(&xs, count++); + n++; + } + } else { + /* we know exactly how much to transfer, so do it efficiently */ + for (size = B_size; count != us.size; n++) { + if (us.size - count < B_size) + size = us.size - count; + if (timeout) + alarm(timeout); + if (fread(buf, size, 1, f) != 1 || fwrite(buf, size, 1, of) != 1) + break; + stat_update(&xs, count += size); + } + } + + if (timeout) + alarm(0); + + stat_end(&xs); + + /* check the status of our files */ + if (ferror(f)) + warn("%s", URL); + if (ferror(of)) + warn("%s", path); + if (ferror(f) || ferror(of)) { + if (!R_flag && !o_stdout) + unlink(path); + goto failure; + } + + /* need to close the file before setting mtime */ + if (of != stdout) { + fclose(of); + of = NULL; + } + + /* Set mtime of local file */ + if (m_flag && us.size != -1 && !o_stdout) { + struct timeval tv[2]; + + tv[0].tv_sec = (long)us.atime; + tv[1].tv_sec = (long)us.mtime; + tv[0].tv_usec = tv[1].tv_usec = 0; + if (utimes(path, tv)) + warn("%s: utimes()", path); + } + + success: + r = 0; + goto done; + failure: + r = -1; + goto done; + done: + if (f) + fclose(f); + if (of && of != stdout) + fclose(of); + fetchFreeURL(url); + return r; +} + +void +usage(void) +{ + /* XXX badly out of synch */ + fprintf(stderr, + "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n" + " [-B bytes] [-T seconds] [-w seconds]\n" + " [-f file -h host [-c dir] | URL ...]\n" + ); +} + + +#define PARSENUM(NAME, TYPE) \ +int \ +NAME(char *s, TYPE *v) \ +{ \ + *v = 0; \ + for (*v = 0; *s; s++) \ + if (isdigit(*s)) \ + *v = *v * 10 + *s - '0'; \ + else \ + return -1; \ + return 0; \ +} + +PARSENUM(parseint, u_int) +PARSENUM(parsesize, size_t) +PARSENUM(parseoff, off_t) + +int +main(int argc, char *argv[]) +{ + struct stat sb; + char *p, *q, *s; + int c, e, r; + + while ((c = getopt(argc, argv, + "146AaB:bdFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF) + switch (c) { + case '1': + once_flag = 1; + break; + case '4': + family = PF_INET; + break; + case '6': + family = PF_INET6; + break; + case 'A': + A_flag = 1; + break; + case 'a': + a_flag = 1; + break; + case 'B': + if (parsesize(optarg, &B_size) == -1) + errx(1, "invalid buffer size"); + break; + case 'b': + warnx("warning: the -b option is deprecated"); + b_flag = 1; + break; + case 'd': + d_flag = 1; + break; + case 'F': + F_flag = 1; + break; + case 'f': + f_filename = optarg; + break; + case 'H': + H_flag = 1; + break; + case 'h': + h_hostname = optarg; + break; + case 'l': + l_flag = 1; + break; + case 'o': + o_flag = 1; + o_filename = optarg; + break; + case 'M': + case 'm': + m_flag = 1; + break; + case 'n': + m_flag = 0; + break; + case 'P': + case 'p': + p_flag = 1; + break; + case 'q': + v_level = 0; + break; + case 'R': + R_flag = 1; + break; + case 'r': + r_flag = 1; + break; + case 'S': + if (parseoff(optarg, &S_size) == -1) + errx(1, "invalid size"); + break; + case 's': + s_flag = 1; + break; + case 'T': + if (parseint(optarg, &T_secs) == -1) + errx(1, "invalid timeout"); + break; + case 't': + t_flag = 1; + warnx("warning: the -t option is deprecated"); + break; + case 'v': + v_level++; + break; + case 'w': + a_flag = 1; + if (parseint(optarg, &w_secs) == -1) + errx(1, "invalid delay"); + break; + default: + usage(); + exit(EX_USAGE); + } + + argc -= optind; + argv += optind; + + if (h_hostname || f_filename) { + if (!h_hostname || !f_filename || argc) { + usage(); + exit(EX_USAGE); + } + /* XXX this is a hack. */ + if (strcspn(h_hostname, "@:/") != strlen(h_hostname)) + errx(1, "invalid hostname"); + if (asprintf(argv, "ftp://%s/%s", h_hostname, f_filename) == -1) + errx(1, strerror(ENOMEM)); + argc++; + } + + if (!argc) { + usage(); + exit(EX_USAGE); + } + + /* allocate buffer */ + if (B_size < MINBUFSIZE) + B_size = MINBUFSIZE; + if ((buf = malloc(B_size)) == NULL) + errx(1, strerror(ENOMEM)); + + /* timeout handling */ + signal(SIGALRM, sig_handler); + if ((s = getenv("FTP_TIMEOUT")) != NULL) { + if (parseint(s, &ftp_timeout) == -1) { + warnx("FTP_TIMEOUT is not a positive integer"); + ftp_timeout = 0; + } + } + if ((s = getenv("HTTP_TIMEOUT")) != NULL) { + if (parseint(s, &http_timeout) == -1) { + warnx("HTTP_TIMEOUT is not a positive integer"); + http_timeout = 0; + } + } + + /* output file */ + if (o_flag) { + if (strcmp(o_filename, "-") == 0) { + o_stdout = 1; + } else if (stat(o_filename, &sb) == -1) { + if (errno == ENOENT) { + if (argc > 1) + errx(EX_USAGE, "%s is not a directory", o_filename); + } else { + err(EX_IOERR, "%s", o_filename); + } + } else { + if (sb.st_mode & S_IFDIR) + o_directory = 1; + } + } + + /* check if output is to a tty (for progress report) */ + v_tty = isatty(STDOUT_FILENO); + r = 0; + + while (argc) { + if ((p = strrchr(*argv, '/')) == NULL) + p = *argv; + else + p++; + + if (!*p) + p = "fetch.out"; + + fetchLastErrCode = 0; + + if (o_flag) { + if (o_stdout) { + e = fetch(*argv, "-"); + } else if (o_directory) { + asprintf(&q, "%s/%s", o_filename, p); + e = fetch(*argv, q); + free(q); + } else { + e = fetch(*argv, o_filename); + } + } else { + e = fetch(*argv, p); + } + + if (e == 0 && once_flag) + exit(0); + + if (e) { + r = 1; + if ((fetchLastErrCode + && fetchLastErrCode != FETCH_UNAVAIL + && fetchLastErrCode != FETCH_MOVED + && fetchLastErrCode != FETCH_URL + && fetchLastErrCode != FETCH_RESOLV + && fetchLastErrCode != FETCH_UNKNOWN)) { + if (w_secs) { + if (v_level) + fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs); + sleep(w_secs); + } + if (a_flag) + continue; + fprintf(stderr, "Skipping %s\n", *argv); + } + } + + argc--, argv++; + } + + exit(r); +} diff --git a/usr.bin/fetch/fetch.h b/usr.bin/fetch/fetch.h deleted file mode 100644 index d54a343..0000000 --- a/usr.bin/fetch/fetch.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 1997 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef fetch_h -#define fetch_h 1 - - -#define BUFFER_SIZE 1024 -#define FETCH_VERSION "fetch/1.0" -#define PATH_CP "/bin/cp" - -struct fetch_state { - const char *fs_status; - const char *fs_outputfile; - int fs_verbose; /* -q, -v option */ - int fs_newtime; /* -n option */ - int fs_mirror; /* -m option */ - int fs_restart; /* -r option */ - int fs_timeout; /* -T option */ - int fs_passive_mode; /* -p option */ - int fs_linkfile; /* -l option */ - int fs_precious; /* -R option */ - int fs_auto_retry; /* -a option */ - int fs_linux_bug; /* -b option */ - int fs_use_connect; /* -t option */ - off_t fs_expectedsize; /* -S option */ - int fs_reportsize; /* -s option */ - int fs_forcerestart; /* -F option */ - time_t fs_modtime; - void *fs_proto; - int (*fs_retrieve)(struct fetch_state *); - int (*fs_close)(struct fetch_state *); -}; - -struct uri_scheme { - const char *sc_name; /* name of the scheme, <32 characters */ - int (*sc_parse)(struct fetch_state *, const char *); - /* routine to parse a URI and build state */ - int (*sc_proxy_parse)(struct fetch_state *, const char *); - /* same, but for proxy case */ - const char *sc_proxy_envar; /* envar used to determine proxy */ - const char *sc_proxy_by; /* list of protos which can proxy us */ - - /* The rest is filled in dynamically... */ - int sc_can_proxy; - struct uri_scheme *sc_proxyproto; -}; - -extern struct uri_scheme file_scheme, ftp_scheme, http_scheme; - -void adjmodtime(struct fetch_state *fs); -void catchsig(int signo); -int display(struct fetch_state *fs, off_t total, ssize_t thisincr); -void init_schemes(void); -void rm(struct fetch_state *fs); -void setup_sigalrm(void); -void unsetup_sigalrm(void); -void *safe_malloc(size_t len); -char *percent_decode(const char *orig); -char *safe_strdup(const char *orig); -char *safe_strndup(const char *orig, size_t len); -char *to_base64(const unsigned char *buf, size_t len); -int from_base64(const char *orig, unsigned char *buf, size_t *lenp); -int parse_host_port(const char *str, char **hostname, int *port); -int parse_uri(struct fetch_state *fs, const char *uri); -#endif /* ! fetch_h */ diff --git a/usr.bin/fetch/file.c b/usr.bin/fetch/file.c deleted file mode 100644 index 24dfd75..0000000 --- a/usr.bin/fetch/file.c +++ /dev/null @@ -1,168 +0,0 @@ -/*- - * Copyright 1997 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/types.h> - -#include <err.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sysexits.h> -#include <unistd.h> - -#include <sys/stat.h> -#include <sys/wait.h> - -#include "fetch.h" - -static int file_retrieve(struct fetch_state *fs); -static int file_close(struct fetch_state *fs); -static int file_parse(struct fetch_state *fs, const char *uri); - -struct uri_scheme file_scheme = - { "file", file_parse, 0, 0, 0 }; - -/* - * Again, we slightly misinterpret the slash after the hostname as - * being the start of the pathname rather than merely a separator. - */ -static int -file_parse(struct fetch_state *fs, const char *uri) -{ - const char *p; - - p = uri + 5; /* skip past `file:' */ - if (p[0] == '/' && p[1] == '/') { - /* skip past `//localhost', if any */ - p += 2; - while (*p && *p != '/') - p++; - } - - if (p[0] != '/') { - warnx("`%s': expected absolute pathname in `file' URL", uri); - return EX_USAGE; - } - - fs->fs_proto = percent_decode(p); - /* guaranteed to succeed because of above test */ - p = strrchr(fs->fs_proto, '/'); - if (fs->fs_outputfile == 0) /* only set if not overridden by user */ - fs->fs_outputfile = p + 1; - fs->fs_retrieve = file_retrieve; - fs->fs_close = file_close; - return 0; -} - -static int -file_close(struct fetch_state *fs) -{ - free(fs->fs_proto); - fs->fs_proto = 0; - fs->fs_outputfile = 0; - fs->fs_status = "free"; - return 0; -} - -static int -file_retrieve(struct fetch_state *fs) -{ - struct stat sb; - - /* XXX - this seems bogus to me! */ - if (access(fs->fs_outputfile, F_OK) == 0) { - errno = EEXIST; - warn("%s", fs->fs_outputfile); - return EX_USAGE; - } - - if (fs->fs_linkfile) { - fs->fs_status = "checking path"; - if (stat(fs->fs_proto, &sb) == -1) { - warn("%s", (char *)fs->fs_proto); - return EX_NOINPUT; - } - fs->fs_status = "symlink"; - if (symlink(fs->fs_proto, fs->fs_outputfile) == -1) { - warn("symlink"); - return EX_OSERR; - } - fs->fs_status = "done"; - } else if (strcmp(fs->fs_outputfile, "-") == 0) { - FILE *f; - int ch; - - if ((f = fopen(fs->fs_proto, "r")) == NULL) { - warn("fopen"); - return EX_OSERR; - } - while ((ch = fgetc(f)) != EOF) - fputc(ch, stdout); - if (ferror(f)) { - warn("fgetc"); - fclose(f); - return EX_OSERR; - } - fclose(f); - } else { - pid_t pid; - int status; - - fflush(stderr); - pid = fork(); - if (pid < 0) { - warn("fork"); - return EX_TEMPFAIL; - } else if (pid == 0) { - execl(PATH_CP, "cp", "-p", fs->fs_proto, - fs->fs_outputfile, (char *)0); - warn("execl: " PATH_CP); - fflush(stderr); - _exit(EX_OSERR); - } else { - fs->fs_status = "copying"; - if (waitpid(pid, &status, 0) < 0) { - warn("waitpid(%ld)", (long)pid); - return EX_OSERR; - } - if (WIFEXITED(status)) - return WEXITSTATUS(status); - if (WIFSIGNALED(status)) - warn(PATH_CP " exited on signal: %s", - sys_signame[WTERMSIG(status)]); - return EX_OSERR; - } - } - return 0; -} - diff --git a/usr.bin/fetch/ftp.c b/usr.bin/fetch/ftp.c deleted file mode 100644 index 7abab35..0000000 --- a/usr.bin/fetch/ftp.c +++ /dev/null @@ -1,521 +0,0 @@ -/*- - * Copyright 1997 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/types.h> - -#include <err.h> -#include <errno.h> -#include <ftpio.h> -#include <limits.h> -#include <netdb.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sysexits.h> -#include <unistd.h> - -#include <sys/param.h> -#include <sys/stat.h> - -#include "fetch.h" - -struct ftp_state { - char *ftp_hostname; - char *ftp_user; - char *ftp_password; - char *ftp_remote_file; - char **ftp_remote_dirs; - int ftp_remote_ndirs; - char *ftp_remote_path; - char *ftp_type; - unsigned ftp_port; -}; - -static int ftp_close(struct fetch_state *fs); -static int ftp_retrieve(struct fetch_state *fs); -static int ftp_parse(struct fetch_state *fs, const char *uri); -static int ftp_proxy_parse(struct fetch_state *fs, const char *uri); - -struct uri_scheme ftp_scheme = - { "ftp", ftp_parse, ftp_proxy_parse, "FTP_PROXY", "ftp,http" }; - -static int -ftp_parse(struct fetch_state *fs, const char *uri) -{ - const char *p, *slash, *q; - char *hostname, *atsign, *colon, *path, *r, *s, **dp; - unsigned port; - struct ftp_state *ftps; - - p = uri + 4; - port = 0; - - if (p[0] != '/' || p[1] != '/') { - warnx("`%s': invalid `ftp' URL", uri); - return EX_USAGE; - } - - p += 2; - slash = strchr(p, '/'); - if (slash == 0) { - warnx("`%s': malformed `ftp' URL", uri); - return EX_USAGE; - } - hostname = alloca(slash - p + 1); - hostname[0] = '\0'; - strncat(hostname, p, slash - p); - - if ((atsign = strrchr(hostname, '@')) == 0) - q = hostname; - else - q = atsign + 1; - - if ((colon = strchr(q, ':')) != 0) - *colon = '\0'; - - if (colon && *(colon + 1)) { - unsigned long ul; - char *ep; - - errno = 0; - ul = strtoul(colon + 1, &ep, 10); - if (*ep || errno != 0 || ul < 1 || ul > 65534) { - if (errno) - warn("`%s': invalid port in URL", uri); - else - warnx("`%s': invalid port in URL", uri); - return EX_USAGE; - } - - port = ul; - } else { - port = 21; - } - - p = slash + 1; - - ftps = safe_malloc(sizeof *ftps); - ftps->ftp_password = 0; - ftps->ftp_user = 0; - - /* - * Now, we have a copy of the hostname in hostname, the specified port - * (or the default value) in port, and p points to the filename part - * of the URI. We just need to check for a user in the hostname, - * and then save all the bits in our state. - */ - if (atsign) { - if (atsign[1] == '\0') { - warnx("`%s': malformed `ftp' hostname", hostname); - free(ftps); - return EX_USAGE; - } - - *atsign = '\0'; - if ((colon = strchr(hostname, ':')) != 0) - *colon = '\0'; - if (hostname[0] == '\0') { - warnx("`%s': malformed `ftp' user", atsign + 1); - free(ftps); - return EX_USAGE; - } - if (colon != 0) - ftps->ftp_password = percent_decode(colon + 1); - ftps->ftp_user = percent_decode(hostname); - ftps->ftp_hostname = safe_strdup(atsign + 1); - } else - ftps->ftp_hostname = safe_strdup(hostname); - ftps->ftp_port = port; - - /* Save the full path for error messages. */ - ftps->ftp_remote_path = percent_decode(p); - - /* Build a list of directory components plus the filename. */ - ftps->ftp_remote_ndirs = 0; - q = p; - while ((q = strchr(q, '/')) != 0) { - q++; - ftps->ftp_remote_ndirs++; - } - path = safe_strdup(p); - if (ftps->ftp_remote_ndirs != 0) { - ftps->ftp_remote_dirs = safe_malloc(ftps->ftp_remote_ndirs * - sizeof(char *)); - r = s = path = safe_strdup(p); - dp = ftps->ftp_remote_dirs; - while ((s = strchr(s, '/')) != 0) { - *s++ = '\0'; - /* - * Skip double-slashes. According to RFC1738, - * double-slashes mean "send 'CWD '", which is - * a syntax error to most FTP servers. Instead, - * we just pretend that multiple slashes are a - * single slash. - */ - if (*r == '\0') { - warnx("skipping double slash in FTP URL; see man page or RFC1738."); - ftps->ftp_remote_ndirs--; - } else - *dp++ = percent_decode(r); - r = s; - } - } else { - ftps->ftp_remote_dirs = 0; - r = path; - } - if ((s = strchr(r, ';')) != 0 && strncmp(s, ";type=", 6) == 0) { - *s = '\0'; - ftps->ftp_type = percent_decode(s+6); - } else - ftps->ftp_type = 0; - ftps->ftp_remote_file = percent_decode(r); - free(path); - - if (fs->fs_outputfile == 0) { - fs->fs_outputfile = ftps->ftp_remote_file; - } - - if (ftps->ftp_password == 0) - ftps->ftp_password = getenv("FTP_PASSWORD"); - if (ftps->ftp_password != 0) { - ftps->ftp_password = safe_strdup(ftps->ftp_password); - } else { - char *pw; - const char *logname; - char localhost[MAXHOSTNAMELEN]; - - logname = getlogin(); - if (logname == 0) - logname = "root"; - gethostname(localhost, sizeof localhost); - pw = safe_malloc(strlen(logname) + 1 + strlen(localhost) + 1); - strcpy(pw, logname); - strcat(pw, "@"); - strcat(pw, localhost); - ftps->ftp_password = pw; - setenv("FTP_PASSWORD", pw, 0); /* cache the result */ - } - - if (ftps->ftp_user == 0) - ftps->ftp_user = getenv("FTP_LOGIN"); - if (ftps->ftp_user != 0) - ftps->ftp_user = safe_strdup(ftps->ftp_user); - - fs->fs_proto = ftps; - fs->fs_close = ftp_close; - fs->fs_retrieve = ftp_retrieve; - return 0; -} - -/* - * The only URIs we can handle in the FTP proxy are FTP URLs. - * This makes it possible to take a few short cuts. - */ -static int -ftp_proxy_parse(struct fetch_state *fs, const char *uri) -{ - int rv; - char *hostname; - char *port; - const char *user; - char *newuser; - unsigned portno; - struct ftp_state *ftps; - - hostname = getenv("FTP_PROXY"); - port = strchr(hostname, ':'); - if (port == 0) { - portno = 21; - } else { - unsigned long ul; - char *ep; - - /* All this to avoid modifying the environment. */ - ep = alloca(strlen(hostname) + 1); - strcpy(ep, hostname); - port = ep + (port - hostname); - hostname = ep; - - *port++ = '\0'; - errno = 0; - ul = strtoul(port, &ep, 0); - if (*ep || !*port || errno != 0 || ul < 1 || ul > 65534) { - warnx("`%s': invalid port specification for FTP proxy", - port); - return EX_USAGE; - } - portno = ul; - } - - /* ftp_parse() does most of the work; we can just fix things up */ - rv = ftp_parse(fs, uri); - if (rv) - return rv; - /* Oops.. it got turned into a file: */ - if (fs->fs_retrieve != ftp_retrieve) { - return 0; - } - - ftps = fs->fs_proto; - - user = ftps->ftp_user ? ftps->ftp_user : "anonymous"; - /* user @ hostname [ @port ] \0 */ - newuser = safe_malloc(strlen(user) + 1 + strlen(ftps->ftp_hostname) - + ((ftps->ftp_port != 21) ? 6 : 0) + 1); - - strcpy(newuser, user); - strcat(newuser, "@"); - strcat(newuser, ftps->ftp_hostname); - if (ftps->ftp_port != 21) { - char numbuf[6]; - - snprintf(numbuf, sizeof(numbuf), "%d", ftps->ftp_port); - numbuf[sizeof(numbuf)-1] = '\0'; - strcat(newuser, "@"); - strcat(newuser, numbuf); - } - - ftps->ftp_port = portno; - free(ftps->ftp_hostname); - ftps->ftp_hostname = safe_strdup(hostname); - free(ftps->ftp_user); - ftps->ftp_user = newuser; - return 0; -} - -static int -ftp_close(struct fetch_state *fs) -{ - struct ftp_state *ftps = fs->fs_proto; - int i; - char **dp; - - if (ftps->ftp_user) - free(ftps->ftp_user); - free(ftps->ftp_hostname); - free(ftps->ftp_password); - free(ftps->ftp_remote_file); - for (i = 0, dp = ftps->ftp_remote_dirs; i < ftps->ftp_remote_ndirs; i++, dp++) - free(*dp); - if (ftps->ftp_remote_dirs) - free(ftps->ftp_remote_dirs); - free(ftps->ftp_remote_path); - if (ftps->ftp_type) - free(ftps->ftp_type); - free(ftps); - fs->fs_proto = 0; - fs->fs_outputfile = 0; - return 0; -} - -static int -ftp_retrieve(struct fetch_state *fs) -{ - struct ftp_state *ftps = fs->fs_proto; - FILE *ftp, *remote, *local; - char **dp; - int i, status; - off_t size; - off_t seekloc, wehave; - time_t modtime; - size_t readresult, writeresult; - - fs->fs_status = "logging in to FTP server"; - ftp = ftpLogin(ftps->ftp_hostname, - (char *)(ftps->ftp_user ? ftps->ftp_user : "anonymous"), - /* XXX ^^^^ bad API */ - ftps->ftp_password, ftps->ftp_port, fs->fs_verbose > 1, - &status); - if (ftp == 0) { - warnx("%s: %s", ftps->ftp_hostname, - status ? ftpErrString(status) : hstrerror(h_errno)); - return EX_IOERR; - } - fs->fs_status = "preparing for FTP transfer"; - if (ftps->ftp_type && strcasecmp(ftps->ftp_type, "i") != 0) { - if (strcasecmp(ftps->ftp_type, "a") == 0) - ftpAscii(ftp); - else { - warnx("unknown or unsupported type %s", ftps->ftp_type); - return EX_USAGE; - } - } else - ftpBinary(ftp); - ftpPassive(ftp, fs->fs_passive_mode); - for (i = 0, dp = ftps->ftp_remote_dirs; i < ftps->ftp_remote_ndirs; i++, dp++) { - if ((status = ftpChdir(ftp, *dp)) != 0) { - warnx("%s: %s: %s", ftps->ftp_hostname, - *dp, ftpErrString(status)); - return EX_IOERR; - } - } - size = ftpGetSize(ftp, ftps->ftp_remote_file); - - if (fs->fs_reportsize) { - fclose(ftp); - if (size == -1) { - warnx("%s: size not known\n", fs->fs_outputfile); - printf("Unknown\n"); - return 1; - } - else { - printf("%qd\n", (quad_t)size); - return 0; - } - } - - if (size > 0 && fs->fs_expectedsize != -1 && size != fs->fs_expectedsize) { - warnx("%s: size mismatch, expected=%lu / actual=%lu", - ftps->ftp_remote_path, - (unsigned long)fs->fs_expectedsize, - (unsigned long)size); - return EX_DATAERR; - } - modtime = ftpGetModtime(ftp, ftps->ftp_remote_file); - if (modtime <= 0) { /* xxx */ - warnx("%s: cannot get remote modification time", - ftps->ftp_remote_path); - modtime = -1; - } - fs->fs_modtime = modtime; - seekloc = wehave = 0; - if (fs->fs_restart || fs->fs_mirror) { - struct stat stab; - - if (fs->fs_outputfile[0] == '-' - && fs->fs_outputfile[1] == '\0') - status = fstat(STDOUT_FILENO, &stab); - else - status = stat(fs->fs_outputfile, &stab); - if (status < 0) { - stab.st_mtime = -1; - stab.st_size = 0; - } - if (status == 0 && !S_ISREG(stab.st_mode)) { - fs->fs_restart = 0; - fs->fs_mirror = 0; - } - if (fs->fs_mirror && stab.st_size == size - && modtime <= stab.st_mtime) { - fclose(ftp); - return 0; - } - if (fs->fs_restart) { - if (stab.st_size != 0 && stab.st_size < size) - seekloc = wehave = stab.st_size; - } - } - - fs->fs_status = "retrieving file from FTP server"; - remote = ftpGet(ftp, ftps->ftp_remote_file, &seekloc); - if (remote == 0) { - if (ftpErrno(ftp)) { - warnx("ftp://%s/%s: FTP error:", - ftps->ftp_hostname, ftps->ftp_remote_path); - warnx("%s", ftpErrString(ftpErrno(ftp))); - fclose(ftp); - return EX_IOERR; - } else { - warn("ftpGet"); - return EX_OSERR; - } - } - - if (fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0') - local = fopen("/dev/stdout", wehave ? "a" : "w"); - else - local = fopen(fs->fs_outputfile, wehave ? "a" : "w"); - if (local == 0) { - warn("%s", fs->fs_outputfile); - fclose(remote); - fclose(ftp); - return EX_OSERR; - } - - if (fs->fs_timeout) { - char buf[sizeof("18446744073709551616")]; /* 2**64 */ - snprintf(buf, sizeof buf, "%d", fs->fs_timeout); - setenv("FTP_TIMEOUT", buf, 1); - } else { - char *env = getenv("FTP_TIMEOUT"); - char *ep; - unsigned long ul; - - if (env) { - errno = 0; - ul = strtoul(env, &ep, 0); - if (*env && *ep == '\0' && errno == 0 && ul <= INT_MAX) - fs->fs_timeout = ul; - else - warnx("`%s': invalid FTP timeout", env); - } - } - - display(fs, size, wehave); - setup_sigalrm(); - - do { - char buf[BUFFER_SIZE]; - - alarm(fs->fs_timeout); - readresult = fread(buf, 1, sizeof buf, remote); - alarm(0); - if (readresult == 0) - break; - display(fs, size, readresult); - writeresult = fwrite(buf, 1, readresult, local); - } while (writeresult == readresult); - unsetup_sigalrm(); - - if (ferror(remote)) { - warn("reading remote file from %s", ftps->ftp_hostname); - fclose(local); - fclose(remote); - fclose(ftp); - rm(fs); - return EX_IOERR; - } else if(ferror(local)) { - warn("%s", fs->fs_outputfile); - fclose(local); - fclose(remote); - fclose(ftp); - rm(fs); - return EX_IOERR; - } - - fclose(local); - fclose(remote); - fclose(ftp); - if (display(fs, size, -1) != 0) - return EX_PROTOCOL; - adjmodtime(fs); - return 0; -} diff --git a/usr.bin/fetch/http.c b/usr.bin/fetch/http.c deleted file mode 100644 index e7d5f8b..0000000 --- a/usr.bin/fetch/http.c +++ /dev/null @@ -1,1846 +0,0 @@ -/*- - * Copyright 1997 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/types.h> - -#include <ctype.h> -#include <err.h> -#include <errno.h> -#include <limits.h> -#include <md5.h> -#include <netdb.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sysexits.h> -#include <time.h> -#include <unistd.h> - -#include <sys/param.h> /* for MAXHOSTNAMELEN */ -#include <sys/queue.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/sysctl.h> -#include <sys/uio.h> - -#include <netinet/in.h> -#include <arpa/inet.h> - -#include "fetch.h" - -struct http_state { - char *http_hostname; - char *http_remote_request; - char *http_decoded_file; - char *http_host_header; - char *http_authentication; - char *http_proxy_authentication; - unsigned http_port; - int http_redirected; -}; - -struct http_auth { - TAILQ_ENTRY(http_auth) ha_link; - char *ha_scheme; - char *ha_realm; - char *ha_params; - const struct http_auth_method *ha_ham; -}; -TAILQ_HEAD(http_auth_head, http_auth); - -static int http_parse(struct fetch_state *fs, const char *uri); -static int http_proxy_parse(struct fetch_state *fs, const char *uri); -static int http_close(struct fetch_state *fs); -static int http_retrieve(struct fetch_state *fs); -static int basic_doauth(struct fetch_state *fs, struct http_auth *ha, int prx); - -struct uri_scheme http_scheme = - { "http", http_parse, http_proxy_parse, "HTTP_PROXY", "http" }; - -struct http_auth_head http_auth, http_proxy_auth; - -struct http_auth_method { - const char *ham_scheme; - int (*ham_doauth)(struct fetch_state *, struct http_auth *, int); -} http_auth_methods[] = { - { "basic", basic_doauth }, - { 0, 0 } -}; - -/* We are only concerned with headers we might receive. */ -enum http_header { - ht_accept_ranges, ht_age, ht_allow, ht_cache_control, ht_connection, - ht_content_base, ht_content_encoding, ht_content_language, - ht_content_length, ht_content_location, ht_content_md5, - ht_content_range, ht_content_type, ht_date, ht_etag, ht_expires, - ht_last_modified, ht_location, ht_pragma, ht_proxy_authenticate, - ht_public, ht_retry_after, ht_server, ht_transfer_encoding, - ht_upgrade, ht_vary, ht_via, ht_www_authenticate, ht_warning, - /* unusual cases */ - ht_syntax_error, ht_unknown, ht_end_of_header -}; - -static char *format_http_date(time_t when); -static char *format_http_user_agent(void); -static enum http_header http_parse_header(char *line, char **valuep); -static int check_md5(FILE *fp, char *base64ofmd5); -static int http_first_line(const char *line); -static int http_suck(struct fetch_state *fs, FILE *remote, FILE *local, - off_t total_length, int timo); -static int http_suck_chunked(struct fetch_state *fs, FILE *remote, FILE *local, - off_t total_length, int timo); -static int parse_http_content_range(char *orig, off_t *first, off_t *total); -static int process_http_auth(struct fetch_state *fs, char *hdr, int autherr); -static struct http_auth *find_http_auth(struct http_auth_head *list, - const char *scheme, const char *realm); -static time_t parse_http_date(char *datestring); -static void setup_http_auth(void); - -static int -http_parse(struct fetch_state *fs, const char *u) -{ - const char *p, *colon, *slash, *q; - char *hostname, *hosthdr, *trimmed_name, *uri, *ques, saveq = 0; - unsigned port; - struct http_state *https; - - uri = alloca(strlen(u) + 1); - strcpy(uri, u); - - p = uri + 5; - port = 0; - - if (p[0] != '/' || p[1] != '/') { - warnx("`%s': malformed `http' URL", uri); - return EX_USAGE; - } - - p += 2; - - if ((ques = strpbrk(p, "?#")) != NULL) { - saveq = *ques; - *ques = '\0'; - } - - colon = strchr(p, ':'); - slash = strchr(p, '/'); - if (colon && slash && colon < slash) - q = colon; - else - q = slash; - if (q == 0) { - warnx("`%s': malformed `http' URL", uri); - return EX_USAGE; - } - hostname = alloca(q - p + 1); - hostname[0] = '\0'; - strncat(hostname, p, q - p); - p = slash; - - if (q == colon && colon + 1 != slash) { - unsigned long ul; - char *ep; - - errno = 0; - ul = strtoul(colon + 1, &ep, 10); - if (ep != slash || ep == colon + 1 || errno != 0 - || ul < 1 || ul > 65534) { - warn("`%s': invalid port in URL", uri); - return EX_USAGE; - } - - port = ul; - } else { - port = 80; - } - - p = slash; - - /* parsing finished, restore parm part */ - if (ques != NULL) - *ques = saveq; - - https = safe_malloc(sizeof *https); - - /* - * Now, we have a copy of the hostname in hostname, the specified port - * (or the default value) in port, and p points to the filename part - * of the URI. - */ - https->http_hostname = safe_strdup(hostname); - https->http_port = port; - hosthdr = alloca(sizeof("Host: :\r\n") + 5 + strlen(hostname)); - sprintf(hosthdr, "Host: %s:%d\r\n", hostname, port); - https->http_host_header = safe_strdup(hosthdr); - - /* - * NB: HTTP/1.1 servers MUST also accept a full URI. - * However, HTTP/1.0 servers will ONLY accept a trimmed URI. - */ - https->http_remote_request = safe_strdup(p); - p++; - if (ques) { - trimmed_name = safe_strndup(p, ques - p); - } else { - trimmed_name = safe_strdup(p); - } - https->http_decoded_file = percent_decode(trimmed_name); - free(trimmed_name); - p = https->http_decoded_file; - /* now p is the decoded version, so we can extract the basename */ - - if (fs->fs_outputfile == 0) { - slash = strrchr(p, '/'); - if (slash) - fs->fs_outputfile = slash + 1; - else - fs->fs_outputfile = p; - } - https->http_redirected = 0; - https->http_authentication = https->http_proxy_authentication = 0; - - fs->fs_proto = https; - fs->fs_close = http_close; - fs->fs_retrieve = http_retrieve; - return 0; -} - -/* - * An HTTP proxy works by accepting a complete URI in a GET request, - * retrieving that object, and then forwarding it back to us. Because - * it can conceivably handle any URI, we have to do a bit more work - * in the parsing of it. - */ -static int -http_proxy_parse(struct fetch_state *fs, const char *uri) -{ - struct http_state *https; - const char *env, *slash, *ques; - char *file; - int rv; - - https = safe_malloc(sizeof *https); - https->http_remote_request = safe_strdup(uri); - - env = getenv("HTTP_PROXY"); - rv = parse_host_port(env, &https->http_hostname, &https->http_port); - if (rv) { -out: - free(https->http_remote_request); - free(https); - return rv; - } - - if (strncmp(uri, "http://", 7) == 0 || strncmp(uri, "ftp://", 6) == 0) { - char *hosthdr; - slash = strchr(uri + 7, '/'); - if (slash == 0) { - warnx("`%s': malformed `http' URL", uri); - rv = EX_USAGE; - free(https->http_hostname); - goto out; - } - ques = strpbrk(slash, "?#"); - if (ques == 0) - file = safe_strdup(slash); - else - file = safe_strndup(slash, ques - slash); - hosthdr = alloca(sizeof("Host: \r\n") + slash - uri - 7); - strcpy(hosthdr, "Host: "); - strncat(hosthdr, uri + 7, slash - uri - 7); - strcat(hosthdr, "\r\n"); - https->http_host_header = safe_strdup(hosthdr); - } else { - slash = uri; - while (*slash && *slash != ':') - slash++; - if (*slash) - slash++; - if (slash[0] == '/' && slash[1] == '/') { - slash += 2; - while (*slash && *slash != '/') - slash++; - } - file = safe_strdup(slash); - https->http_host_header = safe_strdup(""); - } - https->http_decoded_file = percent_decode(file); - https->http_redirected = 0; - https->http_authentication = https->http_proxy_authentication = 0; - free(file); - if (fs->fs_outputfile == 0) { - slash = strrchr(https->http_decoded_file, '/'); - /* NB: we are not guaranteed to find one... */ - fs->fs_outputfile = slash ? slash + 1 - : https->http_decoded_file; - } - - fs->fs_proto = https; - fs->fs_close = http_close; - fs->fs_retrieve = http_retrieve; - return 0; -} - -static int -http_close(struct fetch_state *fs) -{ - struct http_state *https = fs->fs_proto; - - free(https->http_hostname); - free(https->http_remote_request); - free(https->http_decoded_file); - free(https->http_host_header); - if (https->http_authentication) - free(https->http_authentication); - if (https->http_proxy_authentication) - free(https->http_proxy_authentication); - free(https); - fs->fs_outputfile = 0; - return 0; -} - -static int -nullclose(struct fetch_state *fs) -{ - return 0; -} - -/* - * Process a redirection. This has a small memory leak. - */ -static int -http_redirect(struct fetch_state *fs, char *new, int permanent) -{ - struct http_state *https = fs->fs_proto; - int num_redirects = https->http_redirected + 1; - char *out = safe_strdup(fs->fs_outputfile); - int rv; - - if (num_redirects > 5) { - warnx("%s: HTTP redirection limit exceeded", out); - return EX_PROTOCOL; - } - - free(https->http_hostname); - free(https->http_remote_request); - free(https->http_decoded_file); - free(https); - warnx("%s: resource has moved %s to `%s'", out, - permanent ? "permanently" : "temporarily", new); - rv = http_parse(fs, new); - if (rv != 0) { - fs->fs_close = nullclose; /* XXX rethink interface? */ - return rv; - } - https = fs->fs_proto; - https->http_redirected = num_redirects; - /* - * This ensures that the output file name doesn't suddenly change - * under the user's feet. Unfortunately, this results in a small - * memory leak. I wish C had garbage collection... - */ - fs->fs_outputfile = out; - rv = http_retrieve(fs); - return rv; -} - -/* - * Read HTML-formatted data from remote and display it on stderr. - * This is extremely incomplete, as all it does is delete anything - * between angle brackets. However, this is usually good enough for - * error messages. - */ -static void -html_display(FILE *remote) -{ - char *line; - size_t linelen; - int inbracket = 0; - - - while ((line = fgetln(remote, &linelen)) != 0) { - char *end = line + linelen; - char *p; - int content = 0; - - for (p = line; p < end; p++) { - if (*p == '<' && !inbracket) { - fwrite(line, 1, (p - line), - stderr); - inbracket = 1; - } - if (!inbracket && !content && - *p != '\n' && *p != '\r') - content = 1; - if (*p == '>' && inbracket) { - line = p + 1; - inbracket = 0; - } - } - if (content && line < end) - fwrite(line, 1, (end - line), stderr); - } -} - -/* - * Get a file using HTTP. We will try to implement HTTP/1.1 eventually. - * This subroutine makes heavy use of the 4.4-Lite standard I/O library, - * in particular the `fgetln' which allows us to slurp an entire `line' - * (an arbitrary string of non-NUL characters ending in a newline) directly - * out of the stdio buffer. This makes interpreting the HTTP headers much - * easier, since they are all guaranteed to end in `\r\n' and we can just - * ignore the `\r'. - */ -static int -http_retrieve(struct fetch_state *fs) -{ - struct http_state *https; - FILE *remote, *local; - int s; - struct sockaddr_in sin; - struct msghdr msg; -#define NIOV 16 /* max is currently 14 */ - struct iovec iov[NIOV]; - int n, status; - const char *env; - int timo; - char *line, *new_location; - char *errstr = 0; - size_t linelen, writeresult; - off_t total_length, restart_from; - time_t last_modified, when_to_retry; - char *base64ofmd5; - int to_stdout, restarting, redirection, retrying, autherror, chunked; - char rangebuf[sizeof("Range: bytes=18446744073709551616-\r\n")]; - int tried_head; - - setup_http_auth(); - - https = fs->fs_proto; - to_stdout = (strcmp(fs->fs_outputfile, "-") == 0); - restarting = fs->fs_restart; - redirection = 0; - retrying = 0; - tried_head = 0; - - /* - * Figure out the timeout. Prefer the -T command-line value, - * otherwise the HTTP_TIMEOUT envar, or else don't time out at all. - */ - if (fs->fs_timeout) { - timo = fs->fs_timeout; - } else if ((env = getenv("HTTP_TIMEOUT")) != 0) { - char *ep; - unsigned long ul; - - errno = 0; - ul = strtoul(env, &ep, 0); - if (*ep != '\0' || *env == '\0' || errno != 0 - || ul > INT_MAX) { - warnx("`%s': invalid timeout", env); - return EX_USAGE; - } - timo = ul; - } else { - timo = 0; - } - - memset(&sin, 0, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_len = sizeof sin; - sin.sin_port = htons(https->http_port); - - fs->fs_status = "looking up hostname"; - if (inet_aton(https->http_hostname, &sin.sin_addr) == 0) { - struct hostent *hp; - - /* XXX - do timeouts for name resolution? */ - hp = gethostbyname2(https->http_hostname, AF_INET); - if (hp == 0) { - warnx("`%s': cannot resolve: %s", https->http_hostname, - hstrerror(h_errno)); - return EX_NOHOST; - } - memcpy(&sin.sin_addr, hp->h_addr_list[0], sizeof sin.sin_addr); - } - - fs->fs_status = "creating request message"; - msg.msg_name = (caddr_t)&sin; - msg.msg_namelen = sizeof sin; - msg.msg_iov = iov; - n = 0; - msg.msg_control = 0; - msg.msg_controllen = 0; - msg.msg_flags = fs->fs_linux_bug ? 0 : MSG_EOF; - -#define addstr(Iov, N, Str) \ - do { \ - Iov[N].iov_base = (void *)Str; \ - Iov[N].iov_len = strlen(Iov[n].iov_base); \ - N++; \ - } while(0) - -retry: - if (fs->fs_reportsize && !tried_head) { - addstr(iov, n, "HEAD "); - tried_head = 1; - } - else { - addstr(iov, n, "GET "); - tried_head = 0; - } - addstr(iov, n, https->http_remote_request); - addstr(iov, n, " HTTP/1.1\r\n"); - /* - * The choice of HTTP/1.1 may be a bit controversial. The - * specification says that implementations which are not at - * least conditionally compliant MUST NOT call themselves - * HTTP/1.1. We choose not to comply with that requirement. - * (Eventually we will support the full HTTP/1.1, at which - * time this comment will not apply. But it's amusing how - * specifications attempt to define behavior for implementations - * which aren't obeying the spec in the first place...) - */ - addstr(iov, n, format_http_user_agent()); - /* do content negotiation here */ - addstr(iov, n, "Accept: */*\r\n"); - addstr(iov, n, https->http_host_header); - addstr(iov, n, "Connection: close\r\n"); - if (https->http_proxy_authentication) - addstr(iov, n, https->http_proxy_authentication); - if (https->http_authentication) - addstr(iov, n, https->http_authentication); - if (fs->fs_mirror) { - struct stat stab; - - errno = 0; - if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0) - || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0)) - && S_ISREG(stab.st_mode)) { - addstr(iov, n, "If-Modified-Since: "); - addstr(iov, n, format_http_date(stab.st_mtime)); - addstr(iov, n, "\r\n"); - } else if (errno != 0 || !S_ISREG(stab.st_mode)) { - if (errno != 0) - warn("%s", fs->fs_outputfile); - else - warnx("%s: not a regular file", - fs->fs_outputfile); - warnx("cannot mirror; will retrieve anew"); - } - } - if (restarting) { - struct stat stab; - - errno = 0; - if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0) - || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0)) - && S_ISREG(stab.st_mode)) { - if (!fs->fs_forcerestart) { - addstr(iov, n, "If-Range: "); - addstr(iov, n, format_http_date(stab.st_mtime)); - addstr(iov, n, "\r\n"); - } - sprintf(rangebuf, "Range: bytes=%qd-\r\n", - (long long)stab.st_size); - addstr(iov, n, rangebuf); - } else if (errno != 0 || !S_ISREG(stab.st_mode)) { - if (errno != 0) - warn("%s", fs->fs_outputfile); - else - warnx("%s: not a regular file", - fs->fs_outputfile); - restarting = 0; - warnx("cannot restart; will retrieve anew"); - } - } - addstr(iov, n, "\r\n"); - msg.msg_iovlen = n; - - if (n >= NIOV) - err(EX_SOFTWARE, "request vector length exceeded: %d", n); - - s = socket(PF_INET, SOCK_STREAM, 0); - if (s < 0) { - warn("socket"); - return EX_OSERR; - } - - remote = fdopen(s, "r"); - if (remote == 0) { - warn("fdopen"); - close(s); - return EX_OSERR; - } - - fs->fs_status = "sending request message"; - setup_sigalrm(); - alarm(timo); - - /* - * Some hosts do not correctly handle data in SYN segments. - * If no connect(2) is done, the TCP stack will send our - * initial request as such a segment. fs_use_connect works - * around these broken server TCPs by avoiding this case. - * It is not the default because we want to exercise this - * code path, and in any case the majority of hosts handle - * our default correctly. - */ - if (fs->fs_use_connect && (connect(s, (struct sockaddr *)&sin, - sizeof(struct sockaddr_in)) < 0)) { - warn("connect: %s", https->http_hostname); - fclose(remote); - return EX_OSERR; - } - - if (sendmsg(s, &msg, fs->fs_linux_bug ? 0 : MSG_EOF) < 0) { - warn("sendmsg: %s", https->http_hostname); - fclose(remote); - return EX_OSERR; - } - -got100reply: - fs->fs_status = "reading reply status"; - alarm(timo); - line = fgetln(remote, &linelen); - alarm(0); - if (line == 0) { - if (ferror(remote)) { - warn("reading reply from %s", https->http_hostname); - fclose(remote); - unsetup_sigalrm(); - return EX_OSERR; - } else { - warnx("empty reply from %s", https->http_hostname); - fclose(remote); - unsetup_sigalrm(); - return EX_PROTOCOL; - } - } - /* - * If the other end is HTTP 0.9, then we just suck their - * response over; can't do anything fancy. We assume that - * the file is a text file, so it is safe to use fgetln() - * to suck the entire file. (It had better be, since - * we used it to grab the first line.) - */ - if (linelen < 5 || strncasecmp(line, "http", 4) != 0) { - if (to_stdout) - local = fopen("/dev/stdout", "w"); - else - local = fopen(fs->fs_outputfile, "w"); - if (local == 0) { - warn("%s: fopen", fs->fs_outputfile); - fclose(remote); - unsetup_sigalrm(); - return EX_OSERR; - } - fs->fs_status = "retrieving file from HTTP/0.9 server"; - display(fs, -1, 0); - - do { - writeresult = fwrite(line, 1, linelen, local); - display(fs, -1, writeresult); - if (writeresult != linelen) - break; - alarm(timo); - line = fgetln(remote, &linelen); - alarm(0); - } while(line != 0); - unsetup_sigalrm(); - - if (ferror(local)) { - warn("%s", fs->fs_outputfile); - fclose(local); - fclose(remote); - rm(fs); - return EX_OSERR; - } else if(ferror(remote)) { - warn("%s", https->http_hostname); - if (errno == ECONNRESET) - warnx("(maybe try -b or -t)"); - fclose(local); - fclose(remote); - rm(fs); - return EX_OSERR; - } - fclose(local); - fclose(remote); - display(fs, -1, -1); - return 0; - } - /* - * OK. The other end is doing HTTP 1.0 at the very least. - * This means that some of the fancy stuff is at least possible. - */ - autherror = 0; - line[linelen - 1] = '\0'; /* turn line into a string */ - status = http_first_line(line); - - switch(status) { - case 100: /* Continue */ - goto got100reply; - case 200: /* Here come results */ - case 203: /* Non-Authoritative Information */ - restarting = 0; - break; - case 206: /* Here come partial results */ - /* can only happen when restarting */ - break; - case 301: /* Resource has moved permanently */ - if (fs->fs_auto_retry < 1) - errstr = safe_strdup(line); - else - redirection = 301; - break; - case 302: /* Resource has moved temporarily */ - /* - * We formerly didn't test fs->fs_auto_retry here, - * so that this sort of redirection would be transparent - * to the user. Unfortunately, there are a lot of idiots - * out there running Web sites, and some of them have - * decided to implement the following stupidity: rather - * than returning the correct `404 Not Found' error - * when something is not found, they instead return - * a 302 redirect, giving the erroneous impression that - * the requested resource actually exists. This - * breaks any client which expects a non-existent resource - * to elicit a 40x response. Grrr. - */ - if (fs->fs_auto_retry < 0) /* -A flag */ - errstr = safe_strdup(line); - else - redirection = 302; - break; - case 304: /* Object is unmodified */ - if (fs->fs_mirror) { - fclose(remote); - unsetup_sigalrm(); - return 0; - } - errstr = safe_strdup(line); - break; - case 401: /* Unauthorized */ - if (https->http_authentication) - errstr = safe_strdup(line); - else - autherror = 401; - break; - case 407: /* Proxy Authentication Required */ - if (https->http_proxy_authentication) - errstr = safe_strdup(line); - else - autherror = 407; - break; - case 501: /* Not Implemented */ - /* If we tried HEAD, retry with GET */ - if (tried_head) { - n = 0; - goto retry; - } - else { - errstr = safe_strdup(line); - break; - } - case 503: /* Service Unavailable */ - if (!fs->fs_auto_retry) - errstr = safe_strdup(line); - else - retrying = 503; - break; - - default: - errstr = safe_strdup(line); - break; - } - - total_length = -1; /* -1 means ``don't know'' */ - last_modified = when_to_retry = -1; - base64ofmd5 = 0; - new_location = 0; - restart_from = 0; - chunked = 0; - fs->fs_status = "parsing reply headers"; - - while((line = fgetln(remote, &linelen)) != 0) { - char *value, *ep; - enum http_header header; - unsigned long ul; - - line[linelen - 1] = '\0'; - header = http_parse_header(line, &value); - - if (header == ht_end_of_header) - break; - - switch(header) { - case ht_content_length: - errno = 0; - ul = strtoul(value, &ep, 10); - if (errno != 0 || *ep) - warnx("invalid Content-Length: `%s'", value); - if (!restarting) - total_length = ul; - break; - - case ht_last_modified: - last_modified = parse_http_date(value); - if (last_modified == -1 && fs->fs_verbose > 0) - warnx("invalid Last-Modified: `%s'", value); - break; - - case ht_content_md5: - base64ofmd5 = safe_strdup(value); - break; - - case ht_content_range: - if (!restarting) /* XXX protocol error */ - break; - - /* NB: we might have to restart from farther back - than we asked. */ - status = parse_http_content_range(value, &restart_from, - &total_length); - /* If we couldn't understand the reply, get the whole - thing. */ - if (status) { - restarting = 0; -doretry: - fclose(remote); - if (base64ofmd5) - free(base64ofmd5); - if (new_location) - free(new_location); - restart_from = 0; - n = 0; - goto retry; - } - break; - - case ht_location: - if (redirection) { - char *s = value; - while (*s && !isspace(*s)) - s++; - new_location = safe_strndup(value, s - value); - } - break; - - case ht_transfer_encoding: - if (strncasecmp(value, "chunked", 7) == 0) { - chunked = 1; - break; - } - warnx("%s: %s specified Transfer-Encoding `%s'", - fs->fs_outputfile, https->http_hostname, - value); - warnx("%s: output file may be uninterpretable", - fs->fs_outputfile); - break; - - case ht_retry_after: - if (!retrying) - break; - - errno = 0; - ul = strtoul(value, &ep, 10); - if (errno != 0 || (*ep && !isspace(*ep))) { - time_t when; - when = parse_http_date(value); - if (when == -1) - break; - when_to_retry = when; - } else { - when_to_retry = time(0) + ul; - } - break; - - case ht_www_authenticate: - if (autherror != 401) - break; - - status = process_http_auth(fs, value, autherror); - if (status != 0) - goto cantauth; - break; - - case ht_proxy_authenticate: - if (autherror != 407) - break; - status = process_http_auth(fs, value, autherror); - if (status != 0) - goto cantauth; - break; - - default: - break; - } - } - if (autherror == 401 && https->http_authentication) - goto doretry; - if (autherror == 407 && https->http_proxy_authentication) - goto doretry; - if (autherror) { - goto spewerror; - } - - if (retrying) { - int howlong; - - if (when_to_retry == -1) { - errstr = safe_strdup("HTTP/1.1 503 Service Unavailable"); - goto spewerror; - } - howlong = when_to_retry - time(0); - if (howlong < 30) - howlong = 30; - - warnx("%s: service unavailable; retrying in %d seconds", - https->http_hostname, howlong); - fs->fs_status = "waiting to retry"; - sleep(howlong); - goto doretry; - } - - if (errstr != 0) { -spewerror: - warnx("%s: %s: HTTP server returned error code %d", - fs->fs_outputfile, https->http_hostname, status); - if (fs->fs_verbose > 1) { - fputs(errstr, stderr); - fputc('\n', stderr); - html_display(remote); - } - free(errstr); - fclose(remote); - unsetup_sigalrm(); - return EX_UNAVAILABLE; - } - - if (redirection && new_location) { - fclose(remote); - if (base64ofmd5) - free(base64ofmd5); - fs->fs_status = "processing redirection"; - status = http_redirect(fs, new_location, redirection == 301); - free(new_location); - return status; - } else if (redirection) { - warnx("%s: redirection but no new location", - fs->fs_outputfile); - fclose(remote); - if (base64ofmd5) - free(base64ofmd5); - return EX_PROTOCOL; - } - - if (total_length > 0 && fs->fs_expectedsize != -1 - && total_length != fs->fs_expectedsize) { - warnx("%s: size mismatch, expected=%lu / actual=%lu", - fs->fs_outputfile, - (unsigned long)fs->fs_expectedsize, - (unsigned long)total_length); - fclose(remote); - if (base64ofmd5) - free(base64ofmd5); - unsetup_sigalrm(); - return EX_DATAERR; - } - - fs->fs_status = "retrieving file from HTTP/1.x server"; - - if (fs->fs_reportsize) { - if (total_length == -1) { - warnx("%s: size not known\n", - fs->fs_outputfile); - printf("Unknown\n"); - status = 1; - } - else { - printf("%qd\n", (quad_t)total_length); - status = 0; - } - fclose(remote); - unsetup_sigalrm(); - return status; - } - - - /* - * OK, if we got here, then we have finished parsing the header - * and have read the `\r\n' line which denotes the end of same. - * We may or may not have a good idea of the length of the file - * or its modtime. At this point we will have to deal with - * any special byte-range, content-negotiation, redirection, - * or authentication, and probably jump back up to the top, - * once we implement those features. So, all we have left to - * do is open up the output file and copy data from input to - * output until EOF. - */ - if (to_stdout) - local = fopen("/dev/stdout", restarting ? "a" : "w"); - else - local = fopen(fs->fs_outputfile, restarting ? "a+" : "w+"); - if (local == 0) { - warn("%s: fopen", fs->fs_outputfile); - fclose(remote); - unsetup_sigalrm(); - return EX_OSERR; - } - - fs->fs_modtime = last_modified; - fseek(local, restart_from, SEEK_SET); /* XXX truncation off_t->long */ - display(fs, total_length, restart_from); /* XXX truncation */ - - if (chunked) - status = http_suck_chunked(fs, remote, local, total_length, - timo); - else - status = http_suck(fs, remote, local, total_length, timo); - if (status) - goto out; - - status = errno; /* save errno for warn(), below, if needed */ - if (display(fs, total_length, -1) != 0) { - /* Check for truncated file */ - errno = status; - status = EX_PROTOCOL; - goto out; - } - errno = status; - - if (ferror(remote)) { - warn("reading remote file from %s", https->http_hostname); - if (errno == ECONNRESET) - warnx("(maybe try -b or -t)"); - status = EX_OSERR; - } else if(ferror(local)) { - warn("`%s': fwrite", fs->fs_outputfile); - status = EX_OSERR; - } else { - status = 0; - } - if (base64ofmd5) { - /* - * Ack. When restarting, the MD5 only covers the parts - * we are getting, not the whole thing. - */ - fseek(local, restart_from, SEEK_SET); - fs->fs_status = "computing MD5 message digest"; - if (!to_stdout) - status = check_md5(local, base64ofmd5); - else - warnx("can't check md5 digest on stdout: %s", - base64ofmd5); - free(base64ofmd5); - } - - fclose(local); -out: - unsetup_sigalrm(); - fclose(remote); - - if (status != 0) - rm(fs); - else - adjmodtime(fs); - - return status; -#undef addstr - -cantauth: - warnx("%s: cannot authenticate with %s %s", - fs->fs_outputfile, - (autherror == 401) ? "server" : "proxy", - https->http_hostname); - status = EX_NOPERM; - goto out; -} - -/* - * Suck over an HTTP body in standard form. - */ -static int -http_suck(struct fetch_state *fs, FILE *remote, FILE *local, - off_t total_length, int timo) -{ - static char buf[BUFFER_SIZE]; - ssize_t readresult, writeresult; - off_t remain = total_length; - - if (total_length == -1) - remain = 1; /*XXX*/ - - do { - alarm(timo); - readresult = fread(buf, 1, sizeof buf, remote); - alarm(0); - - /* - * If know the content-length, ignore anything more the - * the server chooses to send us. - */ - if (total_length != -1 && ((remain -= readresult) < 0)) - readresult += remain; - - if (readresult == 0) - return 0; - display(fs, total_length, readresult); - - writeresult = fwrite(buf, 1, readresult, local); - } while (writeresult == readresult && remain > 0); - return 0; -} - -/* - * Suck over an HTTP body in chunked form. Ick. - * Note that the return value convention here is a bit strange. - * A zero return does not necessarily mean success; rather, it means - * that this routine has already taken care of error reporting and - * just wants to exit. - */ -static int -http_suck_chunked(struct fetch_state *fs, FILE *remote, FILE *local, - off_t total_length, int timo) -{ - static char buf[BUFFER_SIZE]; - ssize_t readresult, writeresult; - size_t linelen; - u_long chunklen; - char *line, *ep; - - for (;;) { - alarm(timo); - line = fgetln(remote, &linelen); - alarm(0); - if (line == 0) { - warnx("%s: error processing chunked encoding: " - "missing length", fs->fs_outputfile); - return EX_PROTOCOL; - } - line[--linelen] = '\0'; - for (; linelen > 0; linelen--) { - if (isspace(line[linelen - 1])) - line[linelen - 1] = '\0'; - } - errno = 0; - chunklen = strtoul(line, &ep, 16); - if (errno || *line == 0 - || (*ep && !isspace(*ep) && *ep != ';')) { - warnx("%s: error processing chunked encoding: " - "uninterpretable length: %s", fs->fs_outputfile, - line); - return EX_PROTOCOL; - } - if (chunklen == 0) - break; - -#ifndef MIN -#define MIN(a,b) ((a)>(b)?(b):(a)) -#endif - while (chunklen > 0) { - alarm(timo); - readresult = fread(buf, 1, MIN(sizeof buf, chunklen), - remote); - alarm(0); - if (readresult == 0) { - warnx("%s: EOF with %lu left in chunk", - fs->fs_outputfile, chunklen); - return EX_PROTOCOL; - } - display(fs, total_length, readresult); - chunklen -= readresult; - - writeresult = fwrite(buf, 1, readresult, local); - if (writeresult != readresult) - return 0; /* main code will diagnose */ - } - /* - * Read the bogus CRLF after the chunk's body. - */ - alarm(timo); - fread(buf, 1, 2, remote); - alarm(0); - } - /* - * If we got here, then we successfully read every chunk and got - * the end-of-chunks indicator. Now we have to ignore any trailer - * lines which come across---or we would if we cared about keeping - * the connection open. Since we are just going to close it anyway, - * we won't bother with that here. If ever something important is - * defined for the trailer, we will have to revisit that decision. - */ - return 0; -} - - -/* - * The format of the response line for an HTTP request is: - * HTTP/V.vv{WS}999{WS}Explanatory text for humans to read\r\n - * Old pre-HTTP/1.0 servers can return - * HTTP{WS}999{WS}Explanatory text for humans to read\r\n - * Where {WS} represents whitespace (spaces and/or tabs) and 999 - * is a machine-interprable result code. We return the integer value - * of that result code, or the impossible value `0' if we are unable to - * parse the result. - */ -static int -http_first_line(const char *line) -{ - char *ep; - unsigned long ul; - - if (strncasecmp(line, "http", 4) != 0) - return 0; - - line += 4; - while (*line && !isspace(*line)) /* skip non-whitespace */ - line++; - while (*line && isspace(*line)) /* skip first whitespace */ - line++; - - errno = 0; - ul = strtoul(line, &ep, 10); - if (errno != 0 || ul > 999 || ul < 100 || !isspace(*ep)) - return 0; - return ul; -} - -/* - * The format of a header line for an HTTP request is: - * Header-Name: header-value (with comments in parens)\r\n - * This would be a nice application for gperf(1), except that the - * names are case-insensitive and gperf can't handle that. - */ -static enum http_header -http_parse_header(char *line, char **valuep) -{ - char *colon, *value; - - if (*line == '\0' /* protocol error! */ - || (line[0] == '\r' && line[1] == '\0')) - return ht_end_of_header; - - colon = strchr(line, ':'); - if (colon == 0) - return ht_syntax_error; - *colon = '\0'; - - for (value = colon + 1; *value && isspace(*value); value++) - ; /* do nothing */ - - /* Trim trailing whitespace (including \r). */ - *valuep = value; - value += strlen(value) - 1; - while (value > *valuep && isspace(*value)) - value--; - *++value = '\0'; - -#define cmp(name, num) do { if (!strcasecmp(line, name)) return num; } while(0) - cmp("Accept-Ranges", ht_accept_ranges); - cmp("Age", ht_age); - cmp("Allow", ht_allow); - cmp("Cache-Control", ht_cache_control); - cmp("Connection", ht_connection); - cmp("Content-Base", ht_content_base); - cmp("Content-Encoding", ht_content_encoding); - cmp("Content-Language", ht_content_language); - cmp("Content-Length", ht_content_length); - cmp("Content-Location", ht_content_location); - cmp("Content-MD5", ht_content_md5); - cmp("Content-Range", ht_content_range); - cmp("Content-Type", ht_content_type); - cmp("Date", ht_date); - cmp("ETag", ht_etag); - cmp("Expires", ht_expires); - cmp("Last-Modified", ht_last_modified); - cmp("Location", ht_location); - cmp("Pragma", ht_pragma); - cmp("Proxy-Authenticate", ht_proxy_authenticate); - cmp("Public", ht_public); - cmp("Retry-After", ht_retry_after); - cmp("Server", ht_server); - cmp("Transfer-Encoding", ht_transfer_encoding); - cmp("Upgrade", ht_upgrade); - cmp("Vary", ht_vary); - cmp("Via", ht_via); - cmp("WWW-Authenticate", ht_www_authenticate); - cmp("Warning", ht_warning); -#undef cmp - return ht_unknown; -} - -/* - * Compute the RSA Data Security, Inc., MD5 Message Digest of the file - * given in `fp', see if it matches the one given in base64 encoding by - * `base64ofmd5'. Warn and return an error if it doesn't. - */ -static int -check_md5(FILE *fp, char *base64ofmd5) { - MD5_CTX ctx; - unsigned char digest[16]; - char buf[512]; - size_t len; - char *ourval; - - MD5Init(&ctx); - while ((len = fread(buf, 1, sizeof buf, fp)) != 0) { - MD5Update(&ctx, buf, len); - } - MD5Final(digest, &ctx); - ourval = to_base64(digest, 16); - if (strcmp(ourval, base64ofmd5) != 0) { - warnx("MD5 digest mismatch: %s, should be %s", ourval, - base64ofmd5); - free(ourval); - return EX_DATAERR; - } - free(ourval); - return 0; -} - -static const char *wkdays[] = { - "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" -}; -static const char *months[] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", - "Nov", "Dec" -}; - -/* - * Interpret one of the three possible formats for an HTTP date. - * All of them are really bogus; HTTP should use either ISO 8601 - * or NTP timestamps. We make some attempt to accept a subset of 8601 - * format. The three standard formats are all fixed-length subsets of their - * respective standards (except 8601, which puts all of the stuff we - * care about up front). - */ -static time_t -parse_http_date(char *string) -{ - static struct tm tm; /* get good initialization */ - time_t rv; - const char *tz; - int i; - - /* 8601 has the shortest minimum length */ - if (strlen(string) < 15) - return -1; - - if (isdigit(*string)) { - /* ISO 8601: 19970127T134551stuffwedon'tcareabout */ - for (i = 0; i < 15; i++) { - if (i != 8 && !isdigit(string[i])) - break; - } - if (i < 15) - return -1; -#define digit(x) (string[x] - '0') - tm.tm_year = (digit(0) * 1000 - + digit(1) * 100 - + digit(2) * 10 - + digit(3)) - 1900; - tm.tm_mon = digit(4) * 10 + digit(5) - 1; - tm.tm_mday = digit(6) * 10 + digit(7); - if (string[8] != 'T' && string[8] != 't' && string[8] != ' ') - return -1; - tm.tm_hour = digit(9) * 10 + digit(10); - tm.tm_min = digit(11) * 10 + digit(12); - tm.tm_sec = digit(13) * 10 + digit(14); - /* We don't care about the rest of the stuff after the secs. */ - } else if (string[3] == ',') { - /* Mon, 27 Jan 1997 14:24:35 stuffwedon'tcareabout */ - if (strlen(string) < 25) - return -1; - string += 5; /* skip over day-of-week */ - if (!(isdigit(string[0]) && isdigit(string[1]))) - return -1; - tm.tm_mday = digit(0) * 10 + digit(1); - for (i = 0; i < 12; i++) { - if (strncasecmp(months[i], &string[3], 3) == 0) - break; - } - if (i >= 12) - return -1; - tm.tm_mon = i; - - if (sscanf(&string[7], "%d %d:%d:%d", &i, &tm.tm_hour, - &tm.tm_min, &tm.tm_sec) != 4) - return -1; - tm.tm_year = i - 1900; - - } else if (string[3] == ' ') { - /* Mon Jan 27 14:25:20 1997 */ - if (strlen(string) < 24) - return -1; - string += 4; - for (i = 0; i < 12; i++) { - if (strncasecmp(string, months[i], 3) == 0) - break; - } - if (i >= 12) - return -1; - tm.tm_mon = i; - if (sscanf(&string[4], "%d %d:%d:%d %u", &tm.tm_mday, - &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &i) - != 5) - return -1; - tm.tm_year = i - 1900; - } else { - /* Monday, 27-Jan-97 14:31:09 stuffwedon'tcareabout */ - /* Quoth RFC 2068: - o HTTP/1.1 clients and caches should assume that an RFC-850 date - which appears to be more than 50 years in the future is in fact - in the past (this helps solve the "year 2000" problem). - */ - time_t now; - struct tm *tmnow; - int this2dyear; - char *comma = strchr(string, ','); - char mname[4]; - - if (comma == 0) - return -1; - string = comma + 1; - if (strlen(string) < 19) - return -1; - string++; - mname[4] = '\0'; - if (sscanf(string, "%d-%c%c%c-%d %d:%d:%d", &tm.tm_mday, - mname, mname + 1, mname + 2, &tm.tm_year, - &tm.tm_hour, &tm.tm_min, &tm.tm_sec) != 8) - return -1; - for (i = 0; i < 12; i++) { - if (strcasecmp(months[i], mname)) - break; - } - if (i >= 12) - return -1; - tm.tm_mon = i; - /* - * RFC 2068 year interpretation. - */ - time(&now); - tmnow = gmtime(&now); - this2dyear = tmnow->tm_year % 100; - tm.tm_year += tmnow->tm_year - this2dyear; - if (tm.tm_year - tmnow->tm_year >= 50) - tm.tm_year -= 100; - } -#undef digit - - if (tm.tm_sec > 60 || tm.tm_min > 59 || tm.tm_hour > 23 - || tm.tm_mday > 31 || tm.tm_mon > 11) - return -1; - if (tm.tm_sec < 0 || tm.tm_min < 0 || tm.tm_hour < 0 - || tm.tm_mday < 0 || tm.tm_mon < 0 || tm.tm_year < 0) - return -1; - - tz = getenv("TZ"); - setenv("TZ", "UTC0", 1); - tzset(); - rv = mktime(&tm); - if (tz) - setenv("TZ", tz, 1); - else - unsetenv("TZ"); - return rv; -} - -static char * -format_http_date(time_t when) -{ - struct tm *tm; - static char buf[30]; - - tm = gmtime(&when); - if (tm == 0) - return 0; -#ifndef HTTP_DATE_ISO_8601 - sprintf(buf, "%s, %02d %s %04d %02d:%02d:%02d GMT", - wkdays[tm->tm_wday], tm->tm_mday, months[tm->tm_mon], - tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec); -#else /* ISO 8601 */ - sprintf(buf, "%04d%02d%02dT%02d%02d%02d+0000", - tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec); -#endif - return buf; -} - -static char * -format_http_user_agent(void) -{ - static char buf[128]; - static int inited; - - if (!inited) { - int mib[2]; - char ostype[128], osrelease[128], machine[128]; - size_t len; - - mib[0] = CTL_KERN; - mib[1] = KERN_OSTYPE; - len = sizeof ostype; - if (sysctl(mib, 2, ostype, &len, 0, 0) < 0) { - warn("sysctl"); - ostype[0] = '\0'; - } - mib[1] = KERN_OSRELEASE; - len = sizeof osrelease; - if (sysctl(mib, 2, osrelease, &len, 0, 0) < 0) { - warn("sysctl"); - osrelease[0] = '\0'; - } - mib[0] = CTL_HW; - mib[1] = HW_MACHINE; - len = sizeof machine; - if (sysctl(mib, 2, machine, &len, 0, 0) < 0) { - warn("sysctl"); - machine[0] = '\0'; - } - - snprintf(buf, sizeof buf, - "User-Agent: " FETCH_VERSION " %s/%s (%s)\r\n", - ostype, osrelease, machine); - } - return buf; -} - -/* - * Parse a Content-Range return header from the server. RFC 2066 defines - * this header to have the format: - * Content-Range: bytes 12345-67890/123456 - * Since we always ask for the whole rest of the file, we consider it an - * error if the reply doesn't claim to give it to us. - */ -static int -parse_http_content_range(char *orig, off_t *restart_from, off_t *total_length) -{ - u_quad_t first, last, total; - char *ep; - - if (strncasecmp(orig, "bytes", 5) != 0) { - warnx("unknown Content-Range unit: `%s'", orig); - return EX_PROTOCOL; - } - - orig += 5; - while (*orig && isspace(*orig)) - orig++; - - errno = 0; - first = strtouq(orig, &ep, 10); - if (errno != 0 || *ep != '-') { - warnx("invalid Content-Range: `%s'", orig); - return EX_PROTOCOL; - } - last = strtouq(ep + 1, &ep, 10); - if (errno != 0 || *ep != '/' || last < first) { - warnx("invalid Content-Range: `%s'", orig); - return EX_PROTOCOL; - } - total = strtouq(ep + 1, &ep, 10); - if (errno != 0 || !(*ep == '\0' || isspace(*ep))) { - warnx("invalid Content-Range: `%s'", orig); - return EX_PROTOCOL; - } - - if (last + 1 != total) { - warnx("HTTP server did not return requested Content-Range"); - return EX_PROTOCOL; - } - - *restart_from = first; - *total_length = last; - return 0; -} - -/* - * Do HTTP authentication. We only do ``basic'' right now, but - * MD5 ought to be fairly easy. The hard part is actually teasing - * apart the header, which is fairly badly designed (so what else is - * new?). - */ -static char * -getauthparam(char *params, const char *name) -{ - char *rv; - enum state { normal, quoted } state; - while (*params) { - if (strncasecmp(params, name, strlen(name)) == 0 - && params[strlen(name)] == '=') - break; - state = normal; - while (*params) { - if (state == normal && *params == ',') - break; - if (*params == '\"') - state = (state == quoted) ? normal : quoted; - if (*params == '\\' && params[1] != '\0') - params++; - params++; - } - } - - if (*params == '\0') - return 0; - params += strlen(name) + 1; - rv = params; - state = normal; - while (*params) { - if (state == normal && *params == ',') - break; - if (*params == '\"') - state = (state == quoted) ? normal : quoted; - if (*params == '\\' && params[1] != '\0') - params++; - params++; - } - if (params[-1] == '\"') - params[-1] = '\0'; - else - params[0] = '\0'; - - if (*rv == '\"') - rv++; - return rv; -} - -static int -process_http_auth(struct fetch_state *fs, char *hdr, int autherr) -{ - enum state { normal, quoted } state; - char *scheme, *params, *nscheme, *realm; - struct http_auth *ha; - - do { - scheme = params = hdr; - /* Look for end of scheme name. */ - while (*params && !isspace(*params)) - params++; - - if (*params == '\0') - return EX_PROTOCOL; - - /* Null-terminate scheme and skip whitespace. */ - while (*params && isspace(*params)) - *params++ = '\0'; - - /* Semi-parse parameters to find their end. */ - nscheme = params; - state = normal; - while (*nscheme) { - if (state == normal && isspace(*nscheme)) - break; - if (*nscheme == '\"') - state = (state == quoted) ? normal : quoted; - if (*nscheme == '\\' && nscheme[1] != '\0') - nscheme++; - nscheme++; - } - - /* Null-terminate parameters and skip whitespace. */ - while (*nscheme && isspace(*nscheme)) - *nscheme++ = '\0'; - - realm = getauthparam(params, "realm"); - if (realm == 0) { - scheme = nscheme; - continue; - } - - if (autherr == 401) - ha = find_http_auth(&http_auth, scheme, realm); - else - ha = find_http_auth(&http_proxy_auth, scheme, realm); - - if (ha) - return ha->ha_ham->ham_doauth(fs, ha, autherr == 407); - } while (*scheme); - return EX_NOPERM; -} - -static void -parse_http_auth_env(const char *env, struct http_auth_head *ha_tqh) -{ - char *nenv, *p, *scheme, *realm, *params; - struct http_auth *ha; - struct http_auth_method *ham; - - nenv = alloca(strlen(env) + 1); - strcpy(nenv, env); - - while ((p = strsep(&nenv, " \t")) != 0) { - scheme = strsep(&p, ":"); - if (scheme == 0 || *scheme == '\0') - continue; - realm = strsep(&p, ":"); - if (realm == 0 || *realm == '\0') - continue; - params = (p && *p) ? p : 0; - for (ham = http_auth_methods; ham->ham_scheme; ham++) { - if (strcasecmp(scheme, ham->ham_scheme) == 0) - break; - } - if (ham == 0) - continue; - ha = safe_malloc(sizeof *ha); - ha->ha_scheme = safe_strdup(scheme); - ha->ha_realm = safe_strdup(realm); - ha->ha_params = params ? safe_strdup(params) : 0; - ha->ha_ham = ham; - TAILQ_INSERT_TAIL(ha_tqh, ha, ha_link); - } -} - -/* - * Look up an authentication method. Automatically clone wildcards - * into fully-specified entries. - */ -static struct http_auth * -find_http_auth(struct http_auth_head *tqh, const char *scm, const char *realm) -{ - struct http_auth *ha; - - for (ha = tqh->tqh_first; ha; ha = ha->ha_link.tqe_next) { - if (strcasecmp(ha->ha_scheme, scm) == 0 - && strcasecmp(ha->ha_realm, realm) == 0) - return ha; - } - - for (ha = tqh->tqh_first; ha; ha = ha->ha_link.tqe_next) { - if (strcasecmp(ha->ha_scheme, scm) == 0 - && strcmp(ha->ha_realm, "*") == 0) - break; - } - if (ha != 0) { - struct http_auth *ha2; - - ha2 = safe_malloc(sizeof *ha2); - ha2->ha_scheme = safe_strdup(scm); - ha2->ha_realm = safe_strdup(realm); - ha2->ha_params = ha->ha_params ? safe_strdup(ha->ha_params) :0; - ha2->ha_ham = ha->ha_ham; - TAILQ_INSERT_TAIL(tqh, ha2, ha_link); - ha = ha2; - } - - return ha; -} - -static void -setup_http_auth(void) -{ - const char *envar; - static int once; - - if (once) - return; - once = 1; - - TAILQ_INIT(&http_auth); - TAILQ_INIT(&http_proxy_auth); - envar = getenv("HTTP_AUTH"); - if (envar) - parse_http_auth_env(envar, &http_auth); - - envar = getenv("HTTP_PROXY_AUTH"); - if (envar) - parse_http_auth_env(envar, &http_proxy_auth); -} - -static int -basic_doauth(struct fetch_state *fs, struct http_auth *ha, int isproxy) -{ - struct http_state *https = fs->fs_proto; - char *user; - char *pass; - char *enc; - char **hdr; - size_t userlen; - FILE *fp; - - if (!isatty(0) && - (ha->ha_params == 0 || strchr(ha->ha_params, ':') == 0)) - return EX_NOPERM; - - if (ha->ha_params == 0) { - fp = fopen("/dev/tty", "r+"); - if (fp == 0) { - warn("opening /dev/tty"); - return EX_OSERR; - } - fprintf(fp, "Enter `basic' user name for realm `%s': ", - ha->ha_realm); - fflush(fp); - user = fgetln(fp, &userlen); - if (user == 0 || userlen < 1) { /* longer name? */ - return EX_NOPERM; - } - if (user[userlen - 1] == '\n') - user[userlen - 1] = '\0'; - else - user[userlen] = '\0'; - user = safe_strdup(user); - fclose(fp); - pass = 0; - } else if ((pass = strchr(ha->ha_params, ':')) == 0) { - user = safe_strdup(ha->ha_params); - free(ha->ha_params); - } - - if (pass == 0) { - pass = getpass("Password: "); - ha->ha_params = safe_malloc(strlen(user) + 2 + strlen(pass)); - strcpy(ha->ha_params, user); - strcat(ha->ha_params, ":"); - strcat(ha->ha_params, pass); - } - - enc = to_base64(ha->ha_params, strlen(ha->ha_params)); - - hdr = isproxy ? &https->http_proxy_authentication - : &https->http_authentication; - if (*hdr) - free(*hdr); - *hdr = safe_malloc(sizeof("Proxy-Authorization: basic \r\n") - + strlen(enc)); - if (isproxy) - strcpy(*hdr, "Proxy-Authorization"); - else - strcpy(*hdr, "Authorization"); - strcat(*hdr, ": Basic "); - strcat(*hdr, enc); - strcat(*hdr, "\r\n"); - free(enc); - return 0; -} diff --git a/usr.bin/fetch/main.c b/usr.bin/fetch/main.c deleted file mode 100644 index 1df192c..0000000 --- a/usr.bin/fetch/main.c +++ /dev/null @@ -1,402 +0,0 @@ -/*- - * Copyright (c) 1996 - * Jean-Marc Zucconi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* $FreeBSD$ */ - -#include <sys/types.h> - -#include <err.h> -#include <errno.h> -#include <limits.h> /* needed for INT_MAX */ -#include <setjmp.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sysexits.h> -#include <unistd.h> - -#include <sys/param.h> /* for MAXHOSTNAMELEN */ -#include <sys/time.h> /* for struct timeval, gettimeofday */ - -#include "fetch.h" - -static struct fetch_state clean_fetch_state; -static sigjmp_buf sigbuf; -static int get(struct fetch_state *volatile fs); - -static void -usage(void) -{ - fprintf(stderr, - "usage: fetch [-ADHILMNPRTVablFmnpqrstv] [-o outputfile] " - "[-S bytes]\n" - " [-f file -h host [-c dir] | URL]\n"); - exit(EX_USAGE); -} - - -int -main(int argc, char *const *argv) -{ - int c; - char *ep; - struct fetch_state fs; - const char *change_to_dir, *file_to_get, *hostname; - int error, rv; - unsigned long l; - - init_schemes(); - fs = clean_fetch_state; - fs.fs_verbose = 1; - fs.fs_reportsize = 0; - fs.fs_expectedsize = -1; - change_to_dir = file_to_get = hostname = 0; - -#define OPT_STRING "Aabc:D:Ff:h:HIlLmMnNo:pPqRrS:stT:vV:" - while ((c = getopt(argc, argv, OPT_STRING)) != -1) { - switch (c) { - case 'A': - fs.fs_auto_retry = -1; - break; - - case 'D': case 'H': case 'I': case 'L': case 'N': case 'V': - break; /* ncftp compatibility */ - - case 'F': - fs.fs_forcerestart = 1; - break; - - case 'a': - fs.fs_auto_retry = 1; - break; - - case 'b': - fs.fs_linux_bug = 1; - break; - - case 'c': - change_to_dir = optarg; - break; - - case 'f': - file_to_get = optarg; - break; - - case 'h': - hostname = optarg; - break; - - case 'l': - fs.fs_linkfile = 1; - break; - - case 'm': case 'M': - fs.fs_mirror = 1; - break; - - case 'n': - fs.fs_newtime = 1; - break; - - case 'o': - fs.fs_outputfile = optarg; - break; - - case 'p': case 'P': - fs.fs_passive_mode = 1; - break; - - case 'q': - fs.fs_verbose = 0; - break; - - case 'r': - fs.fs_restart = 1; - break; - - case 'R': - fs.fs_precious = 1; - break; - - case 't': - fs.fs_use_connect = 1; - break; - - case 's': - fs.fs_reportsize = 1; - break; - - case 'S': - /* strtol sets errno to ERANGE in the case of overflow */ - errno = 0; - l = strtoul(optarg, &ep, 0); - if (!optarg[0] || *ep || errno != 0 || l > INT_MAX) - errx(EX_USAGE, "invalid size value: `%s'", - optarg); - fs.fs_expectedsize = l; - break; - - case 'T': - /* strtol sets errno to ERANGE in the case of overflow */ - errno = 0; - l = strtoul(optarg, &ep, 0); - if (!optarg[0] || *ep || errno != 0 || l > INT_MAX) - errx(EX_USAGE, "invalid timeout value: `%s'", - optarg); - fs.fs_timeout = l; - break; - - case 'v': - if (fs.fs_verbose < 2) - fs.fs_verbose = 2; - else - fs.fs_verbose++; - break; - - default: - case '?': - usage(); - } - } - - clean_fetch_state = fs; /* preserve option settings */ - - if (argv[optind] && (hostname || change_to_dir || file_to_get)) { - warnx("cannot use -h, -c, or -f with a URI argument"); - usage(); - } - - if (fs.fs_mirror && fs.fs_restart) - errx(EX_USAGE, "-m and -r are mutually exclusive."); - - if (argv[optind] == 0) { - char *uri; - - if (hostname == 0) hostname = "localhost"; - if (change_to_dir == 0) change_to_dir = ""; - if (file_to_get == 0) { - usage(); - } - - uri = alloca(sizeof("ftp://") + strlen(hostname) + - strlen(change_to_dir) + 5 + strlen(file_to_get)); - strcpy(uri, "ftp://"); - strcat(uri, hostname); - strcat(uri, "/"); - if (change_to_dir[0] == '/') { - strcat(uri, "%2f"); - strcat(uri, change_to_dir+1); - } - else strcat(uri, change_to_dir); - if (file_to_get[0] != '/' && uri[strlen(uri) - 1] != '/') - strcat(uri, "/"); - strcat(uri, file_to_get); - - error = parse_uri(&fs, uri); - if (error) - return error; - return get(&fs); - } - - for (rv = 0; argv[optind] != 0; optind++) { - error = parse_uri(&fs, argv[optind]); - if (error) { - rv = error; - continue; - } - - error = get(&fs); - if (error) { - rv = error; - } - fs = clean_fetch_state; - } - return rv; -} - -/* - * The signal handling is probably more complex than it needs to be, - * but it doesn't cost a lot, so we'll be extra-careful. Using - * siglongjmp() to get out of the signal handler allows us to - * call rm() without having to store the state variable in some global - * spot where the signal handler can get at it. We also obviate the need - * for a separate timeout signal handler. - */ -static int -get(struct fetch_state *volatile fs) -{ - volatile int error; - struct sigaction oldhup, oldint, oldquit, oldterm; - struct sigaction catch; - volatile sigset_t omask; - - sigemptyset(&catch.sa_mask); - sigaddset(&catch.sa_mask, SIGHUP); - sigaddset(&catch.sa_mask, SIGINT); - sigaddset(&catch.sa_mask, SIGQUIT); - sigaddset(&catch.sa_mask, SIGTERM); - sigaddset(&catch.sa_mask, SIGALRM); - catch.sa_handler = catchsig; - catch.sa_flags = 0; - - sigprocmask(SIG_BLOCK, &catch.sa_mask, (sigset_t *)&omask); - sigaction(SIGHUP, &catch, &oldhup); - sigaction(SIGINT, &catch, &oldint); - sigaction(SIGQUIT, &catch, &oldquit); - sigaction(SIGTERM, &catch, &oldterm); - - error = sigsetjmp(sigbuf, 0); - if (error == SIGALRM) { - rm(fs); - unsetup_sigalrm(); - fprintf(stderr, "\n"); /* just in case */ - warnx("%s: %s: timed out", fs->fs_outputfile, fs->fs_status); - goto close; - } else if (error) { - rm(fs); - fprintf(stderr, "\n"); /* just in case */ - warnx("%s: interrupted by signal: %s", fs->fs_status, - sys_signame[error]); - sigdelset(&omask, error); - signal(error, SIG_DFL); - sigprocmask(SIG_SETMASK, (sigset_t *)&omask, 0); - raise(error); /* so that it gets reported as such */ - } - - sigprocmask(SIG_SETMASK, (sigset_t *)&omask, 0); - error = fs->fs_retrieve(fs); - -close: - sigaction(SIGHUP, &oldhup, 0); - sigaction(SIGINT, &oldint, 0); - sigaction(SIGQUIT, &oldquit, 0); - sigaction(SIGTERM, &oldterm, 0); - fs->fs_close(fs); - - return error; -} - - -/* - * Utility functions - */ - -/* - * Handle all signals by jumping back into get(). - */ -void -catchsig(int sig) -{ - siglongjmp(sigbuf, sig); -} - -/* - * Used to generate the progress display when not in quiet mode. - * Return != 0 when the file appears to be truncated. - */ -int -display(struct fetch_state *fs, off_t size, ssize_t n) -{ - static off_t bytes; - static off_t bytestart; - static int pr, stdoutatty, init = 0; - static struct timeval t0, t_start; - static char *s; - struct timezone tz; - struct timeval t; - float d; - int truncated; - - if (size != -1 && n == -1 && bytes != size) { - truncated = 1; - } else - truncated = 0; - if (init == 0) { - init = 1; - gettimeofday(&t0, &tz); - t_start = t0; - bytes = pr = 0; - stdoutatty = isatty(STDOUT_FILENO); - if (size > 0) - asprintf (&s, "Receiving %s (%qd bytes)%s", fs->fs_outputfile, - (quad_t)size, - size ? "" : " [appending]"); - else - asprintf (&s, "Receiving %s", fs->fs_outputfile); - if (fs->fs_verbose) - fprintf (stderr, "%s", s); - bytestart = bytes = n; - goto out; - } - gettimeofday(&t, &tz); - if (n == -1) { - if(stdoutatty && fs->fs_verbose) { - if (size > 0) - fprintf (stderr, "\r%s: 100%%", s); - else - fprintf (stderr, "\r%s: %qd Kbytes", s, (long long)bytes/1024); - } - bytes -= bytestart; - d = t.tv_sec + t.tv_usec/1.e6 - t_start.tv_sec - t_start.tv_usec/1.e6; - if (fs->fs_verbose) - fprintf (stderr, "\n%qd bytes transferred in %.1f seconds", - (long long)bytes, d); - d = bytes/d; - if (fs->fs_verbose) { - if (d < 1000) - fprintf (stderr, " (%.0f bytes/s)\n", d); - else { - d /=1024; - fprintf (stderr, " (%.2f Kbytes/s)\n", d); - } - } - free(s); - init = 0; - goto out; - } - bytes += n; - d = t.tv_sec + t.tv_usec/1.e6 - t0.tv_sec - t0.tv_usec/1.e6; - if (d < 5) /* display every 5 sec. */ - goto out; - t0 = t; - pr++; - if(stdoutatty && fs->fs_verbose) { - if (size > 1000000) - fprintf (stderr, "\r%s: %2qd%%", s, (long long)(bytes/(size/100))); - else if (size > 0) - fprintf (stderr, "\r%s: %2qd%%", s, (long long)(100*bytes/size)); - else - fprintf (stderr, "\r%s: %qd Kbytes", s, (long long)(bytes/1024)); - } -out: - if (truncated != 0) - fprintf(stderr, "WARNING: File %s appears to be truncated: " - "%qd/%qd bytes\n", - fs->fs_outputfile, - (quad_t)bytes, (quad_t)size); - return truncated; -} diff --git a/usr.bin/fetch/uri.c b/usr.bin/fetch/uri.c deleted file mode 100644 index 3aeb0d1..0000000 --- a/usr.bin/fetch/uri.c +++ /dev/null @@ -1,122 +0,0 @@ -/*- - * Copyright 1997 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/types.h> - -#include <err.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sysexits.h> -#include <unistd.h> - -#include "fetch.h" - -struct uri_scheme *schemes[] = { - &http_scheme, &ftp_scheme, &file_scheme, 0 -}; - -static struct uri_scheme * -find_scheme(const char *name) -{ - int i; - - for (i = 0; schemes[i]; i++) { - if (strcasecmp(schemes[i]->sc_name, name) == 0) - return schemes[i]; - } - return 0; -} - -void -init_schemes(void) -{ - int i; - char schemebuf[32]; - const char *s, *t; - struct uri_scheme *scp; - - for (i = 0; schemes[i]; i++) { - if (getenv(schemes[i]->sc_proxy_envar) != 0) - schemes[i]->sc_can_proxy = 1; - } - - for (i = 0; schemes[i]; i++) { - s = schemes[i]->sc_proxy_by; - while (s && *s) { - t = strchr(s, ','); - if (t) { - schemebuf[0] = '\0'; - strncat(schemebuf, s, t - s); - s = t + 1; - } else { - strcpy(schemebuf, s); - s = 0; - } - scp = find_scheme(schemebuf); - if (scp && scp->sc_can_proxy) { - schemes[i]->sc_proxyproto = scp; - break; - } - } - } -} - -int -parse_uri(struct fetch_state *fs, const char *uri) -{ - const char *colon, *slash; - char *scheme; - struct uri_scheme *scp; - - fs->fs_status = "parsing URI"; - colon = strchr(uri, ':'); - slash = strchr(uri, '/'); - if (!colon || !slash || slash < colon) { - warnx("%s: an absolute URI is required", uri); - return EX_USAGE; - } - - scheme = alloca(colon - uri + 1); - scheme[0] = '\0'; - strncat(scheme, uri, colon - uri); - scp = find_scheme(scheme); - - if (scp == 0) { - warnx("%s: unknown URI scheme", scheme); - return EX_USAGE; - } - if (scp->sc_proxyproto) - return scp->sc_proxyproto->sc_proxy_parse(fs, uri); - else - return scp->sc_parse(fs, uri); -} - diff --git a/usr.bin/fetch/util.c b/usr.bin/fetch/util.c deleted file mode 100644 index bac316d..0000000 --- a/usr.bin/fetch/util.c +++ /dev/null @@ -1,334 +0,0 @@ -/*- - * Copyright 1997 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/types.h> - -#include <ctype.h> -#include <err.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sysexits.h> -#include <time.h> /* for time() */ -#include <unistd.h> - -#include <sys/time.h> /* for struct timeval */ - -#include "fetch.h" - - -/* Signal handling functions */ - -/* - * If this were Scheme we could make this variable private to just these two - * functions... - */ -static struct sigaction oldalrm; - -void -setup_sigalrm(void) -{ - struct sigaction catch; - - sigemptyset(&catch.sa_mask); - sigaddset(&catch.sa_mask, SIGHUP); - sigaddset(&catch.sa_mask, SIGINT); - sigaddset(&catch.sa_mask, SIGQUIT); - sigaddset(&catch.sa_mask, SIGTERM); - sigaddset(&catch.sa_mask, SIGALRM); - catch.sa_handler = catchsig; - catch.sa_flags = 0; - - sigaction(SIGALRM, &catch, &oldalrm); -} - -void -unsetup_sigalrm(void) -{ - sigaction(SIGALRM, &oldalrm, 0); -} - - -/* File-handling functions */ - -/* - * Set the last-modified time of the output file to be that returned by - * the server. - */ -void -adjmodtime(struct fetch_state *fs) -{ - struct timeval tv[2]; - time_t tt; - - /* XXX - not strictly correct, since (time_t)-1 does not have to be - > 0. This also catches some of the other routines which erroneously - return 0 for invalid times rather than -1. */ - if (!fs->fs_newtime && fs->fs_modtime > 0) { - tv[0].tv_usec = tv[1].tv_usec = 0; - time(&tt); - tv[0].tv_sec = tt; - tv[1].tv_sec = fs->fs_modtime; - utimes(fs->fs_outputfile, tv); - } -} - -/* - * Delete the file when exiting on error, if it is not `precious'. - */ -void -rm(struct fetch_state *fs) -{ - if (!(fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0')) { - if (!fs->fs_restart && !fs->fs_mirror && !fs->fs_precious) - unlink(fs->fs_outputfile); - else - adjmodtime(fs); - } -} - - -/* String-handling and -parsing functions */ - -/* - * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This - * must be done after the URI is parsed, since the principal purpose of - * the encoding is to hide characters which would otherwise be significant - * to the parser (like `/'). - */ -char * -percent_decode(const char *uri) -{ - char *rv, *s; - - rv = s = safe_malloc(strlen(uri) + 1); - - while (*uri) { - if (*uri == '%' && uri[1] - && isxdigit(uri[1]) && isxdigit(uri[2])) { - int c; - static char buf[] = "xx"; - - buf[0] = uri[1]; - buf[1] = uri[2]; - sscanf(buf, "%x", &c); - uri += 3; - *s++ = c; - } else { - *s++ = *uri++; - } - } - *s = '\0'; - return rv; -} - -/* - * Decode a standard host:port string into its constituents, allocating - * memory for a new copy of the host part. - */ -int -parse_host_port(const char *s, char **hostname, int *port) -{ - const char *colon; - char *ep; - unsigned long ul; - - colon = strchr(s, ':'); - if (colon != 0) { - errno = 0; - ul = strtoul(colon + 1, &ep, 10); - if (*ep != '\0' || colon[1] == '\0' || errno != 0 - || ul < 1 || ul > 65534) { - warnx("`%s': invalid port number", colon + 1); - return EX_USAGE; - } - - *hostname = safe_strndup(s, colon - s); - *port = ul; - } else { - *hostname = safe_strdup(s); - } - return 0; -} - -/* - * safe_malloc is like malloc, but aborts on error. - */ -void * -safe_malloc(size_t len) -{ - void *rv; - - rv = malloc(len); - if (rv == 0) - err(EX_OSERR, "malloc(%qu)", (u_quad_t)len); - return rv; -} - -/* - * safe_strdup is like strdup, but aborts on error. - */ -char * -safe_strdup(const char *orig) -{ - char *s; - - s = safe_malloc(strlen(orig) + 1); - strcpy(s, orig); - return s; -} - -/* - * safe_strndup is like safe_strdup, but copies at most `len' - * characters from `orig'. - */ -char * -safe_strndup(const char *orig, size_t len) -{ - char *s; - - s = safe_malloc(len + 1); - s[0] = '\0'; - strncat(s, orig, len); - return s; -} - -/* - * Implement the `base64' encoding as described in RFC 1521. - */ -static const char base64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -char * -to_base64(const unsigned char *buf, size_t len) -{ - char *s, *rv; - unsigned tmp; - - s = safe_malloc((4 * (len + 1)) / 3 + 1); - - rv = s; - while (len >= 3) { - tmp = buf[0] << 16 | buf[1] << 8 | buf[2]; - s[0] = base64[tmp >> 18]; - s[1] = base64[(tmp >> 12) & 077]; - s[2] = base64[(tmp >> 6) & 077]; - s[3] = base64[tmp & 077]; - len -= 3; - buf += 3; - s += 4; - } - - /* RFC 1521 enumerates these three possibilities... */ - switch(len) { - case 2: - tmp = buf[0] << 16 | buf[1] << 8; - s[0] = base64[(tmp >> 18) & 077]; - s[1] = base64[(tmp >> 12) & 077]; - s[2] = base64[(tmp >> 6) & 077]; - s[3] = '='; - s[4] = '\0'; - break; - case 1: - tmp = buf[0] << 16; - s[0] = base64[(tmp >> 18) & 077]; - s[1] = base64[(tmp >> 12) & 077]; - s[2] = s[3] = '='; - s[4] = '\0'; - break; - case 0: - s[0] = '\0'; - break; - } - - return rv; -} - -int -from_base64(const char *orig, unsigned char *buf, size_t *lenp) -{ - int len, len2; - const char *equals; - unsigned tmp; - - len = strlen(orig); - while (isspace(orig[len - 1])) - len--; - - if (len % 4) - return -1; - - len2 = 3 * (len / 4); - equals = strchr(orig, '='); - if (equals != 0) { - if (equals[1] == '=') - len2 -= 2; - else - len2 -= 1; - } - - /* Now the length is len2 is the actual length of the original. */ - if (len2 > *lenp) - return -1; - *lenp = len2; - - while (len > 0) { - int i; - const char *off; - int forget; - - tmp = 0; - forget = 0; - for (i = 0; i < 4; i++) { - if (orig[i] == '=') { - off = base64; - forget++; - } else { - off = strchr(base64, orig[i]); - } - if (off == 0) - return -1; - tmp = (tmp << 6) | (off - base64); - } - - buf[0] = (tmp >> 16) & 0xff; - if (forget < 2) - buf[1] = (tmp >> 8) & 0xff; - if (forget < 1) - buf[2] = (tmp >> 8) & 0xff; - len -= 4; - orig += 4; - buf += 3 - forget; - } - return 0; -} |