diff options
author | wollman <wollman@FreeBSD.org> | 1997-01-30 21:43:44 +0000 |
---|---|---|
committer | wollman <wollman@FreeBSD.org> | 1997-01-30 21:43:44 +0000 |
commit | 444cbb04eefcb9a4da4e2a366541c4d02d74938d (patch) | |
tree | 8697935e80816a6c94b292ebeb8af3ef6994b10b | |
parent | 067667ae976ea0d2540166d4637720d70a012cf7 (diff) | |
download | FreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.zip FreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.tar.gz |
Here is my long-threatened revamping of fetch. Jean-Marc probably won't
recognize it any more. This makes the following significant changes:
- The main body of the program doesn't know a thing about URIs,
HTTP, or FTP. This makes it possible to easily plug in other
protocols. (The next revision will probably be able to dynamically
add new recognizers.)
- There are no longer arbitrary timeouts for the protocols. If you want
to set one for yourself, use the environment variables.
- FTP proxies are now supported (if I implemented it right).
- The HTTP implementation is much more complete, and can now do restarts,
preserve modtimes, and mrun in mirror mode. It's not yet up to 1.1,
but it's getting there.
- Transaction TCP is now used for sending HTTP requests. The HTTP/1.1 syntax
for requesting that the connection be closed after one request is
implemented.
In all of this, I have doubtless broken somebody. Please test it and tell me
about the bugs.
-rw-r--r-- | usr.bin/fetch/Makefile | 8 | ||||
-rw-r--r-- | usr.bin/fetch/fetch.1 | 195 | ||||
-rw-r--r-- | usr.bin/fetch/fetch.h | 87 | ||||
-rw-r--r-- | usr.bin/fetch/file.c | 144 | ||||
-rw-r--r-- | usr.bin/fetch/ftp.c | 420 | ||||
-rw-r--r-- | usr.bin/fetch/http.c | 976 | ||||
-rw-r--r-- | usr.bin/fetch/main.c | 915 | ||||
-rw-r--r-- | usr.bin/fetch/uri.c | 122 | ||||
-rw-r--r-- | usr.bin/fetch/util.c | 322 |
9 files changed, 2428 insertions, 761 deletions
diff --git a/usr.bin/fetch/Makefile b/usr.bin/fetch/Makefile index 6e86642..31479bd 100644 --- a/usr.bin/fetch/Makefile +++ b/usr.bin/fetch/Makefile @@ -1,9 +1,9 @@ PROG = fetch -SRCS = main.c +SRCS = file.c ftp.c http.c main.c util.c uri.c -CFLAGS+= -Wall +CFLAGS+= -Wall -Wwrite-strings -Wmissing-prototypes -DPADD= ${LIBFTPIO} -LDADD= -lftpio +DPADD= ${LIBFTPIO} ${LIBMD} +LDADD= -lftpio -lmd .include <bsd.prog.mk> diff --git a/usr.bin/fetch/fetch.1 b/usr.bin/fetch/fetch.1 index 1f3ae01..6955250 100644 --- a/usr.bin/fetch/fetch.1 +++ b/usr.bin/fetch/fetch.1 @@ -11,7 +11,7 @@ .Op Fl o Ar file .Ar URL .Nm fetch -.Op Fl MPmnpqr +.Op Fl MPRmnpqr .Op Fl o Ar file .Op Fl c Ar dir .Fl f Ar file @@ -26,22 +26,17 @@ or the protocol. In the first form of the command, the .Ar URL may be of the form -.Em http://site.domain/path/to/the/file +.Li http://site.domain/path/to/the/file or -.Em ftp://site.domain/path/to/the/file. -For compatibility with -.Xr tftp 1 -the form -.Em site.domain:/path/to/the/file -is also accepted. -To denote a local filename to be copied or linked to (see +.Li ftp://site.domain/path/to/the/file. +To denote a local filename to be copied or linked to (see the .Fl l -flag), the +flag below), the .Em file:/path/to/the/file URL form is used. - +.Pp The second form of the command can be used to get a file using the -.Em ftp +.Tn FTP protocol, specifying the file name and the remote host with the .Fl h and the @@ -50,34 +45,51 @@ flags. .Pp The following options are available: .Bl -tag -width Fl -compact -.It Fl M -.It Fl m -Mirror mode: Set the modification time of the file so that it is -identical to the modification time of the file at the remote host. -If the file already exists on the local host and is identical (as -gauged by size and modification time), no transfer is done. -.It Fl n -Don't preserve the modtime of the transfered file, use the current time. -.It Fl P -.It Fl p -Use passive mode if you are behind a firewall. .It Fl c Ar dir -Change to directory +The file to retrieve is in directory .Ar dir -at remote host before starting the transfer. +on the remote host. .It Fl f Ar file -Retrieve +The file to retrieve is named .Ar file on the remote host. .It Fl h Ar host -Set the -.Ar host -for transfer. +The file to retrieve is located on the host +.Ar host . .It Fl l If target is a .Ar file:/ style of URL, make a link to the target rather than trying to copy it. +.It Fl M +.It Fl m +Mirror mode: Set the modification time of the file so that it is +identical to the modification time of the file at the remote host. +If the file already exists on the local host and is identical (as +gauged by size and modification time), no transfer is done. +.It Fl n +Don't preserve the modtime of the transfered file, use the current time. +.It Fl o Ar file +Set the output file name to +.Ar file . +By default, a ``pathname'' is extracted from the specified URI, and +its basename is used as the name of the output file. A +.Ar file +argument of +.Sq Li \&- +indicates that results are to be directed to the standard output. +.It Fl P +.It Fl p +Use the passive mode of the +.Tn FTP +protocol. This is useful for crossing certain sorts of firewalls. +.It Fl q +Quiet mode. Do not report transfer progress on the terminal. +.It Fl R +The filenames specified are ``precious'', and should not be deleted +under any circumstances, even if the transfer failed or was incomplete. +.It Fl r +Restart a previously interrupted transfer. .It Fl T Ar seconds Set timeout value to .Ar seconds. @@ -86,47 +98,90 @@ Overrides the environment variables for ftp transfers or .Ev HTTP_TIMEOUT for http transfers if set. -.It Fl q -Quiet mode. Do not report transfer progress on the terminal. .It Fl v -Verbose mode - display FTP connection information in painful detail. -.It Fl r -Reget. Use this flag to restart an interrupted transfer. -.It Fl o Ar file -Set the output file name to -.Ar file +Increase verbosity. More +.Fl v Ns \&'s +result in more information. .El +.Pp +Many options are also controlled solely by the environment (this is a +bug). +.Sh PROXY SERVERS +Many sites use application gateways (``proxy servers'') in their +firewalls in order to allow communication across the firewall using a +trusted protocol. The +.Nm fetch +program can use both the +.Tn FTP +and the +.Tn HTTP +protocol with a proxy server. +.Tn FTP +proxy servers can only relay +.Tn FTP +requests; +.Tn HTTP +proxy servers can relay both +.Tn FTP +and +.Tn HTTP +requests. +A proxy server can be configured by defining an environment variable +named +.Dq Va PROTO Ns Ev _PROXY , +where +.Va PROTO +is the name of the protocol in upper case. The value of the +environment variable specifies a hostname, optionally followed by a +colon and a port number. +.Pp +The +.Tn FTP +proxy client specifies +.Dq anonymous +as its user name, and passes the remote user name and host as the +.Tn FTP +session's password, in the form +.Dq Va remoteuser Ns Li \&@ Va remotehost . +The +.Tn HTTP +proxy client simply passes the originally-requested URI to the remote +server in an +.Tn HTTP +.Dq Li GET +request. HTTP proxy authentication is not yet implemented. +When multiple proxy protcols are configured, +.Nm +will prefer +.Tn HTTP . .Sh ENVIRONMENT -A transfer using the -.Em ftp -protocol will be aborted after the delay specified by the -.Ev FTP_TIMEOUT -variable. The default is 300 (seconds) - -A transfer using the -.Em http -protocol will be aborted after the delay specified by the -.Ev HTTP_TIMEOUT -variable. The default is 300 (seconds) - -.Ev FTP_LOGIN -is the login name for the remote host. Default is -.Em anonymous - -.Ev FTP_PASSWORD -is the password for the remote host. Default is -.Em <yourname>@ - -.Ev FTP_PASSIVE_MODE -will force the use of passive mode FTP for firewalls. - -If -.Ev HTTP_PROXY -is set to a value of the form -.Em host:port -it specifies the address of a http proxy. The proxy will be used -for all ftp and http requests. This is useful if you are behind -an application firewall. +.Bl -tag -width FTP_PASSIVE_MODE -offset indent +.It Ev FTP_TIMEOUT +maximum time, in seconds, to wait before aborting an +.Tn FTP +connection. +.It Ev HTTP_TIMEOUT +maximum time, in seconds, to wait before aborting an +.Tn HTTP +connection. +.It Ev FTP_LOGIN +the login name used for +.Tn FTP +transfers (default +.Dq Li anonymous ) +.It Ev FTP_PASSWORD +the password used for +.Tn FTP +transfers (default +.Dq Va yourname Ns Li \&@ Ns Va yourhost ) +.It Ev FTP_PASSIVE_MODE +force the use of passive mode FTP +.It Ev HTTP_PROXY +the address of a proxy server which understands +.Tn HTTP +.It Ev FTP_PROXY +the address of a proxy server which understands +.Tn FTP .Sh SEE ALSO .Xr ftp 1 , .Xr tftp 1 @@ -135,3 +190,9 @@ The .Nm fetch command appeared in .Fx 2.1.5 . +.Sh AUTHORS +The original implementation of +.Nm +was done by Jean-Marc Zucconi. It was extensively re-worked for +.Fx 3.0 +by Garrett Wollman. diff --git a/usr.bin/fetch/fetch.h b/usr.bin/fetch/fetch.h new file mode 100644 index 0000000..daa010e --- /dev/null +++ b/usr.bin/fetch/fetch.h @@ -0,0 +1,87 @@ +/* + * Copyright 1997 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#ifndef fetch_h +#define fetch_h 1 + + +#define BUFFER_SIZE 1024 +#define FETCH_VERSION "fetch/1.0" +#define PATH_CP "/bin/cp" + +struct fetch_state { + const char *fs_status; + const char *fs_outputfile; + int fs_verbose; /* -q, -v option */ + int fs_newtime; /* -n option */ + int fs_mirror; /* -m option */ + int fs_restart; /* -r option */ + int fs_timeout; /* -T option */ + int fs_passive_mode; /* -p option */ + int fs_linkfile; /* -l option */ + int fs_precious; /* -R option */ + time_t fs_modtime; + void *fs_proto; + int (*fs_retrieve)(struct fetch_state *); + int (*fs_close)(struct fetch_state *); +}; + +struct uri_scheme { + const char *sc_name; /* name of the scheme, <32 characters */ + int (*sc_parse)(struct fetch_state *, const char *); + /* routine to parse a URI and build state */ + int (*sc_proxy_parse)(struct fetch_state *, const char *); + /* same, but for proxy case */ + const char *sc_proxy_envar; /* envar used to determine proxy */ + const char *sc_proxy_by; /* list of protos which can proxy us */ + + /* The rest is filled in dynamically... */ + int sc_can_proxy; + struct uri_scheme *sc_proxyproto; +}; + +extern struct uri_scheme file_scheme, ftp_scheme, http_scheme; + +void adjmodtime(struct fetch_state *fs); +void catchsig(int signo); +void display(struct fetch_state *fs, off_t total, ssize_t thisincr); +void init_schemes(void); +void rm(struct fetch_state *fs); +void setup_sigalrm(void); +void unsetup_sigalrm(void); +char *percent_decode(const char *orig); +char *safe_strdup(const char *orig); +char *safe_strndup(const char *orig, size_t len); +char *to_base64(const unsigned char *buf, size_t len); +int from_base64(const char *orig, unsigned char *buf, size_t *lenp); +int parse_host_port(const char *str, char **hostname, int *port); +int parse_uri(struct fetch_state *fs, const char *uri); +#endif /* ! fetch_h */ diff --git a/usr.bin/fetch/file.c b/usr.bin/fetch/file.c new file mode 100644 index 0000000..091639c --- /dev/null +++ b/usr.bin/fetch/file.c @@ -0,0 +1,144 @@ +/*- + * Copyright 1997 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include <sys/types.h> + +#include <err.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> + +#include <sys/wait.h> + +#include "fetch.h" + +static int file_retrieve(struct fetch_state *fs); +static int file_close(struct fetch_state *fs); +static int file_parse(struct fetch_state *fs, const char *uri); + +struct uri_scheme file_scheme = + { "file", file_parse, 0, 0, 0 }; + +/* + * Again, we slightly misinterpret the slash after the hostname as + * being the start of the pathname rather than merely a separator. + */ +static int +file_parse(struct fetch_state *fs, const char *uri) +{ + const char *p; + + p = uri + 5; /* skip past `file:' */ + if (p[0] == '/' && p[1] == '/') { + /* skip past `//localhost', if any */ + p += 2; + while (*p && *p != '/') + p++; + } + + if (p[0] != '/') { + warnx("`%s': expected absolute pathname in `file' URL", uri); + return EX_USAGE; + } + + fs->fs_proto = percent_decode(p); + /* guaranteed to succeed because of above test */ + p = strrchr(fs->fs_proto, '/'); + if (fs->fs_outputfile == 0) /* only set if not overridden by user */ + fs->fs_outputfile = p + 1; + fs->fs_retrieve = file_retrieve; + fs->fs_close = file_close; + return 0; +} + +static int +file_close(struct fetch_state *fs) +{ + free(fs->fs_proto); + fs->fs_proto = 0; + fs->fs_outputfile = 0; + fs->fs_status = "free"; + return 0; +} + +static int +file_retrieve(struct fetch_state *fs) +{ + /* XXX - this seems bogus to me! */ + if (access(fs->fs_outputfile, F_OK) == 0) { + errno = EEXIST; + warn("%s", fs->fs_outputfile); + return EX_USAGE; + } + + if (fs->fs_linkfile) { + fs->fs_status = "symlink"; + if (symlink(fs->fs_proto, fs->fs_outputfile) == -1) { + warn("symlink"); + return EX_OSERR; + } + fs->fs_status = "done"; + } else { + pid_t pid; + int status; + + fflush(stderr); + pid = fork(); + if (pid < 0) { + warn("fork"); + return EX_TEMPFAIL; + } else if (pid == 0) { + execl(PATH_CP, "cp", "-p", fs->fs_proto, + fs->fs_outputfile, (char *)0); + warn("execl: " PATH_CP); + fflush(stderr); + _exit(EX_OSERR); + } else { + fs->fs_status = "copying"; + if (waitpid(pid, &status, 0) < 0) { + warn("waitpid(%ld)", (long)pid); + return EX_OSERR; + } + if (WIFEXITED(status)) + return WEXITSTATUS(status); + if (WIFSIGNALED(status)) + warn(PATH_CP " exited on signal: %s", + sys_signame[WTERMSIG(status)]); + return EX_OSERR; + } + } + return 0; +} + diff --git a/usr.bin/fetch/ftp.c b/usr.bin/fetch/ftp.c new file mode 100644 index 0000000..34caeb0 --- /dev/null +++ b/usr.bin/fetch/ftp.c @@ -0,0 +1,420 @@ +/*- + * Copyright 1997 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include <sys/types.h> + +#include <err.h> +#include <errno.h> +#include <ftpio.h> +#include <limits.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> + +#include <sys/param.h> +#include <sys/stat.h> + +#include "fetch.h" + +struct ftp_state { + char *ftp_hostname; + char *ftp_user; + char *ftp_password; + char *ftp_remote_file; + unsigned ftp_port; +}; + +static int ftp_close(struct fetch_state *fs); +static int ftp_retrieve(struct fetch_state *fs); +static int ftp_parse(struct fetch_state *fs, const char *uri); +static int ftp_proxy_parse(struct fetch_state *fs, const char *uri); + +struct uri_scheme ftp_scheme = + { "ftp", ftp_parse, ftp_proxy_parse, "FTP_PROXY", "ftp,http" }; + +static int +ftp_parse(struct fetch_state *fs, const char *uri) +{ + const char *p, *colon, *slash, *q; + char *hostname, *atsign; + unsigned port; + struct ftp_state *ftps; + + p = uri + 4; + port = 0; + + if (p[0] != '/' || p[1] != '/') { + warnx("`%s': invalid `ftp' URL", uri); + return EX_USAGE; + } + + p += 2; + colon = strchr(p, ':'); + slash = strchr(p, '/'); + if (colon && slash && colon < slash) + q = colon; + else + q = slash; + if (q == 0) { + warnx("`%s': malformed `ftp' URL", uri); + return EX_USAGE; + } + hostname = alloca(q - p + 1); + hostname[0] = '\0'; + strncat(hostname, p, q - p); + p = slash; + + if (colon && colon + 1 != slash) { + unsigned long ul; + char *ep; + + errno = 0; + ul = strtoul(colon + 1, &ep, 10); + if (ep != slash || ep == colon + 1 || errno != 0 + || ul < 1 || ul > 65534) { + warn("`%s': invalid port in URL", uri); + return EX_USAGE; + } + + port = ul; + } else { + port = 21; + } + + p = slash + 1; + + ftps = malloc(sizeof *ftps); + if (ftps == 0) + err(EX_OSERR, "malloc"); + + /* + * Now, we have a copy of the hostname in hostname, the specified port + * (or the default value) in port, and p points to the filename part + * of the URI. We just need to check for a user in the hostname, + * and then save all the bits in our state. + */ + atsign = strrchr(hostname, '@'); + if (atsign) { + if (atsign[1] == '\0') { + warnx("`%s': malformed `ftp' hostname", hostname); + free(ftps); + return EX_USAGE; + } + + *atsign = '\0'; + ftps->ftp_user = percent_decode(hostname); + ftps->ftp_hostname = safe_strdup(atsign + 1); + } else { + ftps->ftp_user = 0; + ftps->ftp_hostname = safe_strdup(hostname); + ftps->ftp_port = port; + } + + p = ftps->ftp_remote_file = percent_decode(p); + /* now p is the decoded version */ + + if (fs->fs_outputfile == 0) { + slash = strrchr(p, '/'); + fs->fs_outputfile = slash + 1; + } + + ftps->ftp_password = getenv("FTP_PASSWORD"); + if (ftps->ftp_password != 0) { + ftps->ftp_password = safe_strdup(ftps->ftp_password); + } else { + char *pw; + const char *logname; + char localhost[MAXHOSTNAMELEN]; + + logname = getlogin(); + if (logname == 0) + logname = "root"; + gethostname(localhost, sizeof localhost); + pw = malloc(strlen(logname) + 1 + strlen(localhost) + 1); + if (pw == 0) + err(EX_OSERR, "malloc"); + strcpy(pw, logname); + strcat(pw, "@"); + strcat(pw, localhost); + ftps->ftp_password = pw; + setenv("FTP_PASSWORD", pw, 0); /* cache the result */ + } + + if (ftps->ftp_user == 0) { + const char *user = getenv("FTP_LOGIN"); + if (user != 0) + ftps->ftp_user = safe_strdup(user); + } + + fs->fs_proto = ftps; + fs->fs_close = ftp_close; + fs->fs_retrieve = ftp_retrieve; + return 0; +} + +/* + * The only URIs we can handle in the FTP proxy are FTP URLs. + * This makes it possible to take a few short cuts. + */ +static int +ftp_proxy_parse(struct fetch_state *fs, const char *uri) +{ + int rv; + char *hostname; + char *port; + const char *user; + char *newpass; + unsigned portno; + struct ftp_state *ftps; + + hostname = getenv("FTP_PROXY"); + port = strchr(hostname, ':'); + if (port == 0) { + portno = 21; + } else { + unsigned long ul; + char *ep; + + /* All this to avoid modifying the environment. */ + ep = alloca(strlen(hostname) + 1); + strcpy(ep, hostname); + port = ep + (port - hostname); + hostname = ep; + + *port++ = '\0'; + errno = 0; + ul = strtoul(port, &ep, 0); + if (*ep || !*port || errno != 0 || ul < 1 || ul > 65534) { + warnx("`%s': invalid port specification for FTP proxy", + port); + return EX_USAGE; + } + portno = ul; + } + + /* ftp_parse() does most of the work; we can just fix things up */ + rv = ftp_parse(fs, uri); + if (rv) + return rv; + /* Oops.. it got turned into a file: */ + if (fs->fs_retrieve != ftp_retrieve) { + return 0; + } + + ftps = fs->fs_proto; + if (ftps->ftp_port != 21) { + ftp_close(fs); + warnx("`%s': FTP proxy requires the use of the standard port", + uri); + return EX_USAGE; + } + + ftps->ftp_port = portno; + user = ftps->ftp_user ? ftps->ftp_user : "anonymous"; + newpass = malloc(strlen(ftps->ftp_user ? ftps->ftp_user : "anonymous") + + 1 + strlen(ftps->ftp_hostname) + 1); + if (newpass == 0) + err(EX_OSERR, "malloc"); + + strcpy(newpass, user); + strcat(newpass, "@"); + strcpy(newpass, ftps->ftp_hostname); + free(ftps->ftp_hostname); + ftps->ftp_hostname = safe_strdup(hostname); + free(ftps->ftp_password); + ftps->ftp_password = newpass; + free(ftps->ftp_user); + ftps->ftp_user = getenv("FTP_PROXY_USER"); + if (ftps->ftp_user) + ftps->ftp_user = safe_strdup(ftps->ftp_user); + return 0; +} + +static int +ftp_close(struct fetch_state *fs) +{ + struct ftp_state *ftps = fs->fs_proto; + + if (ftps->ftp_user) + free(ftps->ftp_user); + free(ftps->ftp_hostname); + free(ftps->ftp_password); + free(ftps->ftp_remote_file); + free(ftps); + fs->fs_proto = 0; + fs->fs_outputfile = 0; + return 0; +} + +static int +ftp_retrieve(struct fetch_state *fs) +{ + struct ftp_state *ftps = fs->fs_proto; + FILE *ftp, *remote, *local; + int status; + off_t size; + off_t seekloc, wehave; + time_t modtime; + size_t readresult, writeresult; + + ftp = ftpLogin(ftps->ftp_hostname, + (char *)(ftps->ftp_user ? ftps->ftp_user : "anonymous"), + /* XXX ^^^^ bad API */ + ftps->ftp_password, 0, fs->fs_verbose > 1, + &status); + if (ftp == 0) { + warnx("%s: %s", ftps->ftp_hostname, + status ? ftpErrString(status) : hstrerror(h_errno)); + return EX_IOERR; + } + ftpBinary(ftp); + ftpPassive(ftp, fs->fs_passive_mode); + size = ftpGetSize(ftp, ftps->ftp_remote_file); + modtime = ftpGetModtime(ftp, ftps->ftp_remote_file); + if (modtime <= 0) { /* xxx */ + warnx("%s: cannot get remote modification time", + ftps->ftp_remote_file); + modtime = -1; + } + fs->fs_modtime = modtime; + seekloc = wehave = 0; + if (fs->fs_restart || fs->fs_mirror) { + struct stat stab; + + if (fs->fs_outputfile[0] == '-' + && fs->fs_outputfile[1] == '\0') + status = fstat(STDOUT_FILENO, &stab); + else + status = stat(fs->fs_outputfile, &stab); + if (status < 0) { + stab.st_mtime = -1; + stab.st_size = 0; + } + if (status == 0 && !S_ISREG(stab.st_mode)) { + fs->fs_restart = 0; + fs->fs_mirror = 0; + } + if (fs->fs_mirror && stab.st_size == size + && modtime <= stab.st_mtime) { + fclose(ftp); + return 0; + } + if (fs->fs_restart) { + if (stab.st_size != 0 && stab.st_size < size) + seekloc = wehave = size; + } + } + + remote = ftpGet(ftp, ftps->ftp_remote_file, &seekloc); + if (remote == 0) { + if (ftpErrno(ftp)) { + warnx("%s: %s", ftps->ftp_hostname, + ftpErrString(ftpErrno(ftp))); + fclose(ftp); + return EX_IOERR; + } else { + warn("ftpGet"); + return EX_OSERR; + } + } + + if (fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0') + local = fopen("/dev/stdout", wehave ? "a" : "w"); + else + local = fopen(fs->fs_outputfile, wehave ? "a" : "w"); + if (local == 0) { + warn("%s", fs->fs_outputfile); + fclose(remote); + fclose(ftp); + return EX_OSERR; + } + + if (fs->fs_timeout) { + char buf[sizeof("18446744073709551616")]; /* 2**64 */ + snprintf(buf, sizeof buf, "%d", fs->fs_timeout); + setenv("FTP_TIMEOUT", buf, 1); + } else { + char *env = getenv("FTP_TIMEOUT"); + char *ep; + unsigned long ul; + + if (env) { + errno = 0; + ul = strtoul(env, &ep, 0); + if (*env && *ep && errno == 0 && ul <= INT_MAX) + fs->fs_timeout = ul; + else + warnx("`%s': invalid FTP timeout", env); + } + } + + display(fs, size, wehave); + setup_sigalrm(); + + do { + char buf[BUFFER_SIZE]; + + alarm(fs->fs_timeout); + readresult = fread(buf, 1, sizeof buf, remote); + alarm(0); + if (readresult == 0) + break; + display(fs, size, readresult); + writeresult = fwrite(buf, 1, readresult, local); + } while (writeresult == readresult); + unsetup_sigalrm(); + + if (ferror(remote)) { + warn("reading remote file from %s", ftps->ftp_hostname); + fclose(local); + fclose(remote); + fclose(ftp); + rm(fs); + return EX_IOERR; + } else if(ferror(local)) { + warn("%s", fs->fs_outputfile); + fclose(local); + fclose(remote); + fclose(ftp); + rm(fs); + return EX_IOERR; + } + + fclose(local); + fclose(remote); + fclose(ftp); + display(fs, size, -1); + adjmodtime(fs); + return 0; +} diff --git a/usr.bin/fetch/http.c b/usr.bin/fetch/http.c new file mode 100644 index 0000000..425476d --- /dev/null +++ b/usr.bin/fetch/http.c @@ -0,0 +1,976 @@ +/*- + * Copyright 1997 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include <sys/types.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <md5.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <time.h> +#include <unistd.h> + +#include <sys/param.h> /* for MAXHOSTNAMELEN */ +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysctl.h> +#include <sys/uio.h> + +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "fetch.h" + +static int http_parse(struct fetch_state *fs, const char *uri); +static int http_proxy_parse(struct fetch_state *fs, const char *uri); +static int http_close(struct fetch_state *fs); +static int http_retrieve(struct fetch_state *fs); + +struct uri_scheme http_scheme = + { "http", http_parse, http_proxy_parse, "HTTP_PROXY", "http" }; + +struct http_state { + char *http_hostname; + char *http_remote_request; + char *http_decoded_file; + unsigned http_port; +}; + +/* We are only concerned with headers we might receive. */ +enum http_header { + ht_content_length, ht_last_modified, ht_content_md5, ht_content_type, + ht_transfer_encoding, ht_content_range, ht_warning, + /* unusual cases */ + ht_syntax_error, ht_unknown, ht_end_of_header +}; + +static char *format_http_date(time_t when); +static char *format_http_user_agent(void); +static enum http_header http_parse_header(char *line, char **valuep); +static int check_md5(FILE *fp, char *base64ofmd5); +static int http_first_line(const char *line); +static int parse_http_content_range(char *orig, off_t *first, off_t *total); +static time_t parse_http_date(char *datestring); + +static int +http_parse(struct fetch_state *fs, const char *uri) +{ + const char *p, *colon, *slash, *ques, *q; + char *hostname; + unsigned port; + struct http_state *https; + + p = uri + 5; + port = 0; + + if (p[0] != '/' || p[1] != '/') { + warnx("`%s': malformed `http' URL", uri); + return EX_USAGE; + } + + p += 2; + colon = strchr(p, ':'); + slash = strchr(p, '/'); + if (colon && slash && colon < slash) + q = colon; + else + q = slash; + if (q == 0) { + warnx("`%s': malformed `http' URL", uri); + return EX_USAGE; + } + hostname = alloca(q - p + 1); + hostname[0] = '\0'; + strncat(hostname, p, q - p); + p = slash; + + if (colon && colon + 1 != slash) { + unsigned long ul; + char *ep; + + errno = 0; + ul = strtoul(colon + 1, &ep, 10); + if (ep != slash || ep == colon + 1 || errno != 0 + || ul < 1 || ul > 65534) { + warn("`%s': invalid port in URL", uri); + return EX_USAGE; + } + + port = ul; + } else { + port = 80; + } + + p = slash + 1; + + https = malloc(sizeof *https); + if (https == 0) + err(EX_OSERR, "malloc"); + + /* + * Now, we have a copy of the hostname in hostname, the specified port + * (or the default value) in port, and p points to the filename part + * of the URI. + */ + https->http_hostname = safe_strdup(hostname); + https->http_port = port; + + ques = strpbrk(p, "?#"); + if (ques) { + https->http_remote_request = safe_strndup(p, ques - p); + } else { + https->http_remote_request = safe_strdup(p); + } + p = https->http_decoded_file = percent_decode(p); + /* now p is the decoded version, so we can extract the basename */ + + if (fs->fs_outputfile == 0) { + slash = strrchr(p, '/'); + if (slash) + fs->fs_outputfile = slash + 1; + else + fs->fs_outputfile = p; + } + + fs->fs_proto = https; + fs->fs_close = http_close; + fs->fs_retrieve = http_retrieve; + return 0; +} + +/* + * An HTTP proxy works by accepting a complete URI in a GET request, + * retrieving that object, and then forwarding it back to us. Because + * it can conceivably handle any URI, we have to do a bit more work + * in the parsing of it. + */ +static int +http_proxy_parse(struct fetch_state *fs, const char *uri) +{ + struct http_state *https; + const char *env, *slash, *ques; + char *file; + int rv; + + https = malloc(sizeof *https); + https->http_remote_request = safe_strdup(uri); + + env = getenv("HTTP_PROXY"); + rv = parse_host_port(env, &https->http_hostname, &https->http_port); + if (rv) { +out: + free(https->http_remote_request); + free(https); + return rv; + } + + if (strncmp(uri, "http://", 7) == 0) { + slash = strchr(uri + 7, '/'); + if (slash == 0) { + warnx("`%s': malformed `http' URL", uri); + rv = EX_USAGE; + free(https->http_hostname); + goto out; + } + ques = strpbrk(slash, "?#"); + if (ques == 0) + file = safe_strdup(slash); + else + file = safe_strndup(slash, ques - slash); + } else { + slash = uri; + while (*slash && *slash != ':') + slash++; + if (*slash) + slash++; + if (slash[0] == '/' && slash[1] == '/') { + slash += 2; + while (*slash && *slash != '/') + slash++; + } + file = safe_strdup(slash); + } + https->http_decoded_file = percent_decode(file); + free(file); + if (fs->fs_outputfile == 0) { + slash = strrchr(https->http_decoded_file, '/'); + /* NB: we are not guaranteed to find one... */ + fs->fs_outputfile = slash ? slash + 1 + : https->http_decoded_file; + } + + fs->fs_proto = https; + fs->fs_close = http_close; + fs->fs_retrieve = http_retrieve; + return 0; +} + +static int +http_close(struct fetch_state *fs) +{ + struct http_state *https = fs->fs_proto; + + free(https->http_hostname); + free(https->http_remote_request); + free(https->http_decoded_file); + free(https); + fs->fs_outputfile = 0; + return 0; +} + +/* + * Get a file using HTTP. We will try to implement HTTP/1.1 eventually. + * This subroutine makes heavy use of the 4.4-Lite standard I/O library, + * in particular the `fgetln' which allows us to slurp an entire `line' + * (an arbitrary string of non-NUL characters ending in a newline) directly + * out of the stdio buffer. This makes interpreting the HTTP headers much + * easier, since they are all guaranteed to end in `\r\n' and we can just + * ignore the `\r'. + */ +static int +http_retrieve(struct fetch_state *fs) +{ + struct http_state *https; + FILE *remote, *local; + int s; + struct sockaddr_in sin; + struct msghdr msg; + struct iovec iov[16]; /* XXX count precisely */ + int n, status; + const char *env; + int timo; + char *line; + size_t linelen, readresult, writeresult; + off_t total_length, restart_from; + time_t last_modified; + char *base64ofmd5; + static char buf[BUFFER_SIZE]; + int to_stdout; + char rangebuf[sizeof("Range: bytes=18446744073709551616-\r\n")]; + + https = fs->fs_proto; + to_stdout = (strcmp(fs->fs_outputfile, "-") == 0); + + if (fs->fs_timeout) { + timo = fs->fs_timeout; + } else if ((env = getenv("HTTP_TIMEOUT")) != 0) { + char *ep; + unsigned long ul; + + errno = 0; + ul = strtoul(env, &ep, 0); + if (*ep != '\0' || *env == '\0' || errno != 0 + || ul > INT_MAX) { + warnx("`%s': invalid timeout", env); + return EX_USAGE; + } + timo = ul; + } else { + timo = 0; + } + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_len = sizeof sin; + sin.sin_port = htons(https->http_port); + + if (inet_aton(https->http_hostname, &sin.sin_addr) == 0) { + struct hostent *hp; + + /* XXX - do timeouts for name resolution? */ + hp = gethostbyname2(https->http_hostname, AF_INET); + if (hp == 0) { + warnx("`%s': cannot resolve: %s", https->http_hostname, + hstrerror(h_errno)); + return EX_NOHOST; + } + memcpy(&sin.sin_addr, hp->h_addr_list[0], sizeof sin.sin_addr); + } + + msg.msg_name = (caddr_t)&sin; + msg.msg_namelen = sizeof sin; + msg.msg_iov = iov; + n = 0; + msg.msg_control = 0; + msg.msg_controllen = 0; + msg.msg_flags = MSG_EOF; + +#define addstr(Iov, N, Str) \ + do { \ + Iov[N].iov_base = (void *)Str; \ + Iov[N].iov_len = strlen(Iov[n].iov_base); \ + N++; \ + } while(0) + +retry: + addstr(iov, n, "GET /"); + addstr(iov, n, https->http_remote_request); + addstr(iov, n, " HTTP/1.0\r\n"); + addstr(iov, n, format_http_user_agent()); + /* do content negotiation here */ + addstr(iov, n, "Accept: */*\r\n"); + if (fs->fs_mirror) { + struct stat stab; + + errno = 0; + if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0) + || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0)) + && S_ISREG(stab.st_mode)) { + addstr(iov, n, "If-Modified-Since: "); + addstr(iov, n, format_http_date(stab.st_mtime)); + addstr(iov, n, "\r\n"); + } else if (errno != 0) { + warn("%s: cannot mirror; will retrieve anew", + fs->fs_outputfile); + } + } + if (fs->fs_restart) { + struct stat stab; + + errno = 0; + if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0) + || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0)) + && S_ISREG(stab.st_mode)) { + addstr(iov, n, "If-Range: "); + addstr(iov, n, format_http_date(stab.st_mtime)); + addstr(iov, n, "\r\n"); + sprintf(rangebuf, "Range: bytes=%qd-\r\n", + (quad_t)stab.st_size); + addstr(iov, n, rangebuf); + } else if (errno != 0) { + warn("%s: cannot restart; will retrieve anew", + fs->fs_outputfile); + } + } + addstr(iov, n, "Connection: close\r\n"); + addstr(iov, n, "\r\n"); + msg.msg_iovlen = n; + + s = socket(PF_INET, SOCK_STREAM, 0); + if (s < 0) { + warn("socket"); + return EX_OSERR; + } + + remote = fdopen(s, "r"); + if (remote == 0) { + warn("fdopen"); + close(s); + return EX_OSERR; + } + + setup_sigalrm(); + alarm(timo); + if (sendmsg(s, &msg, MSG_EOF) < 0) { + warn("%s", https->http_hostname); + fclose(remote); + return EX_OSERR; + } + + alarm(timo); + line = fgetln(remote, &linelen); + alarm(0); + if (line == 0) { + if (ferror(remote)) { + warn("reading reply from %s", https->http_hostname); + fclose(remote); + unsetup_sigalrm(); + return EX_OSERR; + } else { + warnx("empty reply from %s", https->http_hostname); + fclose(remote); + unsetup_sigalrm(); + return EX_PROTOCOL; + } + } + /* + * If the other end is HTTP 0.9, then we just suck their + * response over; can't do anything fancy. We assume that + * the file is a text file, so it is safe to use fgetln() + * to suck the entire file. (It had better be, since + * we used it to grab the first line.) + */ + if (linelen < 5 || strncasecmp(line, "http/", 5) != 0) { + if (to_stdout) + local = fopen("/dev/stdout", "w"); + else + local = fopen(fs->fs_outputfile, "w"); + if (local == 0) { + warn("%s: fopen", fs->fs_outputfile); + fclose(remote); + unsetup_sigalrm(); + return EX_OSERR; + } + display(fs, -1, 0); + + do { + writeresult = fwrite(line, 1, linelen, local); + display(fs, -1, writeresult); + if (writeresult != linelen) + break; + alarm(timo); + line = fgetln(remote, &linelen); + alarm(0); + } while(line != 0); + unsetup_sigalrm(); + + if (ferror(local)) { + warn("%s", fs->fs_outputfile); + fclose(local); + fclose(remote); + rm(fs); + return EX_OSERR; + } else if(ferror(remote)) { + warn("%s", https->http_hostname); + fclose(local); + fclose(remote); + rm(fs); + return EX_OSERR; + } + fclose(local); + fclose(remote); + display(fs, -1, -1); + return 0; + } + /* + * OK. The other end is doing HTTP 1.0 at the very least. + * This means that some of the fancy stuff is at least possible. + */ + line[linelen - 1] = '\0'; /* turn line into a string */ + status = http_first_line(line); + + /* In the future, we might handle redirection and other responses. */ + switch(status) { + case 200: /* Here come results */ + case 206: /* Here come partial results */ + break; + + case 304: /* Object is unmodified */ + if (fs->fs_mirror) { + fclose(remote); + unsetup_sigalrm(); + return 0; + } + /* otherwise, fall through */ + default: + warnx("%s: %s: HTTP server returned error code %d", + fs->fs_outputfile, https->http_hostname, status); + if (fs->fs_verbose > 1) { + fputs(line, stderr); + fputc('\n', stderr); + while ((line = fgetln(remote, &linelen)) != 0) + fwrite(line, 1, linelen, stderr); + } + fclose(remote); + unsetup_sigalrm(); + return EX_UNAVAILABLE; + } + + total_length = -1; /* -1 means ``don't know'' */ + last_modified = -1; + base64ofmd5 = 0; + restart_from = 0; + + while((line = fgetln(remote, &linelen)) != 0) { + char *value, *ep; + enum http_header header; + unsigned long ul; + + line[linelen - 1] = '\0'; + header = http_parse_header(line, &value); + + if (header == ht_end_of_header) + break; + + switch(header) { + case ht_content_length: + errno = 0; + ul = strtoul(value, &ep, 10); + if (errno != 0 || *ep != '\r') + warnx("invalid Content-Length: `%s'", value); + if (!fs->fs_restart) + total_length = ul; + break; + + case ht_last_modified: + last_modified = parse_http_date(value); + if (last_modified == -1 && fs->fs_verbose > 0) + warnx("invalid Last-Modified: `%s'", value); + break; + + case ht_content_md5: + base64ofmd5 = safe_strdup(value); + break; + + case ht_content_range: + /* NB: we might have to restart from farther back + than we asked. */ + status = parse_http_content_range(value, &restart_from, + &total_length); + /* If we couldn't understand the reply, get the whole + thing. */ + if (status) { + fs->fs_restart = 0; +/*doretry:*/ + fclose(remote); + if (base64ofmd5) + free(base64ofmd5); + restart_from = 0; + n = 0; + goto retry; + } + break; + + default: + break; + } + } + + /* + * OK, if we got here, then we have finished parsing the header + * and have read the `\r\n' line which denotes the end of same. + * We may or may not have a good idea of the length of the file + * or its modtime. At this point we will have to deal with + * any special byte-range, content-negotiation, redirection, + * or authentication, and probably jump back up to the top, + * once we implement those features. So, all we have left to + * do is open up the output file and copy data from input to + * output until EOF. + */ + if (to_stdout) + local = fopen("/dev/stdout", "w"); + else + local = fopen(fs->fs_outputfile, "w"); + if (local == 0) { + warn("%s: fopen", fs->fs_outputfile); + fclose(remote); + unsetup_sigalrm(); + return EX_OSERR; + } + + fs->fs_modtime = last_modified; + fseek(local, restart_from, SEEK_SET); /* XXX truncation off_t->long */ + display(fs, total_length, restart_from); /* XXX truncation */ + + do { + alarm(timo); + readresult = fread(buf, 1, sizeof buf, remote); + alarm(0); + + if (readresult == 0) + break; + display(fs, total_length, readresult); + + writeresult = fwrite(buf, 1, sizeof buf, local); + } while (writeresult == readresult); + + status = errno; /* save errno for warn(), below, if needed */ + display(fs, total_length, -1); /* do here in case we have to warn */ + errno = status; + + if (ferror(remote)) { + warn("reading remote file from %s", https->http_hostname); + status = EX_OSERR; + } else if(ferror(local)) { + warn("`%s': fwrite", fs->fs_outputfile); + status = EX_OSERR; + } else { + status = 0; + } + if (base64ofmd5) { + /* + * Ack. When restarting, the MD5 only covers the parts + * we are getting, not the whole thing. + */ + fseek(local, restart_from, SEEK_SET); + status = check_md5(local, base64ofmd5); + free(base64ofmd5); + } + + unsetup_sigalrm(); + fclose(local); + fclose(remote); + + if (status != 0) + rm(fs); + else + adjmodtime(fs); + + return status; +#undef addstr +} + +/* + * The format of the response line for an HTTP request is: + * HTTP/V.vv{WS}999{WS}Explanatory text for humans to read\r\n + * Where {WS} represents whitespace (spaces and/or tabs) and 999 + * is a machine-interprable result code. We return the integer value + * of that result code, or the impossible value `0' if we are unable to + * parse the result. + */ +static int +http_first_line(const char *line) +{ + char *ep; + unsigned long ul; + + if (strncasecmp(line, "http/", 5) != 0) + return 0; + + line += 5; + while (*line && isdigit(*line)) /* skip major version number */ + line++; + if (*line++ != '.') /* skip period */ + return 0; + while (*line && isdigit(*line)) /* skip minor version number */ + line++; + while (*line && isspace(*line)) /* skip first whitespace */ + line++; + + errno = 0; + ul = strtoul(line, &ep, 10); + if (errno != 0 || ul > 999 || ul < 100 || !isspace(*ep)) + return 0; + return ul; +} + +/* + * The format of a header line for an HTTP request is: + * Header-Name: header-value (with comments in parens)\r\n + * This would be a nice application for gperf(1), except that the + * names are case-insensitive and gperf can't handle that. + */ +static enum http_header +http_parse_header(char *line, char **valuep) +{ + char *colon, *value; + + if (*line == '\0' /* protocol error! */ + || (line[0] == '\r' && line[1] == '\0')) + return ht_end_of_header; + + colon = strchr(line, ':'); + if (colon == 0) + return ht_syntax_error; + *colon = '\0'; + + for (value = colon + 1; *value && isspace(*value); value++) + ; /* do nothing */ + + /* XXX - strip comments? */ + *valuep = value; + +#define cmp(name, num) do { if (!strcasecmp(line, name)) return num; } while(0) + cmp("Content-Length", ht_content_length); + cmp("Last-Modified", ht_last_modified); + cmp("Content-MD5", ht_content_md5); + cmp("Content-Range", ht_content_range); + cmp("Content-Type", ht_content_type); + cmp("Transfer-Encoding", ht_transfer_encoding); + cmp("Warning", ht_warning); +#undef cmp + return ht_unknown; +} + +/* + * Compute the RSA Data Security, Inc., MD5 Message Digest of the file + * given in `fp', see if it matches the one given in base64 encoding by + * `base64ofmd5'. Warn and return an error if it doesn't. + */ +static int +check_md5(FILE *fp, char *base64ofmd5) { + MD5_CTX ctx; + unsigned char digest[16]; + char buf[512]; + size_t len; + char *ourval; + + MD5Init(&ctx); + while ((len = fread(buf, 1, sizeof buf, fp)) != 0) { + MD5Update(&ctx, buf, len); + } + MD5Final(digest, &ctx); + ourval = to_base64(digest, 16); + if (strcmp(ourval, base64ofmd5) != 0) { + warnx("MD5 digest mismatch: %s, should be %s", ourval, + base64ofmd5); + free(ourval); + return EX_DATAERR; + } + free(ourval); + return 0; +} + +static const char *wkdays[] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; +static const char *months[] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", + "Nov", "Dec" +}; + +/* + * Interpret one of the three possible formats for an HTTP date. + * All of them are really bogus; HTTP should use either ISO 8601 + * or NTP timestamps. We make some attempt to accept a subset of 8601 + * format. The three standard formats are all fixed-length subsets of their + * respective standards (except 8601, which puts all of the stuff we + * care about up front). + */ +static time_t +parse_http_date(char *string) +{ + static struct tm tm; /* get good initialization */ + time_t rv; + const char *tz; + int i; + + /* 8601 has the shortest minimum length */ + if (strlen(string) < 15) + return -1; + + if (isdigit(*string)) { + /* ISO 8601: 19970127T134551stuffwedon'tcareabout */ + for (i = 0; i < 15; i++) { + if (i != 8 && !isdigit(string[i])) + break; + } + if (i < 15) + return -1; +#define digit(x) (string[x] - '0') + tm.tm_year = (digit(0) * 1000 + + digit(1) * 100 + + digit(2) * 10 + + digit(3)) - 1900; + tm.tm_mon = digit(4) * 10 + digit(5) - 1; + tm.tm_mday = digit(6) * 10 + digit(7); + if (string[8] != 'T' && string[8] != 't' && string[8] != ' ') + return -1; + tm.tm_hour = digit(9) * 10 + digit(10); + tm.tm_min = digit(11) * 10 + digit(12); + tm.tm_sec = digit(13) * 10 + digit(14); + /* We don't care about the rest of the stuff after the secs. */ + } else if (string[3] == ',') { + /* Mon, 27 Jan 1997 14:24:35 stuffwedon'tcareabout */ + if (strlen(string) < 25) + return -1; + string += 5; /* skip over day-of-week */ + if (!(isdigit(string[0]) && isdigit(string[1]))) + return -1; + tm.tm_mday = digit(0) * 10 + digit(1); + for (i = 0; i < 12; i++) { + if (strncasecmp(months[i], &string[3], 3) == 0) + break; + } + if (i >= 12) + return -1; + tm.tm_mon = i; + + if (sscanf(&string[7], "%d %d:%d:%d", &i, &tm.tm_hour, + &tm.tm_min, &tm.tm_sec) != 4) + return -1; + tm.tm_year = i - 1900; + + } else if (string[3] == ' ') { + /* Mon Jan 27 14:25:20 1997 */ + if (strlen(string) < 25) + return -1; + string += 4; + for (i = 0; i < 12; i++) { + if (strncasecmp(string, months[i], 3) == 0) + break; + } + if (i >= 12) + return -1; + tm.tm_mon = i; + if (sscanf(&string[4], "%d %d:%d:%d %u", &tm.tm_mday, + &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &i) + != 5) + return -1; + tm.tm_year = i - 1900; + } else { + /* Monday, 27-Jan-97 14:31:09 stuffwedon'tcareabout */ + char *comma = strchr(string, ','); + char mname[4]; + + if (comma == 0) + return -1; + string = comma + 1; + if (strlen(string) < 19) + return -1; + string++; + mname[4] = '\0'; + if (sscanf(string, "%d-%c%c%c-%d %d:%d:%d", &tm.tm_mday, + mname, mname + 1, mname + 2, &tm.tm_year, + &tm.tm_hour, &tm.tm_min, &tm.tm_sec) != 8) + return -1; + for (i = 0; i < 12; i++) { + if (strcasecmp(months[i], mname)) + break; + } + if (i >= 12) + return -1; + tm.tm_mon = i; + } +#undef digit + + if (tm.tm_sec > 60 || tm.tm_min > 59 || tm.tm_hour > 23 + || tm.tm_mday > 31 || tm.tm_mon > 11) + return -1; + if (tm.tm_sec < 0 || tm.tm_min < 0 || tm.tm_hour < 0 + || tm.tm_mday < 0 || tm.tm_mon < 0 || tm.tm_year < 0) + return -1; + + tz = getenv("TZ"); + setenv("TZ", "UTC0", 1); + tzset(); + rv = mktime(&tm); + if (tz) + setenv("TZ", tz, 1); + else + unsetenv("TZ"); + return rv; +} + +static char * +format_http_date(time_t when) +{ + struct tm *tm; + static char buf[30]; + + tm = gmtime(&when); + if (tm == 0) + return 0; +#ifndef HTTP_DATE_ISO_8601 + sprintf(buf, "%s, %02d %s %04d %02d:%02d:%02d GMT", + wkdays[tm->tm_wday], tm->tm_mday, months[tm->tm_mon], + tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec); +#else /* ISO 8601 */ + sprintf(buf, "%04d%02d%02dT%02d%02d%02d+0000", + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec); +#endif + return buf; +} + +static char * +format_http_user_agent(void) +{ + static char buf[128]; + static int inited; + + if (!inited) { + int mib[2]; + char ostype[128], osrelease[128], machine[128]; + size_t len; + + mib[0] = CTL_KERN; + mib[1] = KERN_OSTYPE; + len = sizeof ostype; + if (sysctl(mib, 2, ostype, &len, 0, 0) < 0) { + warn("sysctl"); + ostype[0] = '\0'; + } + mib[1] = KERN_OSRELEASE; + len = sizeof osrelease; + if (sysctl(mib, 2, osrelease, &len, 0, 0) < 0) { + warn("sysctl"); + osrelease[0] = '\0'; + } + mib[0] = CTL_HW; + mib[1] = HW_MACHINE; + len = sizeof machine; + if (sysctl(mib, 2, machine, &len, 0, 0) < 0) { + warn("sysctl"); + machine[0] = '\0'; + } + + snprintf(buf, sizeof buf, + "User-Agent: " FETCH_VERSION " %s/%s (%s)\r\n", + ostype, osrelease, machine); + } + return buf; +} + +/* + * Parse a Content-Range return header from the server. RFC 2066 defines + * this header to have the format: + * Content-Range: bytes 12345-67890/123456 + * Since we always ask for the whole rest of the file, we consider it an + * error if the reply doesn't claim to give it to us. + */ +static int +parse_http_content_range(char *orig, off_t *restart_from, off_t *total_length) +{ + u_quad_t first, last, total; + char *ep; + + if (strcasecmp(orig, "bytes") != 0) { + warnx("unknown Content-Range unit: `%s'", orig); + return EX_PROTOCOL; + } + + orig += 5; + while (*orig && isspace(*orig)) + orig++; + + errno = 0; + first = strtouq(orig, &ep, 10); + if (errno != 0 || *ep != '-') { + warnx("invalid Content-Range: `%s'", orig); + return EX_PROTOCOL; + } + last = strtouq(ep + 1, &ep, 10); + if (errno != 0 || *ep != '/' || last < first) { + warnx("invalid Content-Range: `%s'", orig); + return EX_PROTOCOL; + } + total = strtouq(ep + 1, &ep, 10); + if (errno != 0 || !(*ep == '\0' || isspace(*ep))) { + warnx("invalid Content-Range: `%s'", orig); + return EX_PROTOCOL; + } + + if (last + 1 != total) { + warnx("HTTP server did not return requested Content-Range"); + return EX_PROTOCOL; + } + + *restart_from = first; + *total_length = last; + return 0; +} diff --git a/usr.bin/fetch/main.c b/usr.bin/fetch/main.c index cc40e32..50845c7 100644 --- a/usr.bin/fetch/main.c +++ b/usr.bin/fetch/main.c @@ -27,403 +27,282 @@ /* $FreeBSD$ */ #include <sys/types.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <pwd.h> -#include <unistd.h> - -#include <netinet/in.h> - -#include <arpa/inet.h> #include <err.h> #include <errno.h> -#include <netdb.h> -#include <pwd.h> -#include <regex.h> +#include <limits.h> /* needed for INT_MAX */ +#include <setjmp.h> #include <signal.h> -#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sysexits.h> #include <unistd.h> -#include <ftpio.h> - -#define BUFFER_SIZE 1024 -#define HTTP_TIMEOUT 300 /* seconds */ -#define FTP_TIMEOUT 300 /* seconds */ - -char buffer[BUFFER_SIZE]; - -extern char *__progname; /* from crt0.o */ - -int verbose = 1; -int ftp_verbose = 0; -int linkfile = 0; -char *outputfile = 0; -char *change_to_dir = 0; -char *host = 0; -int passive_mode = 0; -char *file_to_get = 0; -int ftp = 0; -int http_proxy = 0; -int http = 0; -int http_port = 80; -int mirror = 0; -int newtime = 0; -int restart = 0; -time_t modtime; -FILE *file = 0; -int timeout_ival = 0; - -void usage(void), die(int), rm(void), timeout(int), ftpget(void), - httpget(void), fileget(void), - display(int, int), parse(char *), output_file_name(void), - f_size(char *, off_t *, time_t *), ftperr(FILE* ftp, char *, ...), - filter(unsigned char *, int), - setup_http_proxy(void); - -int match(char *, char *), http_open(void); +#include <sys/param.h> /* for MAXHOSTNAMELEN */ +#include <sys/time.h> /* for struct timeval, gettimeofday */ -void -usage() -{ - fprintf(stderr, "usage: %s [-DHINPMTVLqlmnprv] [-o outputfile] <-f file -h host [-c dir]| URL>\n", __progname); - exit(1); -} +#include "fetch.h" -void -die(int sig) -{ - int e = errno; - - rm(); - if (!sig) - fprintf (stderr, "%s: %s\n", __progname, strerror(e)); - else - warnx ("Interrupted by signal %d", sig); - exit(1); -} +static struct fetch_state clean_fetch_state; +static sigjmp_buf sigbuf; +static int get(struct fetch_state *volatile fs); -void -adjmodtime() +static void +usage(const char *argv0) { - struct timeval tv[2]; - - if (!newtime) { - tv[0].tv_usec = tv[1].tv_usec = 0; - tv[0].tv_sec = time(0); - tv[1].tv_sec = modtime; - utimes (outputfile, tv); - } + fprintf(stderr, + "%s: usage:\n\t%s [-DHINPMTVLqlmnprv] [-o outputfile] " + "[-f file -h host [-c dir] | URL]\n", argv0, argv0); + exit(EX_USAGE); } -void -rm() -{ - if (file) { - fclose(file); - if (file != stdout) { - if (!restart && !mirror) - remove(outputfile); - adjmodtime(); - } - } -} int -main(int argc, char **argv) +main(int argc, char *const *argv) { int c; - char *s; - - while ((c = getopt (argc, argv, "D:HINPMT:V:Lqc:f:h:o:plmnrv")) != -1) { - switch (c) { - case 'D': case 'H': case 'I': case 'N': case 'L': case 'V': - break; /* ncftp compatibility */ - - case 'q': - verbose = 0; - - case 'c': - change_to_dir = optarg; - break; + char *ep; + struct fetch_state fs; + const char *change_to_dir, *file_to_get, *hostname; + int error, rv; + unsigned long l; + + init_schemes(); + fs = clean_fetch_state; + fs.fs_verbose = 1; + change_to_dir = file_to_get = hostname = 0; + + while ((c = getopt(argc, argv, "D:HINPMT:V:Lqc:f:h:o:plmnrv")) != -1) { + switch (c) { + case 'D': case 'H': case 'I': case 'N': case 'L': case 'V': + break; /* ncftp compatibility */ - case 'f': - file_to_get = optarg; - break; + case 'q': + fs.fs_verbose = 0; + + case 'c': + change_to_dir = optarg; + break; - case 'h': - host = optarg; - ftp = 1; - break; - - case 'l': - linkfile = 1; - break; - - case 'o': - outputfile = optarg; - break; + case 'f': + file_to_get = optarg; + break; - case 'p': case 'P': - passive_mode = 1; - break; + case 'h': + hostname = optarg; + break; + + case 'l': + fs.fs_linkfile = 1; + break; + + case 'o': + fs.fs_outputfile = optarg; + break; + + case 'p': case 'P': + fs.fs_passive_mode = 1; + break; - case 'm': case 'M': - mirror = 1; - break; + case 'm': case 'M': + fs.fs_mirror = 1; + break; - case 'n': - newtime = 1; - break; + case 'n': + fs.fs_newtime = 1; + break; - case 'r': - restart = 1; - break; + case 'r': + fs.fs_restart = 1; + break; + + case 'R': + fs.fs_precious = 1; + break; - case 'v': - ftp_verbose = 1; - break; + case 'v': + if (fs.fs_verbose < 2) + fs.fs_verbose = 2; + else + fs.fs_verbose++; + break; + + case 'T': + /* strtol sets errno to ERANGE in the case of overflow */ + errno = 0; + l = strtoul(optarg, &ep, 0); + if (!optarg[0] || *ep || errno != 0 || l > INT_MAX) + errx(EX_USAGE, "invalid timeout value: `%s'", + optarg); + fs.fs_timeout = l; + break; + + default: + case '?': + usage(argv[0]); + } + } - case 'T': - timeout_ival = atoi(optarg); - break; + clean_fetch_state = fs; /* preserve option settings */ - default: - case '?': - usage(); - } + if (argv[optind] && (hostname || change_to_dir || file_to_get)) { + warnx("cannot use -h, -c, or -f with a URI argument"); + usage(argv[0]); } - argc -= optind; - argv += optind; - if (argv[0]) { - if (host || change_to_dir || file_to_get) - usage(); - s = strdup(argv[0]); - if (s == NULL) - s = argv[0]; /* optomistic, I know.. malloc just failed. */ - parse(s); - } else { - if (!host || !file_to_get) - usage(); - } - - if (mirror && restart) - errx(1, "-m and -r are mutually exclusive."); - - output_file_name(); - - signal(SIGHUP, die); - signal(SIGINT, die); - signal(SIGQUIT, die); - signal(SIGTERM, die); + + if (fs.fs_mirror && fs.fs_restart) + errx(EX_USAGE, "-m and -r are mutually exclusive."); - setup_http_proxy(); + if (argv[optind] == 0) { + char *uri; - if (http) - httpget(); - else if (ftp) - ftpget(); - else - fileget(); - exit(0); -} + if (hostname == 0) hostname = "localhost"; + if (change_to_dir == 0) change_to_dir = ""; + if (file_to_get == 0) { + usage(argv[0]); + } -void -timeout(int sig) -{ - fprintf (stderr, "\n%s: Timeout\n", __progname); - rm(); - exit(1); -} + uri = alloca(sizeof("ftp://") + strlen(hostname) + + strlen(change_to_dir) + 2 + strlen(file_to_get)); + strcpy(uri, "ftp://"); + strcat(uri, hostname); + /* + * XXX - we should %-map a leading `/' into `%2f', but for + * anonymous FTP it is unlikely to matter. Still, it would + * be better to follow the spec. + */ + if (change_to_dir[0] != '/') + strcat(uri, "/"); + strcat(uri, change_to_dir); + if (file_to_get[0] != '/' && uri[strlen(uri) - 1] != '/') + strcat(uri, "/"); + strcat(uri, file_to_get); + + error = parse_uri(&fs, uri); + if (error) + return error; + return get(&fs); + } -void -fileget() -{ - char *basename; + for (rv = 0; argv[optind] != 0; optind++) { + error = parse_uri(&fs, argv[optind]); + if (error) { + rv = error; + continue; + } - if (access(file_to_get, R_OK)) { - fprintf(stderr, "unable to access local file `%s'\n", file_to_get); - exit(1); - } - basename = strrchr(file_to_get, '/'); - if (!basename) { - fprintf(stderr, "malformed filename `%s' - expected full path.\n", - file_to_get); - exit(1); - } - ++basename; /* move over the / */ - if (!access(basename, F_OK)) { - fprintf(stderr, "%s: file already exists.\n", basename); - exit(1); + error = get(&fs); + if (error) { + rv = error; + } + fs = clean_fetch_state; } - if (linkfile) { - if (symlink(file_to_get, basename) == -1) { - perror("symlink"); - exit(1); + return rv; +} + +/* + * The signal handling is probably more complex than it needs to be, + * but it doesn't cost a lot, so we'll be extra-careful. Using + * siglongjmp() to get out of the signal handler allows us to + * call rm() without having to store the state variable in some global + * spot where the signal handler can get at it. We also obviate the need + * for a separate timeout signal handler. + */ +static int +get(struct fetch_state *volatile fs) +{ + volatile int error; + struct sigaction oldhup, oldint, oldquit, oldterm; + struct sigaction catch; + volatile sigset_t omask; + + sigemptyset(&catch.sa_mask); + sigaddset(&catch.sa_mask, SIGHUP); + sigaddset(&catch.sa_mask, SIGINT); + sigaddset(&catch.sa_mask, SIGQUIT); + sigaddset(&catch.sa_mask, SIGTERM); + sigaddset(&catch.sa_mask, SIGALRM); + catch.sa_handler = catchsig; + catch.sa_flags = 0; + + sigprocmask(SIG_BLOCK, &catch.sa_mask, (sigset_t *)&omask); + sigaction(SIGHUP, &catch, &oldhup); + sigaction(SIGINT, &catch, &oldint); + sigaction(SIGQUIT, &catch, &oldquit); + sigaction(SIGTERM, &catch, &oldterm); + + error = sigsetjmp(sigbuf, 0); + if (error == SIGALRM) { + rm(fs); + unsetup_sigalrm(); + fprintf(stderr, "\n"); /* just in case */ + warnx("%s: %s: timed out", fs->fs_outputfile, fs->fs_status); + goto close; + } else if (error) { + rm(fs); + fprintf(stderr, "\n"); /* just in case */ + warnx("%s: interrupted by signal: %s", fs->fs_status, + sys_signame[error]); + sigdelset(&omask, error); + signal(error, SIG_DFL); + sigprocmask(SIG_SETMASK, (sigset_t *)&omask, 0); + raise(error); /* so that it gets reported as such */ } - } - else { - char *buf = alloca(strlen(file_to_get) + strlen(basename) + 15); - sprintf(buf, "/bin/cp -p %s %s", file_to_get, basename); - if (system(buf)) { - fprintf(stderr, "failed to copy %s successfully.", file_to_get); - exit(1); - } - } -} + sigprocmask(SIG_SETMASK, (sigset_t *)&omask, 0); + error = fs->fs_retrieve(fs); -void -ftpget() -{ - FILE *ftp, *fp; - char *cp, *lp; - int status, n; - off_t size, size0, seekloc; - char ftp_pw[200]; - time_t t; - - if ((cp = getenv("FTP_PASSWORD")) != NULL) - strcpy(ftp_pw, cp); - else { - sprintf (ftp_pw, "%s@", getpwuid (getuid ())->pw_name); - n = strlen (ftp_pw); - gethostname (ftp_pw + n, 200 - n); - } - if ((lp = getenv("FTP_LOGIN")) == NULL) - lp = "anonymous"; - ftp = ftpLogin(host, lp, ftp_pw, 0, ftp_verbose, &status); - if (!ftp) { - if (status) - errx(1, "%s: %s", host, ftpErrString(status)); - else - errx(1, "couldn't open FTP connection to %s: %s", - host, hstrerror(h_errno)); - } +close: + sigaction(SIGHUP, &oldhup, 0); + sigaction(SIGINT, &oldint, 0); + sigaction(SIGQUIT, &oldquit, 0); + sigaction(SIGTERM, &oldterm, 0); + fs->fs_close(fs); - /* Time to set our defaults */ - ftpBinary (ftp); - ftpPassive (ftp, passive_mode); - - if (change_to_dir) { - status = ftpChdir (ftp, change_to_dir); - if (status) - ftperr (ftp, "couldn't cd to %s: ", change_to_dir); - } - size = ftpGetSize (ftp, file_to_get); - modtime = ftpGetModtime (ftp, file_to_get); - if (modtime <= 0) { - warnx ("Couldn't get file time for %s - using current time", file_to_get); - modtime = (time_t) -1; - } - - if (!strcmp (outputfile, "-")) - restart = 0; - if (restart || mirror) { - f_size (outputfile, &size0, &t); - if (mirror && size0 == size && modtime <= t) { - fclose(ftp); - return; - } - else if (restart) { - if (size0 && size0 < size) - seekloc = size0; - else - seekloc = size0 = 0; - } - } - else if (!restart) - seekloc = size0 = 0; - - fp = ftpGet (ftp, file_to_get, &seekloc); - if (fp == NULL) - if (ftpErrno(ftp)) - ftperr (ftp, NULL); - else - die(0); - if (size0 && !seekloc) - size0 = 0; - - if (strcmp (outputfile, "-")) { - file = fopen (outputfile, size0 ? "a" : "w"); - if (!file) - err (1, "could not open output file %s.", outputfile); - } else - file = stdout; - - if (timeout_ival) { - char env[80]; - /* Override any environment variable */ - snprintf(env, sizeof env - 1, "FTP_TIMEOUT=%d", timeout_ival); - putenv(env); - } - else { - char *cp = getenv("FTP_TIMEOUT"); + return error; +} - if (!cp || !(timeout_ival = atoi(cp))) - timeout_ival = FTP_TIMEOUT; - } + +/* + * Utility functions + */ - display (size, size0); - while (1) { - struct sigaction act; - - act.sa_handler = timeout; - act.sa_mask = 0; - act.sa_flags = 0; - sigaction(SIGALRM, &act, NULL); - alarm(timeout_ival); - n = status = fread (buffer, 1, BUFFER_SIZE, fp); - alarm(0); - act.sa_handler = SIG_DFL; - sigaction(SIGALRM, &act, NULL); - if (status <= 0) - break; - display (size, n); - status = fwrite (buffer, 1, n, file); - if (status != n) - break; - } - if (status < 0) - die(0); - fclose(fp); - fclose(file); - display (size, -1); - if (file != stdout) - adjmodtime(); - exit (0); +/* + * Handle all signals by jumping back into get(). + */ +void +catchsig(int sig) +{ + siglongjmp(sigbuf, sig); } +/* Used to generate the progress display when not in quiet mode. */ void -display (int size, int n) +display(struct fetch_state *fs, off_t size, ssize_t n) { - static int bytes, pr, init = 0; + static off_t bytes; + static int pr, init = 0; static struct timeval t0, t_start; static char *s; struct timezone tz; struct timeval t; float d; - if (!verbose) + if (!fs->fs_verbose) return; if (init == 0) { init = 1; gettimeofday(&t0, &tz); t_start = t0; bytes = pr = 0; - s = (char *) malloc (strlen(outputfile) + 50); + s = malloc(strlen(fs->fs_outputfile) + 50); if (size > 0) - sprintf (s, "Receiving %s (%d bytes)%s", outputfile, size, + sprintf (s, "Receiving %s (%qd bytes)%s", fs->fs_outputfile, + (quad_t)size, size ? "" : " [appending]"); else - sprintf (s, "Receiving %s", outputfile); + sprintf (s, "Receiving %s", fs->fs_outputfile); printf ("%s", s); fflush (stdout); bytes = n; @@ -434,16 +313,18 @@ display (int size, int n) if (size > 0) printf ("\r%s: 100%%", s); else - printf ("\r%s: %d Kbytes", s, bytes/1024); + printf ("\r%s: %qd Kbytes", s, (quad_t)bytes/1024); d = t.tv_sec + t.tv_usec/1.e6 - t_start.tv_sec - t_start.tv_usec/1.e6; - printf ("\n%d bytes transfered in %.1f seconds", bytes, d); + printf ("\n%qd bytes transfered in %.1f seconds", (quad_t)bytes, d); d = bytes/d; if (d < 1000) - printf (" (%d Bytes/s)\n", (int)d); + printf (" (%.0f bytes/s)\n", d); else { d /=1024; - printf (" (%.2f K/s)\n", d); + printf (" (%.2f kB/s)\n", d); } + free(s); + init = 0; return; } bytes += n; @@ -453,357 +334,11 @@ display (int size, int n) t0 = t; pr++; if (size > 1000000) - printf ("\r%s: %2d%%", s, bytes/(size/100)); + printf ("\r%s: %2qd%%", s, (quad_t)bytes/(size/100)); else if (size > 0) - printf ("\r%s: %2d%%", s, 100*bytes/size); + printf ("\r%s: %2qd%%", s, (quad_t)100*bytes/size); else - printf ("\r%s: %d Kbytes", s, bytes/1024); + printf ("\r%s: %qd kB", s, (quad_t)bytes/1024); fflush (stdout); } -void -parse (char *s) -{ - char *p; - - if (strncasecmp (s, "file:", 5) == 0) { - /* file:filename */ - s += 4; - *s++ = '\0'; - host = NULL; - ftp = http = 0; - file_to_get = s; - return; - } - else if (strncasecmp (s, "ftp://", 6) == 0) { - /* ftp://host.name/file/name */ - s += 6; - p = strchr(s, '/'); - if (!p) { - warnx("no filename??"); - usage(); - } - } - else if (strncasecmp (s, "http://", 7) == 0) { - /* http://host.name/file/name */ - char *q; - s += 7; - p = strchr(s, '/'); - if (!p) { - warnx ("no filename??"); - usage (); - } - *p++ = 0; - q = strchr (s, ':'); - if (q && q < p) { - *q++ = 0; - http_port = atoi (q); - } - host = s; - file_to_get = p; - http = 1; - return; - } - else { - /* assume host.name:/file/name */ - p = strchr (s, ':'); - if (!p) { - /* assume /file/name */ - host = NULL; - ftp = http = 0; - file_to_get = s; - return; - } - } - ftp = 1; - *p++ = 0; - host = s; - s = strrchr (p, '/'); - if (s) { - *s++ = 0; - change_to_dir = p; - file_to_get = s; - } else { - change_to_dir = 0; - file_to_get = p; - } -} - -void -output_file_name () -{ - char *p; - - if (!outputfile) { - p = strrchr (file_to_get, '/'); - if (!p || (!ftp && !http)) - p = file_to_get; - else - p++; - outputfile = strdup (p); - } -} - -void -f_size (char *name, off_t *size, time_t *time) -{ - struct stat s; - - *size = 0; - - if (stat (name, &s)) - return; - *size = s.st_size; - *time = s.st_mtime; -} - -void -ftperr (FILE* ftp, char *fmt, ...) -{ - va_list ap; - va_start (ap, fmt); - - if (fmt) - vfprintf(stderr, fmt, ap); - if(ftp) { - const char *str = ftpErrString(ftpErrno(ftp)); - - if (str) - fprintf(stderr, "%s\n", str); - } - rm (); - exit (1); -} - -void -httpget () -{ - char *cp, str[1000]; - struct timeval tv; - time_t tout; - fd_set fdset; - int i, s; - - restart = 0; - - s = http_open (); - sprintf (str, "GET %s%s HTTP/1.0\r\n\r\n", - http_proxy? "" : "/", file_to_get); - i = strlen (str); - if (i != write (s, str, i)) - err (1, "could not send GET command to HTTP server."); - - FD_ZERO (&fdset); - FD_SET (s, &fdset); - if (timeout_ival) - tout = timeout_ival; - else if ((cp = getenv("HTTP_TIMEOUT")) != NULL) - tout = atoi(cp); - else - tout = HTTP_TIMEOUT; - - if (strcmp (outputfile, "-")) { - file = fopen (outputfile, "w"); - if (!file) - err (1, "could not open output file %s.", outputfile); - } else { - file = stdout; - verbose = 0; - } - - while (1) { - tv.tv_sec = tout; - tv.tv_usec = 0; - i = select (s+1, &fdset, 0, 0, &tv); - switch (i) { - case 0: - warnx ("Timeout"); - rm (); - exit (1); - case 1: - i = read (s, buffer, sizeof (buffer)); - filter (buffer, i); - if (i == 0) - exit (0); - break; - default: - err (1, "communication error with HTTP server."); - } - } -} - -int -match (char *pat, char *s) -{ - regex_t preg; - regmatch_t pmatch[2]; - - regcomp (&preg, pat, REG_EXTENDED|REG_ICASE); - if (regexec(&preg, s, 2, pmatch, 0)) - return 0; - return pmatch[1].rm_so ? pmatch[1].rm_so : -1; -} - -void -filter (unsigned char *p, int len) -{ -#define S 512 - static unsigned char s[S+2]; - static int header_len = 0, size = -1, n; - int i = len; - unsigned char *q = p; - - if (header_len < S) { - while (header_len < S && i--) - s[header_len++] = *q++; - s[header_len] = 0; - if (len && (header_len < S)) - return; - if (match ("^HTTP/[0-9]+\\.[0-9]+[ \t]+200[^0-9]", s) == 0) { - /* maybe not found, or document w/o header */ - if (match ("^HTTP/[0-9]+\\.[0-9]+[ \t]+[0-9]+", s)) { - fprintf (stderr, "%s fetching failed, header so far:\n%s\n", file_to_get, s); - rm (); - exit (1); - } - /* assume no header */ - /* write s */ - display (size, 0); - i = fwrite (s, 1, header_len, file); - if (i != header_len) - die(0); - display (size, header_len); - /* then p */ - if (p+len > q) { - i = fwrite (q, 1, p+len-q, file); - if (i != p+len-q) - die(0); - display (size, i); - } - } else { - unsigned char *t; - /* document begins with a success line. try to get size */ - i = match ("content-length:[ \t]*([0-9]+)", s); - if (i > 0) - size = atoi (s+i); - /* assume that the file to get begins after an empty line */ - i = match ("(\n\n|\r\n\r\n)", s); - if (i > 0) { - if (s[i] == '\r') - t = s+i+4; - else - t = s+i+2; - } else { - fprintf (stderr, "Can't decode the header!\n"); - rm (); - exit (1); - } - display (size, 0); - n = (s-t)+header_len; - i = fwrite (t, 1, n, file); - if (i != n) - die(0); - display (size, n); - if (p+len > q) { - n = p+len-q; - i = fwrite (q, 1, n, file); - if (i != n) - die(0); - display (size, n); - } - } - } else { - i = fwrite (p, 1, len, file); - if (i != len) - die(0); - if (len) - display (size, i); - } - if (len == 0) - display (size, -1); -} - -int -http_open() -{ - unsigned long a; - struct sockaddr_in sin, sin2; - struct hostent *h; - int s; - - a = inet_addr (host); - if (a != INADDR_NONE) { - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = a; - } else { - h = gethostbyname (host); - if (!h) - err (1, "could not lookup host %s.", host); - sin.sin_family = h->h_addrtype; - bcopy(h->h_addr, (char *)&sin.sin_addr, h->h_length); - } - sin.sin_port = htons (http_port); - if ((s = socket (sin.sin_family, SOCK_STREAM, 0)) < 0) - err (1, "socket"); - bzero ((char *)&sin2, sizeof (sin2)); - sin2.sin_family = AF_INET; - sin2.sin_port = 0; - sin2.sin_addr.s_addr = htonl (INADDR_ANY); - if (bind (s, (struct sockaddr *)&sin2, sizeof (sin2))) - err (1, "could not bind to socket."); - - if (connect(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) - err (1, "connection failed"); - return s; -} - -int -isDebug () -{ - return 0; -} - -void msgDebug (char *p) -{ - printf ("%s", p); -} - -void -setup_http_proxy() -{ - char *e; - char *p; - char *url; - unsigned short port; - - if (!(e = getenv("HTTP_PROXY")) - || !(p = strchr(e, ':')) - || (port = atoi(p+1)) == 0) - return; - - if (!(url = (char *) malloc (strlen(file_to_get) - + strlen(host) - + (change_to_dir ? strlen(change_to_dir) : 0) - + 50))) - return; - - if (http) { - sprintf(url, "http://%s:%d/%s", - host, http_port, file_to_get); - } else { - if (change_to_dir) { - sprintf(url, "ftp://%s/%s/%s", - host, change_to_dir, file_to_get); - } else { - sprintf(url, "ftp://%s/%s", host, file_to_get); - } - } - file_to_get = url; - - *p = 0; - host = strdup(e); - http_port = port; - http = 1; - http_proxy = 1; -} - diff --git a/usr.bin/fetch/uri.c b/usr.bin/fetch/uri.c new file mode 100644 index 0000000..95d4c91 --- /dev/null +++ b/usr.bin/fetch/uri.c @@ -0,0 +1,122 @@ +/*- + * Copyright 1997 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include <sys/types.h> + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> + +#include "fetch.h" + +struct uri_scheme *schemes[] = { + &http_scheme, &ftp_scheme, &file_scheme, 0 +}; + +static struct uri_scheme * +find_scheme(const char *name) +{ + int i; + + for (i = 0; schemes[i]; i++) { + if (strcasecmp(schemes[i]->sc_name, name) == 0) + return schemes[i]; + } + return 0; +} + +void +init_schemes(void) +{ + int i; + char schemebuf[32]; + const char *s, *t; + struct uri_scheme *scp; + + for (i = 0; schemes[i]; i++) { + if (getenv(schemes[i]->sc_proxy_envar) != 0) + schemes[i]->sc_can_proxy = 1; + } + + for (i = 0; schemes[i]; i++) { + s = schemes[i]->sc_proxy_by; + while (s && *s) { + t = strchr(s, ','); + if (t) { + schemebuf[0] = '\0'; + strncat(schemebuf, s, t - s); + s = t + 1; + } else { + strcpy(schemebuf, s); + s = 0; + } + scp = find_scheme(schemebuf); + if (scp && scp->sc_can_proxy) { + schemes[i]->sc_proxyproto = scp; + break; + } + } + } +} + +int +parse_uri(struct fetch_state *fs, const char *uri) +{ + const char *colon, *slash; + char *scheme; + struct uri_scheme *scp; + + fs->fs_status = "parsing URI"; + colon = strchr(uri, ':'); + slash = strchr(uri, '/'); + if (!colon || !slash || slash < colon) { + warnx("%s: an absolute URI is required", uri); + return EX_USAGE; + } + + scheme = alloca(colon - uri + 1); + scheme[0] = '\0'; + strncat(scheme, uri, colon - uri); + scp = find_scheme(scheme); + + if (scp == 0) { + warnx("%s: unknown URI scheme", scheme); + return EX_USAGE; + } + if (scp->sc_proxyproto) + return scp->sc_proxyproto->sc_proxy_parse(fs, uri); + else + return scp->sc_parse(fs, uri); +} + diff --git a/usr.bin/fetch/util.c b/usr.bin/fetch/util.c new file mode 100644 index 0000000..08103a0 --- /dev/null +++ b/usr.bin/fetch/util.c @@ -0,0 +1,322 @@ +/*- + * Copyright 1997 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include <sys/types.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <time.h> /* for time() */ +#include <unistd.h> + +#include <sys/time.h> /* for struct timeval */ + +#include "fetch.h" + + +/* Signal handling functions */ + +/* + * If this were Scheme we could make this variable private to just these two + * functions... + */ +static struct sigaction oldalrm; + +void +setup_sigalrm(void) +{ + struct sigaction catch; + + sigemptyset(&catch.sa_mask); + sigaddset(&catch.sa_mask, SIGHUP); + sigaddset(&catch.sa_mask, SIGINT); + sigaddset(&catch.sa_mask, SIGQUIT); + sigaddset(&catch.sa_mask, SIGTERM); + sigaddset(&catch.sa_mask, SIGALRM); + catch.sa_handler = catchsig; + catch.sa_flags = 0; + + sigaction(SIGALRM, &catch, &oldalrm); +} + +void +unsetup_sigalrm(void) +{ + sigaction(SIGALRM, &oldalrm, 0); +} + + +/* File-handling functions */ + +/* + * Set the last-modified time of the output file to be that returned by + * the server. + */ +void +adjmodtime(struct fetch_state *fs) +{ + struct timeval tv[2]; + + /* XXX - not strictly correct, since (time_t)-1 does not have to be + > 0. This also catches some of the other routines which erroneously + return 0 for invalid times rather than -1. */ + if (!fs->fs_newtime && fs->fs_modtime > 0) { + tv[0].tv_usec = tv[1].tv_usec = 0; + time(&tv[0].tv_sec); + tv[1].tv_sec = fs->fs_modtime; + utimes(fs->fs_outputfile, tv); + } +} + +/* + * Delete the file when exiting on error, if it is not `precious'. + */ +void +rm(struct fetch_state *fs) +{ + if (!(fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0')) { + if (!fs->fs_restart && !fs->fs_mirror && !fs->fs_precious) + unlink(fs->fs_outputfile); + else + adjmodtime(fs); + } +} + + +/* String-handling and -parsing functions */ + +/* + * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This + * must be done after the URI is parsed, since the principal purpose of + * the encoding is to hide characters which would otherwise be significant + * to the parser (like `/'). + */ +char * +percent_decode(const char *uri) +{ + char *rv, *s; + + rv = s = malloc(strlen(uri) + 1); + if (rv == 0) + err(EX_OSERR, "malloc"); + + while (*uri) { + if (*uri == '%' && uri[1] + && isxdigit(uri[1]) && isxdigit(uri[2])) { + int c; + static char buf[] = "xx"; + + buf[0] = uri[1]; + buf[1] = uri[2]; + sscanf(buf, "%x", &c); + uri += 3; + *s++ = c; + } else { + *s++ = *uri++; + } + } + return rv; +} + +/* + * Decode a standard host:port string into its constituents, allocating + * memory for a new copy of the host part. + */ +int +parse_host_port(const char *s, char **hostname, int *port) +{ + const char *colon; + char *ep; + unsigned long ul; + + colon = strchr(s, ':'); + if (colon != 0) { + colon++; + errno = 0; + ul = strtoul(colon + 1, &ep, 10); + if (*ep != '\0' || colon[1] == '\0' || errno != 0 + || ul < 1 || ul > 65534) { + warnx("`%s': invalid port number", s); + return EX_USAGE; + } + + *hostname = safe_strndup(s, colon - s); + *port = ul; + } else { + *hostname = safe_strdup(s); + } + return 0; +} + +/* + * safe_strdup is like strdup, but aborts on error. + */ +char * +safe_strdup(const char *orig) +{ + char *s; + + s = malloc(strlen(orig) + 1); + if (s == 0) + err(EX_OSERR, "malloc"); + strcpy(s, orig); + return s; +} + +/* + * safe_strndup is like safe_strdup, but copies at most `len' + * characters from `orig'. + */ +char * +safe_strndup(const char *orig, size_t len) +{ + char *s; + + s = malloc(len + 1); + if (s == 0) + err(EX_OSERR, "malloc"); + s[0] = '\0'; + strncat(s, orig, len); + return s; +} + +/* + * Implement the `base64' encoding as described in RFC 1521. + */ +static const char base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +char * +to_base64(const unsigned char *buf, size_t len) +{ + char *s = malloc((4 * (len + 1)) / 3 + 1), *rv; + unsigned tmp; + + if (s == 0) + err(EX_OSERR, "malloc"); + + rv = s; + while (len >= 3) { + tmp = buf[0] << 16 | buf[1] << 8 || buf[2]; + s[0] = base64[tmp >> 18]; + s[1] = base64[(tmp >> 12) & 077]; + s[2] = base64[(tmp >> 6) & 077]; + s[3] = base64[tmp & 077]; + len -= 3; + buf += 3; + s += 4; + } + + /* RFC 1521 enumerates these three possibilities... */ + switch(len) { + case 2: + tmp = buf[0] << 16 | buf[1] << 8; + s[0] = base64[(tmp >> 18) & 077]; + s[1] = base64[(tmp >> 12) & 077]; + s[2] = base64[(tmp >> 6) & 077]; + s[3] = '='; + break; + case 1: + tmp = buf[0] << 16; + s[0] = base64[(tmp >> 18) & 077]; + s[1] = base64[(tmp >> 12) & 077]; + s[2] = s[3] = '='; + break; + case 0: + break; + } + + return rv; +} + +int +from_base64(const char *orig, unsigned char *buf, size_t *lenp) +{ + int len, len2; + const char *equals; + unsigned tmp; + + len = strlen(orig); + while (isspace(orig[len - 1])) + len--; + + if (len % 4) + return -1; + + len2 = 3 * (len / 4); + equals = strchr(orig, '='); + if (equals != 0) { + if (equals[1] == '=') + len2 -= 2; + else + len2 -= 1; + } + + /* Now the length is len2 is the actual length of the original. */ + if (len2 > *lenp) + return -1; + *lenp = len2; + + while (len > 0) { + int i; + const char *off; + int forget; + + tmp = 0; + forget = 0; + for (i = 0; i < 4; i++) { + if (orig[i] == '=') { + off = base64; + forget++; + } else { + off = strchr(base64, orig[i]); + } + if (off == 0) + return -1; + tmp = (tmp << 6) | (off - base64); + } + + buf[0] = (tmp >> 16) & 0xff; + if (forget < 2) + buf[1] = (tmp >> 8) & 0xff; + if (forget < 1) + buf[2] = (tmp >> 8) & 0xff; + len -= 4; + orig += 4; + buf += 3 - forget; + } + return 0; +} |