summaryrefslogtreecommitdiffstats
path: root/usr.bin/fetch
diff options
context:
space:
mode:
authorwollman <wollman@FreeBSD.org>1997-01-30 21:43:44 +0000
committerwollman <wollman@FreeBSD.org>1997-01-30 21:43:44 +0000
commit444cbb04eefcb9a4da4e2a366541c4d02d74938d (patch)
tree8697935e80816a6c94b292ebeb8af3ef6994b10b /usr.bin/fetch
parent067667ae976ea0d2540166d4637720d70a012cf7 (diff)
downloadFreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.zip
FreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.tar.gz
Here is my long-threatened revamping of fetch. Jean-Marc probably won't
recognize it any more. This makes the following significant changes: - The main body of the program doesn't know a thing about URIs, HTTP, or FTP. This makes it possible to easily plug in other protocols. (The next revision will probably be able to dynamically add new recognizers.) - There are no longer arbitrary timeouts for the protocols. If you want to set one for yourself, use the environment variables. - FTP proxies are now supported (if I implemented it right). - The HTTP implementation is much more complete, and can now do restarts, preserve modtimes, and mrun in mirror mode. It's not yet up to 1.1, but it's getting there. - Transaction TCP is now used for sending HTTP requests. The HTTP/1.1 syntax for requesting that the connection be closed after one request is implemented. In all of this, I have doubtless broken somebody. Please test it and tell me about the bugs.
Diffstat (limited to 'usr.bin/fetch')
-rw-r--r--usr.bin/fetch/Makefile8
-rw-r--r--usr.bin/fetch/fetch.1195
-rw-r--r--usr.bin/fetch/fetch.h87
-rw-r--r--usr.bin/fetch/file.c144
-rw-r--r--usr.bin/fetch/ftp.c420
-rw-r--r--usr.bin/fetch/http.c976
-rw-r--r--usr.bin/fetch/main.c915
-rw-r--r--usr.bin/fetch/uri.c122
-rw-r--r--usr.bin/fetch/util.c322
9 files changed, 2428 insertions, 761 deletions
diff --git a/usr.bin/fetch/Makefile b/usr.bin/fetch/Makefile
index 6e86642..31479bd 100644
--- a/usr.bin/fetch/Makefile
+++ b/usr.bin/fetch/Makefile
@@ -1,9 +1,9 @@
PROG = fetch
-SRCS = main.c
+SRCS = file.c ftp.c http.c main.c util.c uri.c
-CFLAGS+= -Wall
+CFLAGS+= -Wall -Wwrite-strings -Wmissing-prototypes
-DPADD= ${LIBFTPIO}
-LDADD= -lftpio
+DPADD= ${LIBFTPIO} ${LIBMD}
+LDADD= -lftpio -lmd
.include <bsd.prog.mk>
diff --git a/usr.bin/fetch/fetch.1 b/usr.bin/fetch/fetch.1
index 1f3ae01..6955250 100644
--- a/usr.bin/fetch/fetch.1
+++ b/usr.bin/fetch/fetch.1
@@ -11,7 +11,7 @@
.Op Fl o Ar file
.Ar URL
.Nm fetch
-.Op Fl MPmnpqr
+.Op Fl MPRmnpqr
.Op Fl o Ar file
.Op Fl c Ar dir
.Fl f Ar file
@@ -26,22 +26,17 @@ or the
protocol. In the first form of the command, the
.Ar URL
may be of the form
-.Em http://site.domain/path/to/the/file
+.Li http://site.domain/path/to/the/file
or
-.Em ftp://site.domain/path/to/the/file.
-For compatibility with
-.Xr tftp 1
-the form
-.Em site.domain:/path/to/the/file
-is also accepted.
-To denote a local filename to be copied or linked to (see
+.Li ftp://site.domain/path/to/the/file.
+To denote a local filename to be copied or linked to (see the
.Fl l
-flag), the
+flag below), the
.Em file:/path/to/the/file
URL form is used.
-
+.Pp
The second form of the command can be used to get a file using the
-.Em ftp
+.Tn FTP
protocol, specifying the file name and the remote host with the
.Fl h
and the
@@ -50,34 +45,51 @@ flags.
.Pp
The following options are available:
.Bl -tag -width Fl -compact
-.It Fl M
-.It Fl m
-Mirror mode: Set the modification time of the file so that it is
-identical to the modification time of the file at the remote host.
-If the file already exists on the local host and is identical (as
-gauged by size and modification time), no transfer is done.
-.It Fl n
-Don't preserve the modtime of the transfered file, use the current time.
-.It Fl P
-.It Fl p
-Use passive mode if you are behind a firewall.
.It Fl c Ar dir
-Change to directory
+The file to retrieve is in directory
.Ar dir
-at remote host before starting the transfer.
+on the remote host.
.It Fl f Ar file
-Retrieve
+The file to retrieve is named
.Ar file
on the remote host.
.It Fl h Ar host
-Set the
-.Ar host
-for transfer.
+The file to retrieve is located on the host
+.Ar host .
.It Fl l
If target is a
.Ar file:/
style of URL, make a link to the target rather than trying
to copy it.
+.It Fl M
+.It Fl m
+Mirror mode: Set the modification time of the file so that it is
+identical to the modification time of the file at the remote host.
+If the file already exists on the local host and is identical (as
+gauged by size and modification time), no transfer is done.
+.It Fl n
+Don't preserve the modtime of the transfered file, use the current time.
+.It Fl o Ar file
+Set the output file name to
+.Ar file .
+By default, a ``pathname'' is extracted from the specified URI, and
+its basename is used as the name of the output file. A
+.Ar file
+argument of
+.Sq Li \&-
+indicates that results are to be directed to the standard output.
+.It Fl P
+.It Fl p
+Use the passive mode of the
+.Tn FTP
+protocol. This is useful for crossing certain sorts of firewalls.
+.It Fl q
+Quiet mode. Do not report transfer progress on the terminal.
+.It Fl R
+The filenames specified are ``precious'', and should not be deleted
+under any circumstances, even if the transfer failed or was incomplete.
+.It Fl r
+Restart a previously interrupted transfer.
.It Fl T Ar seconds
Set timeout value to
.Ar seconds.
@@ -86,47 +98,90 @@ Overrides the environment variables
for ftp transfers or
.Ev HTTP_TIMEOUT
for http transfers if set.
-.It Fl q
-Quiet mode. Do not report transfer progress on the terminal.
.It Fl v
-Verbose mode - display FTP connection information in painful detail.
-.It Fl r
-Reget. Use this flag to restart an interrupted transfer.
-.It Fl o Ar file
-Set the output file name to
-.Ar file
+Increase verbosity. More
+.Fl v Ns \&'s
+result in more information.
.El
+.Pp
+Many options are also controlled solely by the environment (this is a
+bug).
+.Sh PROXY SERVERS
+Many sites use application gateways (``proxy servers'') in their
+firewalls in order to allow communication across the firewall using a
+trusted protocol. The
+.Nm fetch
+program can use both the
+.Tn FTP
+and the
+.Tn HTTP
+protocol with a proxy server.
+.Tn FTP
+proxy servers can only relay
+.Tn FTP
+requests;
+.Tn HTTP
+proxy servers can relay both
+.Tn FTP
+and
+.Tn HTTP
+requests.
+A proxy server can be configured by defining an environment variable
+named
+.Dq Va PROTO Ns Ev _PROXY ,
+where
+.Va PROTO
+is the name of the protocol in upper case. The value of the
+environment variable specifies a hostname, optionally followed by a
+colon and a port number.
+.Pp
+The
+.Tn FTP
+proxy client specifies
+.Dq anonymous
+as its user name, and passes the remote user name and host as the
+.Tn FTP
+session's password, in the form
+.Dq Va remoteuser Ns Li \&@ Va remotehost .
+The
+.Tn HTTP
+proxy client simply passes the originally-requested URI to the remote
+server in an
+.Tn HTTP
+.Dq Li GET
+request. HTTP proxy authentication is not yet implemented.
+When multiple proxy protcols are configured,
+.Nm
+will prefer
+.Tn HTTP .
.Sh ENVIRONMENT
-A transfer using the
-.Em ftp
-protocol will be aborted after the delay specified by the
-.Ev FTP_TIMEOUT
-variable. The default is 300 (seconds)
-
-A transfer using the
-.Em http
-protocol will be aborted after the delay specified by the
-.Ev HTTP_TIMEOUT
-variable. The default is 300 (seconds)
-
-.Ev FTP_LOGIN
-is the login name for the remote host. Default is
-.Em anonymous
-
-.Ev FTP_PASSWORD
-is the password for the remote host. Default is
-.Em <yourname>@
-
-.Ev FTP_PASSIVE_MODE
-will force the use of passive mode FTP for firewalls.
-
-If
-.Ev HTTP_PROXY
-is set to a value of the form
-.Em host:port
-it specifies the address of a http proxy. The proxy will be used
-for all ftp and http requests. This is useful if you are behind
-an application firewall.
+.Bl -tag -width FTP_PASSIVE_MODE -offset indent
+.It Ev FTP_TIMEOUT
+maximum time, in seconds, to wait before aborting an
+.Tn FTP
+connection.
+.It Ev HTTP_TIMEOUT
+maximum time, in seconds, to wait before aborting an
+.Tn HTTP
+connection.
+.It Ev FTP_LOGIN
+the login name used for
+.Tn FTP
+transfers (default
+.Dq Li anonymous )
+.It Ev FTP_PASSWORD
+the password used for
+.Tn FTP
+transfers (default
+.Dq Va yourname Ns Li \&@ Ns Va yourhost )
+.It Ev FTP_PASSIVE_MODE
+force the use of passive mode FTP
+.It Ev HTTP_PROXY
+the address of a proxy server which understands
+.Tn HTTP
+.It Ev FTP_PROXY
+the address of a proxy server which understands
+.Tn FTP
.Sh SEE ALSO
.Xr ftp 1 ,
.Xr tftp 1
@@ -135,3 +190,9 @@ The
.Nm fetch
command appeared in
.Fx 2.1.5 .
+.Sh AUTHORS
+The original implementation of
+.Nm
+was done by Jean-Marc Zucconi. It was extensively re-worked for
+.Fx 3.0
+by Garrett Wollman.
diff --git a/usr.bin/fetch/fetch.h b/usr.bin/fetch/fetch.h
new file mode 100644
index 0000000..daa010e
--- /dev/null
+++ b/usr.bin/fetch/fetch.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#ifndef fetch_h
+#define fetch_h 1
+
+
+#define BUFFER_SIZE 1024
+#define FETCH_VERSION "fetch/1.0"
+#define PATH_CP "/bin/cp"
+
+struct fetch_state {
+ const char *fs_status;
+ const char *fs_outputfile;
+ int fs_verbose; /* -q, -v option */
+ int fs_newtime; /* -n option */
+ int fs_mirror; /* -m option */
+ int fs_restart; /* -r option */
+ int fs_timeout; /* -T option */
+ int fs_passive_mode; /* -p option */
+ int fs_linkfile; /* -l option */
+ int fs_precious; /* -R option */
+ time_t fs_modtime;
+ void *fs_proto;
+ int (*fs_retrieve)(struct fetch_state *);
+ int (*fs_close)(struct fetch_state *);
+};
+
+struct uri_scheme {
+ const char *sc_name; /* name of the scheme, <32 characters */
+ int (*sc_parse)(struct fetch_state *, const char *);
+ /* routine to parse a URI and build state */
+ int (*sc_proxy_parse)(struct fetch_state *, const char *);
+ /* same, but for proxy case */
+ const char *sc_proxy_envar; /* envar used to determine proxy */
+ const char *sc_proxy_by; /* list of protos which can proxy us */
+
+ /* The rest is filled in dynamically... */
+ int sc_can_proxy;
+ struct uri_scheme *sc_proxyproto;
+};
+
+extern struct uri_scheme file_scheme, ftp_scheme, http_scheme;
+
+void adjmodtime(struct fetch_state *fs);
+void catchsig(int signo);
+void display(struct fetch_state *fs, off_t total, ssize_t thisincr);
+void init_schemes(void);
+void rm(struct fetch_state *fs);
+void setup_sigalrm(void);
+void unsetup_sigalrm(void);
+char *percent_decode(const char *orig);
+char *safe_strdup(const char *orig);
+char *safe_strndup(const char *orig, size_t len);
+char *to_base64(const unsigned char *buf, size_t len);
+int from_base64(const char *orig, unsigned char *buf, size_t *lenp);
+int parse_host_port(const char *str, char **hostname, int *port);
+int parse_uri(struct fetch_state *fs, const char *uri);
+#endif /* ! fetch_h */
diff --git a/usr.bin/fetch/file.c b/usr.bin/fetch/file.c
new file mode 100644
index 0000000..091639c
--- /dev/null
+++ b/usr.bin/fetch/file.c
@@ -0,0 +1,144 @@
+/*-
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#include <sys/types.h>
+
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include <sys/wait.h>
+
+#include "fetch.h"
+
+static int file_retrieve(struct fetch_state *fs);
+static int file_close(struct fetch_state *fs);
+static int file_parse(struct fetch_state *fs, const char *uri);
+
+struct uri_scheme file_scheme =
+ { "file", file_parse, 0, 0, 0 };
+
+/*
+ * Again, we slightly misinterpret the slash after the hostname as
+ * being the start of the pathname rather than merely a separator.
+ */
+static int
+file_parse(struct fetch_state *fs, const char *uri)
+{
+ const char *p;
+
+ p = uri + 5; /* skip past `file:' */
+ if (p[0] == '/' && p[1] == '/') {
+ /* skip past `//localhost', if any */
+ p += 2;
+ while (*p && *p != '/')
+ p++;
+ }
+
+ if (p[0] != '/') {
+ warnx("`%s': expected absolute pathname in `file' URL", uri);
+ return EX_USAGE;
+ }
+
+ fs->fs_proto = percent_decode(p);
+ /* guaranteed to succeed because of above test */
+ p = strrchr(fs->fs_proto, '/');
+ if (fs->fs_outputfile == 0) /* only set if not overridden by user */
+ fs->fs_outputfile = p + 1;
+ fs->fs_retrieve = file_retrieve;
+ fs->fs_close = file_close;
+ return 0;
+}
+
+static int
+file_close(struct fetch_state *fs)
+{
+ free(fs->fs_proto);
+ fs->fs_proto = 0;
+ fs->fs_outputfile = 0;
+ fs->fs_status = "free";
+ return 0;
+}
+
+static int
+file_retrieve(struct fetch_state *fs)
+{
+ /* XXX - this seems bogus to me! */
+ if (access(fs->fs_outputfile, F_OK) == 0) {
+ errno = EEXIST;
+ warn("%s", fs->fs_outputfile);
+ return EX_USAGE;
+ }
+
+ if (fs->fs_linkfile) {
+ fs->fs_status = "symlink";
+ if (symlink(fs->fs_proto, fs->fs_outputfile) == -1) {
+ warn("symlink");
+ return EX_OSERR;
+ }
+ fs->fs_status = "done";
+ } else {
+ pid_t pid;
+ int status;
+
+ fflush(stderr);
+ pid = fork();
+ if (pid < 0) {
+ warn("fork");
+ return EX_TEMPFAIL;
+ } else if (pid == 0) {
+ execl(PATH_CP, "cp", "-p", fs->fs_proto,
+ fs->fs_outputfile, (char *)0);
+ warn("execl: " PATH_CP);
+ fflush(stderr);
+ _exit(EX_OSERR);
+ } else {
+ fs->fs_status = "copying";
+ if (waitpid(pid, &status, 0) < 0) {
+ warn("waitpid(%ld)", (long)pid);
+ return EX_OSERR;
+ }
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+ if (WIFSIGNALED(status))
+ warn(PATH_CP " exited on signal: %s",
+ sys_signame[WTERMSIG(status)]);
+ return EX_OSERR;
+ }
+ }
+ return 0;
+}
+
diff --git a/usr.bin/fetch/ftp.c b/usr.bin/fetch/ftp.c
new file mode 100644
index 0000000..34caeb0
--- /dev/null
+++ b/usr.bin/fetch/ftp.c
@@ -0,0 +1,420 @@
+/*-
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#include <sys/types.h>
+
+#include <err.h>
+#include <errno.h>
+#include <ftpio.h>
+#include <limits.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include "fetch.h"
+
+struct ftp_state {
+ char *ftp_hostname;
+ char *ftp_user;
+ char *ftp_password;
+ char *ftp_remote_file;
+ unsigned ftp_port;
+};
+
+static int ftp_close(struct fetch_state *fs);
+static int ftp_retrieve(struct fetch_state *fs);
+static int ftp_parse(struct fetch_state *fs, const char *uri);
+static int ftp_proxy_parse(struct fetch_state *fs, const char *uri);
+
+struct uri_scheme ftp_scheme =
+ { "ftp", ftp_parse, ftp_proxy_parse, "FTP_PROXY", "ftp,http" };
+
+static int
+ftp_parse(struct fetch_state *fs, const char *uri)
+{
+ const char *p, *colon, *slash, *q;
+ char *hostname, *atsign;
+ unsigned port;
+ struct ftp_state *ftps;
+
+ p = uri + 4;
+ port = 0;
+
+ if (p[0] != '/' || p[1] != '/') {
+ warnx("`%s': invalid `ftp' URL", uri);
+ return EX_USAGE;
+ }
+
+ p += 2;
+ colon = strchr(p, ':');
+ slash = strchr(p, '/');
+ if (colon && slash && colon < slash)
+ q = colon;
+ else
+ q = slash;
+ if (q == 0) {
+ warnx("`%s': malformed `ftp' URL", uri);
+ return EX_USAGE;
+ }
+ hostname = alloca(q - p + 1);
+ hostname[0] = '\0';
+ strncat(hostname, p, q - p);
+ p = slash;
+
+ if (colon && colon + 1 != slash) {
+ unsigned long ul;
+ char *ep;
+
+ errno = 0;
+ ul = strtoul(colon + 1, &ep, 10);
+ if (ep != slash || ep == colon + 1 || errno != 0
+ || ul < 1 || ul > 65534) {
+ warn("`%s': invalid port in URL", uri);
+ return EX_USAGE;
+ }
+
+ port = ul;
+ } else {
+ port = 21;
+ }
+
+ p = slash + 1;
+
+ ftps = malloc(sizeof *ftps);
+ if (ftps == 0)
+ err(EX_OSERR, "malloc");
+
+ /*
+ * Now, we have a copy of the hostname in hostname, the specified port
+ * (or the default value) in port, and p points to the filename part
+ * of the URI. We just need to check for a user in the hostname,
+ * and then save all the bits in our state.
+ */
+ atsign = strrchr(hostname, '@');
+ if (atsign) {
+ if (atsign[1] == '\0') {
+ warnx("`%s': malformed `ftp' hostname", hostname);
+ free(ftps);
+ return EX_USAGE;
+ }
+
+ *atsign = '\0';
+ ftps->ftp_user = percent_decode(hostname);
+ ftps->ftp_hostname = safe_strdup(atsign + 1);
+ } else {
+ ftps->ftp_user = 0;
+ ftps->ftp_hostname = safe_strdup(hostname);
+ ftps->ftp_port = port;
+ }
+
+ p = ftps->ftp_remote_file = percent_decode(p);
+ /* now p is the decoded version */
+
+ if (fs->fs_outputfile == 0) {
+ slash = strrchr(p, '/');
+ fs->fs_outputfile = slash + 1;
+ }
+
+ ftps->ftp_password = getenv("FTP_PASSWORD");
+ if (ftps->ftp_password != 0) {
+ ftps->ftp_password = safe_strdup(ftps->ftp_password);
+ } else {
+ char *pw;
+ const char *logname;
+ char localhost[MAXHOSTNAMELEN];
+
+ logname = getlogin();
+ if (logname == 0)
+ logname = "root";
+ gethostname(localhost, sizeof localhost);
+ pw = malloc(strlen(logname) + 1 + strlen(localhost) + 1);
+ if (pw == 0)
+ err(EX_OSERR, "malloc");
+ strcpy(pw, logname);
+ strcat(pw, "@");
+ strcat(pw, localhost);
+ ftps->ftp_password = pw;
+ setenv("FTP_PASSWORD", pw, 0); /* cache the result */
+ }
+
+ if (ftps->ftp_user == 0) {
+ const char *user = getenv("FTP_LOGIN");
+ if (user != 0)
+ ftps->ftp_user = safe_strdup(user);
+ }
+
+ fs->fs_proto = ftps;
+ fs->fs_close = ftp_close;
+ fs->fs_retrieve = ftp_retrieve;
+ return 0;
+}
+
+/*
+ * The only URIs we can handle in the FTP proxy are FTP URLs.
+ * This makes it possible to take a few short cuts.
+ */
+static int
+ftp_proxy_parse(struct fetch_state *fs, const char *uri)
+{
+ int rv;
+ char *hostname;
+ char *port;
+ const char *user;
+ char *newpass;
+ unsigned portno;
+ struct ftp_state *ftps;
+
+ hostname = getenv("FTP_PROXY");
+ port = strchr(hostname, ':');
+ if (port == 0) {
+ portno = 21;
+ } else {
+ unsigned long ul;
+ char *ep;
+
+ /* All this to avoid modifying the environment. */
+ ep = alloca(strlen(hostname) + 1);
+ strcpy(ep, hostname);
+ port = ep + (port - hostname);
+ hostname = ep;
+
+ *port++ = '\0';
+ errno = 0;
+ ul = strtoul(port, &ep, 0);
+ if (*ep || !*port || errno != 0 || ul < 1 || ul > 65534) {
+ warnx("`%s': invalid port specification for FTP proxy",
+ port);
+ return EX_USAGE;
+ }
+ portno = ul;
+ }
+
+ /* ftp_parse() does most of the work; we can just fix things up */
+ rv = ftp_parse(fs, uri);
+ if (rv)
+ return rv;
+ /* Oops.. it got turned into a file: */
+ if (fs->fs_retrieve != ftp_retrieve) {
+ return 0;
+ }
+
+ ftps = fs->fs_proto;
+ if (ftps->ftp_port != 21) {
+ ftp_close(fs);
+ warnx("`%s': FTP proxy requires the use of the standard port",
+ uri);
+ return EX_USAGE;
+ }
+
+ ftps->ftp_port = portno;
+ user = ftps->ftp_user ? ftps->ftp_user : "anonymous";
+ newpass = malloc(strlen(ftps->ftp_user ? ftps->ftp_user : "anonymous")
+ + 1 + strlen(ftps->ftp_hostname) + 1);
+ if (newpass == 0)
+ err(EX_OSERR, "malloc");
+
+ strcpy(newpass, user);
+ strcat(newpass, "@");
+ strcpy(newpass, ftps->ftp_hostname);
+ free(ftps->ftp_hostname);
+ ftps->ftp_hostname = safe_strdup(hostname);
+ free(ftps->ftp_password);
+ ftps->ftp_password = newpass;
+ free(ftps->ftp_user);
+ ftps->ftp_user = getenv("FTP_PROXY_USER");
+ if (ftps->ftp_user)
+ ftps->ftp_user = safe_strdup(ftps->ftp_user);
+ return 0;
+}
+
+static int
+ftp_close(struct fetch_state *fs)
+{
+ struct ftp_state *ftps = fs->fs_proto;
+
+ if (ftps->ftp_user)
+ free(ftps->ftp_user);
+ free(ftps->ftp_hostname);
+ free(ftps->ftp_password);
+ free(ftps->ftp_remote_file);
+ free(ftps);
+ fs->fs_proto = 0;
+ fs->fs_outputfile = 0;
+ return 0;
+}
+
+static int
+ftp_retrieve(struct fetch_state *fs)
+{
+ struct ftp_state *ftps = fs->fs_proto;
+ FILE *ftp, *remote, *local;
+ int status;
+ off_t size;
+ off_t seekloc, wehave;
+ time_t modtime;
+ size_t readresult, writeresult;
+
+ ftp = ftpLogin(ftps->ftp_hostname,
+ (char *)(ftps->ftp_user ? ftps->ftp_user : "anonymous"),
+ /* XXX ^^^^ bad API */
+ ftps->ftp_password, 0, fs->fs_verbose > 1,
+ &status);
+ if (ftp == 0) {
+ warnx("%s: %s", ftps->ftp_hostname,
+ status ? ftpErrString(status) : hstrerror(h_errno));
+ return EX_IOERR;
+ }
+ ftpBinary(ftp);
+ ftpPassive(ftp, fs->fs_passive_mode);
+ size = ftpGetSize(ftp, ftps->ftp_remote_file);
+ modtime = ftpGetModtime(ftp, ftps->ftp_remote_file);
+ if (modtime <= 0) { /* xxx */
+ warnx("%s: cannot get remote modification time",
+ ftps->ftp_remote_file);
+ modtime = -1;
+ }
+ fs->fs_modtime = modtime;
+ seekloc = wehave = 0;
+ if (fs->fs_restart || fs->fs_mirror) {
+ struct stat stab;
+
+ if (fs->fs_outputfile[0] == '-'
+ && fs->fs_outputfile[1] == '\0')
+ status = fstat(STDOUT_FILENO, &stab);
+ else
+ status = stat(fs->fs_outputfile, &stab);
+ if (status < 0) {
+ stab.st_mtime = -1;
+ stab.st_size = 0;
+ }
+ if (status == 0 && !S_ISREG(stab.st_mode)) {
+ fs->fs_restart = 0;
+ fs->fs_mirror = 0;
+ }
+ if (fs->fs_mirror && stab.st_size == size
+ && modtime <= stab.st_mtime) {
+ fclose(ftp);
+ return 0;
+ }
+ if (fs->fs_restart) {
+ if (stab.st_size != 0 && stab.st_size < size)
+ seekloc = wehave = size;
+ }
+ }
+
+ remote = ftpGet(ftp, ftps->ftp_remote_file, &seekloc);
+ if (remote == 0) {
+ if (ftpErrno(ftp)) {
+ warnx("%s: %s", ftps->ftp_hostname,
+ ftpErrString(ftpErrno(ftp)));
+ fclose(ftp);
+ return EX_IOERR;
+ } else {
+ warn("ftpGet");
+ return EX_OSERR;
+ }
+ }
+
+ if (fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0')
+ local = fopen("/dev/stdout", wehave ? "a" : "w");
+ else
+ local = fopen(fs->fs_outputfile, wehave ? "a" : "w");
+ if (local == 0) {
+ warn("%s", fs->fs_outputfile);
+ fclose(remote);
+ fclose(ftp);
+ return EX_OSERR;
+ }
+
+ if (fs->fs_timeout) {
+ char buf[sizeof("18446744073709551616")]; /* 2**64 */
+ snprintf(buf, sizeof buf, "%d", fs->fs_timeout);
+ setenv("FTP_TIMEOUT", buf, 1);
+ } else {
+ char *env = getenv("FTP_TIMEOUT");
+ char *ep;
+ unsigned long ul;
+
+ if (env) {
+ errno = 0;
+ ul = strtoul(env, &ep, 0);
+ if (*env && *ep && errno == 0 && ul <= INT_MAX)
+ fs->fs_timeout = ul;
+ else
+ warnx("`%s': invalid FTP timeout", env);
+ }
+ }
+
+ display(fs, size, wehave);
+ setup_sigalrm();
+
+ do {
+ char buf[BUFFER_SIZE];
+
+ alarm(fs->fs_timeout);
+ readresult = fread(buf, 1, sizeof buf, remote);
+ alarm(0);
+ if (readresult == 0)
+ break;
+ display(fs, size, readresult);
+ writeresult = fwrite(buf, 1, readresult, local);
+ } while (writeresult == readresult);
+ unsetup_sigalrm();
+
+ if (ferror(remote)) {
+ warn("reading remote file from %s", ftps->ftp_hostname);
+ fclose(local);
+ fclose(remote);
+ fclose(ftp);
+ rm(fs);
+ return EX_IOERR;
+ } else if(ferror(local)) {
+ warn("%s", fs->fs_outputfile);
+ fclose(local);
+ fclose(remote);
+ fclose(ftp);
+ rm(fs);
+ return EX_IOERR;
+ }
+
+ fclose(local);
+ fclose(remote);
+ fclose(ftp);
+ display(fs, size, -1);
+ adjmodtime(fs);
+ return 0;
+}
diff --git a/usr.bin/fetch/http.c b/usr.bin/fetch/http.c
new file mode 100644
index 0000000..425476d
--- /dev/null
+++ b/usr.bin/fetch/http.c
@@ -0,0 +1,976 @@
+/*-
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <md5.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <sys/param.h> /* for MAXHOSTNAMELEN */
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "fetch.h"
+
+static int http_parse(struct fetch_state *fs, const char *uri);
+static int http_proxy_parse(struct fetch_state *fs, const char *uri);
+static int http_close(struct fetch_state *fs);
+static int http_retrieve(struct fetch_state *fs);
+
+struct uri_scheme http_scheme =
+ { "http", http_parse, http_proxy_parse, "HTTP_PROXY", "http" };
+
+struct http_state {
+ char *http_hostname;
+ char *http_remote_request;
+ char *http_decoded_file;
+ unsigned http_port;
+};
+
+/* We are only concerned with headers we might receive. */
+enum http_header {
+ ht_content_length, ht_last_modified, ht_content_md5, ht_content_type,
+ ht_transfer_encoding, ht_content_range, ht_warning,
+ /* unusual cases */
+ ht_syntax_error, ht_unknown, ht_end_of_header
+};
+
+static char *format_http_date(time_t when);
+static char *format_http_user_agent(void);
+static enum http_header http_parse_header(char *line, char **valuep);
+static int check_md5(FILE *fp, char *base64ofmd5);
+static int http_first_line(const char *line);
+static int parse_http_content_range(char *orig, off_t *first, off_t *total);
+static time_t parse_http_date(char *datestring);
+
+static int
+http_parse(struct fetch_state *fs, const char *uri)
+{
+ const char *p, *colon, *slash, *ques, *q;
+ char *hostname;
+ unsigned port;
+ struct http_state *https;
+
+ p = uri + 5;
+ port = 0;
+
+ if (p[0] != '/' || p[1] != '/') {
+ warnx("`%s': malformed `http' URL", uri);
+ return EX_USAGE;
+ }
+
+ p += 2;
+ colon = strchr(p, ':');
+ slash = strchr(p, '/');
+ if (colon && slash && colon < slash)
+ q = colon;
+ else
+ q = slash;
+ if (q == 0) {
+ warnx("`%s': malformed `http' URL", uri);
+ return EX_USAGE;
+ }
+ hostname = alloca(q - p + 1);
+ hostname[0] = '\0';
+ strncat(hostname, p, q - p);
+ p = slash;
+
+ if (colon && colon + 1 != slash) {
+ unsigned long ul;
+ char *ep;
+
+ errno = 0;
+ ul = strtoul(colon + 1, &ep, 10);
+ if (ep != slash || ep == colon + 1 || errno != 0
+ || ul < 1 || ul > 65534) {
+ warn("`%s': invalid port in URL", uri);
+ return EX_USAGE;
+ }
+
+ port = ul;
+ } else {
+ port = 80;
+ }
+
+ p = slash + 1;
+
+ https = malloc(sizeof *https);
+ if (https == 0)
+ err(EX_OSERR, "malloc");
+
+ /*
+ * Now, we have a copy of the hostname in hostname, the specified port
+ * (or the default value) in port, and p points to the filename part
+ * of the URI.
+ */
+ https->http_hostname = safe_strdup(hostname);
+ https->http_port = port;
+
+ ques = strpbrk(p, "?#");
+ if (ques) {
+ https->http_remote_request = safe_strndup(p, ques - p);
+ } else {
+ https->http_remote_request = safe_strdup(p);
+ }
+ p = https->http_decoded_file = percent_decode(p);
+ /* now p is the decoded version, so we can extract the basename */
+
+ if (fs->fs_outputfile == 0) {
+ slash = strrchr(p, '/');
+ if (slash)
+ fs->fs_outputfile = slash + 1;
+ else
+ fs->fs_outputfile = p;
+ }
+
+ fs->fs_proto = https;
+ fs->fs_close = http_close;
+ fs->fs_retrieve = http_retrieve;
+ return 0;
+}
+
+/*
+ * An HTTP proxy works by accepting a complete URI in a GET request,
+ * retrieving that object, and then forwarding it back to us. Because
+ * it can conceivably handle any URI, we have to do a bit more work
+ * in the parsing of it.
+ */
+static int
+http_proxy_parse(struct fetch_state *fs, const char *uri)
+{
+ struct http_state *https;
+ const char *env, *slash, *ques;
+ char *file;
+ int rv;
+
+ https = malloc(sizeof *https);
+ https->http_remote_request = safe_strdup(uri);
+
+ env = getenv("HTTP_PROXY");
+ rv = parse_host_port(env, &https->http_hostname, &https->http_port);
+ if (rv) {
+out:
+ free(https->http_remote_request);
+ free(https);
+ return rv;
+ }
+
+ if (strncmp(uri, "http://", 7) == 0) {
+ slash = strchr(uri + 7, '/');
+ if (slash == 0) {
+ warnx("`%s': malformed `http' URL", uri);
+ rv = EX_USAGE;
+ free(https->http_hostname);
+ goto out;
+ }
+ ques = strpbrk(slash, "?#");
+ if (ques == 0)
+ file = safe_strdup(slash);
+ else
+ file = safe_strndup(slash, ques - slash);
+ } else {
+ slash = uri;
+ while (*slash && *slash != ':')
+ slash++;
+ if (*slash)
+ slash++;
+ if (slash[0] == '/' && slash[1] == '/') {
+ slash += 2;
+ while (*slash && *slash != '/')
+ slash++;
+ }
+ file = safe_strdup(slash);
+ }
+ https->http_decoded_file = percent_decode(file);
+ free(file);
+ if (fs->fs_outputfile == 0) {
+ slash = strrchr(https->http_decoded_file, '/');
+ /* NB: we are not guaranteed to find one... */
+ fs->fs_outputfile = slash ? slash + 1
+ : https->http_decoded_file;
+ }
+
+ fs->fs_proto = https;
+ fs->fs_close = http_close;
+ fs->fs_retrieve = http_retrieve;
+ return 0;
+}
+
+static int
+http_close(struct fetch_state *fs)
+{
+ struct http_state *https = fs->fs_proto;
+
+ free(https->http_hostname);
+ free(https->http_remote_request);
+ free(https->http_decoded_file);
+ free(https);
+ fs->fs_outputfile = 0;
+ return 0;
+}
+
+/*
+ * Get a file using HTTP. We will try to implement HTTP/1.1 eventually.
+ * This subroutine makes heavy use of the 4.4-Lite standard I/O library,
+ * in particular the `fgetln' which allows us to slurp an entire `line'
+ * (an arbitrary string of non-NUL characters ending in a newline) directly
+ * out of the stdio buffer. This makes interpreting the HTTP headers much
+ * easier, since they are all guaranteed to end in `\r\n' and we can just
+ * ignore the `\r'.
+ */
+static int
+http_retrieve(struct fetch_state *fs)
+{
+ struct http_state *https;
+ FILE *remote, *local;
+ int s;
+ struct sockaddr_in sin;
+ struct msghdr msg;
+ struct iovec iov[16]; /* XXX count precisely */
+ int n, status;
+ const char *env;
+ int timo;
+ char *line;
+ size_t linelen, readresult, writeresult;
+ off_t total_length, restart_from;
+ time_t last_modified;
+ char *base64ofmd5;
+ static char buf[BUFFER_SIZE];
+ int to_stdout;
+ char rangebuf[sizeof("Range: bytes=18446744073709551616-\r\n")];
+
+ https = fs->fs_proto;
+ to_stdout = (strcmp(fs->fs_outputfile, "-") == 0);
+
+ if (fs->fs_timeout) {
+ timo = fs->fs_timeout;
+ } else if ((env = getenv("HTTP_TIMEOUT")) != 0) {
+ char *ep;
+ unsigned long ul;
+
+ errno = 0;
+ ul = strtoul(env, &ep, 0);
+ if (*ep != '\0' || *env == '\0' || errno != 0
+ || ul > INT_MAX) {
+ warnx("`%s': invalid timeout", env);
+ return EX_USAGE;
+ }
+ timo = ul;
+ } else {
+ timo = 0;
+ }
+
+ memset(&sin, 0, sizeof sin);
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof sin;
+ sin.sin_port = htons(https->http_port);
+
+ if (inet_aton(https->http_hostname, &sin.sin_addr) == 0) {
+ struct hostent *hp;
+
+ /* XXX - do timeouts for name resolution? */
+ hp = gethostbyname2(https->http_hostname, AF_INET);
+ if (hp == 0) {
+ warnx("`%s': cannot resolve: %s", https->http_hostname,
+ hstrerror(h_errno));
+ return EX_NOHOST;
+ }
+ memcpy(&sin.sin_addr, hp->h_addr_list[0], sizeof sin.sin_addr);
+ }
+
+ msg.msg_name = (caddr_t)&sin;
+ msg.msg_namelen = sizeof sin;
+ msg.msg_iov = iov;
+ n = 0;
+ msg.msg_control = 0;
+ msg.msg_controllen = 0;
+ msg.msg_flags = MSG_EOF;
+
+#define addstr(Iov, N, Str) \
+ do { \
+ Iov[N].iov_base = (void *)Str; \
+ Iov[N].iov_len = strlen(Iov[n].iov_base); \
+ N++; \
+ } while(0)
+
+retry:
+ addstr(iov, n, "GET /");
+ addstr(iov, n, https->http_remote_request);
+ addstr(iov, n, " HTTP/1.0\r\n");
+ addstr(iov, n, format_http_user_agent());
+ /* do content negotiation here */
+ addstr(iov, n, "Accept: */*\r\n");
+ if (fs->fs_mirror) {
+ struct stat stab;
+
+ errno = 0;
+ if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0)
+ || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0))
+ && S_ISREG(stab.st_mode)) {
+ addstr(iov, n, "If-Modified-Since: ");
+ addstr(iov, n, format_http_date(stab.st_mtime));
+ addstr(iov, n, "\r\n");
+ } else if (errno != 0) {
+ warn("%s: cannot mirror; will retrieve anew",
+ fs->fs_outputfile);
+ }
+ }
+ if (fs->fs_restart) {
+ struct stat stab;
+
+ errno = 0;
+ if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0)
+ || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0))
+ && S_ISREG(stab.st_mode)) {
+ addstr(iov, n, "If-Range: ");
+ addstr(iov, n, format_http_date(stab.st_mtime));
+ addstr(iov, n, "\r\n");
+ sprintf(rangebuf, "Range: bytes=%qd-\r\n",
+ (quad_t)stab.st_size);
+ addstr(iov, n, rangebuf);
+ } else if (errno != 0) {
+ warn("%s: cannot restart; will retrieve anew",
+ fs->fs_outputfile);
+ }
+ }
+ addstr(iov, n, "Connection: close\r\n");
+ addstr(iov, n, "\r\n");
+ msg.msg_iovlen = n;
+
+ s = socket(PF_INET, SOCK_STREAM, 0);
+ if (s < 0) {
+ warn("socket");
+ return EX_OSERR;
+ }
+
+ remote = fdopen(s, "r");
+ if (remote == 0) {
+ warn("fdopen");
+ close(s);
+ return EX_OSERR;
+ }
+
+ setup_sigalrm();
+ alarm(timo);
+ if (sendmsg(s, &msg, MSG_EOF) < 0) {
+ warn("%s", https->http_hostname);
+ fclose(remote);
+ return EX_OSERR;
+ }
+
+ alarm(timo);
+ line = fgetln(remote, &linelen);
+ alarm(0);
+ if (line == 0) {
+ if (ferror(remote)) {
+ warn("reading reply from %s", https->http_hostname);
+ fclose(remote);
+ unsetup_sigalrm();
+ return EX_OSERR;
+ } else {
+ warnx("empty reply from %s", https->http_hostname);
+ fclose(remote);
+ unsetup_sigalrm();
+ return EX_PROTOCOL;
+ }
+ }
+ /*
+ * If the other end is HTTP 0.9, then we just suck their
+ * response over; can't do anything fancy. We assume that
+ * the file is a text file, so it is safe to use fgetln()
+ * to suck the entire file. (It had better be, since
+ * we used it to grab the first line.)
+ */
+ if (linelen < 5 || strncasecmp(line, "http/", 5) != 0) {
+ if (to_stdout)
+ local = fopen("/dev/stdout", "w");
+ else
+ local = fopen(fs->fs_outputfile, "w");
+ if (local == 0) {
+ warn("%s: fopen", fs->fs_outputfile);
+ fclose(remote);
+ unsetup_sigalrm();
+ return EX_OSERR;
+ }
+ display(fs, -1, 0);
+
+ do {
+ writeresult = fwrite(line, 1, linelen, local);
+ display(fs, -1, writeresult);
+ if (writeresult != linelen)
+ break;
+ alarm(timo);
+ line = fgetln(remote, &linelen);
+ alarm(0);
+ } while(line != 0);
+ unsetup_sigalrm();
+
+ if (ferror(local)) {
+ warn("%s", fs->fs_outputfile);
+ fclose(local);
+ fclose(remote);
+ rm(fs);
+ return EX_OSERR;
+ } else if(ferror(remote)) {
+ warn("%s", https->http_hostname);
+ fclose(local);
+ fclose(remote);
+ rm(fs);
+ return EX_OSERR;
+ }
+ fclose(local);
+ fclose(remote);
+ display(fs, -1, -1);
+ return 0;
+ }
+ /*
+ * OK. The other end is doing HTTP 1.0 at the very least.
+ * This means that some of the fancy stuff is at least possible.
+ */
+ line[linelen - 1] = '\0'; /* turn line into a string */
+ status = http_first_line(line);
+
+ /* In the future, we might handle redirection and other responses. */
+ switch(status) {
+ case 200: /* Here come results */
+ case 206: /* Here come partial results */
+ break;
+
+ case 304: /* Object is unmodified */
+ if (fs->fs_mirror) {
+ fclose(remote);
+ unsetup_sigalrm();
+ return 0;
+ }
+ /* otherwise, fall through */
+ default:
+ warnx("%s: %s: HTTP server returned error code %d",
+ fs->fs_outputfile, https->http_hostname, status);
+ if (fs->fs_verbose > 1) {
+ fputs(line, stderr);
+ fputc('\n', stderr);
+ while ((line = fgetln(remote, &linelen)) != 0)
+ fwrite(line, 1, linelen, stderr);
+ }
+ fclose(remote);
+ unsetup_sigalrm();
+ return EX_UNAVAILABLE;
+ }
+
+ total_length = -1; /* -1 means ``don't know'' */
+ last_modified = -1;
+ base64ofmd5 = 0;
+ restart_from = 0;
+
+ while((line = fgetln(remote, &linelen)) != 0) {
+ char *value, *ep;
+ enum http_header header;
+ unsigned long ul;
+
+ line[linelen - 1] = '\0';
+ header = http_parse_header(line, &value);
+
+ if (header == ht_end_of_header)
+ break;
+
+ switch(header) {
+ case ht_content_length:
+ errno = 0;
+ ul = strtoul(value, &ep, 10);
+ if (errno != 0 || *ep != '\r')
+ warnx("invalid Content-Length: `%s'", value);
+ if (!fs->fs_restart)
+ total_length = ul;
+ break;
+
+ case ht_last_modified:
+ last_modified = parse_http_date(value);
+ if (last_modified == -1 && fs->fs_verbose > 0)
+ warnx("invalid Last-Modified: `%s'", value);
+ break;
+
+ case ht_content_md5:
+ base64ofmd5 = safe_strdup(value);
+ break;
+
+ case ht_content_range:
+ /* NB: we might have to restart from farther back
+ than we asked. */
+ status = parse_http_content_range(value, &restart_from,
+ &total_length);
+ /* If we couldn't understand the reply, get the whole
+ thing. */
+ if (status) {
+ fs->fs_restart = 0;
+/*doretry:*/
+ fclose(remote);
+ if (base64ofmd5)
+ free(base64ofmd5);
+ restart_from = 0;
+ n = 0;
+ goto retry;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /*
+ * OK, if we got here, then we have finished parsing the header
+ * and have read the `\r\n' line which denotes the end of same.
+ * We may or may not have a good idea of the length of the file
+ * or its modtime. At this point we will have to deal with
+ * any special byte-range, content-negotiation, redirection,
+ * or authentication, and probably jump back up to the top,
+ * once we implement those features. So, all we have left to
+ * do is open up the output file and copy data from input to
+ * output until EOF.
+ */
+ if (to_stdout)
+ local = fopen("/dev/stdout", "w");
+ else
+ local = fopen(fs->fs_outputfile, "w");
+ if (local == 0) {
+ warn("%s: fopen", fs->fs_outputfile);
+ fclose(remote);
+ unsetup_sigalrm();
+ return EX_OSERR;
+ }
+
+ fs->fs_modtime = last_modified;
+ fseek(local, restart_from, SEEK_SET); /* XXX truncation off_t->long */
+ display(fs, total_length, restart_from); /* XXX truncation */
+
+ do {
+ alarm(timo);
+ readresult = fread(buf, 1, sizeof buf, remote);
+ alarm(0);
+
+ if (readresult == 0)
+ break;
+ display(fs, total_length, readresult);
+
+ writeresult = fwrite(buf, 1, sizeof buf, local);
+ } while (writeresult == readresult);
+
+ status = errno; /* save errno for warn(), below, if needed */
+ display(fs, total_length, -1); /* do here in case we have to warn */
+ errno = status;
+
+ if (ferror(remote)) {
+ warn("reading remote file from %s", https->http_hostname);
+ status = EX_OSERR;
+ } else if(ferror(local)) {
+ warn("`%s': fwrite", fs->fs_outputfile);
+ status = EX_OSERR;
+ } else {
+ status = 0;
+ }
+ if (base64ofmd5) {
+ /*
+ * Ack. When restarting, the MD5 only covers the parts
+ * we are getting, not the whole thing.
+ */
+ fseek(local, restart_from, SEEK_SET);
+ status = check_md5(local, base64ofmd5);
+ free(base64ofmd5);
+ }
+
+ unsetup_sigalrm();
+ fclose(local);
+ fclose(remote);
+
+ if (status != 0)
+ rm(fs);
+ else
+ adjmodtime(fs);
+
+ return status;
+#undef addstr
+}
+
+/*
+ * The format of the response line for an HTTP request is:
+ * HTTP/V.vv{WS}999{WS}Explanatory text for humans to read\r\n
+ * Where {WS} represents whitespace (spaces and/or tabs) and 999
+ * is a machine-interprable result code. We return the integer value
+ * of that result code, or the impossible value `0' if we are unable to
+ * parse the result.
+ */
+static int
+http_first_line(const char *line)
+{
+ char *ep;
+ unsigned long ul;
+
+ if (strncasecmp(line, "http/", 5) != 0)
+ return 0;
+
+ line += 5;
+ while (*line && isdigit(*line)) /* skip major version number */
+ line++;
+ if (*line++ != '.') /* skip period */
+ return 0;
+ while (*line && isdigit(*line)) /* skip minor version number */
+ line++;
+ while (*line && isspace(*line)) /* skip first whitespace */
+ line++;
+
+ errno = 0;
+ ul = strtoul(line, &ep, 10);
+ if (errno != 0 || ul > 999 || ul < 100 || !isspace(*ep))
+ return 0;
+ return ul;
+}
+
+/*
+ * The format of a header line for an HTTP request is:
+ * Header-Name: header-value (with comments in parens)\r\n
+ * This would be a nice application for gperf(1), except that the
+ * names are case-insensitive and gperf can't handle that.
+ */
+static enum http_header
+http_parse_header(char *line, char **valuep)
+{
+ char *colon, *value;
+
+ if (*line == '\0' /* protocol error! */
+ || (line[0] == '\r' && line[1] == '\0'))
+ return ht_end_of_header;
+
+ colon = strchr(line, ':');
+ if (colon == 0)
+ return ht_syntax_error;
+ *colon = '\0';
+
+ for (value = colon + 1; *value && isspace(*value); value++)
+ ; /* do nothing */
+
+ /* XXX - strip comments? */
+ *valuep = value;
+
+#define cmp(name, num) do { if (!strcasecmp(line, name)) return num; } while(0)
+ cmp("Content-Length", ht_content_length);
+ cmp("Last-Modified", ht_last_modified);
+ cmp("Content-MD5", ht_content_md5);
+ cmp("Content-Range", ht_content_range);
+ cmp("Content-Type", ht_content_type);
+ cmp("Transfer-Encoding", ht_transfer_encoding);
+ cmp("Warning", ht_warning);
+#undef cmp
+ return ht_unknown;
+}
+
+/*
+ * Compute the RSA Data Security, Inc., MD5 Message Digest of the file
+ * given in `fp', see if it matches the one given in base64 encoding by
+ * `base64ofmd5'. Warn and return an error if it doesn't.
+ */
+static int
+check_md5(FILE *fp, char *base64ofmd5) {
+ MD5_CTX ctx;
+ unsigned char digest[16];
+ char buf[512];
+ size_t len;
+ char *ourval;
+
+ MD5Init(&ctx);
+ while ((len = fread(buf, 1, sizeof buf, fp)) != 0) {
+ MD5Update(&ctx, buf, len);
+ }
+ MD5Final(digest, &ctx);
+ ourval = to_base64(digest, 16);
+ if (strcmp(ourval, base64ofmd5) != 0) {
+ warnx("MD5 digest mismatch: %s, should be %s", ourval,
+ base64ofmd5);
+ free(ourval);
+ return EX_DATAERR;
+ }
+ free(ourval);
+ return 0;
+}
+
+static const char *wkdays[] = {
+ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+};
+static const char *months[] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
+ "Nov", "Dec"
+};
+
+/*
+ * Interpret one of the three possible formats for an HTTP date.
+ * All of them are really bogus; HTTP should use either ISO 8601
+ * or NTP timestamps. We make some attempt to accept a subset of 8601
+ * format. The three standard formats are all fixed-length subsets of their
+ * respective standards (except 8601, which puts all of the stuff we
+ * care about up front).
+ */
+static time_t
+parse_http_date(char *string)
+{
+ static struct tm tm; /* get good initialization */
+ time_t rv;
+ const char *tz;
+ int i;
+
+ /* 8601 has the shortest minimum length */
+ if (strlen(string) < 15)
+ return -1;
+
+ if (isdigit(*string)) {
+ /* ISO 8601: 19970127T134551stuffwedon'tcareabout */
+ for (i = 0; i < 15; i++) {
+ if (i != 8 && !isdigit(string[i]))
+ break;
+ }
+ if (i < 15)
+ return -1;
+#define digit(x) (string[x] - '0')
+ tm.tm_year = (digit(0) * 1000
+ + digit(1) * 100
+ + digit(2) * 10
+ + digit(3)) - 1900;
+ tm.tm_mon = digit(4) * 10 + digit(5) - 1;
+ tm.tm_mday = digit(6) * 10 + digit(7);
+ if (string[8] != 'T' && string[8] != 't' && string[8] != ' ')
+ return -1;
+ tm.tm_hour = digit(9) * 10 + digit(10);
+ tm.tm_min = digit(11) * 10 + digit(12);
+ tm.tm_sec = digit(13) * 10 + digit(14);
+ /* We don't care about the rest of the stuff after the secs. */
+ } else if (string[3] == ',') {
+ /* Mon, 27 Jan 1997 14:24:35 stuffwedon'tcareabout */
+ if (strlen(string) < 25)
+ return -1;
+ string += 5; /* skip over day-of-week */
+ if (!(isdigit(string[0]) && isdigit(string[1])))
+ return -1;
+ tm.tm_mday = digit(0) * 10 + digit(1);
+ for (i = 0; i < 12; i++) {
+ if (strncasecmp(months[i], &string[3], 3) == 0)
+ break;
+ }
+ if (i >= 12)
+ return -1;
+ tm.tm_mon = i;
+
+ if (sscanf(&string[7], "%d %d:%d:%d", &i, &tm.tm_hour,
+ &tm.tm_min, &tm.tm_sec) != 4)
+ return -1;
+ tm.tm_year = i - 1900;
+
+ } else if (string[3] == ' ') {
+ /* Mon Jan 27 14:25:20 1997 */
+ if (strlen(string) < 25)
+ return -1;
+ string += 4;
+ for (i = 0; i < 12; i++) {
+ if (strncasecmp(string, months[i], 3) == 0)
+ break;
+ }
+ if (i >= 12)
+ return -1;
+ tm.tm_mon = i;
+ if (sscanf(&string[4], "%d %d:%d:%d %u", &tm.tm_mday,
+ &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &i)
+ != 5)
+ return -1;
+ tm.tm_year = i - 1900;
+ } else {
+ /* Monday, 27-Jan-97 14:31:09 stuffwedon'tcareabout */
+ char *comma = strchr(string, ',');
+ char mname[4];
+
+ if (comma == 0)
+ return -1;
+ string = comma + 1;
+ if (strlen(string) < 19)
+ return -1;
+ string++;
+ mname[4] = '\0';
+ if (sscanf(string, "%d-%c%c%c-%d %d:%d:%d", &tm.tm_mday,
+ mname, mname + 1, mname + 2, &tm.tm_year,
+ &tm.tm_hour, &tm.tm_min, &tm.tm_sec) != 8)
+ return -1;
+ for (i = 0; i < 12; i++) {
+ if (strcasecmp(months[i], mname))
+ break;
+ }
+ if (i >= 12)
+ return -1;
+ tm.tm_mon = i;
+ }
+#undef digit
+
+ if (tm.tm_sec > 60 || tm.tm_min > 59 || tm.tm_hour > 23
+ || tm.tm_mday > 31 || tm.tm_mon > 11)
+ return -1;
+ if (tm.tm_sec < 0 || tm.tm_min < 0 || tm.tm_hour < 0
+ || tm.tm_mday < 0 || tm.tm_mon < 0 || tm.tm_year < 0)
+ return -1;
+
+ tz = getenv("TZ");
+ setenv("TZ", "UTC0", 1);
+ tzset();
+ rv = mktime(&tm);
+ if (tz)
+ setenv("TZ", tz, 1);
+ else
+ unsetenv("TZ");
+ return rv;
+}
+
+static char *
+format_http_date(time_t when)
+{
+ struct tm *tm;
+ static char buf[30];
+
+ tm = gmtime(&when);
+ if (tm == 0)
+ return 0;
+#ifndef HTTP_DATE_ISO_8601
+ sprintf(buf, "%s, %02d %s %04d %02d:%02d:%02d GMT",
+ wkdays[tm->tm_wday], tm->tm_mday, months[tm->tm_mon],
+ tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec);
+#else /* ISO 8601 */
+ sprintf(buf, "%04d%02d%02dT%02d%02d%02d+0000",
+ tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
+#endif
+ return buf;
+}
+
+static char *
+format_http_user_agent(void)
+{
+ static char buf[128];
+ static int inited;
+
+ if (!inited) {
+ int mib[2];
+ char ostype[128], osrelease[128], machine[128];
+ size_t len;
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_OSTYPE;
+ len = sizeof ostype;
+ if (sysctl(mib, 2, ostype, &len, 0, 0) < 0) {
+ warn("sysctl");
+ ostype[0] = '\0';
+ }
+ mib[1] = KERN_OSRELEASE;
+ len = sizeof osrelease;
+ if (sysctl(mib, 2, osrelease, &len, 0, 0) < 0) {
+ warn("sysctl");
+ osrelease[0] = '\0';
+ }
+ mib[0] = CTL_HW;
+ mib[1] = HW_MACHINE;
+ len = sizeof machine;
+ if (sysctl(mib, 2, machine, &len, 0, 0) < 0) {
+ warn("sysctl");
+ machine[0] = '\0';
+ }
+
+ snprintf(buf, sizeof buf,
+ "User-Agent: " FETCH_VERSION " %s/%s (%s)\r\n",
+ ostype, osrelease, machine);
+ }
+ return buf;
+}
+
+/*
+ * Parse a Content-Range return header from the server. RFC 2066 defines
+ * this header to have the format:
+ * Content-Range: bytes 12345-67890/123456
+ * Since we always ask for the whole rest of the file, we consider it an
+ * error if the reply doesn't claim to give it to us.
+ */
+static int
+parse_http_content_range(char *orig, off_t *restart_from, off_t *total_length)
+{
+ u_quad_t first, last, total;
+ char *ep;
+
+ if (strcasecmp(orig, "bytes") != 0) {
+ warnx("unknown Content-Range unit: `%s'", orig);
+ return EX_PROTOCOL;
+ }
+
+ orig += 5;
+ while (*orig && isspace(*orig))
+ orig++;
+
+ errno = 0;
+ first = strtouq(orig, &ep, 10);
+ if (errno != 0 || *ep != '-') {
+ warnx("invalid Content-Range: `%s'", orig);
+ return EX_PROTOCOL;
+ }
+ last = strtouq(ep + 1, &ep, 10);
+ if (errno != 0 || *ep != '/' || last < first) {
+ warnx("invalid Content-Range: `%s'", orig);
+ return EX_PROTOCOL;
+ }
+ total = strtouq(ep + 1, &ep, 10);
+ if (errno != 0 || !(*ep == '\0' || isspace(*ep))) {
+ warnx("invalid Content-Range: `%s'", orig);
+ return EX_PROTOCOL;
+ }
+
+ if (last + 1 != total) {
+ warnx("HTTP server did not return requested Content-Range");
+ return EX_PROTOCOL;
+ }
+
+ *restart_from = first;
+ *total_length = last;
+ return 0;
+}
diff --git a/usr.bin/fetch/main.c b/usr.bin/fetch/main.c
index cc40e32..50845c7 100644
--- a/usr.bin/fetch/main.c
+++ b/usr.bin/fetch/main.c
@@ -27,403 +27,282 @@
/* $FreeBSD$ */
#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pwd.h>
-#include <unistd.h>
-
-#include <netinet/in.h>
-
-#include <arpa/inet.h>
#include <err.h>
#include <errno.h>
-#include <netdb.h>
-#include <pwd.h>
-#include <regex.h>
+#include <limits.h> /* needed for INT_MAX */
+#include <setjmp.h>
#include <signal.h>
-#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <unistd.h>
-#include <ftpio.h>
-
-#define BUFFER_SIZE 1024
-#define HTTP_TIMEOUT 300 /* seconds */
-#define FTP_TIMEOUT 300 /* seconds */
-
-char buffer[BUFFER_SIZE];
-
-extern char *__progname; /* from crt0.o */
-
-int verbose = 1;
-int ftp_verbose = 0;
-int linkfile = 0;
-char *outputfile = 0;
-char *change_to_dir = 0;
-char *host = 0;
-int passive_mode = 0;
-char *file_to_get = 0;
-int ftp = 0;
-int http_proxy = 0;
-int http = 0;
-int http_port = 80;
-int mirror = 0;
-int newtime = 0;
-int restart = 0;
-time_t modtime;
-FILE *file = 0;
-int timeout_ival = 0;
-
-void usage(void), die(int), rm(void), timeout(int), ftpget(void),
- httpget(void), fileget(void),
- display(int, int), parse(char *), output_file_name(void),
- f_size(char *, off_t *, time_t *), ftperr(FILE* ftp, char *, ...),
- filter(unsigned char *, int),
- setup_http_proxy(void);
-
-int match(char *, char *), http_open(void);
+#include <sys/param.h> /* for MAXHOSTNAMELEN */
+#include <sys/time.h> /* for struct timeval, gettimeofday */
-void
-usage()
-{
- fprintf(stderr, "usage: %s [-DHINPMTVLqlmnprv] [-o outputfile] <-f file -h host [-c dir]| URL>\n", __progname);
- exit(1);
-}
+#include "fetch.h"
-void
-die(int sig)
-{
- int e = errno;
-
- rm();
- if (!sig)
- fprintf (stderr, "%s: %s\n", __progname, strerror(e));
- else
- warnx ("Interrupted by signal %d", sig);
- exit(1);
-}
+static struct fetch_state clean_fetch_state;
+static sigjmp_buf sigbuf;
+static int get(struct fetch_state *volatile fs);
-void
-adjmodtime()
+static void
+usage(const char *argv0)
{
- struct timeval tv[2];
-
- if (!newtime) {
- tv[0].tv_usec = tv[1].tv_usec = 0;
- tv[0].tv_sec = time(0);
- tv[1].tv_sec = modtime;
- utimes (outputfile, tv);
- }
+ fprintf(stderr,
+ "%s: usage:\n\t%s [-DHINPMTVLqlmnprv] [-o outputfile] "
+ "[-f file -h host [-c dir] | URL]\n", argv0, argv0);
+ exit(EX_USAGE);
}
-void
-rm()
-{
- if (file) {
- fclose(file);
- if (file != stdout) {
- if (!restart && !mirror)
- remove(outputfile);
- adjmodtime();
- }
- }
-}
int
-main(int argc, char **argv)
+main(int argc, char *const *argv)
{
int c;
- char *s;
-
- while ((c = getopt (argc, argv, "D:HINPMT:V:Lqc:f:h:o:plmnrv")) != -1) {
- switch (c) {
- case 'D': case 'H': case 'I': case 'N': case 'L': case 'V':
- break; /* ncftp compatibility */
-
- case 'q':
- verbose = 0;
-
- case 'c':
- change_to_dir = optarg;
- break;
+ char *ep;
+ struct fetch_state fs;
+ const char *change_to_dir, *file_to_get, *hostname;
+ int error, rv;
+ unsigned long l;
+
+ init_schemes();
+ fs = clean_fetch_state;
+ fs.fs_verbose = 1;
+ change_to_dir = file_to_get = hostname = 0;
+
+ while ((c = getopt(argc, argv, "D:HINPMT:V:Lqc:f:h:o:plmnrv")) != -1) {
+ switch (c) {
+ case 'D': case 'H': case 'I': case 'N': case 'L': case 'V':
+ break; /* ncftp compatibility */
- case 'f':
- file_to_get = optarg;
- break;
+ case 'q':
+ fs.fs_verbose = 0;
+
+ case 'c':
+ change_to_dir = optarg;
+ break;
- case 'h':
- host = optarg;
- ftp = 1;
- break;
-
- case 'l':
- linkfile = 1;
- break;
-
- case 'o':
- outputfile = optarg;
- break;
+ case 'f':
+ file_to_get = optarg;
+ break;
- case 'p': case 'P':
- passive_mode = 1;
- break;
+ case 'h':
+ hostname = optarg;
+ break;
+
+ case 'l':
+ fs.fs_linkfile = 1;
+ break;
+
+ case 'o':
+ fs.fs_outputfile = optarg;
+ break;
+
+ case 'p': case 'P':
+ fs.fs_passive_mode = 1;
+ break;
- case 'm': case 'M':
- mirror = 1;
- break;
+ case 'm': case 'M':
+ fs.fs_mirror = 1;
+ break;
- case 'n':
- newtime = 1;
- break;
+ case 'n':
+ fs.fs_newtime = 1;
+ break;
- case 'r':
- restart = 1;
- break;
+ case 'r':
+ fs.fs_restart = 1;
+ break;
+
+ case 'R':
+ fs.fs_precious = 1;
+ break;
- case 'v':
- ftp_verbose = 1;
- break;
+ case 'v':
+ if (fs.fs_verbose < 2)
+ fs.fs_verbose = 2;
+ else
+ fs.fs_verbose++;
+ break;
+
+ case 'T':
+ /* strtol sets errno to ERANGE in the case of overflow */
+ errno = 0;
+ l = strtoul(optarg, &ep, 0);
+ if (!optarg[0] || *ep || errno != 0 || l > INT_MAX)
+ errx(EX_USAGE, "invalid timeout value: `%s'",
+ optarg);
+ fs.fs_timeout = l;
+ break;
+
+ default:
+ case '?':
+ usage(argv[0]);
+ }
+ }
- case 'T':
- timeout_ival = atoi(optarg);
- break;
+ clean_fetch_state = fs; /* preserve option settings */
- default:
- case '?':
- usage();
- }
+ if (argv[optind] && (hostname || change_to_dir || file_to_get)) {
+ warnx("cannot use -h, -c, or -f with a URI argument");
+ usage(argv[0]);
}
- argc -= optind;
- argv += optind;
- if (argv[0]) {
- if (host || change_to_dir || file_to_get)
- usage();
- s = strdup(argv[0]);
- if (s == NULL)
- s = argv[0]; /* optomistic, I know.. malloc just failed. */
- parse(s);
- } else {
- if (!host || !file_to_get)
- usage();
- }
-
- if (mirror && restart)
- errx(1, "-m and -r are mutually exclusive.");
-
- output_file_name();
-
- signal(SIGHUP, die);
- signal(SIGINT, die);
- signal(SIGQUIT, die);
- signal(SIGTERM, die);
+
+ if (fs.fs_mirror && fs.fs_restart)
+ errx(EX_USAGE, "-m and -r are mutually exclusive.");
- setup_http_proxy();
+ if (argv[optind] == 0) {
+ char *uri;
- if (http)
- httpget();
- else if (ftp)
- ftpget();
- else
- fileget();
- exit(0);
-}
+ if (hostname == 0) hostname = "localhost";
+ if (change_to_dir == 0) change_to_dir = "";
+ if (file_to_get == 0) {
+ usage(argv[0]);
+ }
-void
-timeout(int sig)
-{
- fprintf (stderr, "\n%s: Timeout\n", __progname);
- rm();
- exit(1);
-}
+ uri = alloca(sizeof("ftp://") + strlen(hostname) +
+ strlen(change_to_dir) + 2 + strlen(file_to_get));
+ strcpy(uri, "ftp://");
+ strcat(uri, hostname);
+ /*
+ * XXX - we should %-map a leading `/' into `%2f', but for
+ * anonymous FTP it is unlikely to matter. Still, it would
+ * be better to follow the spec.
+ */
+ if (change_to_dir[0] != '/')
+ strcat(uri, "/");
+ strcat(uri, change_to_dir);
+ if (file_to_get[0] != '/' && uri[strlen(uri) - 1] != '/')
+ strcat(uri, "/");
+ strcat(uri, file_to_get);
+
+ error = parse_uri(&fs, uri);
+ if (error)
+ return error;
+ return get(&fs);
+ }
-void
-fileget()
-{
- char *basename;
+ for (rv = 0; argv[optind] != 0; optind++) {
+ error = parse_uri(&fs, argv[optind]);
+ if (error) {
+ rv = error;
+ continue;
+ }
- if (access(file_to_get, R_OK)) {
- fprintf(stderr, "unable to access local file `%s'\n", file_to_get);
- exit(1);
- }
- basename = strrchr(file_to_get, '/');
- if (!basename) {
- fprintf(stderr, "malformed filename `%s' - expected full path.\n",
- file_to_get);
- exit(1);
- }
- ++basename; /* move over the / */
- if (!access(basename, F_OK)) {
- fprintf(stderr, "%s: file already exists.\n", basename);
- exit(1);
+ error = get(&fs);
+ if (error) {
+ rv = error;
+ }
+ fs = clean_fetch_state;
}
- if (linkfile) {
- if (symlink(file_to_get, basename) == -1) {
- perror("symlink");
- exit(1);
+ return rv;
+}
+
+/*
+ * The signal handling is probably more complex than it needs to be,
+ * but it doesn't cost a lot, so we'll be extra-careful. Using
+ * siglongjmp() to get out of the signal handler allows us to
+ * call rm() without having to store the state variable in some global
+ * spot where the signal handler can get at it. We also obviate the need
+ * for a separate timeout signal handler.
+ */
+static int
+get(struct fetch_state *volatile fs)
+{
+ volatile int error;
+ struct sigaction oldhup, oldint, oldquit, oldterm;
+ struct sigaction catch;
+ volatile sigset_t omask;
+
+ sigemptyset(&catch.sa_mask);
+ sigaddset(&catch.sa_mask, SIGHUP);
+ sigaddset(&catch.sa_mask, SIGINT);
+ sigaddset(&catch.sa_mask, SIGQUIT);
+ sigaddset(&catch.sa_mask, SIGTERM);
+ sigaddset(&catch.sa_mask, SIGALRM);
+ catch.sa_handler = catchsig;
+ catch.sa_flags = 0;
+
+ sigprocmask(SIG_BLOCK, &catch.sa_mask, (sigset_t *)&omask);
+ sigaction(SIGHUP, &catch, &oldhup);
+ sigaction(SIGINT, &catch, &oldint);
+ sigaction(SIGQUIT, &catch, &oldquit);
+ sigaction(SIGTERM, &catch, &oldterm);
+
+ error = sigsetjmp(sigbuf, 0);
+ if (error == SIGALRM) {
+ rm(fs);
+ unsetup_sigalrm();
+ fprintf(stderr, "\n"); /* just in case */
+ warnx("%s: %s: timed out", fs->fs_outputfile, fs->fs_status);
+ goto close;
+ } else if (error) {
+ rm(fs);
+ fprintf(stderr, "\n"); /* just in case */
+ warnx("%s: interrupted by signal: %s", fs->fs_status,
+ sys_signame[error]);
+ sigdelset(&omask, error);
+ signal(error, SIG_DFL);
+ sigprocmask(SIG_SETMASK, (sigset_t *)&omask, 0);
+ raise(error); /* so that it gets reported as such */
}
- }
- else {
- char *buf = alloca(strlen(file_to_get) + strlen(basename) + 15);
- sprintf(buf, "/bin/cp -p %s %s", file_to_get, basename);
- if (system(buf)) {
- fprintf(stderr, "failed to copy %s successfully.", file_to_get);
- exit(1);
- }
- }
-}
+ sigprocmask(SIG_SETMASK, (sigset_t *)&omask, 0);
+ error = fs->fs_retrieve(fs);
-void
-ftpget()
-{
- FILE *ftp, *fp;
- char *cp, *lp;
- int status, n;
- off_t size, size0, seekloc;
- char ftp_pw[200];
- time_t t;
-
- if ((cp = getenv("FTP_PASSWORD")) != NULL)
- strcpy(ftp_pw, cp);
- else {
- sprintf (ftp_pw, "%s@", getpwuid (getuid ())->pw_name);
- n = strlen (ftp_pw);
- gethostname (ftp_pw + n, 200 - n);
- }
- if ((lp = getenv("FTP_LOGIN")) == NULL)
- lp = "anonymous";
- ftp = ftpLogin(host, lp, ftp_pw, 0, ftp_verbose, &status);
- if (!ftp) {
- if (status)
- errx(1, "%s: %s", host, ftpErrString(status));
- else
- errx(1, "couldn't open FTP connection to %s: %s",
- host, hstrerror(h_errno));
- }
+close:
+ sigaction(SIGHUP, &oldhup, 0);
+ sigaction(SIGINT, &oldint, 0);
+ sigaction(SIGQUIT, &oldquit, 0);
+ sigaction(SIGTERM, &oldterm, 0);
+ fs->fs_close(fs);
- /* Time to set our defaults */
- ftpBinary (ftp);
- ftpPassive (ftp, passive_mode);
-
- if (change_to_dir) {
- status = ftpChdir (ftp, change_to_dir);
- if (status)
- ftperr (ftp, "couldn't cd to %s: ", change_to_dir);
- }
- size = ftpGetSize (ftp, file_to_get);
- modtime = ftpGetModtime (ftp, file_to_get);
- if (modtime <= 0) {
- warnx ("Couldn't get file time for %s - using current time", file_to_get);
- modtime = (time_t) -1;
- }
-
- if (!strcmp (outputfile, "-"))
- restart = 0;
- if (restart || mirror) {
- f_size (outputfile, &size0, &t);
- if (mirror && size0 == size && modtime <= t) {
- fclose(ftp);
- return;
- }
- else if (restart) {
- if (size0 && size0 < size)
- seekloc = size0;
- else
- seekloc = size0 = 0;
- }
- }
- else if (!restart)
- seekloc = size0 = 0;
-
- fp = ftpGet (ftp, file_to_get, &seekloc);
- if (fp == NULL)
- if (ftpErrno(ftp))
- ftperr (ftp, NULL);
- else
- die(0);
- if (size0 && !seekloc)
- size0 = 0;
-
- if (strcmp (outputfile, "-")) {
- file = fopen (outputfile, size0 ? "a" : "w");
- if (!file)
- err (1, "could not open output file %s.", outputfile);
- } else
- file = stdout;
-
- if (timeout_ival) {
- char env[80];
- /* Override any environment variable */
- snprintf(env, sizeof env - 1, "FTP_TIMEOUT=%d", timeout_ival);
- putenv(env);
- }
- else {
- char *cp = getenv("FTP_TIMEOUT");
+ return error;
+}
- if (!cp || !(timeout_ival = atoi(cp)))
- timeout_ival = FTP_TIMEOUT;
- }
+
+/*
+ * Utility functions
+ */
- display (size, size0);
- while (1) {
- struct sigaction act;
-
- act.sa_handler = timeout;
- act.sa_mask = 0;
- act.sa_flags = 0;
- sigaction(SIGALRM, &act, NULL);
- alarm(timeout_ival);
- n = status = fread (buffer, 1, BUFFER_SIZE, fp);
- alarm(0);
- act.sa_handler = SIG_DFL;
- sigaction(SIGALRM, &act, NULL);
- if (status <= 0)
- break;
- display (size, n);
- status = fwrite (buffer, 1, n, file);
- if (status != n)
- break;
- }
- if (status < 0)
- die(0);
- fclose(fp);
- fclose(file);
- display (size, -1);
- if (file != stdout)
- adjmodtime();
- exit (0);
+/*
+ * Handle all signals by jumping back into get().
+ */
+void
+catchsig(int sig)
+{
+ siglongjmp(sigbuf, sig);
}
+/* Used to generate the progress display when not in quiet mode. */
void
-display (int size, int n)
+display(struct fetch_state *fs, off_t size, ssize_t n)
{
- static int bytes, pr, init = 0;
+ static off_t bytes;
+ static int pr, init = 0;
static struct timeval t0, t_start;
static char *s;
struct timezone tz;
struct timeval t;
float d;
- if (!verbose)
+ if (!fs->fs_verbose)
return;
if (init == 0) {
init = 1;
gettimeofday(&t0, &tz);
t_start = t0;
bytes = pr = 0;
- s = (char *) malloc (strlen(outputfile) + 50);
+ s = malloc(strlen(fs->fs_outputfile) + 50);
if (size > 0)
- sprintf (s, "Receiving %s (%d bytes)%s", outputfile, size,
+ sprintf (s, "Receiving %s (%qd bytes)%s", fs->fs_outputfile,
+ (quad_t)size,
size ? "" : " [appending]");
else
- sprintf (s, "Receiving %s", outputfile);
+ sprintf (s, "Receiving %s", fs->fs_outputfile);
printf ("%s", s);
fflush (stdout);
bytes = n;
@@ -434,16 +313,18 @@ display (int size, int n)
if (size > 0)
printf ("\r%s: 100%%", s);
else
- printf ("\r%s: %d Kbytes", s, bytes/1024);
+ printf ("\r%s: %qd Kbytes", s, (quad_t)bytes/1024);
d = t.tv_sec + t.tv_usec/1.e6 - t_start.tv_sec - t_start.tv_usec/1.e6;
- printf ("\n%d bytes transfered in %.1f seconds", bytes, d);
+ printf ("\n%qd bytes transfered in %.1f seconds", (quad_t)bytes, d);
d = bytes/d;
if (d < 1000)
- printf (" (%d Bytes/s)\n", (int)d);
+ printf (" (%.0f bytes/s)\n", d);
else {
d /=1024;
- printf (" (%.2f K/s)\n", d);
+ printf (" (%.2f kB/s)\n", d);
}
+ free(s);
+ init = 0;
return;
}
bytes += n;
@@ -453,357 +334,11 @@ display (int size, int n)
t0 = t;
pr++;
if (size > 1000000)
- printf ("\r%s: %2d%%", s, bytes/(size/100));
+ printf ("\r%s: %2qd%%", s, (quad_t)bytes/(size/100));
else if (size > 0)
- printf ("\r%s: %2d%%", s, 100*bytes/size);
+ printf ("\r%s: %2qd%%", s, (quad_t)100*bytes/size);
else
- printf ("\r%s: %d Kbytes", s, bytes/1024);
+ printf ("\r%s: %qd kB", s, (quad_t)bytes/1024);
fflush (stdout);
}
-void
-parse (char *s)
-{
- char *p;
-
- if (strncasecmp (s, "file:", 5) == 0) {
- /* file:filename */
- s += 4;
- *s++ = '\0';
- host = NULL;
- ftp = http = 0;
- file_to_get = s;
- return;
- }
- else if (strncasecmp (s, "ftp://", 6) == 0) {
- /* ftp://host.name/file/name */
- s += 6;
- p = strchr(s, '/');
- if (!p) {
- warnx("no filename??");
- usage();
- }
- }
- else if (strncasecmp (s, "http://", 7) == 0) {
- /* http://host.name/file/name */
- char *q;
- s += 7;
- p = strchr(s, '/');
- if (!p) {
- warnx ("no filename??");
- usage ();
- }
- *p++ = 0;
- q = strchr (s, ':');
- if (q && q < p) {
- *q++ = 0;
- http_port = atoi (q);
- }
- host = s;
- file_to_get = p;
- http = 1;
- return;
- }
- else {
- /* assume host.name:/file/name */
- p = strchr (s, ':');
- if (!p) {
- /* assume /file/name */
- host = NULL;
- ftp = http = 0;
- file_to_get = s;
- return;
- }
- }
- ftp = 1;
- *p++ = 0;
- host = s;
- s = strrchr (p, '/');
- if (s) {
- *s++ = 0;
- change_to_dir = p;
- file_to_get = s;
- } else {
- change_to_dir = 0;
- file_to_get = p;
- }
-}
-
-void
-output_file_name ()
-{
- char *p;
-
- if (!outputfile) {
- p = strrchr (file_to_get, '/');
- if (!p || (!ftp && !http))
- p = file_to_get;
- else
- p++;
- outputfile = strdup (p);
- }
-}
-
-void
-f_size (char *name, off_t *size, time_t *time)
-{
- struct stat s;
-
- *size = 0;
-
- if (stat (name, &s))
- return;
- *size = s.st_size;
- *time = s.st_mtime;
-}
-
-void
-ftperr (FILE* ftp, char *fmt, ...)
-{
- va_list ap;
- va_start (ap, fmt);
-
- if (fmt)
- vfprintf(stderr, fmt, ap);
- if(ftp) {
- const char *str = ftpErrString(ftpErrno(ftp));
-
- if (str)
- fprintf(stderr, "%s\n", str);
- }
- rm ();
- exit (1);
-}
-
-void
-httpget ()
-{
- char *cp, str[1000];
- struct timeval tv;
- time_t tout;
- fd_set fdset;
- int i, s;
-
- restart = 0;
-
- s = http_open ();
- sprintf (str, "GET %s%s HTTP/1.0\r\n\r\n",
- http_proxy? "" : "/", file_to_get);
- i = strlen (str);
- if (i != write (s, str, i))
- err (1, "could not send GET command to HTTP server.");
-
- FD_ZERO (&fdset);
- FD_SET (s, &fdset);
- if (timeout_ival)
- tout = timeout_ival;
- else if ((cp = getenv("HTTP_TIMEOUT")) != NULL)
- tout = atoi(cp);
- else
- tout = HTTP_TIMEOUT;
-
- if (strcmp (outputfile, "-")) {
- file = fopen (outputfile, "w");
- if (!file)
- err (1, "could not open output file %s.", outputfile);
- } else {
- file = stdout;
- verbose = 0;
- }
-
- while (1) {
- tv.tv_sec = tout;
- tv.tv_usec = 0;
- i = select (s+1, &fdset, 0, 0, &tv);
- switch (i) {
- case 0:
- warnx ("Timeout");
- rm ();
- exit (1);
- case 1:
- i = read (s, buffer, sizeof (buffer));
- filter (buffer, i);
- if (i == 0)
- exit (0);
- break;
- default:
- err (1, "communication error with HTTP server.");
- }
- }
-}
-
-int
-match (char *pat, char *s)
-{
- regex_t preg;
- regmatch_t pmatch[2];
-
- regcomp (&preg, pat, REG_EXTENDED|REG_ICASE);
- if (regexec(&preg, s, 2, pmatch, 0))
- return 0;
- return pmatch[1].rm_so ? pmatch[1].rm_so : -1;
-}
-
-void
-filter (unsigned char *p, int len)
-{
-#define S 512
- static unsigned char s[S+2];
- static int header_len = 0, size = -1, n;
- int i = len;
- unsigned char *q = p;
-
- if (header_len < S) {
- while (header_len < S && i--)
- s[header_len++] = *q++;
- s[header_len] = 0;
- if (len && (header_len < S))
- return;
- if (match ("^HTTP/[0-9]+\\.[0-9]+[ \t]+200[^0-9]", s) == 0) {
- /* maybe not found, or document w/o header */
- if (match ("^HTTP/[0-9]+\\.[0-9]+[ \t]+[0-9]+", s)) {
- fprintf (stderr, "%s fetching failed, header so far:\n%s\n", file_to_get, s);
- rm ();
- exit (1);
- }
- /* assume no header */
- /* write s */
- display (size, 0);
- i = fwrite (s, 1, header_len, file);
- if (i != header_len)
- die(0);
- display (size, header_len);
- /* then p */
- if (p+len > q) {
- i = fwrite (q, 1, p+len-q, file);
- if (i != p+len-q)
- die(0);
- display (size, i);
- }
- } else {
- unsigned char *t;
- /* document begins with a success line. try to get size */
- i = match ("content-length:[ \t]*([0-9]+)", s);
- if (i > 0)
- size = atoi (s+i);
- /* assume that the file to get begins after an empty line */
- i = match ("(\n\n|\r\n\r\n)", s);
- if (i > 0) {
- if (s[i] == '\r')
- t = s+i+4;
- else
- t = s+i+2;
- } else {
- fprintf (stderr, "Can't decode the header!\n");
- rm ();
- exit (1);
- }
- display (size, 0);
- n = (s-t)+header_len;
- i = fwrite (t, 1, n, file);
- if (i != n)
- die(0);
- display (size, n);
- if (p+len > q) {
- n = p+len-q;
- i = fwrite (q, 1, n, file);
- if (i != n)
- die(0);
- display (size, n);
- }
- }
- } else {
- i = fwrite (p, 1, len, file);
- if (i != len)
- die(0);
- if (len)
- display (size, i);
- }
- if (len == 0)
- display (size, -1);
-}
-
-int
-http_open()
-{
- unsigned long a;
- struct sockaddr_in sin, sin2;
- struct hostent *h;
- int s;
-
- a = inet_addr (host);
- if (a != INADDR_NONE) {
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = a;
- } else {
- h = gethostbyname (host);
- if (!h)
- err (1, "could not lookup host %s.", host);
- sin.sin_family = h->h_addrtype;
- bcopy(h->h_addr, (char *)&sin.sin_addr, h->h_length);
- }
- sin.sin_port = htons (http_port);
- if ((s = socket (sin.sin_family, SOCK_STREAM, 0)) < 0)
- err (1, "socket");
- bzero ((char *)&sin2, sizeof (sin2));
- sin2.sin_family = AF_INET;
- sin2.sin_port = 0;
- sin2.sin_addr.s_addr = htonl (INADDR_ANY);
- if (bind (s, (struct sockaddr *)&sin2, sizeof (sin2)))
- err (1, "could not bind to socket.");
-
- if (connect(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
- err (1, "connection failed");
- return s;
-}
-
-int
-isDebug ()
-{
- return 0;
-}
-
-void msgDebug (char *p)
-{
- printf ("%s", p);
-}
-
-void
-setup_http_proxy()
-{
- char *e;
- char *p;
- char *url;
- unsigned short port;
-
- if (!(e = getenv("HTTP_PROXY"))
- || !(p = strchr(e, ':'))
- || (port = atoi(p+1)) == 0)
- return;
-
- if (!(url = (char *) malloc (strlen(file_to_get)
- + strlen(host)
- + (change_to_dir ? strlen(change_to_dir) : 0)
- + 50)))
- return;
-
- if (http) {
- sprintf(url, "http://%s:%d/%s",
- host, http_port, file_to_get);
- } else {
- if (change_to_dir) {
- sprintf(url, "ftp://%s/%s/%s",
- host, change_to_dir, file_to_get);
- } else {
- sprintf(url, "ftp://%s/%s", host, file_to_get);
- }
- }
- file_to_get = url;
-
- *p = 0;
- host = strdup(e);
- http_port = port;
- http = 1;
- http_proxy = 1;
-}
-
diff --git a/usr.bin/fetch/uri.c b/usr.bin/fetch/uri.c
new file mode 100644
index 0000000..95d4c91
--- /dev/null
+++ b/usr.bin/fetch/uri.c
@@ -0,0 +1,122 @@
+/*-
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#include <sys/types.h>
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "fetch.h"
+
+struct uri_scheme *schemes[] = {
+ &http_scheme, &ftp_scheme, &file_scheme, 0
+};
+
+static struct uri_scheme *
+find_scheme(const char *name)
+{
+ int i;
+
+ for (i = 0; schemes[i]; i++) {
+ if (strcasecmp(schemes[i]->sc_name, name) == 0)
+ return schemes[i];
+ }
+ return 0;
+}
+
+void
+init_schemes(void)
+{
+ int i;
+ char schemebuf[32];
+ const char *s, *t;
+ struct uri_scheme *scp;
+
+ for (i = 0; schemes[i]; i++) {
+ if (getenv(schemes[i]->sc_proxy_envar) != 0)
+ schemes[i]->sc_can_proxy = 1;
+ }
+
+ for (i = 0; schemes[i]; i++) {
+ s = schemes[i]->sc_proxy_by;
+ while (s && *s) {
+ t = strchr(s, ',');
+ if (t) {
+ schemebuf[0] = '\0';
+ strncat(schemebuf, s, t - s);
+ s = t + 1;
+ } else {
+ strcpy(schemebuf, s);
+ s = 0;
+ }
+ scp = find_scheme(schemebuf);
+ if (scp && scp->sc_can_proxy) {
+ schemes[i]->sc_proxyproto = scp;
+ break;
+ }
+ }
+ }
+}
+
+int
+parse_uri(struct fetch_state *fs, const char *uri)
+{
+ const char *colon, *slash;
+ char *scheme;
+ struct uri_scheme *scp;
+
+ fs->fs_status = "parsing URI";
+ colon = strchr(uri, ':');
+ slash = strchr(uri, '/');
+ if (!colon || !slash || slash < colon) {
+ warnx("%s: an absolute URI is required", uri);
+ return EX_USAGE;
+ }
+
+ scheme = alloca(colon - uri + 1);
+ scheme[0] = '\0';
+ strncat(scheme, uri, colon - uri);
+ scp = find_scheme(scheme);
+
+ if (scp == 0) {
+ warnx("%s: unknown URI scheme", scheme);
+ return EX_USAGE;
+ }
+ if (scp->sc_proxyproto)
+ return scp->sc_proxyproto->sc_proxy_parse(fs, uri);
+ else
+ return scp->sc_parse(fs, uri);
+}
+
diff --git a/usr.bin/fetch/util.c b/usr.bin/fetch/util.c
new file mode 100644
index 0000000..08103a0
--- /dev/null
+++ b/usr.bin/fetch/util.c
@@ -0,0 +1,322 @@
+/*-
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission. M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <time.h> /* for time() */
+#include <unistd.h>
+
+#include <sys/time.h> /* for struct timeval */
+
+#include "fetch.h"
+
+
+/* Signal handling functions */
+
+/*
+ * If this were Scheme we could make this variable private to just these two
+ * functions...
+ */
+static struct sigaction oldalrm;
+
+void
+setup_sigalrm(void)
+{
+ struct sigaction catch;
+
+ sigemptyset(&catch.sa_mask);
+ sigaddset(&catch.sa_mask, SIGHUP);
+ sigaddset(&catch.sa_mask, SIGINT);
+ sigaddset(&catch.sa_mask, SIGQUIT);
+ sigaddset(&catch.sa_mask, SIGTERM);
+ sigaddset(&catch.sa_mask, SIGALRM);
+ catch.sa_handler = catchsig;
+ catch.sa_flags = 0;
+
+ sigaction(SIGALRM, &catch, &oldalrm);
+}
+
+void
+unsetup_sigalrm(void)
+{
+ sigaction(SIGALRM, &oldalrm, 0);
+}
+
+
+/* File-handling functions */
+
+/*
+ * Set the last-modified time of the output file to be that returned by
+ * the server.
+ */
+void
+adjmodtime(struct fetch_state *fs)
+{
+ struct timeval tv[2];
+
+ /* XXX - not strictly correct, since (time_t)-1 does not have to be
+ > 0. This also catches some of the other routines which erroneously
+ return 0 for invalid times rather than -1. */
+ if (!fs->fs_newtime && fs->fs_modtime > 0) {
+ tv[0].tv_usec = tv[1].tv_usec = 0;
+ time(&tv[0].tv_sec);
+ tv[1].tv_sec = fs->fs_modtime;
+ utimes(fs->fs_outputfile, tv);
+ }
+}
+
+/*
+ * Delete the file when exiting on error, if it is not `precious'.
+ */
+void
+rm(struct fetch_state *fs)
+{
+ if (!(fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0')) {
+ if (!fs->fs_restart && !fs->fs_mirror && !fs->fs_precious)
+ unlink(fs->fs_outputfile);
+ else
+ adjmodtime(fs);
+ }
+}
+
+
+/* String-handling and -parsing functions */
+
+/*
+ * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This
+ * must be done after the URI is parsed, since the principal purpose of
+ * the encoding is to hide characters which would otherwise be significant
+ * to the parser (like `/').
+ */
+char *
+percent_decode(const char *uri)
+{
+ char *rv, *s;
+
+ rv = s = malloc(strlen(uri) + 1);
+ if (rv == 0)
+ err(EX_OSERR, "malloc");
+
+ while (*uri) {
+ if (*uri == '%' && uri[1]
+ && isxdigit(uri[1]) && isxdigit(uri[2])) {
+ int c;
+ static char buf[] = "xx";
+
+ buf[0] = uri[1];
+ buf[1] = uri[2];
+ sscanf(buf, "%x", &c);
+ uri += 3;
+ *s++ = c;
+ } else {
+ *s++ = *uri++;
+ }
+ }
+ return rv;
+}
+
+/*
+ * Decode a standard host:port string into its constituents, allocating
+ * memory for a new copy of the host part.
+ */
+int
+parse_host_port(const char *s, char **hostname, int *port)
+{
+ const char *colon;
+ char *ep;
+ unsigned long ul;
+
+ colon = strchr(s, ':');
+ if (colon != 0) {
+ colon++;
+ errno = 0;
+ ul = strtoul(colon + 1, &ep, 10);
+ if (*ep != '\0' || colon[1] == '\0' || errno != 0
+ || ul < 1 || ul > 65534) {
+ warnx("`%s': invalid port number", s);
+ return EX_USAGE;
+ }
+
+ *hostname = safe_strndup(s, colon - s);
+ *port = ul;
+ } else {
+ *hostname = safe_strdup(s);
+ }
+ return 0;
+}
+
+/*
+ * safe_strdup is like strdup, but aborts on error.
+ */
+char *
+safe_strdup(const char *orig)
+{
+ char *s;
+
+ s = malloc(strlen(orig) + 1);
+ if (s == 0)
+ err(EX_OSERR, "malloc");
+ strcpy(s, orig);
+ return s;
+}
+
+/*
+ * safe_strndup is like safe_strdup, but copies at most `len'
+ * characters from `orig'.
+ */
+char *
+safe_strndup(const char *orig, size_t len)
+{
+ char *s;
+
+ s = malloc(len + 1);
+ if (s == 0)
+ err(EX_OSERR, "malloc");
+ s[0] = '\0';
+ strncat(s, orig, len);
+ return s;
+}
+
+/*
+ * Implement the `base64' encoding as described in RFC 1521.
+ */
+static const char base64[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+char *
+to_base64(const unsigned char *buf, size_t len)
+{
+ char *s = malloc((4 * (len + 1)) / 3 + 1), *rv;
+ unsigned tmp;
+
+ if (s == 0)
+ err(EX_OSERR, "malloc");
+
+ rv = s;
+ while (len >= 3) {
+ tmp = buf[0] << 16 | buf[1] << 8 || buf[2];
+ s[0] = base64[tmp >> 18];
+ s[1] = base64[(tmp >> 12) & 077];
+ s[2] = base64[(tmp >> 6) & 077];
+ s[3] = base64[tmp & 077];
+ len -= 3;
+ buf += 3;
+ s += 4;
+ }
+
+ /* RFC 1521 enumerates these three possibilities... */
+ switch(len) {
+ case 2:
+ tmp = buf[0] << 16 | buf[1] << 8;
+ s[0] = base64[(tmp >> 18) & 077];
+ s[1] = base64[(tmp >> 12) & 077];
+ s[2] = base64[(tmp >> 6) & 077];
+ s[3] = '=';
+ break;
+ case 1:
+ tmp = buf[0] << 16;
+ s[0] = base64[(tmp >> 18) & 077];
+ s[1] = base64[(tmp >> 12) & 077];
+ s[2] = s[3] = '=';
+ break;
+ case 0:
+ break;
+ }
+
+ return rv;
+}
+
+int
+from_base64(const char *orig, unsigned char *buf, size_t *lenp)
+{
+ int len, len2;
+ const char *equals;
+ unsigned tmp;
+
+ len = strlen(orig);
+ while (isspace(orig[len - 1]))
+ len--;
+
+ if (len % 4)
+ return -1;
+
+ len2 = 3 * (len / 4);
+ equals = strchr(orig, '=');
+ if (equals != 0) {
+ if (equals[1] == '=')
+ len2 -= 2;
+ else
+ len2 -= 1;
+ }
+
+ /* Now the length is len2 is the actual length of the original. */
+ if (len2 > *lenp)
+ return -1;
+ *lenp = len2;
+
+ while (len > 0) {
+ int i;
+ const char *off;
+ int forget;
+
+ tmp = 0;
+ forget = 0;
+ for (i = 0; i < 4; i++) {
+ if (orig[i] == '=') {
+ off = base64;
+ forget++;
+ } else {
+ off = strchr(base64, orig[i]);
+ }
+ if (off == 0)
+ return -1;
+ tmp = (tmp << 6) | (off - base64);
+ }
+
+ buf[0] = (tmp >> 16) & 0xff;
+ if (forget < 2)
+ buf[1] = (tmp >> 8) & 0xff;
+ if (forget < 1)
+ buf[2] = (tmp >> 8) & 0xff;
+ len -= 4;
+ orig += 4;
+ buf += 3 - forget;
+ }
+ return 0;
+}
OpenPOWER on IntegriCloud